Accessing an unordered_map
How can I improve the access time to this unordered map? If I instead allocate a 8 x 10 x 30 x 30 x 24000:
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > >
each access is about 0.0001 ms. Using an unordered_map
implementation, each access is about 0.001 ms, 10x slower. What can I do to reduce the access time for this unordered_map
? I'm doing a lot of insertions and modifications of existing entries.
#include <cstdio>
#include <random>
#include <unordered_map>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
std::unordered_map<Key, float> votes;
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
Key key(a, b, x, y, z);
if (this_vote > 0.8) {
votes[key] += this_vote; // This is what is slow.
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
I find it's amortized access time to be slow, and it takes a growing amount of memory (3.1GB after 4000 iterations of the main loop, 6.3GB after 8000 iterations, and 8.6GB after 12000 iterations):
1000 / 200000 : Milliseconds per access: 0.000548
2000 / 200000 : Milliseconds per access: 0.000653
3000 / 200000 : Milliseconds per access: 0.000682
4000 / 200000 : Milliseconds per access: 0.000834
5000 / 200000 : Milliseconds per access: 0.000874
6000 / 200000 : Milliseconds per access: 0.000926
7000 / 200000 : Milliseconds per access: 0.001107
8000 / 200000 : Milliseconds per access: 0.001143
9000 / 200000 : Milliseconds per access: 0.001187
10000 / 200000 : Milliseconds per access: 0.001234
11000 / 200000 : Milliseconds per access: 0.001285
12000 / 200000 : Milliseconds per access: 0.001338
Here is the version using the vector of vectors instead:
#include <cstdio>
#include <random>
#include <vector>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
// This makes an 8 x 10 x 30 x 30 x 24000 vector of vectors... of vectors.
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > > votes;
for (size_t a = 0; a < 8; ++a) {
std::vector<std::vector<std::vector<std::vector<float> > > > a_grid;
for (size_t b = 0; b < 10; ++b) {
std::vector<std::vector<std::vector<float> > > b_grid;
for (size_t y = 0; y < 30; ++y) {
std::vector<std::vector<float> > y_grid;
for (size_t x = 0; x < 30; ++x) {
y_grid.push_back(std::vector<float>(24000, 0));
}
b_grid.push_back(y_grid);
}
a_grid.push_back(b_grid);
}
votes.push_back(a_grid);
}
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[a][b][y][x][z] += this_vote;
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
It takes 7.2GB of memory and reports constant, low (relative to unordered_map
) access time:
1000 / 200000 : Milliseconds per access: 0.000179
2000 / 200000 : Milliseconds per access: 0.000179
3000 / 200000 : Milliseconds per access: 0.000179
4000 / 200000 : Milliseconds per access: 0.000179
c++ performance c++11
add a comment |
How can I improve the access time to this unordered map? If I instead allocate a 8 x 10 x 30 x 30 x 24000:
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > >
each access is about 0.0001 ms. Using an unordered_map
implementation, each access is about 0.001 ms, 10x slower. What can I do to reduce the access time for this unordered_map
? I'm doing a lot of insertions and modifications of existing entries.
#include <cstdio>
#include <random>
#include <unordered_map>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
std::unordered_map<Key, float> votes;
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
Key key(a, b, x, y, z);
if (this_vote > 0.8) {
votes[key] += this_vote; // This is what is slow.
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
I find it's amortized access time to be slow, and it takes a growing amount of memory (3.1GB after 4000 iterations of the main loop, 6.3GB after 8000 iterations, and 8.6GB after 12000 iterations):
1000 / 200000 : Milliseconds per access: 0.000548
2000 / 200000 : Milliseconds per access: 0.000653
3000 / 200000 : Milliseconds per access: 0.000682
4000 / 200000 : Milliseconds per access: 0.000834
5000 / 200000 : Milliseconds per access: 0.000874
6000 / 200000 : Milliseconds per access: 0.000926
7000 / 200000 : Milliseconds per access: 0.001107
8000 / 200000 : Milliseconds per access: 0.001143
9000 / 200000 : Milliseconds per access: 0.001187
10000 / 200000 : Milliseconds per access: 0.001234
11000 / 200000 : Milliseconds per access: 0.001285
12000 / 200000 : Milliseconds per access: 0.001338
Here is the version using the vector of vectors instead:
#include <cstdio>
#include <random>
#include <vector>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
// This makes an 8 x 10 x 30 x 30 x 24000 vector of vectors... of vectors.
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > > votes;
for (size_t a = 0; a < 8; ++a) {
std::vector<std::vector<std::vector<std::vector<float> > > > a_grid;
for (size_t b = 0; b < 10; ++b) {
std::vector<std::vector<std::vector<float> > > b_grid;
for (size_t y = 0; y < 30; ++y) {
std::vector<std::vector<float> > y_grid;
for (size_t x = 0; x < 30; ++x) {
y_grid.push_back(std::vector<float>(24000, 0));
}
b_grid.push_back(y_grid);
}
a_grid.push_back(b_grid);
}
votes.push_back(a_grid);
}
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[a][b][y][x][z] += this_vote;
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
It takes 7.2GB of memory and reports constant, low (relative to unordered_map
) access time:
1000 / 200000 : Milliseconds per access: 0.000179
2000 / 200000 : Milliseconds per access: 0.000179
3000 / 200000 : Milliseconds per access: 0.000179
4000 / 200000 : Milliseconds per access: 0.000179
c++ performance c++11
If your access pattern is the same as in provided example it's better to use single vector or even static array to store all data, and calculate index like this:[z*8*10*30*30 + a*10*30*30 + b*30*30 + x*30 + y]
. It looks ugly, but it's fastest and straightest way in C++.
– magras
Aug 1 '16 at 14:41
add a comment |
How can I improve the access time to this unordered map? If I instead allocate a 8 x 10 x 30 x 30 x 24000:
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > >
each access is about 0.0001 ms. Using an unordered_map
implementation, each access is about 0.001 ms, 10x slower. What can I do to reduce the access time for this unordered_map
? I'm doing a lot of insertions and modifications of existing entries.
#include <cstdio>
#include <random>
#include <unordered_map>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
std::unordered_map<Key, float> votes;
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
Key key(a, b, x, y, z);
if (this_vote > 0.8) {
votes[key] += this_vote; // This is what is slow.
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
I find it's amortized access time to be slow, and it takes a growing amount of memory (3.1GB after 4000 iterations of the main loop, 6.3GB after 8000 iterations, and 8.6GB after 12000 iterations):
1000 / 200000 : Milliseconds per access: 0.000548
2000 / 200000 : Milliseconds per access: 0.000653
3000 / 200000 : Milliseconds per access: 0.000682
4000 / 200000 : Milliseconds per access: 0.000834
5000 / 200000 : Milliseconds per access: 0.000874
6000 / 200000 : Milliseconds per access: 0.000926
7000 / 200000 : Milliseconds per access: 0.001107
8000 / 200000 : Milliseconds per access: 0.001143
9000 / 200000 : Milliseconds per access: 0.001187
10000 / 200000 : Milliseconds per access: 0.001234
11000 / 200000 : Milliseconds per access: 0.001285
12000 / 200000 : Milliseconds per access: 0.001338
Here is the version using the vector of vectors instead:
#include <cstdio>
#include <random>
#include <vector>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
// This makes an 8 x 10 x 30 x 30 x 24000 vector of vectors... of vectors.
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > > votes;
for (size_t a = 0; a < 8; ++a) {
std::vector<std::vector<std::vector<std::vector<float> > > > a_grid;
for (size_t b = 0; b < 10; ++b) {
std::vector<std::vector<std::vector<float> > > b_grid;
for (size_t y = 0; y < 30; ++y) {
std::vector<std::vector<float> > y_grid;
for (size_t x = 0; x < 30; ++x) {
y_grid.push_back(std::vector<float>(24000, 0));
}
b_grid.push_back(y_grid);
}
a_grid.push_back(b_grid);
}
votes.push_back(a_grid);
}
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[a][b][y][x][z] += this_vote;
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
It takes 7.2GB of memory and reports constant, low (relative to unordered_map
) access time:
1000 / 200000 : Milliseconds per access: 0.000179
2000 / 200000 : Milliseconds per access: 0.000179
3000 / 200000 : Milliseconds per access: 0.000179
4000 / 200000 : Milliseconds per access: 0.000179
c++ performance c++11
How can I improve the access time to this unordered map? If I instead allocate a 8 x 10 x 30 x 30 x 24000:
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > >
each access is about 0.0001 ms. Using an unordered_map
implementation, each access is about 0.001 ms, 10x slower. What can I do to reduce the access time for this unordered_map
? I'm doing a lot of insertions and modifications of existing entries.
#include <cstdio>
#include <random>
#include <unordered_map>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
std::unordered_map<Key, float> votes;
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
Key key(a, b, x, y, z);
if (this_vote > 0.8) {
votes[key] += this_vote; // This is what is slow.
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
I find it's amortized access time to be slow, and it takes a growing amount of memory (3.1GB after 4000 iterations of the main loop, 6.3GB after 8000 iterations, and 8.6GB after 12000 iterations):
1000 / 200000 : Milliseconds per access: 0.000548
2000 / 200000 : Milliseconds per access: 0.000653
3000 / 200000 : Milliseconds per access: 0.000682
4000 / 200000 : Milliseconds per access: 0.000834
5000 / 200000 : Milliseconds per access: 0.000874
6000 / 200000 : Milliseconds per access: 0.000926
7000 / 200000 : Milliseconds per access: 0.001107
8000 / 200000 : Milliseconds per access: 0.001143
9000 / 200000 : Milliseconds per access: 0.001187
10000 / 200000 : Milliseconds per access: 0.001234
11000 / 200000 : Milliseconds per access: 0.001285
12000 / 200000 : Milliseconds per access: 0.001338
Here is the version using the vector of vectors instead:
#include <cstdio>
#include <random>
#include <vector>
#include "boost/functional/hash.hpp"
#include "boost/date_time/posix_time/posix_time.hpp"
struct Key {
Key(int a, int b, int x, int y, int z) :
a_(a), b_(b), x_(x), y_(y), z_(z) {
}
bool operator==(const Key& other) const {
return (a_ == other.a_ &&
b_ == other.b_ &&
x_ == other.x_ &&
y_ == other.y_ &&
z_ == other.z_);
}
int a_, b_, x_, y_, z_;
};
namespace std {
template <>
struct hash<Key> {
typedef Key argument_type;
typedef std::size_t result_type;
result_type operator()(const Key& key) const {
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
};
} // namespace std.
int main(int argc, char** argv) {
std::default_random_engine generator;
std::uniform_int_distribution<int> random_z(1,24000);
std::uniform_real_distribution<float> random_vote(0.0, 1.0);
// This makes an 8 x 10 x 30 x 30 x 24000 vector of vectors... of vectors.
std::vector<std::vector<std::vector<std::vector<std::vector<float> > > > > votes;
for (size_t a = 0; a < 8; ++a) {
std::vector<std::vector<std::vector<std::vector<float> > > > a_grid;
for (size_t b = 0; b < 10; ++b) {
std::vector<std::vector<std::vector<float> > > b_grid;
for (size_t y = 0; y < 30; ++y) {
std::vector<std::vector<float> > y_grid;
for (size_t x = 0; x < 30; ++x) {
y_grid.push_back(std::vector<float>(24000, 0));
}
b_grid.push_back(y_grid);
}
a_grid.push_back(b_grid);
}
votes.push_back(a_grid);
}
int total_accesses = 0;
boost::posix_time::ptime start =
boost::posix_time::microsec_clock::local_time();
for (int i = 0; i < 200000; ++i) {
int z = random_z(generator); // z in [1,24000]
for (int a = 0; a < 8; ++a) {
for (int b = 0; b < 10; ++b) {
for (int x = 0; x < 30; ++x) {
for (int y = 0; y < 30; ++y) {
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[a][b][y][x][z] += this_vote;
++total_accesses;
}
}
}
}
}
if ((i + 1) % 1000 == 0) {
boost::posix_time::ptime now =
boost::posix_time::microsec_clock::local_time();
boost::posix_time::time_duration diff = now - start;
printf("%d / %d : Milliseconds per access: %fn",
i + 1, 200000,
static_cast<float>(diff.total_milliseconds()) / total_accesses);
}
}
}
It takes 7.2GB of memory and reports constant, low (relative to unordered_map
) access time:
1000 / 200000 : Milliseconds per access: 0.000179
2000 / 200000 : Milliseconds per access: 0.000179
3000 / 200000 : Milliseconds per access: 0.000179
4000 / 200000 : Milliseconds per access: 0.000179
c++ performance c++11
c++ performance c++11
edited 8 mins ago
Jamal♦
30.3k11116226
30.3k11116226
asked Jun 4 '13 at 23:47
user25841
If your access pattern is the same as in provided example it's better to use single vector or even static array to store all data, and calculate index like this:[z*8*10*30*30 + a*10*30*30 + b*30*30 + x*30 + y]
. It looks ugly, but it's fastest and straightest way in C++.
– magras
Aug 1 '16 at 14:41
add a comment |
If your access pattern is the same as in provided example it's better to use single vector or even static array to store all data, and calculate index like this:[z*8*10*30*30 + a*10*30*30 + b*30*30 + x*30 + y]
. It looks ugly, but it's fastest and straightest way in C++.
– magras
Aug 1 '16 at 14:41
If your access pattern is the same as in provided example it's better to use single vector or even static array to store all data, and calculate index like this:
[z*8*10*30*30 + a*10*30*30 + b*30*30 + x*30 + y]
. It looks ugly, but it's fastest and straightest way in C++.– magras
Aug 1 '16 at 14:41
If your access pattern is the same as in provided example it's better to use single vector or even static array to store all data, and calculate index like this:
[z*8*10*30*30 + a*10*30*30 + b*30*30 + x*30 + y]
. It looks ugly, but it's fastest and straightest way in C++.– magras
Aug 1 '16 at 14:41
add a comment |
1 Answer
1
active
oldest
votes
Firstly, it's somewhat hard to say, because you haven't given the code you are using with vector
. Are you keeping it sorted and doing some kind of binary search to find the element, or is this just raw vector access times where you don't care about adding this_vote
to a specific key? If so, of course raw vector
access with something like push_back
will be much faster, due to locality of reference, not requiring the computation of a hash function for each Key
, and so on.
Secondly, this is where we grab our trusty profiler to see what exactly is taking up so much time. Compiling with -O3
and running gives this:
% cumulative self self total
time seconds seconds calls ms/call ms/call name
79.19 26.15 26.15 165 158.48 158.48 std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_rehash(unsigned int, std::pair<unsigned int, unsigned int> const&)
Which, if you squint hard enough at, becomes clear that _M_rehash
is the culprit, taking almost 80% of the time. Unfortunately, I'm running this on GCC 4.7, which has a known bug that slows down unordered_map
quite a lot: it's on the mailing list if you're interested. Further, reserve
doesn't fix this problem. If you're using GCC 4.7, unordered_map
will be slow.
The actual insert
call is called 14311307 times, and accounts for only 0.64% of the time at 0.21s:
0.64 32.88 0.21 14311307 0.00 0.00 std::__detail::_Node_iterator<std::pair<Key const, float>, false, true> std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_insert_bucket<std::pair<Key, float> >(std::pair<Key, float>&&, unsigned int, unsigned int)
Let's switch over to a regular std::map
:
4.26 10.10 0.46 14311307 0.00 0.00 std::_Rb_tree_iterator<std::pair<Key const, float> > std::_Rb_tree<Key, std::pair<Key const, float>, std::_Select1st<std::pair<Key const, float> >, std::less<Key>, std::allocator<std::pair<Key const, float> > >::_M_insert_unique_<std::pair<Key const, float> >(std::_Rb_tree_const_iterator<std::pair<Key const, float> >, std::pair<Key const, float>&&)
Now we have the same 14311307 calls to insert, which take up 4.26% of the time at 0.46s. However, there is not the huge overhead of having to rehash the map, hence this is actually quite a bit quicker.
The std::map
code is exactly the same, except with a specialization of std::less
:
#include <map>
#include <tuple>
#include <functional>
namespace std
{
template <>
struct less<Key>
{
bool operator()(const Key& k1, const Key& k2) const
{
return std::tie(k1.a_, k1.b_, k1.x_, k1.y_, k1.z_) <
std::tie(k2.a_, k2.b_, k2.x_, k2.y_, k2.z_);
}
};
}
and with votes
being declared as:
std::map<Key, float> votes;
Since you're already using boost
, why not try boost::unordered_map
instead?
#include "boost/unordered_map.hpp"
std::size_t hash_value(const Key& key)
{
typedef Key argument_type;
typedef std::size_t result_type;
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
boost::unordered_map<Key, float, boost::hash<Key>> votes;
You can also save a bit of time by not constructing the Key
if it isn't needed, and passing an rvalue
instead of an lvalue
reference:
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[Key(a, b, x, y, z)] += this_vote;
++total_accesses;
}
If you have an up to date version of boost
, you might also try using emplace
:
if (this_vote > 0.8) {
auto val = votes.emplace(a, b, x, y, z);
if(!val.second)
*(val.first) += this_vote;
This may or may not be slower. I can't test it, but my gut feeling is it will be slower because Key
is a thin object. If it contained more than a few integers, this would possibly be faster.
Basically, the final advice is to profile. Further, you're unlikely to get access times equivalent to a vector
for anything else, as vector
is guaranteed to be contiguous, hence will have good locality of reference and likely won't require as many dereferences as any other container.
@Sancho I modified it to perform less loops. I'm running on a machine with significantly less memory.
– Yuushi
Jun 6 '13 at 0:43
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f27013%2faccessing-an-unordered-map%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
Firstly, it's somewhat hard to say, because you haven't given the code you are using with vector
. Are you keeping it sorted and doing some kind of binary search to find the element, or is this just raw vector access times where you don't care about adding this_vote
to a specific key? If so, of course raw vector
access with something like push_back
will be much faster, due to locality of reference, not requiring the computation of a hash function for each Key
, and so on.
Secondly, this is where we grab our trusty profiler to see what exactly is taking up so much time. Compiling with -O3
and running gives this:
% cumulative self self total
time seconds seconds calls ms/call ms/call name
79.19 26.15 26.15 165 158.48 158.48 std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_rehash(unsigned int, std::pair<unsigned int, unsigned int> const&)
Which, if you squint hard enough at, becomes clear that _M_rehash
is the culprit, taking almost 80% of the time. Unfortunately, I'm running this on GCC 4.7, which has a known bug that slows down unordered_map
quite a lot: it's on the mailing list if you're interested. Further, reserve
doesn't fix this problem. If you're using GCC 4.7, unordered_map
will be slow.
The actual insert
call is called 14311307 times, and accounts for only 0.64% of the time at 0.21s:
0.64 32.88 0.21 14311307 0.00 0.00 std::__detail::_Node_iterator<std::pair<Key const, float>, false, true> std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_insert_bucket<std::pair<Key, float> >(std::pair<Key, float>&&, unsigned int, unsigned int)
Let's switch over to a regular std::map
:
4.26 10.10 0.46 14311307 0.00 0.00 std::_Rb_tree_iterator<std::pair<Key const, float> > std::_Rb_tree<Key, std::pair<Key const, float>, std::_Select1st<std::pair<Key const, float> >, std::less<Key>, std::allocator<std::pair<Key const, float> > >::_M_insert_unique_<std::pair<Key const, float> >(std::_Rb_tree_const_iterator<std::pair<Key const, float> >, std::pair<Key const, float>&&)
Now we have the same 14311307 calls to insert, which take up 4.26% of the time at 0.46s. However, there is not the huge overhead of having to rehash the map, hence this is actually quite a bit quicker.
The std::map
code is exactly the same, except with a specialization of std::less
:
#include <map>
#include <tuple>
#include <functional>
namespace std
{
template <>
struct less<Key>
{
bool operator()(const Key& k1, const Key& k2) const
{
return std::tie(k1.a_, k1.b_, k1.x_, k1.y_, k1.z_) <
std::tie(k2.a_, k2.b_, k2.x_, k2.y_, k2.z_);
}
};
}
and with votes
being declared as:
std::map<Key, float> votes;
Since you're already using boost
, why not try boost::unordered_map
instead?
#include "boost/unordered_map.hpp"
std::size_t hash_value(const Key& key)
{
typedef Key argument_type;
typedef std::size_t result_type;
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
boost::unordered_map<Key, float, boost::hash<Key>> votes;
You can also save a bit of time by not constructing the Key
if it isn't needed, and passing an rvalue
instead of an lvalue
reference:
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[Key(a, b, x, y, z)] += this_vote;
++total_accesses;
}
If you have an up to date version of boost
, you might also try using emplace
:
if (this_vote > 0.8) {
auto val = votes.emplace(a, b, x, y, z);
if(!val.second)
*(val.first) += this_vote;
This may or may not be slower. I can't test it, but my gut feeling is it will be slower because Key
is a thin object. If it contained more than a few integers, this would possibly be faster.
Basically, the final advice is to profile. Further, you're unlikely to get access times equivalent to a vector
for anything else, as vector
is guaranteed to be contiguous, hence will have good locality of reference and likely won't require as many dereferences as any other container.
@Sancho I modified it to perform less loops. I'm running on a machine with significantly less memory.
– Yuushi
Jun 6 '13 at 0:43
add a comment |
Firstly, it's somewhat hard to say, because you haven't given the code you are using with vector
. Are you keeping it sorted and doing some kind of binary search to find the element, or is this just raw vector access times where you don't care about adding this_vote
to a specific key? If so, of course raw vector
access with something like push_back
will be much faster, due to locality of reference, not requiring the computation of a hash function for each Key
, and so on.
Secondly, this is where we grab our trusty profiler to see what exactly is taking up so much time. Compiling with -O3
and running gives this:
% cumulative self self total
time seconds seconds calls ms/call ms/call name
79.19 26.15 26.15 165 158.48 158.48 std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_rehash(unsigned int, std::pair<unsigned int, unsigned int> const&)
Which, if you squint hard enough at, becomes clear that _M_rehash
is the culprit, taking almost 80% of the time. Unfortunately, I'm running this on GCC 4.7, which has a known bug that slows down unordered_map
quite a lot: it's on the mailing list if you're interested. Further, reserve
doesn't fix this problem. If you're using GCC 4.7, unordered_map
will be slow.
The actual insert
call is called 14311307 times, and accounts for only 0.64% of the time at 0.21s:
0.64 32.88 0.21 14311307 0.00 0.00 std::__detail::_Node_iterator<std::pair<Key const, float>, false, true> std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_insert_bucket<std::pair<Key, float> >(std::pair<Key, float>&&, unsigned int, unsigned int)
Let's switch over to a regular std::map
:
4.26 10.10 0.46 14311307 0.00 0.00 std::_Rb_tree_iterator<std::pair<Key const, float> > std::_Rb_tree<Key, std::pair<Key const, float>, std::_Select1st<std::pair<Key const, float> >, std::less<Key>, std::allocator<std::pair<Key const, float> > >::_M_insert_unique_<std::pair<Key const, float> >(std::_Rb_tree_const_iterator<std::pair<Key const, float> >, std::pair<Key const, float>&&)
Now we have the same 14311307 calls to insert, which take up 4.26% of the time at 0.46s. However, there is not the huge overhead of having to rehash the map, hence this is actually quite a bit quicker.
The std::map
code is exactly the same, except with a specialization of std::less
:
#include <map>
#include <tuple>
#include <functional>
namespace std
{
template <>
struct less<Key>
{
bool operator()(const Key& k1, const Key& k2) const
{
return std::tie(k1.a_, k1.b_, k1.x_, k1.y_, k1.z_) <
std::tie(k2.a_, k2.b_, k2.x_, k2.y_, k2.z_);
}
};
}
and with votes
being declared as:
std::map<Key, float> votes;
Since you're already using boost
, why not try boost::unordered_map
instead?
#include "boost/unordered_map.hpp"
std::size_t hash_value(const Key& key)
{
typedef Key argument_type;
typedef std::size_t result_type;
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
boost::unordered_map<Key, float, boost::hash<Key>> votes;
You can also save a bit of time by not constructing the Key
if it isn't needed, and passing an rvalue
instead of an lvalue
reference:
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[Key(a, b, x, y, z)] += this_vote;
++total_accesses;
}
If you have an up to date version of boost
, you might also try using emplace
:
if (this_vote > 0.8) {
auto val = votes.emplace(a, b, x, y, z);
if(!val.second)
*(val.first) += this_vote;
This may or may not be slower. I can't test it, but my gut feeling is it will be slower because Key
is a thin object. If it contained more than a few integers, this would possibly be faster.
Basically, the final advice is to profile. Further, you're unlikely to get access times equivalent to a vector
for anything else, as vector
is guaranteed to be contiguous, hence will have good locality of reference and likely won't require as many dereferences as any other container.
@Sancho I modified it to perform less loops. I'm running on a machine with significantly less memory.
– Yuushi
Jun 6 '13 at 0:43
add a comment |
Firstly, it's somewhat hard to say, because you haven't given the code you are using with vector
. Are you keeping it sorted and doing some kind of binary search to find the element, or is this just raw vector access times where you don't care about adding this_vote
to a specific key? If so, of course raw vector
access with something like push_back
will be much faster, due to locality of reference, not requiring the computation of a hash function for each Key
, and so on.
Secondly, this is where we grab our trusty profiler to see what exactly is taking up so much time. Compiling with -O3
and running gives this:
% cumulative self self total
time seconds seconds calls ms/call ms/call name
79.19 26.15 26.15 165 158.48 158.48 std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_rehash(unsigned int, std::pair<unsigned int, unsigned int> const&)
Which, if you squint hard enough at, becomes clear that _M_rehash
is the culprit, taking almost 80% of the time. Unfortunately, I'm running this on GCC 4.7, which has a known bug that slows down unordered_map
quite a lot: it's on the mailing list if you're interested. Further, reserve
doesn't fix this problem. If you're using GCC 4.7, unordered_map
will be slow.
The actual insert
call is called 14311307 times, and accounts for only 0.64% of the time at 0.21s:
0.64 32.88 0.21 14311307 0.00 0.00 std::__detail::_Node_iterator<std::pair<Key const, float>, false, true> std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_insert_bucket<std::pair<Key, float> >(std::pair<Key, float>&&, unsigned int, unsigned int)
Let's switch over to a regular std::map
:
4.26 10.10 0.46 14311307 0.00 0.00 std::_Rb_tree_iterator<std::pair<Key const, float> > std::_Rb_tree<Key, std::pair<Key const, float>, std::_Select1st<std::pair<Key const, float> >, std::less<Key>, std::allocator<std::pair<Key const, float> > >::_M_insert_unique_<std::pair<Key const, float> >(std::_Rb_tree_const_iterator<std::pair<Key const, float> >, std::pair<Key const, float>&&)
Now we have the same 14311307 calls to insert, which take up 4.26% of the time at 0.46s. However, there is not the huge overhead of having to rehash the map, hence this is actually quite a bit quicker.
The std::map
code is exactly the same, except with a specialization of std::less
:
#include <map>
#include <tuple>
#include <functional>
namespace std
{
template <>
struct less<Key>
{
bool operator()(const Key& k1, const Key& k2) const
{
return std::tie(k1.a_, k1.b_, k1.x_, k1.y_, k1.z_) <
std::tie(k2.a_, k2.b_, k2.x_, k2.y_, k2.z_);
}
};
}
and with votes
being declared as:
std::map<Key, float> votes;
Since you're already using boost
, why not try boost::unordered_map
instead?
#include "boost/unordered_map.hpp"
std::size_t hash_value(const Key& key)
{
typedef Key argument_type;
typedef std::size_t result_type;
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
boost::unordered_map<Key, float, boost::hash<Key>> votes;
You can also save a bit of time by not constructing the Key
if it isn't needed, and passing an rvalue
instead of an lvalue
reference:
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[Key(a, b, x, y, z)] += this_vote;
++total_accesses;
}
If you have an up to date version of boost
, you might also try using emplace
:
if (this_vote > 0.8) {
auto val = votes.emplace(a, b, x, y, z);
if(!val.second)
*(val.first) += this_vote;
This may or may not be slower. I can't test it, but my gut feeling is it will be slower because Key
is a thin object. If it contained more than a few integers, this would possibly be faster.
Basically, the final advice is to profile. Further, you're unlikely to get access times equivalent to a vector
for anything else, as vector
is guaranteed to be contiguous, hence will have good locality of reference and likely won't require as many dereferences as any other container.
Firstly, it's somewhat hard to say, because you haven't given the code you are using with vector
. Are you keeping it sorted and doing some kind of binary search to find the element, or is this just raw vector access times where you don't care about adding this_vote
to a specific key? If so, of course raw vector
access with something like push_back
will be much faster, due to locality of reference, not requiring the computation of a hash function for each Key
, and so on.
Secondly, this is where we grab our trusty profiler to see what exactly is taking up so much time. Compiling with -O3
and running gives this:
% cumulative self self total
time seconds seconds calls ms/call ms/call name
79.19 26.15 26.15 165 158.48 158.48 std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_rehash(unsigned int, std::pair<unsigned int, unsigned int> const&)
Which, if you squint hard enough at, becomes clear that _M_rehash
is the culprit, taking almost 80% of the time. Unfortunately, I'm running this on GCC 4.7, which has a known bug that slows down unordered_map
quite a lot: it's on the mailing list if you're interested. Further, reserve
doesn't fix this problem. If you're using GCC 4.7, unordered_map
will be slow.
The actual insert
call is called 14311307 times, and accounts for only 0.64% of the time at 0.21s:
0.64 32.88 0.21 14311307 0.00 0.00 std::__detail::_Node_iterator<std::pair<Key const, float>, false, true> std::_Hashtable<Key, std::pair<Key const, float>, std::allocator<std::pair<Key const, float> >, std::_Select1st<std::pair<Key const, float> >, std::equal_to<Key>, std::hash<Key>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, true, false, true>::_M_insert_bucket<std::pair<Key, float> >(std::pair<Key, float>&&, unsigned int, unsigned int)
Let's switch over to a regular std::map
:
4.26 10.10 0.46 14311307 0.00 0.00 std::_Rb_tree_iterator<std::pair<Key const, float> > std::_Rb_tree<Key, std::pair<Key const, float>, std::_Select1st<std::pair<Key const, float> >, std::less<Key>, std::allocator<std::pair<Key const, float> > >::_M_insert_unique_<std::pair<Key const, float> >(std::_Rb_tree_const_iterator<std::pair<Key const, float> >, std::pair<Key const, float>&&)
Now we have the same 14311307 calls to insert, which take up 4.26% of the time at 0.46s. However, there is not the huge overhead of having to rehash the map, hence this is actually quite a bit quicker.
The std::map
code is exactly the same, except with a specialization of std::less
:
#include <map>
#include <tuple>
#include <functional>
namespace std
{
template <>
struct less<Key>
{
bool operator()(const Key& k1, const Key& k2) const
{
return std::tie(k1.a_, k1.b_, k1.x_, k1.y_, k1.z_) <
std::tie(k2.a_, k2.b_, k2.x_, k2.y_, k2.z_);
}
};
}
and with votes
being declared as:
std::map<Key, float> votes;
Since you're already using boost
, why not try boost::unordered_map
instead?
#include "boost/unordered_map.hpp"
std::size_t hash_value(const Key& key)
{
typedef Key argument_type;
typedef std::size_t result_type;
std::size_t val = 0;
boost::hash_combine(val, key.a_);
boost::hash_combine(val, key.b_);
boost::hash_combine(val, key.x_);
boost::hash_combine(val, key.y_);
boost::hash_combine(val, key.z_);
return val;
}
boost::unordered_map<Key, float, boost::hash<Key>> votes;
You can also save a bit of time by not constructing the Key
if it isn't needed, and passing an rvalue
instead of an lvalue
reference:
float this_vote = random_vote(generator);
if (this_vote > 0.8) {
votes[Key(a, b, x, y, z)] += this_vote;
++total_accesses;
}
If you have an up to date version of boost
, you might also try using emplace
:
if (this_vote > 0.8) {
auto val = votes.emplace(a, b, x, y, z);
if(!val.second)
*(val.first) += this_vote;
This may or may not be slower. I can't test it, but my gut feeling is it will be slower because Key
is a thin object. If it contained more than a few integers, this would possibly be faster.
Basically, the final advice is to profile. Further, you're unlikely to get access times equivalent to a vector
for anything else, as vector
is guaranteed to be contiguous, hence will have good locality of reference and likely won't require as many dereferences as any other container.
edited 15 hours ago
underscore_d
1036
1036
answered Jun 5 '13 at 2:54
Yuushi
9,91422360
9,91422360
@Sancho I modified it to perform less loops. I'm running on a machine with significantly less memory.
– Yuushi
Jun 6 '13 at 0:43
add a comment |
@Sancho I modified it to perform less loops. I'm running on a machine with significantly less memory.
– Yuushi
Jun 6 '13 at 0:43
@Sancho I modified it to perform less loops. I'm running on a machine with significantly less memory.
– Yuushi
Jun 6 '13 at 0:43
@Sancho I modified it to perform less loops. I'm running on a machine with significantly less memory.
– Yuushi
Jun 6 '13 at 0:43
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f27013%2faccessing-an-unordered-map%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
If your access pattern is the same as in provided example it's better to use single vector or even static array to store all data, and calculate index like this:
[z*8*10*30*30 + a*10*30*30 + b*30*30 + x*30 + y]
. It looks ugly, but it's fastest and straightest way in C++.– magras
Aug 1 '16 at 14:41