Skip to content

Commit

Permalink
[perf] simd test
Browse files Browse the repository at this point in the history
  • Loading branch information
lamphamsy committed May 25, 2019
1 parent 9a9f7c4 commit 5563714
Showing 1 changed file with 57 additions and 167 deletions.
224 changes: 57 additions & 167 deletions test/simd/test_simd_fnt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,13 @@ class SimdTestFnt : public ::testing::Test {
x = simd::load_to_reg(reinterpret_cast<simd::VecType*>(x_buf));
}

void core_op_perf_lambda(
const std::string& text,
const std::function<void(simd::VecType&, const simd::VecType&)>& f)
template <typename TFunc>
void core_op_perf(const std::string& text, const TFunc& f)
{
const size_t len = vec_len * simd::countof<T>();

std::vector<T> buf_x(len);
std::vector<T> buf_y(len);
std::vector<T> buf_x(len, 0);
std::vector<T> buf_y(len, 0);
gen_rand_data(buf_x);
gen_rand_data(buf_y);

Expand All @@ -266,7 +265,7 @@ class SimdTestFnt : public ::testing::Test {
}

template <typename TFunc>
void core_op_perf_template(const std::string& text, const TFunc& f)
void butterfly_perf(const std::string& text, const TFunc& f)
{
const size_t len = vec_len * simd::countof<T>();

Expand All @@ -278,18 +277,28 @@ class SimdTestFnt : public ::testing::Test {
simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());

std::vector<simd::VecType> data_z(this->vec_len);

T coef =
1
+ this->distribution->operator()(quadiron::prng()) % (this->q - 2);
const simd::CtGsCase ct_case = simd::get_case<T>(coef, this->q);
const simd::VecType c = simd::set_one(coef);

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < vec_len; ++j) {
for (size_t j = 0; j < this->vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);

f(x, y);
f(ct_case, c, x, y);

simd::store_to_mem(&data_x[i], x);
simd::store_to_mem(&data_y[i], y);
}
}
uint64_t end = quadiron::hw_timer();

double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Average nb of CPU cycles per operation " << text << ": "
Expand Down Expand Up @@ -448,178 +457,59 @@ TYPED_TEST(SimdTestFnt, PerfModMulSingle) // NOLINT
std::cout << "PerfModMulSingle: " << avg_cycles_nb << "\n";
}

TYPED_TEST(SimdTestFnt, PerfModMulBuf) // NOLINT
TYPED_TEST(SimdTestFnt, PerfSimdBuf) // NOLINT
{
const size_t iters_nb = 1e3;

const size_t len = this->vec_len * simd::countof<TypeParam>();
std::vector<TypeParam> buf_x(len);
std::vector<TypeParam> buf_y(len);
this->gen_rand_data(buf_x);
this->gen_rand_data(buf_y);

simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
this->core_op_perf("Add", [](simd::VecType& x, const simd::VecType& y) {
x = simd::add<TypeParam>(x, y);
});

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < this->vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);

x = simd::mod_mul<TypeParam>(x, y);

simd::store_to_mem(&data_x[i], x);
}
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Perf of ModMul on buffer of "
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
}
this->core_op_perf("Sub", [](simd::VecType& x, const simd::VecType& y) {
x = simd::sub<TypeParam>(x, y);
});

TYPED_TEST(SimdTestFnt, PerfModMulBufLambda) // NOLINT
{
this->core_op_perf_lambda(
"[Lambda] ModMul", [](simd::VecType& x, const simd::VecType& y) {
x = simd::mod_mul<TypeParam>(x, y);
});
}
this->core_op_perf("Mul", [](simd::VecType& x, const simd::VecType& y) {
x = simd::mul<TypeParam>(x, y);
});

TYPED_TEST(SimdTestFnt, PerfModMulBufTemplate) // NOLINT
{
this->core_op_perf_template(
"[Template] ModMul", [](simd::VecType& x, const simd::VecType& y) {
x = simd::mod_mul<TypeParam>(x, y);
});
this->core_op_perf("Min", [](simd::VecType& x, const simd::VecType& y) {
x = simd::min<TypeParam>(x, y);
});
}

TYPED_TEST(SimdTestFnt, PerfModAddBuf) // NOLINT
TYPED_TEST(SimdTestFnt, PerfModBuf) // NOLINT
{
const size_t iters_nb = 1e3;

const size_t len = this->vec_len * simd::countof<TypeParam>();
std::vector<TypeParam> buf_x(len);
std::vector<TypeParam> buf_y(len);
this->gen_rand_data(buf_x);
this->gen_rand_data(buf_y);

simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
this->core_op_perf("ModAdd", [](simd::VecType& x, const simd::VecType& y) {
x = simd::mod_add<TypeParam>(x, y);
});

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < this->vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);
this->core_op_perf("ModSub", [](simd::VecType& x, const simd::VecType& y) {
x = simd::mod_sub<TypeParam>(x, y);
});

x = simd::mod_add<TypeParam>(x, y);

simd::store_to_mem(&data_x[i], x);
}
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Perf of ModAdd on buffer of "
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
this->core_op_perf("ModMul", [](simd::VecType& x, const simd::VecType& y) {
x = simd::mod_mul<TypeParam>(x, y);
});
}

TYPED_TEST(SimdTestFnt, PerfModSubBuf) // NOLINT
TYPED_TEST(SimdTestFnt, PerfButterfly) // NOLINT
{
const size_t iters_nb = 1e3;

const size_t len = this->vec_len * simd::countof<TypeParam>();
std::vector<TypeParam> buf_x(len);
std::vector<TypeParam> buf_y(len);
this->gen_rand_data(buf_x);
this->gen_rand_data(buf_y);

simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());

std::vector<simd::VecType> data_z(this->vec_len);

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < this->vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);

x = simd::mod_sub<TypeParam>(x, y);

simd::store_to_mem(&data_x[i], x);
}
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Perf of ModSub on buffer of "
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
}

TYPED_TEST(SimdTestFnt, PerfButterflyCt) // NOLINT
{
const size_t iters_nb = 1e3;

const size_t len = this->vec_len * simd::countof<TypeParam>();
std::vector<TypeParam> buf_x(len);
std::vector<TypeParam> buf_y(len);
this->gen_rand_data(buf_x);
this->gen_rand_data(buf_y);

simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());

std::vector<simd::VecType> data_z(this->vec_len);

TypeParam coef =
1 + this->distribution->operator()(quadiron::prng()) % (this->q - 2);
const simd::CtGsCase ct_case = simd::get_case<TypeParam>(coef, this->q);
simd::VecType c = simd::set_one(coef);

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < this->vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);

this->butterfly_perf(
"Butterfly_CT",
[](simd::CtGsCase ct_case,
const simd::VecType& c,
simd::VecType& x,
simd::VecType& y) {
simd::butterfly_ct<TypeParam>(ct_case, c, x, y);
});

simd::store_to_mem(&data_x[i], x);
simd::store_to_mem(&data_y[i], y);
}
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Perf of Butterfly_CT on buffer of "
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";

start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < this->vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);

simd::VecType z = simd::mod_mul<TypeParam>(c, y);
y = simd::mod_sub<TypeParam>(x, z);
x = simd::mod_add<TypeParam>(x, z);

simd::store_to_mem(&data_x[i], x);
simd::store_to_mem(&data_y[i], y);
}
}
end = quadiron::hw_timer();
avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Perf of MANUAL Butterfly_CT on buffer of "
<< len * sizeof(TypeParam) / 1024 << " KB: " << avg_cycles_nb
<< " => per operation: " << avg_cycles_nb / this->vec_len << "\n";
this->butterfly_perf(
"Butterfly_GS",
[](simd::CtGsCase ct_case,
const simd::VecType& c,
simd::VecType& x,
simd::VecType& y) {
simd::butterfly_gs<TypeParam>(ct_case, c, x, y);
});
}

#endif

0 comments on commit 5563714

Please sign in to comment.