Skip to content

Commit

Permalink
[simd][perf]
Browse files Browse the repository at this point in the history
  • Loading branch information
lamphamsy committed May 27, 2019
1 parent 5563714 commit c1bae3d
Showing 1 changed file with 163 additions and 65 deletions.
228 changes: 163 additions & 65 deletions test/simd/test_simd_fnt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ class SimdTestFnt : public ::testing::Test {
return vec[0];
}

void gen_rand_data(std::vector<T>& vec)
template <typename Tx>
void gen_rand_data(std::vector<Tx>& vec)
{
const size_t len = vec.size();

Expand Down Expand Up @@ -234,81 +235,127 @@ class SimdTestFnt : public ::testing::Test {
}

template <typename TFunc>
void core_op_perf(const std::string& text, const TFunc& f)
void core_op_perf_single(const std::string& text, const TFunc& f)
{
const size_t len = vec_len * simd::countof<T>();

std::vector<T> buf_x(len, 0);
std::vector<T> buf_y(len, 0);
gen_rand_data(buf_x);
gen_rand_data(buf_y);
std::cout << text << "\n";
std::cout << "\tVectors nb\t\tAverage nb of CPU cycles\n";
simd::VecType x = this->rand_vec();
simd::VecType y = this->rand_vec();
for (auto vec_len : arr_vec_len) {
const size_t nb = iters_nb * vec_len;

simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());
uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < nb; ++i) {
simd::VecType _x = simd::load_to_reg(&x);
simd::VecType _y = simd::load_to_reg(&y);

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);
f(_x, _y);

f(x, y);

simd::store_to_mem(&data_x[i], x);
simd::store_to_mem(&x, _x);
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(nb);
std::cout << "\t" << vec_len << "\t\t" << avg_cycles_nb << "\n";
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Average nb of CPU cycles per operation " << text << ": "
<< avg_cycles_nb / vec_len << "\n";
std::cout << "\n";
}

template <typename TFunc>
void butterfly_perf(const std::string& text, const TFunc& f)
void core_op_perf(const std::string& text, const TFunc& f)
{
const size_t len = vec_len * simd::countof<T>();

std::vector<T> buf_x(len);
std::vector<T> buf_y(len);
gen_rand_data(buf_x);
gen_rand_data(buf_y);

simd::VecType* data_x = reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y = reinterpret_cast<simd::VecType*>(buf_y.data());

std::vector<simd::VecType> data_z(this->vec_len);
std::cout << text << "\n";
std::cout << "\tVectors nb\t\tAverage nb of CPU cycles\n";
for (auto vec_len : arr_vec_len) {
const size_t len = vec_len * simd::countof<T>();

std::vector<T> buf_x(len, 0);
std::vector<T> buf_y(len, 0);
gen_rand_data(buf_x);
gen_rand_data(buf_y);

simd::VecType* data_x =
reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y =
reinterpret_cast<simd::VecType*>(buf_y.data());

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[j]);
simd::VecType y = simd::load_to_reg(&data_y[j]);

f(x, y);

simd::store_to_mem(&data_x[j], x);
}
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb = static_cast<double>(end - start)
/ static_cast<double>(iters_nb)
/ static_cast<double>(vec_len);
;

T coef =
1
+ this->distribution->operator()(quadiron::prng()) % (this->q - 2);
const simd::CtGsCase ct_case = simd::get_case<T>(coef, this->q);
const simd::VecType c = simd::set_one(coef);
std::cout << "\t" << vec_len << "\t\t" << avg_cycles_nb << "\n";
}
std::cout << "\n";
}

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < this->vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[i]);
simd::VecType y = simd::load_to_reg(&data_y[i]);
template <typename TFunc>
void butterfly_perf(const std::string& text, const TFunc& f)
{
std::cout << text << "\n";
std::cout << "\tVectors nb\t\tAverage nb of CPU cycles\n";
for (auto vec_len : arr_vec_len) {
const size_t len = vec_len * simd::countof<T>();

std::vector<T> buf_x(len);
std::vector<T> buf_y(len);
gen_rand_data(buf_x);
gen_rand_data(buf_y);

simd::VecType* data_x =
reinterpret_cast<simd::VecType*>(buf_x.data());
simd::VecType* data_y =
reinterpret_cast<simd::VecType*>(buf_y.data());

std::vector<simd::VecType> data_z(vec_len);

T coef = 1
+ this->distribution->operator()(quadiron::prng())
% (this->q - 2);
const simd::CtGsCase ct_case = simd::get_case<T>(coef, this->q);
const simd::VecType c = simd::set_one(coef);

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
for (size_t j = 0; j < vec_len; ++j) {
simd::VecType x = simd::load_to_reg(&data_x[j]);
simd::VecType y = simd::load_to_reg(&data_y[j]);

f(ct_case, c, x, y);

simd::store_to_mem(&data_x[j], x);
simd::store_to_mem(&data_y[j], y);
}
}
uint64_t end = quadiron::hw_timer();

f(ct_case, c, x, y);
double avg_cycles_nb = static_cast<double>(end - start)
/ static_cast<double>(iters_nb)
/ static_cast<double>(vec_len);
;

simd::store_to_mem(&data_x[i], x);
simd::store_to_mem(&data_y[i], y);
}
std::cout << "\t" << vec_len << "\t\t" << avg_cycles_nb << "\n";
}
uint64_t end = quadiron::hw_timer();

double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
std::cout << "Average nb of CPU cycles per operation " << text << ": "
<< avg_cycles_nb / vec_len << "\n";
std::cout << "\n";
}

T q;
std::unique_ptr<std::uniform_int_distribution<uint32_t>> distribution;
size_t vec_len = 256;
size_t iters_nb = 1e3;
std::vector<size_t> arr_vec_len =
{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384};
size_t iters_nb = 1e5;
};

using AllTypes = ::testing::Types<uint16_t, uint32_t>;
Expand Down Expand Up @@ -438,22 +485,34 @@ TYPED_TEST(SimdTestFnt, TestButterflyGs) // NOLINT

TYPED_TEST(SimdTestFnt, PerfModMulSingle) // NOLINT
{
const size_t iters_nb = 1e5;
simd::VecType x = this->rand_vec();
simd::VecType y = this->rand_vec();
const size_t len = simd::countof<TypeParam>();
TypeParam x[len];
TypeParam y[len];

for (unsigned i = 0; i < len; ++i) {
x[i] = 1
+ (this->distribution->operator()(quadiron::prng())
% (this->q - 1));
y[i] = 1
+ (this->distribution->operator()(quadiron::prng())
% (this->q - 1));
}

simd::VecType* vec_x = reinterpret_cast<simd::VecType*>(x);
simd::VecType* vec_y = reinterpret_cast<simd::VecType*>(y);

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < iters_nb; ++i) {
simd::VecType _x = simd::load_to_reg(&x);
simd::VecType _y = simd::load_to_reg(&y);
for (unsigned i = 0; i < this->iters_nb; ++i) {
simd::VecType _x = simd::load_to_reg(vec_x);
simd::VecType _y = simd::load_to_reg(vec_y);

_x = simd::mod_mul<TypeParam>(_x, _y);

simd::store_to_mem(&x, _x);
simd::store_to_mem(vec_x, _x);
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb =
static_cast<double>(end - start) / static_cast<double>(iters_nb);
static_cast<double>(end - start) / static_cast<double>(this->iters_nb);
std::cout << "PerfModMulSingle: " << avg_cycles_nb << "\n";
}

Expand Down Expand Up @@ -491,6 +550,45 @@ TYPED_TEST(SimdTestFnt, PerfModBuf) // NOLINT
});
}

TYPED_TEST(SimdTestFnt, PerfPackUnpack) // NOLINT
{
std::cout << "Pack & Unpack"
<< "\n";
std::cout << "\tVectors nb\t\tAverage nb of CPU cycles\n";
for (auto vec_len : this->arr_vec_len) {
const size_t len = vec_len * simd::countof<TypeParam>();

std::vector<TypeParam> buf_data(len, 0);
std::vector<simd::MetaType> buf_meta(vec_len, 0);
this->gen_rand_data(buf_data);
this->gen_rand_data(buf_meta);

simd::VecType* data = reinterpret_cast<simd::VecType*>(buf_data.data());
simd::MetaType* meta = buf_meta.data();

uint64_t start = quadiron::hw_timer();
for (unsigned i = 0; i < this->iters_nb; ++i) {
for (size_t j = 0; j < vec_len; ++j) {
simd::VecType lo, hi;

simd::VecType x = simd::load_to_reg(&data[j]);

simd::unpack<TypeParam>(meta[j], x, hi, lo);
simd::pack<TypeParam>(lo, hi, x, meta[j]);

simd::store_to_mem(&data[j], x);
}
}
uint64_t end = quadiron::hw_timer();
double avg_cycles_nb = static_cast<double>(end - start)
/ static_cast<double>(this->iters_nb)
/ static_cast<double>(vec_len);

std::cout << "\t" << vec_len << "\t\t" << avg_cycles_nb << "\n";
}
std::cout << "\n";
}

TYPED_TEST(SimdTestFnt, PerfButterfly) // NOLINT
{
this->butterfly_perf(
Expand Down

0 comments on commit c1bae3d

Please sign in to comment.