32void parallelize(
int num_workers, Task_ num_tasks, Run_ run_task_range) {
33#ifndef POWERIT_CUSTOM_PARALLEL
38 POWERIT_CUSTOM_PARALLEL(num_workers, num_tasks, run_task_range);
84 return compute_core(order, [&](std::vector<Data_>& buffer,
const Data_* vec) {
86 for (size_t j = start, end = start + length; j < end; ++j) {
88 buffer[j] = std::inner_product(vec, vec + order, matrix + j * order, static_cast<Data_>(0.0));
93 }
else if (opt.num_threads == 1) {
95 return compute_core(order, [&](std::vector<Data_>& buffer,
const Data_* vec) {
96 std::fill(buffer.begin(), buffer.end(), 0);
97 auto matcopy = matrix;
98 for (
size_t j = 0; j < order; ++j) {
100 for (
size_t k = 0; k < order; ++k, ++matcopy) {
101 buffer[k] += mult * (*matcopy);
110 std::vector<std::vector<Data_> > temp_buffers(opt.num_threads);
111 for (
int i = 0; i < opt.num_threads; ++i) {
112 temp_buffers[i].resize(order);
115 return compute_core(order, [&](std::vector<Data_>& buffer,
const Data_* vec) {
116 parallelize(opt.num_threads, order, [&](
int t,
size_t start,
size_t length) {
117 auto& tmp = temp_buffers[t];
118 std::fill_n(tmp.begin(), length, 0);
120 size_t offset = start;
121 for (size_t j = 0; j < order; ++j, offset += order) {
123 auto matcopy = matrix + offset;
124 for (size_t k = 0; k < length; ++k, ++matcopy) {
125 tmp[k] += mult * (*matcopy);
129 std::copy_n(tmp.begin(), length, buffer.begin() + start);
Result< Data_ > compute(size_t order, const Data_ *matrix, bool row_major, Data_ *vector, Engine_ &engine, const Options &opt)
Definition simple.hpp:60