98 ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p,
bool column_major,
int nthreads) :
99 my_primary_dim(column_major ? ncol : nrow),
100 my_secondary_dim(column_major ? nrow : ncol),
101 my_nthreads(nthreads),
102 my_values(std::move(x)),
103 my_indices(std::move(i)),
104 my_ptrs(std::move(p)),
105 my_column_major(column_major)
117 if (my_column_major) {
118 return my_secondary_dim;
120 return my_primary_dim;
128 if (my_column_major) {
129 return my_primary_dim;
131 return my_secondary_dim;
162 typedef typename std::remove_const<typename std::remove_reference<decltype(std::declval<PointerArray_>()[0])>::type>::type
PointerType;
165 Eigen::Index my_primary_dim, my_secondary_dim;
167 ValueArray_ my_values;
168 IndexArray_ my_indices;
169 PointerArray_ my_ptrs;
170 bool my_column_major;
172 typedef typename std::remove_const<typename std::remove_reference<decltype(std::declval<IndexArray_>()[0])>::type>::type IndexType;
174 std::vector<size_t> my_primary_starts, my_primary_ends;
175 std::vector<IndexType> my_secondary_ends;
176 std::vector<std::vector<PointerType> > my_secondary_nonzero_starts;
187 return my_primary_starts;
197 return my_primary_ends;
209 return my_secondary_nonzero_starts;
214 void fragment_threads() {
215 auto total_nzeros = my_ptrs[my_primary_dim];
216 PointerType per_thread = (total_nzeros / my_nthreads) + (total_nzeros % my_nthreads > 0);
219 my_primary_starts.resize(my_nthreads);
220 my_primary_ends.resize(my_nthreads);
222 Eigen::Index primary_counter = 0;
224 for (
int t = 0; t < my_nthreads; ++t) {
225 my_primary_starts[t] = primary_counter;
226 while (primary_counter < my_primary_dim && my_ptrs[primary_counter + 1] <= sofar) {
229 my_primary_ends[t] = primary_counter;
235 my_secondary_ends.resize(my_nthreads + 1);
236 my_secondary_nonzero_starts.resize(my_nthreads + 1, std::vector<PointerType>(my_primary_dim));
238 std::vector<PointerType> secondary_nonzeros(my_secondary_dim);
240 ++(secondary_nonzeros[my_indices[i]]);
243 IndexType secondary_counter = 0;
247 for (
int t = 0; t < my_nthreads; ++t) {
248 while (secondary_counter < my_secondary_dim && cum_rows <= sofar) {
249 cum_rows += secondary_nonzeros[secondary_counter];
252 my_secondary_ends[t + 1] = secondary_counter;
256 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
257 auto primary_start = my_ptrs[c], primary_end = my_ptrs[c + 1];
258 my_secondary_nonzero_starts[0][c] = primary_start;
260 auto s = primary_start;
261 for (
int thread = 0; thread < my_nthreads; ++thread) {
262 auto limit = my_secondary_ends[thread + 1];
263 while (s < primary_end && my_indices[s] < limit) {
266 my_secondary_nonzero_starts[thread + 1][c] = s;
273 void indirect_multiply(
const EigenVector_& rhs, std::vector<std::vector<typename EigenVector_::Scalar> >& thread_buffers, EigenVector_& output)
const {
274 if (my_nthreads == 1) {
276 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
277 auto start = my_ptrs[c];
278 auto end = my_ptrs[c + 1];
279 auto val = rhs.coeff(c);
281 output.coeffRef(my_indices[s]) += my_values[s] * val;
288 auto secondary_start = my_secondary_ends[t];
289 auto secondary_end = my_secondary_ends[t + 1];
290 size_t secondary_len = secondary_end - secondary_start;
295 typename EigenVector_::Scalar* optr;
297 auto& curbuffer = thread_buffers[t - 1];
298 curbuffer.resize(secondary_len);
299 optr = curbuffer.data();
301 optr = output.data() + secondary_start;
303 std::fill_n(optr, secondary_len,
static_cast<typename EigenVector_::Scalar
>(0));
305 const auto& nz_starts = my_secondary_nonzero_starts[t];
306 const auto& nz_ends = my_secondary_nonzero_starts[t + 1];
307 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
308 auto nz_start = nz_starts[c];
309 auto nz_end = nz_ends[c];
310 auto val = rhs.coeff(c);
312 optr[my_indices[s] - secondary_start] += my_values[s] * val;
317 std::copy_n(optr, secondary_len, output.data() + secondary_start);
324 void direct_multiply(
const EigenVector_& rhs, EigenVector_& output)
const {
325 if (my_nthreads == 1) {
326 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
327 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
333 auto curstart = my_primary_starts[t];
334 auto curend = my_primary_ends[t];
335 for (
size_t c = curstart; c < curend; ++c) {
336 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
343 template<
typename Scalar_>
344 Scalar_ column_dot_product(
size_t c,
const EigenVector_& rhs)
const {
345 PointerType primary_start = my_ptrs[c], primary_end = my_ptrs[c + 1];
347 for (
PointerType s = primary_start; s < primary_end; ++s) {
348 dot += my_values[s] * rhs.coeff(my_indices[s]);
359 std::vector<std::vector<typename EigenVector_::Scalar> > thread_buffers;
362 Workspace workspace()
const {
364 if (my_nthreads > 1 && my_column_major) {
365 output.thread_buffers.resize(my_nthreads - 1);
370 struct AdjointWorkspace {
372 std::vector<std::vector<typename EigenVector_::Scalar> > thread_buffers;
375 AdjointWorkspace adjoint_workspace()
const {
376 AdjointWorkspace output;
377 if (my_nthreads > 1 && !my_column_major) {
378 output.thread_buffers.resize(my_nthreads - 1);
384 template<
class Right_>
385 void multiply(
const Right_& rhs, Workspace& work, EigenVector_& output)
const {
386 const auto& realized_rhs = internal::realize_rhs(rhs, work.buffer);
387 if (my_column_major) {
388 indirect_multiply(realized_rhs, work.thread_buffers, output);
390 direct_multiply(realized_rhs, output);
394 template<
class Right_>
395 void adjoint_multiply(
const Right_& rhs, AdjointWorkspace& work, EigenVector_& output)
const {
396 const auto& realized_rhs = internal::realize_rhs(rhs, work.buffer);
397 if (my_column_major) {
398 direct_multiply(realized_rhs, output);
400 indirect_multiply(realized_rhs, work.thread_buffers, output);
405 template<
class EigenMatrix_>
406 EigenMatrix_ realize()
const {
408 EigenMatrix_ output(nr, nc);
411 if (my_column_major) {
412 for (Eigen::Index c = 0; c < nc; ++c) {
413 PointerType col_start = my_ptrs[c], col_end = my_ptrs[c + 1];
414 for (
PointerType s = col_start; s < col_end; ++s) {
415 output.coeffRef(my_indices[s], c) = my_values[s];
419 for (Eigen::Index r = 0; r < nr; ++r) {
420 PointerType row_start = my_ptrs[r], row_end = my_ptrs[r + 1];
421 for (
PointerType s = row_start; s < row_end; ++s) {
422 output.coeffRef(r, my_indices[s]) = my_values[s];