1#ifndef IRLBA_MATRIX_SPARSE_HPP
2#define IRLBA_MATRIX_SPARSE_HPP
13#include "sanisizer/sanisizer.hpp"
15#ifndef IRLBA_CUSTOM_PARALLEL
16#include "subpar/subpar.hpp"
29template<
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
30class ParallelSparseMatrixCore {
32 typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
35 ParallelSparseMatrixCore(
44 my_primary_dim(column_major ? ncol : nrow),
45 my_secondary_dim(column_major ? nrow : ncol),
46 my_num_threads(num_threads),
47 my_values(std::move(x)),
48 my_indices(std::move(i)),
49 my_ptrs(std::move(p)),
50 my_column_major(column_major)
52 if (num_threads > 1) {
53 const auto total_nzeros = my_ptrs[my_primary_dim];
54 const PointerType per_thread_floor = total_nzeros / my_num_threads;
55 const int per_thread_extra = total_nzeros % my_num_threads;
59 const auto nthreads_p1 = sanisizer::sum<std::size_t>(my_num_threads, 1);
63 sanisizer::resize(my_primary_boundaries, nthreads_p1);
65 Eigen::Index primary_counter = 0;
66 PointerType sofar = 0;
67 for (
int t = 0; t < my_num_threads; ++t) {
68 sofar += per_thread_floor + (t < per_thread_extra);
69 while (primary_counter < my_primary_dim && my_ptrs[primary_counter + 1] <= sofar) {
72 my_primary_boundaries[t + 1] = primary_counter;
78 auto secondary_nonzeros = sanisizer::create<std::vector<PointerType> >(my_secondary_dim);
79 for (PointerType i = 0; i < total_nzeros; ++i) {
80 ++(secondary_nonzeros[my_indices[i]]);
83 sanisizer::resize(my_secondary_boundaries, nthreads_p1);
84 Eigen::Index secondary_counter = 0;
85 PointerType sofar = 0;
86 PointerType cum_secondary = 0;
87 for (
int t = 0; t < my_num_threads; ++t) {
88 sofar += per_thread_floor + (t < per_thread_extra);
89 while (secondary_counter < my_secondary_dim && cum_secondary <= sofar) {
90 cum_secondary += secondary_nonzeros[secondary_counter];
93 my_secondary_boundaries[t + 1] = secondary_counter;
96 sanisizer::resize(my_secondary_nonzero_boundaries, nthreads_p1);
97 for (
auto& starts : my_secondary_nonzero_boundaries) {
98 sanisizer::resize(starts, my_primary_dim);
101 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
102 const auto primary_start = my_ptrs[c], primary_end = my_ptrs[c + 1];
103 my_secondary_nonzero_boundaries[0][c] = primary_start;
104 auto s = primary_start;
105 for (
int thread = 0; thread < my_num_threads; ++thread) {
106 const auto limit = my_secondary_boundaries[thread + 1];
107 while (s < primary_end &&
static_cast<Eigen::Index
>(my_indices[s]) < limit) {
110 my_secondary_nonzero_boundaries[thread + 1][c] = s;
118 Eigen::Index my_primary_dim, my_secondary_dim;
121 ValueArray_ my_values;
122 IndexArray_ my_indices;
123 PointerArray_ my_ptrs;
124 bool my_column_major;
126 std::vector<Eigen::Index> my_primary_boundaries;
131 std::vector<Eigen::Index> my_secondary_boundaries;
133 std::vector<std::vector<PointerType> > my_secondary_nonzero_boundaries;
136 Eigen::Index rows()
const {
137 if (my_column_major) {
138 return my_secondary_dim;
140 return my_primary_dim;
144 Eigen::Index cols()
const {
145 if (my_column_major) {
146 return my_primary_dim;
148 return my_secondary_dim;
152 const ValueArray_& get_values()
const {
156 const IndexArray_& get_indices()
const {
160 const PointerArray_& get_pointers()
const {
164 int get_num_threads()
const {
165 return my_num_threads;
168 bool get_column_major()
const {
169 return my_column_major;
172 const std::vector<Eigen::Index>& get_primary_boundaries()
const {
173 return my_primary_boundaries;
176 const std::vector<Eigen::Index>& get_secondary_boundaries()
const {
177 return my_secondary_boundaries;
180 const std::vector<std::vector<PointerType> >& get_secondary_nonzero_boundaries()
const {
181 return my_secondary_nonzero_boundaries;
185 template<
typename EigenVector_>
186 void indirect_multiply(
const EigenVector_& rhs, std::vector<std::vector<typename EigenVector_::Scalar> >& thread_buffers, EigenVector_& output)
const {
187 if (my_num_threads == 1) {
189 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
190 auto start = my_ptrs[c];
191 auto end = my_ptrs[c + 1];
192 auto val = rhs.coeff(c);
193 for (PointerType s = start; s < end; ++s) {
194 output.coeffRef(my_indices[s]) += my_values[s] * val;
201 const auto secondary_start = my_secondary_boundaries[t];
202 const auto secondary_end = my_secondary_boundaries[t + 1];
203 const auto secondary_len = secondary_end - secondary_start;
208 typename EigenVector_::Scalar* optr;
210 auto& curbuffer = thread_buffers[t - 1];
211 sanisizer::resize(curbuffer, secondary_len);
212 optr = curbuffer.data();
214 optr = output.data() + secondary_start;
216 std::fill_n(optr, secondary_len,
static_cast<typename EigenVector_::Scalar
>(0));
218 const auto& nz_starts = my_secondary_nonzero_boundaries[t];
219 const auto& nz_ends = my_secondary_nonzero_boundaries[t + 1];
220 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
221 const auto nz_start = nz_starts[c];
222 const auto nz_end = nz_ends[c];
223 const auto val = rhs.coeff(c);
224 for (PointerType s = nz_start; s < nz_end; ++s) {
225 optr[my_indices[s] - secondary_start] += my_values[s] * val;
230 std::copy_n(optr, secondary_len, output.data() + secondary_start);
238 template<
typename EigenVector_>
239 void direct_multiply(
const EigenVector_& rhs, EigenVector_& output)
const {
240 if (my_num_threads == 1) {
241 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
242 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
248 const auto curstart = my_primary_boundaries[t];
249 const auto curend = my_primary_boundaries[t + 1];
250 for (
auto c = curstart; c < curend; ++c) {
251 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
259 template<
typename Scalar_,
class EigenVector_>
260 Scalar_ column_dot_product(Eigen::Index p,
const EigenVector_& rhs)
const {
261 PointerType primary_start = my_ptrs[p], primary_end = my_ptrs[p + 1];
263 for (PointerType s = primary_start; s < primary_end; ++s) {
264 dot += my_values[s] * rhs.coeff(my_indices[s]);
283template<
class EigenVector_,
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
292 if (my_core.get_num_threads() > 1 && my_core.get_column_major()) {
293 my_thread_buffers.resize(my_core.get_num_threads() - 1);
301 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
302 std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
305 void multiply(
const EigenVector_& right, EigenVector_& output) {
306 if (my_core.get_column_major()) {
307 my_core.indirect_multiply(right, my_thread_buffers, output);
309 my_core.direct_multiply(right, output);
324template<
class EigenVector_,
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
333 if (my_core.get_num_threads() > 1 && !my_core.get_column_major()) {
334 my_thread_buffers.resize(my_core.get_num_threads() - 1);
342 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
343 std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
346 void multiply(
const EigenVector_& right, EigenVector_& output) {
347 if (my_core.get_column_major()) {
348 my_core.direct_multiply(right, output);
350 my_core.indirect_multiply(right, my_thread_buffers, output);
365template<
class EigenMatrix_,
class ValueArray_,
class IndexArray_,
class Po
interArray_ >
379 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
382 const EigenMatrix_&
realize(EigenMatrix_& buffer) {
383 const auto nr = my_core.rows(), nc = my_core.cols();
384 buffer.resize(nr, nc);
387 const auto& ptrs = my_core.get_pointers();
388 const auto& indices = my_core.get_indices();
389 const auto& values = my_core.get_values();
391 typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
392 if (my_core.get_column_major()) {
393 for (Eigen::Index c = 0; c < nc; ++c) {
394 PointerType col_start = ptrs[c], col_end = ptrs[c + 1];
395 for (PointerType s = col_start; s < col_end; ++s) {
396 buffer.coeffRef(indices[s], c) = values[s];
400 for (Eigen::Index r = 0; r < nr; ++r) {
401 PointerType row_start = ptrs[r], row_end = ptrs[r + 1];
402 for (PointerType s = row_start; s < row_end; ++s) {
403 buffer.coeffRef(r, indices[s]) = values[s];
469 ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p,
bool column_major,
int num_threads) :
470 my_core(nrow, ncol, std::move(x), std::move(i), std::move(p), column_major, num_threads)
474 ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_> my_core;
481 return my_core.rows();
488 return my_core.cols();
496 return my_core.get_values();
504 return my_core.get_indices();
511 return my_core.get_pointers();
517 typedef I<decltype(std::declval<PointerArray_>()[0])>
PointerType;
527 return my_core.get_primary_boundaries();
538 return my_core.get_secondary_boundaries();
551 return my_core.get_secondary_nonzero_boundaries();
571 std::unique_ptr<ParallelSparseWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >
new_known_workspace()
const {
572 return std::make_unique<ParallelSparseWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
579 return std::make_unique<ParallelSparseAdjointWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
586 return std::make_unique<ParallelSparseRealizeWorkspace<EigenMatrix_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
Workspace class for multiplying a transposed Matrix.
Definition interface.hpp:61
Interface for a matrix to use in compute().
Definition interface.hpp:142
Workspace for multiplication of a transposed ParallelSparseMatrix.
Definition sparse.hpp:325
void multiply(const EigenVector_ &right, EigenVector_ &output)
Definition sparse.hpp:346
Sparse matrix with customizable parallelization.
Definition sparse.hpp:445
ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p, bool column_major, int num_threads)
Definition sparse.hpp:469
const std::vector< Eigen::Index > & get_primary_boundaries() const
Definition sparse.hpp:526
Eigen::Index cols() const
Definition sparse.hpp:487
const ValueArray_ & get_values() const
Definition sparse.hpp:495
std::unique_ptr< AdjointWorkspace< EigenVector_ > > new_adjoint_workspace() const
Definition sparse.hpp:559
Eigen::Index rows() const
Definition sparse.hpp:480
I< decltype(std::declval< PointerArray_ >()[0])> PointerType
Definition sparse.hpp:517
ParallelSparseMatrix()
Definition sparse.hpp:451
std::unique_ptr< RealizeWorkspace< EigenMatrix_ > > new_realize_workspace() const
Definition sparse.hpp:563
std::unique_ptr< ParallelSparseWorkspace< EigenVector_, ValueArray_, IndexArray_, PointerArray_ > > new_known_workspace() const
Definition sparse.hpp:571
const std::vector< std::vector< PointerType > > & get_secondary_nonzero_boundaries() const
Definition sparse.hpp:550
const PointerArray_ & get_pointers() const
Definition sparse.hpp:510
std::unique_ptr< ParallelSparseRealizeWorkspace< EigenMatrix_, ValueArray_, IndexArray_, PointerArray_ > > new_known_realize_workspace() const
Definition sparse.hpp:585
const IndexArray_ & get_indices() const
Definition sparse.hpp:503
std::unique_ptr< Workspace< EigenVector_ > > new_workspace() const
Definition sparse.hpp:555
const std::vector< Eigen::Index > & get_secondary_boundaries() const
Definition sparse.hpp:537
std::unique_ptr< ParallelSparseAdjointWorkspace< EigenVector_, ValueArray_, IndexArray_, PointerArray_ > > new_known_adjoint_workspace() const
Definition sparse.hpp:578
Workspace for realizing a ParallelSparseMatrix.
Definition sparse.hpp:366
const EigenMatrix_ & realize(EigenMatrix_ &buffer)
Definition sparse.hpp:382
Workspace for multiplication of a ParallelSparseMatrix.
Definition sparse.hpp:284
void multiply(const EigenVector_ &right, EigenVector_ &output)
Definition sparse.hpp:305
Workspace class for realizing a Matrix.
Definition interface.hpp:99
Workspace class for multiplying a Matrix.
Definition interface.hpp:24
Interfaces for matrix inputs.
Implements IRLBA for approximate SVD.
Definition compute.hpp:22
void parallelize(Task_ num_tasks, Run_ run_task)
Definition parallel.hpp:33
Classes for parallelized multiplication.