irlba
A C++ library for IRLBA
Loading...
Searching...
No Matches
sparse.hpp
Go to the documentation of this file.
1#ifndef IRLBA_MATRIX_SPARSE_HPP
2#define IRLBA_MATRIX_SPARSE_HPP
3
4#include <vector>
5#include <memory>
6#include <cstddef>
7
8#include "../utils.hpp"
9#include "../parallel.hpp"
10#include "interface.hpp"
11
12#include "Eigen/Dense"
13#include "sanisizer/sanisizer.hpp"
14
15#ifndef IRLBA_CUSTOM_PARALLEL
16#include "subpar/subpar.hpp"
17#endif
18
24namespace irlba {
25
29template<class ValueArray_, class IndexArray_, class PointerArray_ >
30class ParallelSparseMatrixCore {
31public:
32 typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
33
34public:
35 ParallelSparseMatrixCore(
36 Eigen::Index nrow,
37 Eigen::Index ncol,
38 ValueArray_ x,
39 IndexArray_ i,
40 PointerArray_ p,
41 bool column_major,
42 int num_threads
43 ) :
44 my_primary_dim(column_major ? ncol : nrow),
45 my_secondary_dim(column_major ? nrow : ncol),
46 my_num_threads(num_threads),
47 my_values(std::move(x)),
48 my_indices(std::move(i)),
49 my_ptrs(std::move(p)),
50 my_column_major(column_major)
51 {
52 if (num_threads > 1) {
53 const auto total_nzeros = my_ptrs[my_primary_dim]; // last element - not using back() to avoid an extra requirement on PointerArray.
54 const PointerType per_thread_floor = total_nzeros / my_num_threads;
55 const int per_thread_extra = total_nzeros % my_num_threads;
56
57 // Note that we do a lot of 't + 1' incrementing, but this is guaranteed to fit in an int because 't + 1 <= my_num_threads'.
58 // We just need 'my_num_threads + 1' to fit in a size_t for the various vector allocations.
59 const auto nthreads_p1 = sanisizer::sum<std::size_t>(my_num_threads, 1);
60
61 // Splitting primary dimension elements across threads so each thread processes the same number of nonzero elements.
62 {
63 sanisizer::resize(my_primary_boundaries, nthreads_p1);
64
65 Eigen::Index primary_counter = 0;
66 PointerType sofar = 0;
67 for (int t = 0; t < my_num_threads; ++t) {
68 sofar += per_thread_floor + (t < per_thread_extra); // first few threads might get an extra element to deal with the remainder.
69 while (primary_counter < my_primary_dim && my_ptrs[primary_counter + 1] <= sofar) {
70 ++primary_counter;
71 }
72 my_primary_boundaries[t + 1] = primary_counter;
73 }
74 }
75
76 // Splitting secondary dimension elements across threads so each thread processes the same number of nonzero elements.
77 {
78 auto secondary_nonzeros = sanisizer::create<std::vector<PointerType> >(my_secondary_dim);
79 for (PointerType i = 0; i < total_nzeros; ++i) { // don't using range for loop to avoid an extra requirement on IndexArray.
80 ++(secondary_nonzeros[my_indices[i]]);
81 }
82
83 sanisizer::resize(my_secondary_boundaries, nthreads_p1);
84 Eigen::Index secondary_counter = 0;
85 PointerType sofar = 0;
86 PointerType cum_secondary = 0;
87 for (int t = 0; t < my_num_threads; ++t) {
88 sofar += per_thread_floor + (t < per_thread_extra); // first few threads might get an extra element to deal with the remainder.
89 while (secondary_counter < my_secondary_dim && cum_secondary <= sofar) {
90 cum_secondary += secondary_nonzeros[secondary_counter];
91 ++secondary_counter;
92 }
93 my_secondary_boundaries[t + 1] = secondary_counter;
94 }
95
96 sanisizer::resize(my_secondary_nonzero_boundaries, nthreads_p1);
97 for (auto& starts : my_secondary_nonzero_boundaries) {
98 sanisizer::resize(starts, my_primary_dim);
99 }
100
101 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
102 const auto primary_start = my_ptrs[c], primary_end = my_ptrs[c + 1];
103 my_secondary_nonzero_boundaries[0][c] = primary_start;
104 auto s = primary_start;
105 for (int thread = 0; thread < my_num_threads; ++thread) {
106 const auto limit = my_secondary_boundaries[thread + 1];
107 while (s < primary_end && static_cast<Eigen::Index>(my_indices[s]) < limit) { // cast is safe as my_indices[s] < my_secondary_dim.
108 ++s;
109 }
110 my_secondary_nonzero_boundaries[thread + 1][c] = s;
111 }
112 }
113 }
114 }
115 }
116
117private:
118 Eigen::Index my_primary_dim, my_secondary_dim;
119 int my_num_threads;
120
121 ValueArray_ my_values;
122 IndexArray_ my_indices;
123 PointerArray_ my_ptrs;
124 bool my_column_major;
125
126 std::vector<Eigen::Index> my_primary_boundaries;
127
128 // In theory, it is possible that the IndexArray type (i.e., IndexType) is not large enough to hold my_secondary_dim.
129 // So while it is safe to cast from the IndexType to Eigen::Index, it is not safe to go the other way;
130 // hence we use an Eigen::Index to hold the secondary boundaries as the last entry is equal to my_secondary_dim.
131 std::vector<Eigen::Index> my_secondary_boundaries;
132
133 std::vector<std::vector<PointerType> > my_secondary_nonzero_boundaries;
134
135public:
136 Eigen::Index rows() const {
137 if (my_column_major) {
138 return my_secondary_dim;
139 } else {
140 return my_primary_dim;
141 }
142 }
143
144 Eigen::Index cols() const {
145 if (my_column_major) {
146 return my_primary_dim;
147 } else {
148 return my_secondary_dim;
149 }
150 }
151
152 const ValueArray_& get_values() const {
153 return my_values;
154 }
155
156 const IndexArray_& get_indices() const {
157 return my_indices;
158 }
159
160 const PointerArray_& get_pointers() const {
161 return my_ptrs;
162 }
163
164 int get_num_threads() const {
165 return my_num_threads;
166 }
167
168 bool get_column_major() const {
169 return my_column_major;
170 }
171
172 const std::vector<Eigen::Index>& get_primary_boundaries() const {
173 return my_primary_boundaries;
174 }
175
176 const std::vector<Eigen::Index>& get_secondary_boundaries() const {
177 return my_secondary_boundaries;
178 }
179
180 const std::vector<std::vector<PointerType> >& get_secondary_nonzero_boundaries() const {
181 return my_secondary_nonzero_boundaries;
182 }
183
184public:
185 template<typename EigenVector_>
186 void indirect_multiply(const EigenVector_& rhs, std::vector<std::vector<typename EigenVector_::Scalar> >& thread_buffers, EigenVector_& output) const {
187 if (my_num_threads == 1) {
188 output.setZero();
189 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
190 auto start = my_ptrs[c];
191 auto end = my_ptrs[c + 1];
192 auto val = rhs.coeff(c);
193 for (PointerType s = start; s < end; ++s) {
194 output.coeffRef(my_indices[s]) += my_values[s] * val;
195 }
196 }
197 return;
198 }
199
200 parallelize(my_num_threads, [&](int t) -> void {
201 const auto secondary_start = my_secondary_boundaries[t];
202 const auto secondary_end = my_secondary_boundaries[t + 1];
203 const auto secondary_len = secondary_end - secondary_start;
204
205 // Using a separate buffer for the other threads to avoid false
206 // sharing. On first use, each buffer is allocated within each
207 // thread to give malloc a chance of using thread-specific arenas.
208 typename EigenVector_::Scalar* optr;
209 if (t != 0) {
210 auto& curbuffer = thread_buffers[t - 1];
211 sanisizer::resize(curbuffer, secondary_len);
212 optr = curbuffer.data();
213 } else {
214 optr = output.data() + secondary_start;
215 }
216 std::fill_n(optr, secondary_len, static_cast<typename EigenVector_::Scalar>(0));
217
218 const auto& nz_starts = my_secondary_nonzero_boundaries[t];
219 const auto& nz_ends = my_secondary_nonzero_boundaries[t + 1];
220 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
221 const auto nz_start = nz_starts[c];
222 const auto nz_end = nz_ends[c];
223 const auto val = rhs.coeff(c);
224 for (PointerType s = nz_start; s < nz_end; ++s) {
225 optr[my_indices[s] - secondary_start] += my_values[s] * val;
226 }
227 }
228
229 if (t != 0) {
230 std::copy_n(optr, secondary_len, output.data() + secondary_start);
231 }
232 });
233
234 return;
235 }
236
237public:
238 template<typename EigenVector_>
239 void direct_multiply(const EigenVector_& rhs, EigenVector_& output) const {
240 if (my_num_threads == 1) {
241 for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
242 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
243 }
244 return;
245 }
246
247 parallelize(my_num_threads, [&](int t) -> void {
248 const auto curstart = my_primary_boundaries[t];
249 const auto curend = my_primary_boundaries[t + 1];
250 for (auto c = curstart; c < curend; ++c) {
251 output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
252 }
253 });
254
255 return;
256 }
257
258private:
259 template<typename Scalar_, class EigenVector_>
260 Scalar_ column_dot_product(Eigen::Index p, const EigenVector_& rhs) const {
261 PointerType primary_start = my_ptrs[p], primary_end = my_ptrs[p + 1];
262 Scalar_ dot = 0;
263 for (PointerType s = primary_start; s < primary_end; ++s) {
264 dot += my_values[s] * rhs.coeff(my_indices[s]);
265 }
266 return dot;
267 }
268};
283template<class EigenVector_, class ValueArray_, class IndexArray_, class PointerArray_ >
284class ParallelSparseWorkspace final : public Workspace<EigenVector_> {
285public:
289 ParallelSparseWorkspace(const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& core) :
290 my_core(core)
291 {
292 if (my_core.get_num_threads() > 1 && my_core.get_column_major()) {
293 my_thread_buffers.resize(my_core.get_num_threads() - 1);
294 }
295 }
300private:
301 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
302 std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
303
304public:
305 void multiply(const EigenVector_& right, EigenVector_& output) {
306 if (my_core.get_column_major()) {
307 my_core.indirect_multiply(right, my_thread_buffers, output);
308 } else {
309 my_core.direct_multiply(right, output);
310 }
311 }
312};
313
324template<class EigenVector_, class ValueArray_, class IndexArray_, class PointerArray_ >
325class ParallelSparseAdjointWorkspace final : public AdjointWorkspace<EigenVector_> {
326public:
330 ParallelSparseAdjointWorkspace(const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& core) :
331 my_core(core)
332 {
333 if (my_core.get_num_threads() > 1 && !my_core.get_column_major()) {
334 my_thread_buffers.resize(my_core.get_num_threads() - 1);
335 }
336 }
341private:
342 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
343 std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
344
345public:
346 void multiply(const EigenVector_& right, EigenVector_& output) {
347 if (my_core.get_column_major()) {
348 my_core.direct_multiply(right, output);
349 } else {
350 my_core.indirect_multiply(right, my_thread_buffers, output);
351 }
352 }
353};
354
365template<class EigenMatrix_, class ValueArray_, class IndexArray_, class PointerArray_ >
366class ParallelSparseRealizeWorkspace final : public RealizeWorkspace<EigenMatrix_> {
367public:
371 ParallelSparseRealizeWorkspace(const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& core) :
372 my_core(core)
373 {}
378private:
379 const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
380
381public:
382 const EigenMatrix_& realize(EigenMatrix_& buffer) {
383 const auto nr = my_core.rows(), nc = my_core.cols();
384 buffer.resize(nr, nc);
385 buffer.setZero();
386
387 const auto& ptrs = my_core.get_pointers();
388 const auto& indices = my_core.get_indices();
389 const auto& values = my_core.get_values();
390
391 typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
392 if (my_core.get_column_major()) {
393 for (Eigen::Index c = 0; c < nc; ++c) {
394 PointerType col_start = ptrs[c], col_end = ptrs[c + 1];
395 for (PointerType s = col_start; s < col_end; ++s) {
396 buffer.coeffRef(indices[s], c) = values[s];
397 }
398 }
399 } else {
400 for (Eigen::Index r = 0; r < nr; ++r) {
401 PointerType row_start = ptrs[r], row_end = ptrs[r + 1];
402 for (PointerType s = row_start; s < row_end; ++s) {
403 buffer.coeffRef(r, indices[s]) = values[s];
404 }
405 }
406 }
407
408 return buffer;
409 }
410};
411
438template<
439 class EigenVector_,
440 class EigenMatrix_,
441 class ValueArray_,
442 class IndexArray_,
443 class PointerArray_
444>
445class ParallelSparseMatrix final : public Matrix<EigenVector_, EigenMatrix_> {
446public:
452
469 ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p, bool column_major, int num_threads) :
470 my_core(nrow, ncol, std::move(x), std::move(i), std::move(p), column_major, num_threads)
471 {}
472
473private:
474 ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_> my_core;
475
476public:
480 Eigen::Index rows() const {
481 return my_core.rows();
482 }
483
487 Eigen::Index cols() const {
488 return my_core.cols();
489 }
490
495 const ValueArray_& get_values() const {
496 return my_core.get_values();
497 }
498
503 const IndexArray_& get_indices() const {
504 return my_core.get_indices();
505 }
506
510 const PointerArray_& get_pointers() const {
511 return my_core.get_pointers();
512 }
513
517 typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
518
526 const std::vector<Eigen::Index>& get_primary_boundaries() const {
527 return my_core.get_primary_boundaries();
528 }
529
537 const std::vector<Eigen::Index>& get_secondary_boundaries() const {
538 return my_core.get_secondary_boundaries();
539 }
540
550 const std::vector<std::vector<PointerType> >& get_secondary_nonzero_boundaries() const {
551 return my_core.get_secondary_nonzero_boundaries();
552 }
553
554public:
555 std::unique_ptr<Workspace<EigenVector_> > new_workspace() const {
556 return new_known_workspace();
557 }
558
559 std::unique_ptr<AdjointWorkspace<EigenVector_> > new_adjoint_workspace() const {
561 }
562
563 std::unique_ptr<RealizeWorkspace<EigenMatrix_> > new_realize_workspace() const {
565 }
566
567public:
571 std::unique_ptr<ParallelSparseWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> > new_known_workspace() const {
572 return std::make_unique<ParallelSparseWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
573 }
574
578 std::unique_ptr<ParallelSparseAdjointWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> > new_known_adjoint_workspace() const {
579 return std::make_unique<ParallelSparseAdjointWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
580 }
581
585 std::unique_ptr<ParallelSparseRealizeWorkspace<EigenMatrix_, ValueArray_, IndexArray_, PointerArray_> > new_known_realize_workspace() const {
586 return std::make_unique<ParallelSparseRealizeWorkspace<EigenMatrix_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
587 }
588
589};
590
591}
592
593#endif
Workspace class for multiplying a transposed Matrix.
Definition interface.hpp:61
Interface for a matrix to use in compute().
Definition interface.hpp:142
Workspace for multiplication of a transposed ParallelSparseMatrix.
Definition sparse.hpp:325
void multiply(const EigenVector_ &right, EigenVector_ &output)
Definition sparse.hpp:346
Sparse matrix with customizable parallelization.
Definition sparse.hpp:445
ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p, bool column_major, int num_threads)
Definition sparse.hpp:469
const std::vector< Eigen::Index > & get_primary_boundaries() const
Definition sparse.hpp:526
Eigen::Index cols() const
Definition sparse.hpp:487
const ValueArray_ & get_values() const
Definition sparse.hpp:495
std::unique_ptr< AdjointWorkspace< EigenVector_ > > new_adjoint_workspace() const
Definition sparse.hpp:559
Eigen::Index rows() const
Definition sparse.hpp:480
I< decltype(std::declval< PointerArray_ >()[0])> PointerType
Definition sparse.hpp:517
ParallelSparseMatrix()
Definition sparse.hpp:451
std::unique_ptr< RealizeWorkspace< EigenMatrix_ > > new_realize_workspace() const
Definition sparse.hpp:563
std::unique_ptr< ParallelSparseWorkspace< EigenVector_, ValueArray_, IndexArray_, PointerArray_ > > new_known_workspace() const
Definition sparse.hpp:571
const std::vector< std::vector< PointerType > > & get_secondary_nonzero_boundaries() const
Definition sparse.hpp:550
const PointerArray_ & get_pointers() const
Definition sparse.hpp:510
std::unique_ptr< ParallelSparseRealizeWorkspace< EigenMatrix_, ValueArray_, IndexArray_, PointerArray_ > > new_known_realize_workspace() const
Definition sparse.hpp:585
const IndexArray_ & get_indices() const
Definition sparse.hpp:503
std::unique_ptr< Workspace< EigenVector_ > > new_workspace() const
Definition sparse.hpp:555
const std::vector< Eigen::Index > & get_secondary_boundaries() const
Definition sparse.hpp:537
std::unique_ptr< ParallelSparseAdjointWorkspace< EigenVector_, ValueArray_, IndexArray_, PointerArray_ > > new_known_adjoint_workspace() const
Definition sparse.hpp:578
Workspace for realizing a ParallelSparseMatrix.
Definition sparse.hpp:366
const EigenMatrix_ & realize(EigenMatrix_ &buffer)
Definition sparse.hpp:382
Workspace for multiplication of a ParallelSparseMatrix.
Definition sparse.hpp:284
void multiply(const EigenVector_ &right, EigenVector_ &output)
Definition sparse.hpp:305
Workspace class for realizing a Matrix.
Definition interface.hpp:99
Workspace class for multiplying a Matrix.
Definition interface.hpp:24
Interfaces for matrix inputs.
Implements IRLBA for approximate SVD.
Definition compute.hpp:22
void parallelize(Task_ num_tasks, Run_ run_task)
Definition parallel.hpp:33
Classes for parallelized multiplication.