1#ifndef IRLBA_MATRIX_SPARSE_HPP 
    2#define IRLBA_MATRIX_SPARSE_HPP 
   13#include "sanisizer/sanisizer.hpp" 
   15#ifndef IRLBA_CUSTOM_PARALLEL 
   16#include "subpar/subpar.hpp" 
   29template<
class ValueArray_, 
class IndexArray_, 
class Po
interArray_ >
 
   30class ParallelSparseMatrixCore {
 
   32    typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
 
   35    ParallelSparseMatrixCore(
 
   44        my_primary_dim(column_major ? ncol : nrow), 
 
   45        my_secondary_dim(column_major ? nrow : ncol), 
 
   46        my_num_threads(num_threads), 
 
   47        my_values(std::move(x)), 
 
   48        my_indices(std::move(i)), 
 
   49        my_ptrs(std::move(p)),
 
   50        my_column_major(column_major)
 
   52        if (num_threads > 1) {
 
   53            const auto total_nzeros = my_ptrs[my_primary_dim]; 
 
   54            const PointerType per_thread_floor = total_nzeros / my_num_threads;
 
   55            const int per_thread_extra = total_nzeros % my_num_threads;
 
   59            const auto nthreads_p1 = sanisizer::sum<std::size_t>(my_num_threads, 1);
 
   63                sanisizer::resize(my_primary_boundaries, nthreads_p1);
 
   65                Eigen::Index primary_counter = 0;
 
   66                PointerType sofar = 0;
 
   67                for (
int t = 0; t < my_num_threads; ++t) {
 
   68                    sofar += per_thread_floor + (t < per_thread_extra); 
 
   69                    while (primary_counter < my_primary_dim && my_ptrs[primary_counter + 1] <= sofar) {
 
   72                    my_primary_boundaries[t + 1] = primary_counter;
 
   78                auto secondary_nonzeros = sanisizer::create<std::vector<PointerType> >(my_secondary_dim);
 
   79                for (PointerType i = 0; i < total_nzeros; ++i) { 
 
   80                    ++(secondary_nonzeros[my_indices[i]]);
 
   83                sanisizer::resize(my_secondary_boundaries, nthreads_p1);
 
   84                Eigen::Index secondary_counter = 0;
 
   85                PointerType sofar = 0;
 
   86                PointerType cum_secondary = 0;
 
   87                for (
int t = 0; t < my_num_threads; ++t) {
 
   88                    sofar += per_thread_floor + (t < per_thread_extra); 
 
   89                    while (secondary_counter < my_secondary_dim && cum_secondary <= sofar) {
 
   90                        cum_secondary += secondary_nonzeros[secondary_counter];
 
   93                    my_secondary_boundaries[t + 1] = secondary_counter;
 
   96                sanisizer::resize(my_secondary_nonzero_boundaries, nthreads_p1);
 
   97                for (
auto& starts : my_secondary_nonzero_boundaries) {
 
   98                    sanisizer::resize(starts, my_primary_dim);
 
  101                for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
 
  102                    const auto primary_start = my_ptrs[c], primary_end = my_ptrs[c + 1];
 
  103                    my_secondary_nonzero_boundaries[0][c] = primary_start;
 
  104                    auto s = primary_start;
 
  105                    for (
int thread = 0; thread < my_num_threads; ++thread) {
 
  106                        const auto limit = my_secondary_boundaries[thread + 1];
 
  107                        while (s < primary_end && my_indices[s] < limit) {
 
  110                        my_secondary_nonzero_boundaries[thread + 1][c] = s;
 
  118    Eigen::Index my_primary_dim, my_secondary_dim;
 
  121    ValueArray_ my_values;
 
  122    IndexArray_ my_indices;
 
  123    PointerArray_ my_ptrs;
 
  124    bool my_column_major;
 
  126    std::vector<Eigen::Index> my_primary_boundaries;
 
  127    std::vector<Eigen::Index> my_secondary_boundaries;
 
  128    std::vector<std::vector<PointerType> > my_secondary_nonzero_boundaries;
 
  131    Eigen::Index rows()
 const { 
 
  132        if (my_column_major) {
 
  133            return my_secondary_dim;
 
  135            return my_primary_dim;
 
  139    Eigen::Index cols()
 const { 
 
  140        if (my_column_major) {
 
  141            return my_primary_dim;
 
  143            return my_secondary_dim;
 
  147    const ValueArray_& get_values()
 const {
 
  151    const IndexArray_& get_indices()
 const {
 
  155    const PointerArray_& get_pointers()
 const {
 
  159    int get_num_threads()
 const {
 
  160        return my_num_threads;
 
  163    bool get_column_major()
 const {
 
  164        return my_column_major;
 
  167    const std::vector<Eigen::Index>& get_primary_boundaries()
 const {
 
  168        return my_primary_boundaries;
 
  171    const std::vector<Eigen::Index>& get_secondary_boundaries()
 const {
 
  172        return my_secondary_boundaries;
 
  175    const std::vector<std::vector<PointerType> >& get_secondary_nonzero_boundaries()
 const {
 
  176        return my_secondary_nonzero_boundaries;
 
  180    template<
typename EigenVector_>
 
  181    void indirect_multiply(
const EigenVector_& rhs, std::vector<std::vector<typename EigenVector_::Scalar> >& thread_buffers, EigenVector_& output)
 const {
 
  182        if (my_num_threads == 1) {
 
  184            for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
 
  185                auto start = my_ptrs[c];
 
  186                auto end = my_ptrs[c + 1];
 
  187                auto val = rhs.coeff(c);
 
  188                for (PointerType s = start; s < end; ++s) {
 
  189                    output.coeffRef(my_indices[s]) += my_values[s] * val;
 
  196            const auto secondary_start = my_secondary_boundaries[t];
 
  197            const auto secondary_end = my_secondary_boundaries[t + 1];
 
  198            const auto secondary_len = secondary_end - secondary_start;
 
  203            typename EigenVector_::Scalar* optr;
 
  205                auto& curbuffer = thread_buffers[t - 1];
 
  206                sanisizer::resize(curbuffer, secondary_len);
 
  207                optr = curbuffer.data();
 
  209                optr = output.data() + secondary_start;
 
  211            std::fill_n(optr, secondary_len, 
static_cast<typename EigenVector_::Scalar
>(0));
 
  213            const auto& nz_starts = my_secondary_nonzero_boundaries[t];
 
  214            const auto& nz_ends = my_secondary_nonzero_boundaries[t + 1];
 
  215            for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
 
  216                const auto nz_start = nz_starts[c];
 
  217                const auto nz_end = nz_ends[c];
 
  218                const auto val = rhs.coeff(c);
 
  219                for (PointerType s = nz_start; s < nz_end; ++s) {
 
  220                    optr[my_indices[s] - secondary_start] += my_values[s] * val;
 
  225                std::copy_n(optr, secondary_len, output.data() + secondary_start);
 
  233    template<
typename EigenVector_>
 
  234    void direct_multiply(
const EigenVector_& rhs, EigenVector_& output)
 const {
 
  235        if (my_num_threads == 1) {
 
  236            for (Eigen::Index c = 0; c < my_primary_dim; ++c) {
 
  237                output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
 
  243            const auto curstart = my_primary_boundaries[t];
 
  244            const auto curend = my_primary_boundaries[t + 1];
 
  245            for (
auto c = curstart; c < curend; ++c) {
 
  246                output.coeffRef(c) = column_dot_product<typename EigenVector_::Scalar>(c, rhs);
 
  254    template<
typename Scalar_, 
class EigenVector_>
 
  255    Scalar_ column_dot_product(Eigen::Index p, 
const EigenVector_& rhs)
 const {
 
  256        PointerType primary_start = my_ptrs[p], primary_end = my_ptrs[p + 1];
 
  258        for (PointerType s = primary_start; s < primary_end; ++s) {
 
  259            dot += my_values[s] * rhs.coeff(my_indices[s]);
 
  278template<
class EigenVector_, 
class ValueArray_, 
class IndexArray_, 
class Po
interArray_ >
 
  287        if (my_core.get_num_threads() > 1 && my_core.get_column_major()) {
 
  288            my_thread_buffers.resize(my_core.get_num_threads() - 1);
 
  296    const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
 
  297    std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
 
  300    void multiply(
const EigenVector_& right, EigenVector_& output) {
 
  301        if (my_core.get_column_major()) {
 
  302            my_core.indirect_multiply(right, my_thread_buffers, output);
 
  304            my_core.direct_multiply(right, output);
 
 
 
  319template<
class EigenVector_, 
class ValueArray_, 
class IndexArray_, 
class Po
interArray_ >
 
  328        if (my_core.get_num_threads() > 1 && !my_core.get_column_major()) {
 
  329            my_thread_buffers.resize(my_core.get_num_threads() - 1);
 
  337    const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
 
  338    std::vector<std::vector<typename EigenVector_::Scalar> > my_thread_buffers;
 
  341    void multiply(
const EigenVector_& right, EigenVector_& output) {
 
  342        if (my_core.get_column_major()) {
 
  343            my_core.direct_multiply(right, output);
 
  345            my_core.indirect_multiply(right, my_thread_buffers, output);
 
 
 
  360template<
class EigenMatrix_, 
class ValueArray_, 
class IndexArray_, 
class Po
interArray_ >
 
  374    const ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_>& my_core;
 
  377    const EigenMatrix_& 
realize(EigenMatrix_& buffer) {
 
  378        const auto nr = my_core.rows(), nc = my_core.cols();
 
  379        buffer.resize(nr, nc);
 
  382        const auto& ptrs = my_core.get_pointers();
 
  383        const auto& indices = my_core.get_indices();
 
  384        const auto& values = my_core.get_values();
 
  386        typedef I<decltype(std::declval<PointerArray_>()[0])> PointerType;
 
  387        if (my_core.get_column_major()) {
 
  388            for (Eigen::Index c = 0; c < nc; ++c) {
 
  389                PointerType col_start = ptrs[c], col_end = ptrs[c + 1];
 
  390                for (PointerType s = col_start; s < col_end; ++s) {
 
  391                    buffer.coeffRef(indices[s], c) = values[s];
 
  395            for (Eigen::Index r = 0; r < nr; ++r) {
 
  396                PointerType row_start = ptrs[r], row_end = ptrs[r + 1];
 
  397                for (PointerType s = row_start; s < row_end; ++s) {
 
  398                    buffer.coeffRef(r, indices[s]) = values[s];
 
 
 
  464    ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p, 
bool column_major, 
int num_threads) : 
 
  465        my_core(nrow, ncol, std::move(x), std::move(i), std::move(p), column_major, num_threads)
 
 
  469    ParallelSparseMatrixCore<ValueArray_, IndexArray_, PointerArray_> my_core;
 
  476        return my_core.rows();
 
 
  483        return my_core.cols();
 
 
  491        return my_core.get_values();
 
 
  499        return my_core.get_indices();
 
 
  506        return my_core.get_pointers();
 
 
  512    typedef I<decltype(std::declval<PointerArray_>()[0])> 
PointerType;
 
  522        return my_core.get_primary_boundaries();
 
 
  533        return my_core.get_secondary_boundaries();
 
 
  546        return my_core.get_secondary_nonzero_boundaries();
 
 
  566    std::unique_ptr<ParallelSparseWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> > 
new_known_workspace()
 const {
 
  567        return std::make_unique<ParallelSparseWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
 
 
  574        return std::make_unique<ParallelSparseAdjointWorkspace<EigenVector_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
 
 
  581        return std::make_unique<ParallelSparseRealizeWorkspace<EigenMatrix_, ValueArray_, IndexArray_, PointerArray_> >(my_core);
 
 
 
Workspace class for multiplying a transposed Matrix.
Definition interface.hpp:61
 
Interface for a matrix to use in compute().
Definition interface.hpp:142
 
Workspace for multiplication of a transposed ParallelSparseMatrix.
Definition sparse.hpp:320
 
void multiply(const EigenVector_ &right, EigenVector_ &output)
Definition sparse.hpp:341
 
Sparse matrix with customizable parallelization.
Definition sparse.hpp:440
 
ParallelSparseMatrix(Eigen::Index nrow, Eigen::Index ncol, ValueArray_ x, IndexArray_ i, PointerArray_ p, bool column_major, int num_threads)
Definition sparse.hpp:464
 
const std::vector< Eigen::Index > & get_primary_boundaries() const
Definition sparse.hpp:521
 
Eigen::Index cols() const
Definition sparse.hpp:482
 
const ValueArray_ & get_values() const
Definition sparse.hpp:490
 
std::unique_ptr< AdjointWorkspace< EigenVector_ > > new_adjoint_workspace() const
Definition sparse.hpp:554
 
Eigen::Index rows() const
Definition sparse.hpp:475
 
I< decltype(std::declval< PointerArray_ >()[0])> PointerType
Definition sparse.hpp:512
 
ParallelSparseMatrix()
Definition sparse.hpp:446
 
std::unique_ptr< RealizeWorkspace< EigenMatrix_ > > new_realize_workspace() const
Definition sparse.hpp:558
 
std::unique_ptr< ParallelSparseWorkspace< EigenVector_, ValueArray_, IndexArray_, PointerArray_ > > new_known_workspace() const
Definition sparse.hpp:566
 
const std::vector< std::vector< PointerType > > & get_secondary_nonzero_boundaries() const
Definition sparse.hpp:545
 
const PointerArray_ & get_pointers() const
Definition sparse.hpp:505
 
std::unique_ptr< ParallelSparseRealizeWorkspace< EigenMatrix_, ValueArray_, IndexArray_, PointerArray_ > > new_known_realize_workspace() const
Definition sparse.hpp:580
 
const IndexArray_ & get_indices() const
Definition sparse.hpp:498
 
std::unique_ptr< Workspace< EigenVector_ > > new_workspace() const
Definition sparse.hpp:550
 
const std::vector< Eigen::Index > & get_secondary_boundaries() const
Definition sparse.hpp:532
 
std::unique_ptr< ParallelSparseAdjointWorkspace< EigenVector_, ValueArray_, IndexArray_, PointerArray_ > > new_known_adjoint_workspace() const
Definition sparse.hpp:573
 
Workspace for realizing a ParallelSparseMatrix.
Definition sparse.hpp:361
 
const EigenMatrix_ & realize(EigenMatrix_ &buffer)
Definition sparse.hpp:377
 
Workspace for multiplication of a ParallelSparseMatrix.
Definition sparse.hpp:279
 
void multiply(const EigenVector_ &right, EigenVector_ &output)
Definition sparse.hpp:300
 
Workspace class for realizing a Matrix.
Definition interface.hpp:99
 
Workspace class for multiplying a Matrix.
Definition interface.hpp:24
 
Interfaces for matrix inputs.
 
Implements IRLBA for approximate SVD.
Definition compute.hpp:22
 
void parallelize(Task_ num_tasks, Run_ run_task)
Definition parallel.hpp:33
 
Classes for parallelized multiplication.