scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
SimplePca.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SIMPLE_PCA_HPP
2#define SCRAN_SIMPLE_PCA_HPP
3
4#include "../utils/macros.hpp"
5
6#include "tatami/tatami.hpp"
7#include "irlba/irlba.hpp"
8#include "Eigen/Dense"
9
10#include <vector>
11#include <cmath>
12
13#include "convert.hpp"
14#include "utils.hpp"
15
22namespace scran {
23
33class SimplePca {
34public:
38 struct Defaults {
42 static constexpr int rank = 10;
43
47 static constexpr bool scale = false;
48
52 static constexpr bool transpose = true;
53
57 static constexpr int num_threads = 1;
58
62 static constexpr bool return_rotation = false;
63
67 static constexpr bool return_center = false;
68
72 static constexpr bool return_scale = false;
73 };
74private:
75 bool scale = Defaults::scale;
76 bool transpose = Defaults::transpose;
77 int rank = Defaults::rank;
78
79 bool return_rotation = Defaults::return_rotation;
80 bool return_center = Defaults::return_center;
81 bool return_scale = Defaults::return_scale;
82
83 int nthreads = Defaults::num_threads;
84
85public:
94 rank = r;
95 return *this;
96 }
97
104 scale = s;
105 return *this;
106 }
107
115 transpose = t;
116 return *this;
117 }
118
125 return_rotation = r;
126 return *this;
127 }
128
135 return_center = r;
136 return *this;
137 }
138
145 return_scale = r;
146 return *this;
147 }
148
154 nthreads = n;
155 return *this;
156 }
157
158private:
159 template<typename Data_, typename Index_>
160 void run_sparse(
161 const tatami::Matrix<Data_, Index_>* mat,
162 const irlba::Irlba& irb,
163 Eigen::MatrixXd& pcs,
164 Eigen::MatrixXd& rotation,
165 Eigen::VectorXd& variance_explained,
166 Eigen::VectorXd& center_v,
167 Eigen::VectorXd& scale_v,
168 double& total_var)
169 const {
170 auto extracted = pca_utils::extract_sparse_for_pca(mat, nthreads); // row-major extraction to create a CSR matrix.
171 pca_utils::SparseMatrix emat(mat->ncol(), mat->nrow(), std::move(extracted.values), std::move(extracted.indices), std::move(extracted.ptrs), nthreads); // CSC with genes in columns.
172
173 size_t ngenes = emat.cols();
174 center_v.resize(ngenes);
175 scale_v.resize(ngenes);
176 pca_utils::compute_mean_and_variance_from_sparse_matrix(emat, center_v, scale_v, nthreads);
177 total_var = pca_utils::process_scale_vector(scale, scale_v);
178
179 irlba::Centered<decltype(emat)> centered(&emat, &center_v);
180 if (scale) {
181 irlba::Scaled<decltype(centered)> scaled(&centered, &scale_v);
182 irb.run(scaled, pcs, rotation, variance_explained);
183 } else {
184 irb.run(centered, pcs, rotation, variance_explained);
185 }
186 }
187
188 template<typename Data_, typename Index_>
189 void run_dense(
190 const tatami::Matrix<Data_, Index_>* mat,
191 const irlba::Irlba& irb,
192 Eigen::MatrixXd& pcs,
193 Eigen::MatrixXd& rotation,
194 Eigen::VectorXd& variance_explained,
195 Eigen::VectorXd& center_v,
196 Eigen::VectorXd& scale_v,
197 double& total_var)
198 const {
199 auto emat = pca_utils::extract_dense_for_pca(mat, nthreads); // get a column-major matrix with genes in columns.
200
201 size_t ngenes = emat.cols();
202 center_v.resize(ngenes);
203 scale_v.resize(ngenes);
204 pca_utils::compute_mean_and_variance_from_dense_matrix(emat, center_v, scale_v, nthreads);
205 total_var = pca_utils::process_scale_vector(scale, scale_v);
206
207 // Applying the centering and scaling now so we can do the PCA without any wrappers.
208 pca_utils::apply_center_and_scale_to_dense_matrix(emat, center_v, scale, scale_v, nthreads);
209 irb.run(emat, pcs, rotation, variance_explained);
210 }
211
212 template<typename Data_, typename Index_>
213 void run_internal(
214 const tatami::Matrix<Data_, Index_>* mat,
215 Eigen::MatrixXd& pcs,
216 Eigen::MatrixXd& rotation,
217 Eigen::VectorXd& variance_explained,
218 Eigen::VectorXd& center_v,
219 Eigen::VectorXd& scale_v,
220 double& total_var)
221 const {
222 irlba::EigenThreadScope t(nthreads);
223 irlba::Irlba irb;
224 irb.set_number(rank);
225 irb.set_cap_number(true);
226
227 if (mat->sparse()) {
228 run_sparse(mat, irb, pcs, rotation, variance_explained, center_v, scale_v, total_var);
229 } else {
230 run_dense(mat, irb, pcs, rotation, variance_explained, center_v, scale_v, total_var);
231 }
232
233 pca_utils::clean_up(mat->ncol(), pcs, variance_explained);
234 if (transpose) {
235 pcs.adjointInPlace();
236 }
237 }
238
239public:
245 struct Results {
252 Eigen::MatrixXd pcs;
253
258 Eigen::VectorXd variance_explained;
259
264 double total_variance = 0;
265
272 Eigen::MatrixXd rotation;
273
279 Eigen::VectorXd center;
280
286 Eigen::VectorXd scale;
287 };
288
300 template<typename T, typename IDX>
301 Results run(const tatami::Matrix<T, IDX>* mat) const {
302 Results output;
303
304 Eigen::MatrixXd rotation;
305 Eigen::VectorXd center, scale;
306 run_internal(mat, output.pcs, rotation, output.variance_explained, center, scale, output.total_variance);
307
308 // Shifting them if we want to keep them.
309 if (return_rotation) {
310 output.rotation = std::move(rotation);
311 }
312 if (return_center) {
313 output.center = std::move(center);
314 }
315 if (return_scale) {
316 output.scale = std::move(scale);
317 }
318
319 return output;
320 }
321
338 template<typename T, typename IDX, typename X>
339 Results run(const tatami::Matrix<T, IDX>* mat, const X* features) const {
340 if (!features) {
341 return run(mat);
342 } else {
343 auto subsetted = pca_utils::subset_matrix_by_features(mat, features);
344 return run(subsetted.get());
345 }
346 }
347};
348
349}
350
351#endif
Perform a simple PCA on a gene-cell matrix.
Definition SimplePca.hpp:33
SimplePca & set_return_rotation(bool r=Defaults::return_rotation)
Definition SimplePca.hpp:124
SimplePca & set_rank(int r=Defaults::rank)
Definition SimplePca.hpp:93
Results run(const tatami::Matrix< T, IDX > *mat, const X *features) const
Definition SimplePca.hpp:339
SimplePca & set_return_center(bool r=Defaults::return_center)
Definition SimplePca.hpp:134
SimplePca & set_num_threads(int n=Defaults::num_threads)
Definition SimplePca.hpp:153
Results run(const tatami::Matrix< T, IDX > *mat) const
Definition SimplePca.hpp:301
SimplePca & set_return_scale(bool r=Defaults::return_scale)
Definition SimplePca.hpp:144
SimplePca & set_transpose(bool t=Defaults::transpose)
Definition SimplePca.hpp:114
SimplePca & set_scale(bool s=Defaults::scale)
Definition SimplePca.hpp:103
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Default parameter settings.
Definition SimplePca.hpp:38
static constexpr bool transpose
Definition SimplePca.hpp:52
static constexpr bool return_rotation
Definition SimplePca.hpp:62
static constexpr int rank
Definition SimplePca.hpp:42
static constexpr int num_threads
Definition SimplePca.hpp:57
static constexpr bool scale
Definition SimplePca.hpp:47
static constexpr bool return_center
Definition SimplePca.hpp:67
static constexpr bool return_scale
Definition SimplePca.hpp:72
Container for the PCA results.
Definition SimplePca.hpp:245
double total_variance
Definition SimplePca.hpp:264
Eigen::VectorXd variance_explained
Definition SimplePca.hpp:258
Eigen::MatrixXd rotation
Definition SimplePca.hpp:272
Eigen::VectorXd scale
Definition SimplePca.hpp:286
Eigen::MatrixXd pcs
Definition SimplePca.hpp:252
Eigen::VectorXd center
Definition SimplePca.hpp:279