scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
ResidualPca.hpp
Go to the documentation of this file.
1#ifndef SCRAN_RESIDUAL_PCA_HPP
2#define SCRAN_RESIDUAL_PCA_HPP
3
4#include "../utils/macros.hpp"
5
6#include "tatami/tatami.hpp"
7
8#include "irlba/irlba.hpp"
9#include "Eigen/Dense"
10
11#include <vector>
12#include <cmath>
13
14#include "utils.hpp"
15#include "convert.hpp"
16#include "wrappers.hpp"
17#include "blocking.hpp"
18
25namespace scran {
26
47public:
51 struct Defaults {
55 static constexpr int rank = 10;
56
60 static constexpr bool scale = false;
61
65 static constexpr bool transpose = true;
66
70 static constexpr int num_threads = 1;
71
75 static constexpr WeightPolicy block_weight_policy = WeightPolicy::VARIABLE;
76
81
85 static constexpr bool return_rotation = false;
86
90 static constexpr bool return_center = false;
91
95 static constexpr bool return_scale = false;
96 };
97
98private:
99 bool scale = Defaults::scale;
100 bool transpose = Defaults::transpose;
101 int rank = Defaults::rank;
102
103 bool return_rotation = Defaults::return_rotation;
104 bool return_center = Defaults::return_center;
105 bool return_scale = Defaults::return_scale;
106
107 WeightPolicy block_weight_policy = Defaults::block_weight_policy;
109
110 int nthreads = Defaults::num_threads;
111
112public:
121 rank = r;
122 return *this;
123 }
124
131 scale = s;
132 return *this;
133 }
134
142 transpose = t;
143 return *this;
144 }
145
152 return_rotation = r;
153 return *this;
154 }
155
162 return_center = r;
163 return *this;
164 }
165
172 return_scale = r;
173 return *this;
174 }
175
182 block_weight_policy = w;
183 return *this;
184 }
185
196
202 nthreads = n;
203 return *this;
204 }
205
206private:
207 template<bool weight_, typename Data_, typename Index_, typename Block_>
208 void run_sparse(
209 const tatami::Matrix<Data_, Index_>* mat,
210 const Block_* block,
211 const pca_utils::BlockingDetails<weight_>& block_details,
212 const irlba::Irlba& irb,
213 Eigen::MatrixXd& pcs,
214 Eigen::MatrixXd& rotation,
215 Eigen::VectorXd& variance_explained,
216 Eigen::MatrixXd& center_m,
217 Eigen::VectorXd& scale_v,
218 double& total_var)
219 const {
220 auto ngenes = mat->nrow(), ncells = mat->ncol();
221 auto extracted = pca_utils::extract_sparse_for_pca(mat, nthreads); // row-major filling.
222 pca_utils::SparseMatrix emat(ncells, ngenes, std::move(extracted.values), std::move(extracted.indices), std::move(extracted.ptrs), nthreads); // CSC with genes in columns.
223
224 auto nblocks = block_details.num_blocks();
225 center_m.resize(nblocks, ngenes);
226 scale_v.resize(ngenes);
227 pca_utils::compute_mean_and_variance_regress<weight_>(emat, block, block_details, center_m, scale_v, nthreads);
228 total_var = pca_utils::process_scale_vector(scale, scale_v);
229
230 pca_utils::RegressWrapper<decltype(emat), Block_> centered(&emat, block, &center_m);
231 if constexpr(weight_) {
232 if (scale) {
233 irlba::Scaled<decltype(centered)> scaled(&centered, &scale_v);
234 pca_utils::SampleScaledWrapper<decltype(scaled)> weighted(&scaled, &(block_details.expanded_weights));
235 irb.run(weighted, pcs, rotation, variance_explained);
236 } else {
237 pca_utils::SampleScaledWrapper<decltype(centered)> weighted(&centered, &(block_details.expanded_weights));
238 irb.run(weighted, pcs, rotation, variance_explained);
239 }
240
241 // This transposes 'pcs' to be a NDIM * NCELLS matrix.
242 pca_utils::project_sparse_matrix(emat, pcs, rotation, scale, scale_v, nthreads);
243
244 // Subtracting each block's mean from the PCs.
245 Eigen::MatrixXd centering;
246 if (scale) {
247 centering = (center_m * (rotation.array().colwise() / scale_v.array()).matrix()).adjoint();
248 } else {
249 centering = (center_m * rotation).adjoint();
250 }
251 for (size_t i = 0, iend = pcs.cols(); i < iend; ++i) {
252 pcs.col(i) -= centering.col(block[i]);
253 }
254
255 pca_utils::clean_up_projected<true>(pcs, variance_explained);
256 if (!transpose) {
257 pcs.adjointInPlace();
258 }
259
260 } else {
261 if (scale) {
262 irlba::Scaled<decltype(centered)> scaled(&centered, &scale_v);
263 irb.run(scaled, pcs, rotation, variance_explained);
264 } else {
265 irb.run(centered, pcs, rotation, variance_explained);
266 }
267
268 pca_utils::clean_up(mat->ncol(), pcs, variance_explained);
269 if (transpose) {
270 pcs.adjointInPlace();
271 }
272 }
273 }
274
275 template<bool weight_, typename Data_, typename Index_, typename Block_>
276 void run_dense(
277 const tatami::Matrix<Data_, Index_>* mat,
278 const Block_* block,
279 const pca_utils::BlockingDetails<weight_>& block_details,
280 const irlba::Irlba& irb,
281 Eigen::MatrixXd& pcs,
282 Eigen::MatrixXd& rotation,
283 Eigen::VectorXd& variance_explained,
284 Eigen::MatrixXd& center_m,
285 Eigen::VectorXd& scale_v,
286 double& total_var)
287 const {
288 auto emat = pca_utils::extract_dense_for_pca(mat, nthreads); // get a column-major matrix with genes in columns.
289
290 auto ngenes = emat.cols();
291 auto nblocks = block_details.num_blocks();
292 center_m.resize(nblocks, ngenes);
293 scale_v.resize(ngenes);
294 pca_utils::compute_mean_and_variance_regress<weight_>(emat, block, block_details, center_m, scale_v, nthreads);
295 total_var = pca_utils::process_scale_vector(scale, scale_v);
296
297 // Applying the centering and scaling directly so that we can run the PCA with no or fewer layers.
298 tatami::parallelize([&](size_t, size_t start, size_t length) -> void {
299 size_t ncells = emat.rows();
300 double* ptr = emat.data() + static_cast<size_t>(start) * ncells;
301 for (size_t g = start, end = start + length; g < end; ++g, ptr += ncells) {
302 for (size_t c = 0; c < ncells; ++c) {
303 ptr[c] -= center_m.coeff(block[c], g);
304 }
305
306 if (scale) {
307 auto sd = scale_v[g];
308 for (size_t c = 0; c < ncells; ++c) {
309 ptr[c] /= sd; // process_scale_vector should already protect against division by zero.
310 }
311 }
312 }
313 }, ngenes, nthreads);
314
315 if constexpr(weight_) {
316 pca_utils::SampleScaledWrapper<decltype(emat)> weighted(&emat, &(block_details.expanded_weights));
317 irb.run(weighted, pcs, rotation, variance_explained);
318 pcs.noalias() = emat * rotation;
319 pca_utils::clean_up_projected<false>(pcs, variance_explained);
320 } else {
321 irb.run(emat, pcs, rotation, variance_explained);
322 pca_utils::clean_up(pcs.rows(), pcs, variance_explained);
323 }
324
325 if (transpose) {
326 pcs.adjointInPlace();
327 }
328 }
329
330 template<typename Data_, typename Index_, typename Block_>
331 void run_internal(
332 const tatami::Matrix<Data_, Index_>* mat,
333 const Block_* block,
334 Eigen::MatrixXd& pcs,
335 Eigen::MatrixXd& rotation,
336 Eigen::VectorXd& variance_explained,
337 Eigen::MatrixXd& center_m,
338 Eigen::VectorXd& scale_v,
339 double& total_var)
340 const {
341 irlba::EigenThreadScope t(nthreads);
342 irlba::Irlba irb;
343 irb.set_number(rank);
344 irb.set_cap_number(true);
345
346 if (block_weight_policy == WeightPolicy::NONE) {
347 auto bdetails = pca_utils::compute_blocking_details(mat->ncol(), block);
348 if (mat->sparse()) {
349 run_sparse<false>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
350 } else {
351 run_dense<false>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
352 }
353
354 } else {
355 auto bdetails = pca_utils::compute_blocking_details(mat->ncol(), block, block_weight_policy, variable_block_weight_parameters);
356 if (mat->sparse()) {
357 run_sparse<true>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
358 } else {
359 run_dense<true>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
360 }
361 }
362 }
363
364public:
370 struct Results {
377 Eigen::MatrixXd pcs;
378
383 Eigen::VectorXd variance_explained;
384
389 double total_variance = 0;
390
397 Eigen::MatrixXd rotation;
398
405 Eigen::MatrixXd center;
406
412 Eigen::VectorXd scale;
413 };
414
429 template<typename Data_, typename Index_, typename Block_>
430 Results run(const tatami::Matrix<Data_, Index_>* mat, const Block_* block) const {
431 Results output;
432 Eigen::MatrixXd rotation, center;
433 Eigen::VectorXd scale;
434
435 run_internal(mat, block, output.pcs, rotation, output.variance_explained, center, scale, output.total_variance);
436
437 // Shifting them if we want to keep them.
438 if (return_rotation) {
439 output.rotation = std::move(rotation);
440 }
441 if (return_center) {
442 output.center = center.adjoint();
443 }
444 if (return_scale) {
445 output.scale = std::move(scale);
446 }
447
448 return output;
449 }
450
470 template<typename Data_, typename Index_, typename Block_, typename Subset_>
471 Results run(const tatami::Matrix<Data_, Index_>* mat, const Block_* block, const Subset_* features) const {
472 if (!features) {
473 return run(mat, block);
474 } else {
475 auto subsetted = pca_utils::subset_matrix_by_features(mat, features);
476 return run(subsetted.get(), block);
477 }
478 }
479};
480
481}
482
483#endif
Utilities for handling blocks of cells.
Compute PCA after regressing out an uninteresting factor.
Definition ResidualPca.hpp:46
ResidualPca & set_return_rotation(bool r=Defaults::return_rotation)
Definition ResidualPca.hpp:151
Results run(const tatami::Matrix< Data_, Index_ > *mat, const Block_ *block) const
Definition ResidualPca.hpp:430
ResidualPca & set_transpose(bool t=Defaults::transpose)
Definition ResidualPca.hpp:141
ResidualPca & set_variable_block_weight_parameters(VariableBlockWeightParameters v=Defaults::variable_block_weight_parameters)
Definition ResidualPca.hpp:192
ResidualPca & set_return_scale(bool r=Defaults::return_scale)
Definition ResidualPca.hpp:171
ResidualPca & set_block_weight_policy(WeightPolicy w=Defaults::block_weight_policy)
Definition ResidualPca.hpp:181
Results run(const tatami::Matrix< Data_, Index_ > *mat, const Block_ *block, const Subset_ *features) const
Definition ResidualPca.hpp:471
ResidualPca & set_scale(bool s=Defaults::scale)
Definition ResidualPca.hpp:130
ResidualPca & set_num_threads(int n=Defaults::num_threads)
Definition ResidualPca.hpp:201
ResidualPca & set_rank(int r=Defaults::rank)
Definition ResidualPca.hpp:120
ResidualPca & set_return_center(bool r=Defaults::return_center)
Definition ResidualPca.hpp:161
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
WeightPolicy
Definition blocking.hpp:82
Default parameter settings.
Definition ResidualPca.hpp:51
static constexpr VariableBlockWeightParameters variable_block_weight_parameters
Definition ResidualPca.hpp:80
static constexpr int rank
Definition ResidualPca.hpp:55
static constexpr bool return_rotation
Definition ResidualPca.hpp:85
static constexpr bool transpose
Definition ResidualPca.hpp:65
static constexpr bool return_scale
Definition ResidualPca.hpp:95
static constexpr bool scale
Definition ResidualPca.hpp:60
static constexpr WeightPolicy block_weight_policy
Definition ResidualPca.hpp:75
static constexpr int num_threads
Definition ResidualPca.hpp:70
static constexpr bool return_center
Definition ResidualPca.hpp:90
Container for the PCA results.
Definition ResidualPca.hpp:370
double total_variance
Definition ResidualPca.hpp:389
Eigen::MatrixXd rotation
Definition ResidualPca.hpp:397
Eigen::VectorXd variance_explained
Definition ResidualPca.hpp:383
Eigen::MatrixXd pcs
Definition ResidualPca.hpp:377
Eigen::MatrixXd center
Definition ResidualPca.hpp:405
Eigen::VectorXd scale
Definition ResidualPca.hpp:412
Parameters for variable_block_weight().
Definition blocking.hpp:87