scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
ScoreFeatureSet.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SCORE_FEATURE_SET_HPP
2#define SCRAN_SCORE_FEATURE_SET_HPP
3
4#include "../utils/macros.hpp"
5
6#include <algorithm>
7#include <vector>
8#include "tatami/tatami.hpp"
9
10#include "irlba/irlba.hpp"
11#include "Eigen/Dense"
12
13#include "../dimensionality_reduction/SimplePca.hpp"
14#include "../dimensionality_reduction/ResidualPca.hpp"
15
21namespace scran {
22
42public:
46 struct Defaults {
50 static constexpr WeightPolicy block_weight_policy = WeightPolicy::VARIABLE;
51
55 static constexpr int num_threads = 1;
56
60 static constexpr bool scale = false;
61 };
62
63private:
64 WeightPolicy block_weight_policy = Defaults::block_weight_policy;
65 int nthreads = Defaults::num_threads;
66 bool scale = Defaults::scale;
67
68public:
74 block_weight_policy = b;
75 return *this;
76 }
77
83 nthreads = n;
84 return *this;
85 }
86
92 scale = s;
93 return *this;
94 }
95
96public:
100 struct Results {
105 std::vector<double> scores;
106
112 std::vector<double> weights;
113 };
114
115private:
116 /*
117 * We have the first PC 'P' and the first rotation vector 'R', plus a centering vector 'C'
118 * and scaling vector 'S'. The low-rank approximation is defined as (using R syntax):
119 *
120 * L = outer(R, P) * S + C
121 * = outer(R * S, P) + C
122 *
123 * Remember that we want the column means of the rank-1 approximation, so:
124 *
125 * colMeans(L) = mean(R * S) * P + colMeans(C)
126 *
127 * If scale = false, then S can be dropped from the above expression.
128 */
129 double compute_multiplier(const Eigen::MatrixXd& rotation, const Eigen::VectorXd& scale_v) const {
130 auto first_rot = rotation.col(0);
131
132 double multiplier = 0;
133 if (scale) {
134 for (Eigen::Index i = 0, end = first_rot.size(); i < end; ++i) {
135 multiplier += scale_v.coeff(i) * first_rot.coeff(i);
136 }
137 } else {
138 multiplier = std::accumulate(first_rot.begin(), first_rot.end(), 0.0);
139 }
140
141 // no need to protect against zero rows, as that should already be caught.
142 return multiplier / first_rot.size();
143 }
144
145 void transfer_rotation(const Eigen::MatrixXd& rotation, std::vector<double>& weights) const {
146 auto first_rot = rotation.col(0);
147 weights.insert(weights.end(), first_rot.begin(), first_rot.end());
148 return;
149 }
150
151public:
169 template<typename T, typename IDX, typename X, typename Block>
170 Results run_blocked(const tatami::Matrix<T, IDX>* mat, const X* features, const Block* block) const {
171 std::shared_ptr<const tatami::Matrix<T, IDX> > subsetted = pca_utils::subset_matrix_by_features(mat, features);
172 auto NR = subsetted->nrow();
173 auto NC = subsetted->ncol();
174
175 // Catching edge cases.
176 if (NR == 0) {
177 Results output;
178 output.scores.resize(NC);
179 return output;
180 } else if (NR == 1) {
181 Results output;
182 output.weights.push_back(1);
183 output.scores = subsetted->dense_row()->fetch(0);
184 return output;
185 } else if (NC == 0) {
186 Results output;
187 output.weights.resize(NR);
188 return output;
189 }
190
191 Results output;
192
193 if (block == NULL) {
194 SimplePca runner;
195 runner.set_rank(1);
196 runner.set_scale(scale);
197 runner.set_num_threads(nthreads);
199
200 auto temp = runner.run(subsetted.get());
201 transfer_rotation(temp.rotation, output.weights);
202 double multiplier = compute_multiplier(temp.rotation, temp.scale);
203 double shift = std::accumulate(temp.center.begin(), temp.center.end(), 0.0) / temp.center.size();
204
205 output.scores.resize(temp.pcs.cols());
206 for (Eigen::Index c = 0, end = temp.pcs.cols(); c < end; ++c) {
207 output.scores[c] = temp.pcs.coeff(0, c) * multiplier + shift;
208 }
209
210 } else {
211 ResidualPca runner;
212 runner.set_rank(1);
213 runner.set_scale(scale);
214 runner.set_num_threads(nthreads);
216 runner.set_block_weight_policy(block_weight_policy);
217
218 auto temp = runner.run(subsetted.get(), block);
219 transfer_rotation(temp.rotation, output.weights);
220 double multiplier = compute_multiplier(temp.rotation, temp.scale);
221
222 // Here, we restore the block-specific centers. Don't be tempted into
223 // using MultiBatchPca, as that doesn't yield a rank-1 approximation
224 // that preserves global shifts between blocks.
225 Eigen::VectorXd shift = temp.center.colwise().sum() / temp.center.rows();
226 output.scores.resize(temp.pcs.cols());
227 for (Eigen::Index c = 0, end = temp.pcs.cols(); c < end; ++c) {
228 output.scores[c] = temp.pcs.coeff(0, c) * multiplier + shift.coeff(block[c]);
229 }
230 }
231
232 return output;
233 }
234
249 template<typename T, typename IDX, typename X>
250 Results run(const tatami::Matrix<T, IDX>* mat, const X* features) const {
251 return run_blocked(mat, features, static_cast<unsigned char*>(NULL));
252 }
253};
254
255}
256
257#endif
Compute PCA after regressing out an uninteresting factor.
Definition ResidualPca.hpp:46
ResidualPca & set_return_rotation(bool r=Defaults::return_rotation)
Definition ResidualPca.hpp:151
Results run(const tatami::Matrix< Data_, Index_ > *mat, const Block_ *block) const
Definition ResidualPca.hpp:430
ResidualPca & set_return_scale(bool r=Defaults::return_scale)
Definition ResidualPca.hpp:171
ResidualPca & set_block_weight_policy(WeightPolicy w=Defaults::block_weight_policy)
Definition ResidualPca.hpp:181
ResidualPca & set_scale(bool s=Defaults::scale)
Definition ResidualPca.hpp:130
ResidualPca & set_num_threads(int n=Defaults::num_threads)
Definition ResidualPca.hpp:201
ResidualPca & set_rank(int r=Defaults::rank)
Definition ResidualPca.hpp:120
ResidualPca & set_return_center(bool r=Defaults::return_center)
Definition ResidualPca.hpp:161
Compute per-cell scores for a given feature set.
Definition ScoreFeatureSet.hpp:41
Results run_blocked(const tatami::Matrix< T, IDX > *mat, const X *features, const Block *block) const
Definition ScoreFeatureSet.hpp:170
ScoreFeatureSet & set_num_threads(int n=Defaults::num_threads)
Definition ScoreFeatureSet.hpp:82
ScoreFeatureSet & set_scale(bool s=Defaults::scale)
Definition ScoreFeatureSet.hpp:91
Results run(const tatami::Matrix< T, IDX > *mat, const X *features) const
Definition ScoreFeatureSet.hpp:250
ScoreFeatureSet & set_block_weight_policy(WeightPolicy b=Defaults::block_weight_policy)
Definition ScoreFeatureSet.hpp:73
Perform a simple PCA on a gene-cell matrix.
Definition SimplePca.hpp:33
SimplePca & set_return_rotation(bool r=Defaults::return_rotation)
Definition SimplePca.hpp:124
SimplePca & set_rank(int r=Defaults::rank)
Definition SimplePca.hpp:93
SimplePca & set_return_center(bool r=Defaults::return_center)
Definition SimplePca.hpp:134
SimplePca & set_num_threads(int n=Defaults::num_threads)
Definition SimplePca.hpp:153
Results run(const tatami::Matrix< T, IDX > *mat) const
Definition SimplePca.hpp:301
SimplePca & set_return_scale(bool r=Defaults::return_scale)
Definition SimplePca.hpp:144
SimplePca & set_scale(bool s=Defaults::scale)
Definition SimplePca.hpp:103
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
WeightPolicy
Definition blocking.hpp:82
Default parameters.
Definition ScoreFeatureSet.hpp:46
static constexpr int num_threads
Definition ScoreFeatureSet.hpp:55
static constexpr WeightPolicy block_weight_policy
Definition ScoreFeatureSet.hpp:50
static constexpr bool scale
Definition ScoreFeatureSet.hpp:60
Feature set scoring results.
Definition ScoreFeatureSet.hpp:100
std::vector< double > scores
Definition ScoreFeatureSet.hpp:105
std::vector< double > weights
Definition ScoreFeatureSet.hpp:112