scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
ModelGeneVariances.hpp
Go to the documentation of this file.
1#ifndef SCRAN_MODEL_GENE_VAR_H
2#define SCRAN_MODEL_GENE_VAR_H
3
4#include "../utils/macros.hpp"
5
6#include "tatami/tatami.hpp"
7
8#include "../utils/vector_to_pointers.hpp"
9#include "../utils/blocking.hpp"
10#include "../utils/average_vectors.hpp"
11
12#include "FitVarianceTrend.hpp"
13#include "blocked_variances.hpp"
14
15#include <algorithm>
16#include <vector>
17#include <limits>
18
25namespace scran {
26
37public:
41 struct Defaults {
45 static constexpr WeightPolicy block_weight_policy = WeightPolicy::VARIABLE;
46
51
55 static constexpr bool compute_average = true;
56
60 static constexpr int num_threads = 1;
61 };
62
63private:
64 WeightPolicy block_weight_policy = Defaults::block_weight_policy;
66 int num_threads = Defaults::num_threads;
67
70
73 int minimum_window_count = FitVarianceTrend::Defaults::minimum_window_count;
74
75 bool compute_average = Defaults::compute_average;
76
77public:
84 span = s;
85 return *this;
86 }
87
94 min_mean = m;
95 return *this;
96 }
97
104 use_fixed_width = u;
105 return *this;
106 }
107
114 fixed_width = f;
115 return *this;
116 }
117
124 minimum_window_count = c;
125 return *this;
126 }
127
134 block_weight_policy = w;
135 return *this;
136 }
137
148
155 compute_average = a;
156 return *this;
157 }
158
164 num_threads = n;
165 return *this;
166 }
167
168private:
169 template<bool blocked_, typename Data_, typename Index_, typename Stat_, typename Block_>
170 void compute_dense_row(const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances, const Block_* block, const std::vector<Index_>& block_size) const {
171 auto nblocks = block_size.size();
172 auto NR = mat->nrow(), NC = mat->ncol();
173
174 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
175 std::vector<Stat_> tmp_means(nblocks);
176 std::vector<Stat_> tmp_vars(nblocks);
177
178 std::vector<Data_> buffer(NC);
179 auto ext = tatami::consecutive_extractor<true, false>(mat, start, length);
180
181 for (Index_ r = start, end = start + length; r < end; ++r) {
182 auto ptr = ext->fetch(r, buffer.data());
183 feature_selection::blocked_variance_with_mean<blocked_>(ptr, NC, block, nblocks, block_size.data(), tmp_means.data(), tmp_vars.data());
184 for (size_t b = 0; b < nblocks; ++b) {
185 means[b][r] = tmp_means[b];
186 variances[b][r] = tmp_vars[b];
187 }
188 }
189 }, NR, num_threads);
190 }
191
192 template<bool blocked_, typename Data_, typename Index_, typename Stat_, typename Block_>
193 void compute_sparse_row(const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances, const Block_* block, const std::vector<Index_>& block_size) const {
194 auto nblocks = block_size.size();
195 auto NR = mat->nrow(), NC = mat->ncol();
196
197 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
198 std::vector<Stat_> tmp_means(nblocks);
199 std::vector<Stat_> tmp_vars(nblocks);
200 std::vector<Index_> tmp_nzero(nblocks);
201
202 std::vector<Data_> vbuffer(NC);
203 std::vector<Index_> ibuffer(NC);
204 tatami::Options opt;
205 opt.sparse_ordered_index = false;
206 auto ext = tatami::consecutive_extractor<true, true>(mat, start, length, opt);
207
208 for (Index_ r = start, end = start + length; r < end; ++r) {
209 auto range = ext->fetch(r, vbuffer.data(), ibuffer.data());
210 feature_selection::blocked_variance_with_mean<blocked_>(range, block, nblocks, block_size.data(), tmp_means.data(), tmp_vars.data(), tmp_nzero.data());
211 for (size_t b = 0; b < nblocks; ++b) {
212 means[b][r] = tmp_means[b];
213 variances[b][r] = tmp_vars[b];
214 }
215 }
216 }, NR, num_threads);
217 }
218
219 template<bool blocked_, typename Data_, typename Index_, typename Stat_, typename Block_>
220 void compute_dense_column(const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances, const Block_* block, const std::vector<Index_>& block_size) const {
221 auto nblocks = block_size.size();
222 auto NR = mat->nrow(), NC = mat->ncol();
223
224 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
225 std::vector<Data_> buffer(length);
226 auto ext = tatami::consecutive_extractor<false, false>(mat, 0, NC, start, length);
227
228 // Shifting pointers to account for the new start point.
229 auto mcopy = means;
230 auto vcopy = variances;
231 for (Index_ b = 0; b < nblocks; ++b) {
232 mcopy[b] += start;
233 vcopy[b] += start;
234 }
235
236 std::vector<Index_> counts(nblocks);
237 for (Index_ c = 0; c < NC; ++c) {
238 auto ptr = ext->fetch(c, buffer.data());
239 auto b = feature_selection::get_block<blocked_>(c, block);
240 tatami::stats::variances::compute_running(ptr, length, mcopy[b], vcopy[b], counts[b]);
241 }
242
243 for (size_t b = 0; b < nblocks; ++b) {
244 tatami::stats::variances::finish_running(length, mcopy[b], vcopy[b], counts[b]);
245 }
246 }, NR, num_threads);
247 }
248
249 template<bool blocked_, typename Data_, typename Index_, typename Stat_, typename Block_>
250 void compute_sparse_column(const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances, const Block_* block, const std::vector<Index_>& block_size) const {
251 auto nblocks = block_size.size();
252 auto NR = mat->nrow(), NC = mat->ncol();
253 std::vector<std::vector<Index_> > nonzeros(nblocks, std::vector<Index_>(NR));
254
255 tatami::parallelize([&](size_t, Index_ start, Index_ length) -> void {
256 std::vector<Data_> vbuffer(length);
257 std::vector<Index_> ibuffer(length);
258 tatami::Options opt;
259 opt.sparse_ordered_index = false;
260 auto ext = tatami::consecutive_extractor<false, true>(mat, 0, NC, start, length, opt);
261
262 std::vector<Index_> counts(nblocks);
263 for (Index_ c = 0; c < NC; ++c) {
264 auto range = ext->fetch(c, vbuffer.data(), ibuffer.data());;
265 auto b = feature_selection::get_block<blocked_>(c, block);
266 tatami::stats::variances::compute_running(range, means[b], variances[b], nonzeros[b].data(), counts[b]);
267 }
268
269 for (size_t b = 0; b < nblocks; ++b) {
270 tatami::stats::variances::finish_running(length, means[b] + start, variances[b] + start, nonzeros[b].data() + start, counts[b]);
271 }
272 }, NR, num_threads);
273 }
274
275private:
276 template<bool blocked_, typename Data_, typename Index_, typename Stat_, typename Block_>
277 void compute(const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances, const Block_* block, const std::vector<Index_>& block_size) const {
278 if (mat->prefer_rows()) {
279 if (mat->sparse()) {
280 compute_sparse_row<blocked_>(mat, means, variances, block, block_size);
281 } else {
282 compute_dense_row<blocked_>(mat, means, variances, block, block_size);
283 }
284
285 } else {
286 // Set everything to zero before computing the running statistics.
287 auto NR = mat->nrow();
288 for (auto& mptr : means) {
289 std::fill(mptr, mptr + NR, 0);
290 }
291 for (auto& vptr : variances) {
292 std::fill(vptr, vptr + NR, 0);
293 }
294
295 if (mat->sparse()) {
296 compute_sparse_column<blocked_>(mat, means, variances, block, block_size);
297 } else {
298 compute_dense_column<blocked_>(mat, means, variances, block, block_size);
299 }
300 }
301 }
302
303public:
318 template<typename Value_, typename Index_, typename Stat_>
319 void run(const tatami::Matrix<Value_, Index_>* mat, Stat_* means, Stat_* variances, Stat_* fitted, Stat_* residuals) const {
320 run_blocked(mat, static_cast<int*>(NULL), std::vector<Stat_*>{means}, std::vector<Stat_*>{variances}, std::vector<Stat_*>{fitted}, std::vector<Stat_*>{residuals});
321 return;
322 }
323
357 template<typename Value_, typename Index_, typename Block_, typename Stat_>
359 const tatami::Matrix<Value_, Index_>* mat,
360 const Block_* block,
361 std::vector<Stat_*> means,
362 std::vector<Stat_*> variances,
363 std::vector<Stat_*> fitted,
364 std::vector<Stat_*> residuals,
365 Stat_* ave_means,
366 Stat_* ave_variances,
367 Stat_* ave_fitted,
368 Stat_* ave_residuals)
369 const {
370 Index_ NR = mat->nrow(), NC = mat->ncol();
371 std::vector<Index_> block_size;
372
373 if (block) {
374 block_size = tabulate_ids(NC, block);
375 compute<true>(mat, means, variances, block, block_size);
376 } else {
377 block_size.push_back(NC); // everything is one big block.
378 compute<false>(mat, means, variances, block, block_size);
379 }
380
381 // Applying the trend fit to each block.
383 fit.set_span(span);
384 fit.set_minimum_mean(min_mean);
385 fit.set_use_fixed_width(use_fixed_width);
386 fit.set_fixed_width(fixed_width);
387 fit.set_minimum_window_count(minimum_window_count);
388
389 for (size_t b = 0; b < block_size.size(); ++b) {
390 if (block_size[b] >= 2) {
391 fit.run(NR, means[b], variances[b], fitted[b], residuals[b]);
392 } else {
393 std::fill(fitted[b], fitted[b] + NR, std::numeric_limits<double>::quiet_NaN());
394 std::fill(residuals[b], residuals[b] + NR, std::numeric_limits<double>::quiet_NaN());
395 }
396 }
397
398 // Computing averages under different policies.
399 if (ave_means || ave_variances || ave_fitted || ave_residuals) {
400 std::vector<double> block_weight = compute_block_weights(block_size, block_weight_policy, variable_block_weight_parameters);
401 if (ave_means) {
402 average_vectors_weighted(NR, means, block_weight.data(), ave_means);
403 }
404 if (ave_variances) {
405 average_vectors_weighted(NR, variances, block_weight.data(), ave_variances);
406 }
407 if (ave_fitted) {
408 average_vectors_weighted(NR, fitted, block_weight.data(), ave_fitted);
409 }
410 if (ave_residuals) {
411 average_vectors_weighted(NR, residuals, block_weight.data(), ave_residuals);
412 }
413 }
414
415 return;
416 }
417
439 template<typename Value_, typename Index_, typename Block_, typename Stat_>
441 const tatami::Matrix<Value_, Index_>* mat,
442 const Block_* block,
443 std::vector<Stat_*> means,
444 std::vector<Stat_*> variances,
445 std::vector<Stat_*> fitted,
446 std::vector<Stat_*> residuals)
447 const {
449 mat,
450 block,
451 std::move(means),
452 std::move(variances),
453 std::move(fitted),
454 std::move(residuals),
455 static_cast<Stat_*>(NULL),
456 static_cast<Stat_*>(NULL),
457 static_cast<Stat_*>(NULL),
458 static_cast<Stat_*>(NULL)
459 );
460 }
461
462public:
469 struct Results {
473 Results() {}
474
475 Results(size_t ngenes) : means(ngenes), variances(ngenes), fitted(ngenes), residuals(ngenes) {}
483 std::vector<double> means;
484
488 std::vector<double> variances;
489
493 std::vector<double> fitted;
494
498 std::vector<double> residuals;
499 };
500
511 template<typename Value_, typename Index_>
512 Results run(const tatami::Matrix<Value_, Index_>* mat) const {
513 Results output(mat->nrow());
514 run(mat, output.means.data(), output.variances.data(), output.fitted.data(), output.residuals.data());
515 return output;
516 }
517
518public:
529 BlockResults() {}
530
531 BlockResults(size_t ngenes, int nblocks, bool compute_average) :
532 per_block(nblocks, Results(ngenes)),
533 average(compute_average ? ngenes : 0) {}
541 std::vector<Results> per_block;
542
547 };
548
549private:
550 template<typename Stat_>
551 static void fill_pointers(
552 int nblocks,
553 BlockResults& output,
554 std::vector<Stat_*>& mean_ptr,
555 std::vector<Stat_*>& var_ptr,
556 std::vector<Stat_*>& fit_ptr,
557 std::vector<Stat_*>& resid_ptr
558 ) {
559 mean_ptr.reserve(nblocks);
560 var_ptr.reserve(nblocks);
561 fit_ptr.reserve(nblocks);
562 resid_ptr.reserve(nblocks);
563
564 for (int b = 0; b < nblocks; ++b) {
565 mean_ptr.push_back(output.per_block[b].means.data());
566 var_ptr.push_back(output.per_block[b].variances.data());
567 fit_ptr.push_back(output.per_block[b].fitted.data());
568 resid_ptr.push_back(output.per_block[b].residuals.data());
569 }
570 }
571
572public:
586 template<typename Value_, typename Index_, typename Block_>
587 BlockResults run_blocked(const tatami::Matrix<Value_, Index_>* mat, const Block_* block) const {
588 int nblocks = (block ? count_ids(mat->ncol(), block) : 1);
589 BlockResults output(mat->nrow(), nblocks, compute_average);
590
591 std::vector<double*> mean_ptr, var_ptr, fit_ptr, resid_ptr;
592 fill_pointers(nblocks, output, mean_ptr, var_ptr, fit_ptr, resid_ptr);
593
594 if (compute_average) {
596 mat,
597 block,
598 std::move(mean_ptr),
599 std::move(var_ptr),
600 std::move(fit_ptr),
601 std::move(resid_ptr),
602 output.average.means.data(),
603 output.average.variances.data(),
604 output.average.fitted.data(),
605 output.average.residuals.data()
606 );
607 } else {
609 mat,
610 block,
611 std::move(mean_ptr),
612 std::move(var_ptr),
613 std::move(fit_ptr),
614 std::move(resid_ptr)
615 );
616 }
617
618 return output;
619 }
620};
621
622}
623
624#endif
Fit a mean-variance trend to log-count data.
Fit a mean-variance trend to log-count data.
Definition FitVarianceTrend.hpp:34
FitVarianceTrend & set_minimum_window_count(int c=Defaults::minimum_window_count)
Definition FitVarianceTrend.hpp:165
void run(size_t n, const double *mean, const double *variance, double *fitted, double *residuals) const
Definition FitVarianceTrend.hpp:195
FitVarianceTrend & set_minimum_mean(double m=Defaults::minimum_mean)
Definition FitVarianceTrend.hpp:97
FitVarianceTrend & set_fixed_width(double f=Defaults::fixed_width)
Definition FitVarianceTrend.hpp:151
FitVarianceTrend & set_span(double s=Defaults::span)
Definition FitVarianceTrend.hpp:85
FitVarianceTrend & set_use_fixed_width(bool u=Defaults::use_fixed_width)
Definition FitVarianceTrend.hpp:137
Compute and model the per-gene variances in log-expression data.
Definition ModelGeneVariances.hpp:36
ModelGeneVariances & set_span(double s=FitVarianceTrend::Defaults::span)
Definition ModelGeneVariances.hpp:83
ModelGeneVariances & set_variable_block_weight_parameters(VariableBlockWeightParameters v=Defaults::variable_block_weight_parameters)
Definition ModelGeneVariances.hpp:144
Results run(const tatami::Matrix< Value_, Index_ > *mat) const
Definition ModelGeneVariances.hpp:512
ModelGeneVariances & set_block_weight_policy(WeightPolicy w=Defaults::block_weight_policy)
Definition ModelGeneVariances.hpp:133
void run_blocked(const tatami::Matrix< Value_, Index_ > *mat, const Block_ *block, std::vector< Stat_ * > means, std::vector< Stat_ * > variances, std::vector< Stat_ * > fitted, std::vector< Stat_ * > residuals) const
Definition ModelGeneVariances.hpp:440
ModelGeneVariances & set_fixed_width(double f=FitVarianceTrend::Defaults::fixed_width)
Definition ModelGeneVariances.hpp:113
ModelGeneVariances & set_use_fixed_width(bool u=FitVarianceTrend::Defaults::use_fixed_width)
Definition ModelGeneVariances.hpp:103
ModelGeneVariances & set_minimum_window_count(int c=FitVarianceTrend::Defaults::minimum_window_count)
Definition ModelGeneVariances.hpp:123
ModelGeneVariances & set_minimum_mean(double m=FitVarianceTrend::Defaults::minimum_mean)
Definition ModelGeneVariances.hpp:93
ModelGeneVariances & set_compute_average(bool a=Defaults::compute_average)
Definition ModelGeneVariances.hpp:154
BlockResults run_blocked(const tatami::Matrix< Value_, Index_ > *mat, const Block_ *block) const
Definition ModelGeneVariances.hpp:587
void run(const tatami::Matrix< Value_, Index_ > *mat, Stat_ *means, Stat_ *variances, Stat_ *fitted, Stat_ *residuals) const
Definition ModelGeneVariances.hpp:319
ModelGeneVariances & set_num_threads(int n=Defaults::num_threads)
Definition ModelGeneVariances.hpp:163
void run_blocked(const tatami::Matrix< Value_, Index_ > *mat, const Block_ *block, std::vector< Stat_ * > means, std::vector< Stat_ * > variances, std::vector< Stat_ * > fitted, std::vector< Stat_ * > residuals, Stat_ *ave_means, Stat_ *ave_variances, Stat_ *ave_fitted, Stat_ *ave_residuals) const
Definition ModelGeneVariances.hpp:358
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
size_t count_ids(size_t length, const Id_ *ids)
Definition blocking.hpp:29
std::vector< Output_ > tabulate_ids(size_t length, const Id_ *ids, bool allow_zeros=false)
Definition blocking.hpp:52
void average_vectors_weighted(size_t n, std::vector< Stat_ * > in, const Weight_ *w, Output_ *out)
Definition average_vectors.hpp:158
std::vector< double > compute_block_weights(const std::vector< Size_ > &sizes, WeightPolicy policy, const VariableBlockWeightParameters &param)
Definition blocking.hpp:148
WeightPolicy
Definition blocking.hpp:82
static constexpr int minimum_window_count
Definition FitVarianceTrend.hpp:73
static constexpr double span
Definition FitVarianceTrend.hpp:58
static constexpr double minimum_mean
Definition FitVarianceTrend.hpp:43
static constexpr bool use_fixed_width
Definition FitVarianceTrend.hpp:63
static constexpr double fixed_width
Definition FitVarianceTrend.hpp:68
Results of variance modelling with blocks.
Definition ModelGeneVariances.hpp:525
std::vector< Results > per_block
Definition ModelGeneVariances.hpp:541
Results average
Definition ModelGeneVariances.hpp:546
Default parameters for variance modelling.
Definition ModelGeneVariances.hpp:41
static constexpr int num_threads
Definition ModelGeneVariances.hpp:60
static constexpr VariableBlockWeightParameters variable_block_weight_parameters
Definition ModelGeneVariances.hpp:50
static constexpr WeightPolicy block_weight_policy
Definition ModelGeneVariances.hpp:45
static constexpr bool compute_average
Definition ModelGeneVariances.hpp:55
Results of variance modelling without blocks.
Definition ModelGeneVariances.hpp:469
std::vector< double > means
Definition ModelGeneVariances.hpp:483
std::vector< double > residuals
Definition ModelGeneVariances.hpp:498
std::vector< double > variances
Definition ModelGeneVariances.hpp:488
std::vector< double > fitted
Definition ModelGeneVariances.hpp:493
Parameters for variable_block_weight().
Definition blocking.hpp:87