scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
AggregateAcrossCells.hpp
Go to the documentation of this file.
1#ifndef SCRAN_AGGREGATE_ACROSS_CELLS_HPP
2#define SCRAN_AGGREGATE_ACROSS_CELLS_HPP
3
4#include "../utils/macros.hpp"
5
6#include <algorithm>
7#include <vector>
8#include "tatami/tatami.hpp"
9
10#include "../utils/blocking.hpp"
11
18namespace scran {
19
29public:
35 template<typename Factor>
36 struct Combinations {
40 Combinations(size_t n) : factors(n) {}
52 std::vector<std::vector<Factor> > factors;
53
59 std::vector<size_t> counts;
60 };
61
78 template<typename Factor, typename Combined>
79 static Combinations<Factor> combine_factors(size_t n, std::vector<const Factor*> factors, Combined* combined) {
80 std::vector<size_t> indices(n);
81 std::iota(indices.begin(), indices.end(), 0);
82
83 std::sort(indices.begin(), indices.end(), [&](size_t left, size_t right) -> bool {
84 for (auto curf : factors) {
85 if (curf[left] < curf[right]) {
86 return true;
87 } else if (curf[left] > curf[right]) {
88 return false;
89 }
90 }
91 return false;
92 });
93
94 Combinations<Factor> output(factors.size());
95 size_t last = 0;
96 Combined counter = 0;
97 if (n) {
98 last = indices[0];
99 combined[last] = counter;
100 output.counts.push_back(1);
101 for (size_t f = 0; f < factors.size(); ++f) {
102 output.factors[f].push_back(factors[f][last]);
103 }
104 }
105
106 for (size_t i = 1; i < n; ++i) {
107 auto current = indices[i];
108 bool diff = false;
109 for (auto curf : factors) {
110 if (curf[last] < curf[current]) {
111 diff = true;
112 break;
113 }
114 }
115
116 if (diff) {
117 for (size_t f = 0; f < factors.size(); ++f) {
118 output.factors[f].push_back(factors[f][current]);
119 }
120 output.counts.push_back(1);
121 ++counter;
122 } else {
123 ++(output.counts.back());
124 }
125
126 combined[current] = counter;
127 last = current;
128 }
129
130 return output;
131 }
132
149 template<typename Combined = int, typename Factor>
150 static std::pair<Combinations<Factor>, std::vector<Combined> > combine_factors(size_t n, std::vector<const Factor*> factors) {
151 std::vector<Combined> combined(n);
152 auto output = combine_factors(n, std::move(factors), combined.data());
153 return std::make_pair(std::move(output), std::move(combined));
154 }
155
156private:
157 template<bool sparse_, typename Index_, typename Contents_, typename Factor_, typename Sum_, typename Detected_>
158 void compute_row(Index_ i, Index_ nc, const Contents_& row, const Factor_* factor, std::vector<Sum_>& tmp_sums, std::vector<Sum_*>& sums, std::vector<Detected_>& tmp_detected, std::vector<Detected_*>& detected) {
159 if (sums.size()) {
160 std::fill(tmp_sums.begin(), tmp_sums.end(), 0);
161
162 if constexpr(sparse_) {
163 for (Index_ j = 0; j < row.number; ++j) {
164 tmp_sums[factor[row.index[j]]] += row.value[j];
165 }
166 } else {
167 for (Index_ j = 0; j < nc; ++j) {
168 tmp_sums[factor[j]] += row[j];
169 }
170 }
171
172 // Computing before transferring for more cache-friendliness.
173 for (Index_ l = 0; l < tmp_sums.size(); ++l) {
174 sums[l][i] = tmp_sums[l];
175 }
176 }
177
178 if (detected.size()) {
179 std::fill(tmp_detected.begin(), tmp_detected.end(), 0);
180
181 if constexpr(sparse_) {
182 for (Index_ j = 0; j < row.number; ++j) {
183 tmp_detected[factor[row.index[j]]] += (row.value[j] > 0);
184 }
185 } else {
186 for (Index_ j = 0; j < nc; ++j) {
187 tmp_detected[factor[j]] += (row[j] > 0);
188 }
189 }
190
191 for (Index_ l = 0; l < tmp_detected.size(); ++l) {
192 detected[l][i] = tmp_detected[l];
193 }
194 }
195 }
196
197 template<bool row_, bool sparse_, typename Data_, typename Index_, typename Factor_, typename Sum_, typename Detected_>
198 void compute(const tatami::Matrix<Data_, Index_>* p, const Factor_* factor, std::vector<Sum_*>& sums, std::vector<Detected_*>& detected) {
199 tatami::Options opt;
200 opt.sparse_ordered_index = false;
201
202 if constexpr(row_) {
203 tatami::parallelize([&](size_t, Index_ s, Index_ l) {
204 auto ext = tatami::consecutive_extractor<row_, sparse_>(p, s, l, opt);
205 std::vector<Sum_> tmp_sums(sums.size());
206 std::vector<Detected_> tmp_detected(detected.size());
207
208 auto NC = p->ncol();
209 std::vector<Data_> vbuffer(NC);
210 typename std::conditional<sparse_, std::vector<Index_>, Index_>::type ibuffer(NC);
211
212 for (Index_ x = s, end = s + l; x < end; ++x) {
213 if constexpr(sparse_) {
214 compute_row<true>(x, NC, ext->fetch(x, vbuffer.data(), ibuffer.data()), factor, tmp_sums, sums, tmp_detected, detected);
215 } else {
216 compute_row<false>(x, NC, ext->fetch(x, vbuffer.data()), factor, tmp_sums, sums, tmp_detected, detected);
217 }
218 }
219 }, p->nrow(), num_threads);
220
221 } else {
222 tatami::parallelize([&](size_t, Index_ s, Index_ l) {
223 auto NC = p->ncol();
224 auto ext = tatami::consecutive_extractor<row_, sparse_>(p, 0, NC, s, l, opt);
225 std::vector<Data_> vbuffer(l);
226 typename std::conditional<sparse_, std::vector<Index_>, Index_>::type ibuffer(l);
227
228 for (Index_ x = 0; x < NC; ++x) {
229 auto current = factor[x];
230
231 if constexpr(sparse_) {
232 auto col = ext->fetch(x, vbuffer.data(), ibuffer.data());
233 if (sums.size()) {
234 auto& cursum = sums[current];
235 for (Index_ i = 0; i < col.number; ++i) {
236 cursum[col.index[i]] += col.value[i];
237 }
238 }
239
240 if (detected.size()) {
241 auto& curdetected = detected[current];
242 for (Index_ i = 0; i < col.number; ++i) {
243 curdetected[col.index[i]] += (col.value[i] > 0);
244 }
245 }
246
247 } else {
248 auto col = ext->fetch(x, vbuffer.data());
249
250 if (sums.size()) {
251 auto cursum = sums[current] + s;
252 for (Index_ i = 0; i < l; ++i) {
253 cursum[i] += col[i];
254 }
255 }
256
257 if (detected.size()) {
258 auto curdetected = detected[current] + s;
259 for (Index_ i = 0; i < l; ++i) {
260 curdetected[i] += (col[i] > 0);
261 }
262 }
263 }
264 }
265 }, p->nrow(), num_threads);
266 }
267 }
268
269public:
292 template<typename Data, typename Index, typename Factor, typename Sum, typename Detected>
293 void run(const tatami::Matrix<Data, Index>* input, const Factor* factor, std::vector<Sum*> sums, std::vector<Detected*> detected) {
294 if (input->prefer_rows()) {
295 if (input->sparse()) {
296 compute<true, true>(input, factor, sums, detected);
297 } else {
298 compute<true, false>(input, factor, sums, detected);
299 }
300 } else {
301 if (input->sparse()) {
302 compute<false, true>(input, factor, sums, detected);
303 } else {
304 compute<false, false>(input, factor, sums, detected);
305 }
306 }
307 }
308
309public:
313 struct Defaults {
317 static constexpr bool compute_sums = true;
318
322 static constexpr bool compute_detected = true;
323
327 static constexpr int num_threads = 1;
328 };
329
336 AggregateAcrossCells& set_compute_sums(bool c = Defaults::compute_sums) {
337 compute_sums = c;
338 return *this;
339 }
340
347 AggregateAcrossCells& set_compute_detected(bool c = Defaults::compute_detected) {
348 compute_detected = c;
349 return *this;
350 }
351
356 AggregateAcrossCells& set_num_threads(int n = Defaults::num_threads) {
357 num_threads = n;
358 return *this;
359 }
360
361private:
362 bool compute_sums = Defaults::compute_sums;
363 bool compute_detected = Defaults::compute_detected;
364 int num_threads = Defaults::num_threads;
365
366public:
372 template <typename Sum, typename Detected>
373 struct Results {
381 std::vector<std::vector<Sum> > sums;
382
390 std::vector<std::vector<Detected> > detected;
391 };
392
410 template<typename Sum = double, typename Detected = int, typename Data, typename Index, typename Factor>
411 Results<Sum, Detected> run(const tatami::Matrix<Data, Index>* input, const Factor* factor) {
412 size_t NC = input->ncol();
413 size_t nlevels = count_ids(NC, factor);
414 size_t ngenes = input->nrow();
415
417 std::vector<Sum*> sumptr;
418 std::vector<Detected*> detptr;
419
420 if (compute_sums) {
421 output.sums.resize(nlevels, std::vector<Sum>(ngenes));
422 sumptr.resize(nlevels);
423 for (size_t l = 0; l < nlevels; ++l) {
424 sumptr[l] = output.sums[l].data();
425 }
426 }
427
428 if (compute_detected) {
429 output.detected.resize(nlevels, std::vector<Detected>(ngenes));
430 detptr.resize(nlevels);
431 for (size_t l = 0; l < nlevels; ++l) {
432 detptr[l] = output.detected[l].data();
433 }
434 }
435
436 run(input, factor, std::move(sumptr), std::move(detptr));
437 return output;
438 }
439};
440
441}
442
443#endif
Aggregate expression values across cells.
Definition AggregateAcrossCells.hpp:28
static Combinations< Factor > combine_factors(size_t n, std::vector< const Factor * > factors, Combined *combined)
Definition AggregateAcrossCells.hpp:79
AggregateAcrossCells & set_compute_sums(bool c=Defaults::compute_sums)
Definition AggregateAcrossCells.hpp:336
Results< Sum, Detected > run(const tatami::Matrix< Data, Index > *input, const Factor *factor)
Definition AggregateAcrossCells.hpp:411
AggregateAcrossCells & set_compute_detected(bool c=Defaults::compute_detected)
Definition AggregateAcrossCells.hpp:347
AggregateAcrossCells & set_num_threads(int n=Defaults::num_threads)
Definition AggregateAcrossCells.hpp:356
void run(const tatami::Matrix< Data, Index > *input, const Factor *factor, std::vector< Sum * > sums, std::vector< Detected * > detected)
Definition AggregateAcrossCells.hpp:293
static std::pair< Combinations< Factor >, std::vector< Combined > > combine_factors(size_t n, std::vector< const Factor * > factors)
Definition AggregateAcrossCells.hpp:150
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
size_t count_ids(size_t length, const Id_ *ids)
Definition blocking.hpp:29
Unique combinations of factors.
Definition AggregateAcrossCells.hpp:36
std::vector< std::vector< Factor > > factors
Definition AggregateAcrossCells.hpp:52
std::vector< size_t > counts
Definition AggregateAcrossCells.hpp:59
Default parameters for aggregation.
Definition AggregateAcrossCells.hpp:313
Container for the aggregation results.
Definition AggregateAcrossCells.hpp:373
std::vector< std::vector< Sum > > sums
Definition AggregateAcrossCells.hpp:381
std::vector< std::vector< Detected > > detected
Definition AggregateAcrossCells.hpp:390