scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
SuggestRnaQcFilters.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SUGGEST_RNA_QC_FILTERS_H
2#define SCRAN_SUGGEST_RNA_QC_FILTERS_H
3
4#include "../utils/macros.hpp"
5
6#include <vector>
7#include <cstdint>
8
10#include "ComputeMedianMad.hpp"
12
19namespace scran {
20
48public:
52 struct Defaults {
56 static constexpr double num_mads = 3;
57 };
58
59private:
60 double detected_num_mads = Defaults::num_mads;
61 double sums_num_mads = Defaults::num_mads;
62 double subset_num_mads = Defaults::num_mads;
63
64public:
72 detected_num_mads = n;
73 return *this;
74 }
75
83 sums_num_mads = n;
84 return *this;
85 }
86
94 subset_num_mads = n;
95 return *this;
96 }
97
105 detected_num_mads= n;
106 subset_num_mads = n;
107 sums_num_mads = n;
108 return *this;
109 }
110
111public:
119 struct Thresholds {
125 std::vector<double> sums;
126
132 std::vector<double> detected;
133
139 std::vector<std::vector<double> > subset_proportions;
140
141 public:
157 template<bool overwrite = true, typename Float, typename Integer, typename Output>
158 void filter(size_t n, const PerCellRnaQcMetrics::Buffers<Float, Integer>& buffers, Output* output) const {
159 if (detected.size() != 1) {
160 throw std::runtime_error("should use filter_blocked() for multiple batches");
161 }
162 filter_<overwrite>(n, buffers, output, [](size_t i) -> size_t { return 0; });
163 }
164
175 template<typename Output = uint8_t>
176 std::vector<Output> filter(const PerCellRnaQcMetrics::Results& metrics) const {
177 std::vector<Output> output(metrics.detected.size());
178 filter(output.size(), metrics.buffers(), output.data());
179 return output;
180 }
181
182 public:
199 template<bool overwrite = true, typename Block, typename Float, typename Integer, typename Output>
200 void filter_blocked(size_t n, const Block* block, const PerCellRnaQcMetrics::Buffers<Float, Integer>& buffers, Output* output) const {
201 if (block) {
202 filter_<overwrite>(n, buffers, output, [&](size_t i) -> Block { return block[i]; });
203 } else {
204 filter<overwrite>(n, buffers, output);
205 }
206 }
207
221 template<typename Output = uint8_t, typename Block>
222 std::vector<Output> filter_blocked(const PerCellRnaQcMetrics::Results& metrics, const Block* block) const {
223 std::vector<Output> output(metrics.detected.size());
224 filter_blocked(output.size(), block, metrics.buffers(), output.data());
225 return output;
226 }
227
228 private:
229 template<bool overwrite, typename Float, typename Integer, typename Output, typename Function>
230 void filter_(size_t n, const PerCellRnaQcMetrics::Buffers<Float, Integer>& buffers, Output* output, Function indexer) const {
231 size_t nsubsets = subset_proportions.size();
232
233 for (size_t i = 0; i < n; ++i) {
234 auto b = indexer(i);
235 if (quality_control::is_less_than(buffers.sums[i], sums[b])) {
236 output[i] = true;
237 continue;
238 }
239
240 if (quality_control::is_less_than<double>(buffers.detected[i], detected[b])) {
241 output[i] = true;
242 continue;
243 }
244
245 bool fail = false;
246 for (size_t s = 0; s < nsubsets; ++s) {
247 if (quality_control::is_greater_than(buffers.subset_proportions[s][i], subset_proportions[s][b])) {
248 fail = true;
249 break;
250 }
251 }
252 if (fail) {
253 output[i] = true;
254 continue;
255 }
256
257 if constexpr(overwrite) {
258 output[i] = false;
259 }
260 }
261
262 return;
263 }
264 };
265
266public:
276 template<typename Float, typename Integer>
278 return run_blocked(n, static_cast<int*>(NULL), buffers);
279 }
280
288 return run(metrics.detected.size(), metrics.buffers());
289 }
290
291public:
304 template<typename Block, typename Float, typename Integer>
305 Thresholds run_blocked(size_t n, const Block* block, const PerCellRnaQcMetrics::Buffers<Float, Integer>& buffers) const {
306 Thresholds output;
307 std::vector<double> workspace(n);
308 std::vector<int> starts;
309 if (block) {
311 }
312
313 // Filtering on the total counts.
314 {
315 ComputeMedianMad meddler;
316 meddler.set_log(true);
317 auto sums_res = meddler.run_blocked(n, block, starts, buffers.sums, workspace.data());
318
320 filter.set_num_mads(sums_num_mads);
321 filter.set_upper(false);
322 auto sums_filt = filter.run(std::move(sums_res));
323
324 output.sums = std::move(sums_filt.lower);
325 }
326
327 // Filtering on the detected features.
328 {
329 ComputeMedianMad meddler;
330 meddler.set_log(true);
331 auto detected_res = meddler.run_blocked(n, block, starts, buffers.detected, workspace.data());
332
334 filter.set_num_mads(sums_num_mads);
335 filter.set_upper(false);
336 auto detected_filt = filter.run(std::move(detected_res));
337
338 output.detected = std::move(detected_filt.lower);
339 }
340
341 // Filtering on the subset proportions (no log).
342 {
343 ComputeMedianMad meddler;
344
346 filter.set_num_mads(subset_num_mads);
347 filter.set_lower(false);
348
349 size_t nsubsets = buffers.subset_proportions.size();
350 for (size_t s = 0; s < nsubsets; ++s) {
351 auto subset_res = meddler.run_blocked(n, block, starts, buffers.subset_proportions[s], workspace.data());
352 auto subset_filt = filter.run(std::move(subset_res));
353 output.subset_proportions.push_back(std::move(subset_filt.upper));
354 }
355 }
356
357 return output;
358 }
359
370 template<typename Block>
371 Thresholds run_blocked(const PerCellRnaQcMetrics::Results& metrics, const Block* block) const {
372 return run_blocked(metrics.detected.size(), block, metrics.buffers());
373 }
374};
375
376}
377
378#endif
Define outlier filters using a MAD-based approach.
Compute the median and MAD from an array of values.
Compute typical per-cell quality control metrics from an RNA count matrix.
Compute the median and MAD from an array of values.
Definition ComputeMedianMad.hpp:30
Results run_blocked(size_t n, const Block *block, std::vector< int > starts, const Input *metrics, Buffer *buffer) const
Definition ComputeMedianMad.hpp:214
static std::vector< int > compute_block_starts(size_t n, const Block *block)
Definition ComputeMedianMad.hpp:129
ComputeMedianMad & set_log(bool l=Defaults::log)
Definition ComputeMedianMad.hpp:60
Create filters to identify low-quality cells from RNA-derived QC metrics.
Definition SuggestRnaQcFilters.hpp:47
Thresholds run(size_t n, const PerCellRnaQcMetrics::Buffers< Float, Integer > &buffers) const
Definition SuggestRnaQcFilters.hpp:277
Thresholds run_blocked(size_t n, const Block *block, const PerCellRnaQcMetrics::Buffers< Float, Integer > &buffers) const
Definition SuggestRnaQcFilters.hpp:305
SuggestRnaQcFilters & set_num_mads(double n=Defaults::num_mads)
Definition SuggestRnaQcFilters.hpp:104
SuggestRnaQcFilters & set_sums_num_mads(double n=Defaults::num_mads)
Definition SuggestRnaQcFilters.hpp:82
Thresholds run_blocked(const PerCellRnaQcMetrics::Results &metrics, const Block *block) const
Definition SuggestRnaQcFilters.hpp:371
SuggestRnaQcFilters & set_subset_num_mads(double n=Defaults::num_mads)
Definition SuggestRnaQcFilters.hpp:93
SuggestRnaQcFilters & set_detected_num_mads(double n=Defaults::num_mads)
Definition SuggestRnaQcFilters.hpp:71
Thresholds run(const PerCellRnaQcMetrics::Results &metrics) const
Definition SuggestRnaQcFilters.hpp:287
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Define outlier filters from the median and MAD.
Definition ChooseOutlierFilters.hpp:30
ChooseOutlierFilters & set_num_mads(double n=Defaults::num_mads)
Definition ChooseOutlierFilters.hpp:92
ChooseOutlierFilters & set_lower(bool l=Defaults::lower)
Definition ChooseOutlierFilters.hpp:70
Thresholds run(ComputeMedianMad::Results x) const
Definition ChooseOutlierFilters.hpp:273
ChooseOutlierFilters & set_upper(bool u=Defaults::upper)
Definition ChooseOutlierFilters.hpp:81
Buffers for direct storage of the calculated statistics.
Definition PerCellRnaQcMetrics.hpp:70
Integer * detected
Definition PerCellRnaQcMetrics.hpp:79
Float * sums
Definition PerCellRnaQcMetrics.hpp:74
std::vector< Float * > subset_proportions
Definition PerCellRnaQcMetrics.hpp:85
Result store for QC metric calculations.
Definition PerCellRnaQcMetrics.hpp:143
Buffers buffers()
Definition PerCellRnaQcMetrics.hpp:176
std::vector< int > detected
Definition PerCellRnaQcMetrics.hpp:162
Default parameters.
Definition SuggestRnaQcFilters.hpp:52
static constexpr double num_mads
Definition SuggestRnaQcFilters.hpp:56
Thresholds to define outliers on each metric.
Definition SuggestRnaQcFilters.hpp:119
std::vector< Output > filter_blocked(const PerCellRnaQcMetrics::Results &metrics, const Block *block) const
Definition SuggestRnaQcFilters.hpp:222
std::vector< Output > filter(const PerCellRnaQcMetrics::Results &metrics) const
Definition SuggestRnaQcFilters.hpp:176
void filter_blocked(size_t n, const Block *block, const PerCellRnaQcMetrics::Buffers< Float, Integer > &buffers, Output *output) const
Definition SuggestRnaQcFilters.hpp:200
void filter(size_t n, const PerCellRnaQcMetrics::Buffers< Float, Integer > &buffers, Output *output) const
Definition SuggestRnaQcFilters.hpp:158
std::vector< double > sums
Definition SuggestRnaQcFilters.hpp:125
std::vector< double > detected
Definition SuggestRnaQcFilters.hpp:132
std::vector< std::vector< double > > subset_proportions
Definition SuggestRnaQcFilters.hpp:139