scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
SuggestAdtQcFilters.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SUGGEST_ADT_QC_FILTERS_H
2#define SCRAN_SUGGEST_ADT_QC_FILTERS_H
3
4#include "../utils/macros.hpp"
5
6#include <vector>
7#include <cmath>
8
9#include "utils.hpp"
11#include "ComputeMedianMad.hpp"
13
20namespace scran {
21
51public:
55 struct Defaults {
59 static constexpr double num_mads = 3;
60
64 static constexpr double min_detected_drop = 0.1;
65 };
66
67private:
68 double detected_num_mads = Defaults::num_mads;
69 double subset_num_mads = Defaults::num_mads;
70 double min_detected_drop = Defaults::min_detected_drop;
71
72public:
80 detected_num_mads = n;
81 return *this;
82 }
83
91 subset_num_mads = n;
92 return *this;
93 }
94
102 detected_num_mads= n;
103 subset_num_mads = n;
104 return *this;
105 }
106
114 min_detected_drop= m;
115 return *this;
116 }
117
118public:
126 struct Thresholds {
132 std::vector<double> detected;
133
139 std::vector<std::vector<double> > subset_totals;
140
141 public:
158 template<bool overwrite = true, typename Float, typename Integer, typename Output>
159 void filter(size_t n, const PerCellAdtQcMetrics::Buffers<Float, Integer>& buffers, Output* output) const {
160 if (detected.size() != 1) {
161 throw std::runtime_error("should use filter_blocked() for multiple batches");
162 }
163 filter_<overwrite>(n, buffers, output, [](size_t i) -> size_t { return 0; });
164 }
165
177 template<typename Output = uint8_t>
178 std::vector<Output> filter(const PerCellAdtQcMetrics::Results& metrics) const {
179 std::vector<Output> output(metrics.detected.size());
180 filter(output.size(), metrics.buffers(), output.data());
181 return output;
182 }
183
184 public:
201 template<bool overwrite = true, typename Block, typename Float, typename Integer, typename Output>
202 void filter_blocked(size_t n, const Block* block, const PerCellAdtQcMetrics::Buffers<Float, Integer>& buffers, Output* output) const {
203 if (block) {
204 filter_<overwrite>(n, buffers, output, [&](size_t i) -> Block { return block[i]; });
205 } else {
206 filter<overwrite>(n, buffers, output);
207 }
208 }
209
224 template<typename Output = uint8_t, typename Block>
225 std::vector<Output> filter_blocked(const PerCellAdtQcMetrics::Results& metrics, const Block* block) const {
226 std::vector<Output> output(metrics.detected.size());
227 filter_blocked(output.size(), block, metrics.buffers(), output.data());
228 return output;
229 }
230
231 private:
232 template<bool overwrite, typename Float, typename Integer, typename Output, typename Function>
233 void filter_(size_t n, const PerCellAdtQcMetrics::Buffers<Float, Integer>& buffers, Output* output, Function indexer) const {
234 size_t nsubsets = subset_totals.size();
235
236 for (size_t i = 0; i < n; ++i) {
237 auto b = indexer(i);
238
239 if (quality_control::is_less_than<double>(buffers.detected[i], detected[b])) {
240 output[i] = true;
241 continue;
242 }
243
244 bool fail = false;
245 for (size_t s = 0; s < nsubsets; ++s) {
246 if (quality_control::is_greater_than(buffers.subset_totals[s][i], subset_totals[s][b])) {
247 fail = true;
248 break;
249 }
250 }
251 if (fail) {
252 output[i] = true;
253 continue;
254 }
255
256 if constexpr(overwrite) {
257 output[i] = false;
258 }
259 }
260
261 return;
262 }
263 };
264
265public:
275 template<typename Float, typename Integer>
277 return run_blocked(n, static_cast<int*>(NULL), buffers);
278 }
279
287 return run(metrics.detected.size(), metrics.buffers());
288 }
289
290public:
304 template<typename Block, typename Float, typename Integer>
305 Thresholds run_blocked(size_t n, const Block* block, const PerCellAdtQcMetrics::Buffers<Float, Integer>& buffers) const {
306 Thresholds output;
307 std::vector<double> workspace(n);
308 std::vector<int> starts;
309 if (block) {
311 }
312
313 // Filtering on the number of detected features.
314 {
315 ComputeMedianMad meddler;
316 meddler.set_log(true);
317 auto detected_res = meddler.run_blocked(n, block, starts, buffers.detected, workspace.data());
318
320 filter.set_num_mads(detected_num_mads);
321 filter.set_min_diff(-std::log(1 - min_detected_drop));
322 filter.set_upper(false);
323 auto detected_filt = filter.run(std::move(detected_res));
324
325 output.detected = std::move(detected_filt.lower);
326 }
327
328 // Filtering on the IgG subset totals.
329 {
330 ComputeMedianMad meddler;
331 meddler.set_log(true);
332
334 filter.set_num_mads(subset_num_mads);
335 filter.set_lower(false);
336
337 size_t nsubsets = buffers.subset_totals.size();
338 for (size_t s = 0; s < nsubsets; ++s) {
339 auto subset_res = meddler.run_blocked(n, block, starts, buffers.subset_totals[s], workspace.data());
340 auto subset_filt = filter.run(std::move(subset_res));
341 output.subset_totals.push_back(std::move(subset_filt.upper));
342 }
343 }
344
345 return output;
346 }
347
359 template<typename Block>
360 Thresholds run_blocked(const PerCellAdtQcMetrics::Results& metrics, const Block* block) const {
361 return run_blocked(metrics.detected.size(), block, metrics.buffers());
362 }
363};
364
365}
366
367#endif
Define outlier filters using a MAD-based approach.
Compute the median and MAD from an array of values.
Compute per-cell quality control metrics from an ADT count matrix.
Compute the median and MAD from an array of values.
Definition ComputeMedianMad.hpp:30
Results run_blocked(size_t n, const Block *block, std::vector< int > starts, const Input *metrics, Buffer *buffer) const
Definition ComputeMedianMad.hpp:214
static std::vector< int > compute_block_starts(size_t n, const Block *block)
Definition ComputeMedianMad.hpp:129
ComputeMedianMad & set_log(bool l=Defaults::log)
Definition ComputeMedianMad.hpp:60
Create filters to identify low-quality cells from ADT-derived QC metrics.
Definition SuggestAdtQcFilters.hpp:50
Thresholds run_blocked(size_t n, const Block *block, const PerCellAdtQcMetrics::Buffers< Float, Integer > &buffers) const
Definition SuggestAdtQcFilters.hpp:305
SuggestAdtQcFilters & set_detected_num_mads(double n=Defaults::num_mads)
Definition SuggestAdtQcFilters.hpp:79
SuggestAdtQcFilters & set_subset_num_mads(double n=Defaults::num_mads)
Definition SuggestAdtQcFilters.hpp:90
Thresholds run(const PerCellAdtQcMetrics::Results &metrics) const
Definition SuggestAdtQcFilters.hpp:286
SuggestAdtQcFilters & set_num_mads(double n=Defaults::num_mads)
Definition SuggestAdtQcFilters.hpp:101
SuggestAdtQcFilters & set_min_detected_drop(double m=Defaults::min_detected_drop)
Definition SuggestAdtQcFilters.hpp:113
Thresholds run_blocked(const PerCellAdtQcMetrics::Results &metrics, const Block *block) const
Definition SuggestAdtQcFilters.hpp:360
Thresholds run(size_t n, const PerCellAdtQcMetrics::Buffers< Float, Integer > &buffers) const
Definition SuggestAdtQcFilters.hpp:276
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Define outlier filters from the median and MAD.
Definition ChooseOutlierFilters.hpp:30
ChooseOutlierFilters & set_min_diff(double m=Defaults::min_diff)
Definition ChooseOutlierFilters.hpp:104
ChooseOutlierFilters & set_num_mads(double n=Defaults::num_mads)
Definition ChooseOutlierFilters.hpp:92
ChooseOutlierFilters & set_lower(bool l=Defaults::lower)
Definition ChooseOutlierFilters.hpp:70
Thresholds run(ComputeMedianMad::Results x) const
Definition ChooseOutlierFilters.hpp:273
ChooseOutlierFilters & set_upper(bool u=Defaults::upper)
Definition ChooseOutlierFilters.hpp:81
Buffers for direct storage of the calculated statistics.
Definition PerCellAdtQcMetrics.hpp:69
Integer * detected
Definition PerCellAdtQcMetrics.hpp:78
std::vector< Float * > subset_totals
Definition PerCellAdtQcMetrics.hpp:84
Result store for QC metric calculations.
Definition PerCellAdtQcMetrics.hpp:125
std::vector< int > detected
Definition PerCellAdtQcMetrics.hpp:144
Buffers buffers()
Definition PerCellAdtQcMetrics.hpp:157
Default parameters.
Definition SuggestAdtQcFilters.hpp:55
static constexpr double min_detected_drop
Definition SuggestAdtQcFilters.hpp:64
static constexpr double num_mads
Definition SuggestAdtQcFilters.hpp:59
Thresholds to define outliers on each metric.
Definition SuggestAdtQcFilters.hpp:126
std::vector< double > detected
Definition SuggestAdtQcFilters.hpp:132
void filter(size_t n, const PerCellAdtQcMetrics::Buffers< Float, Integer > &buffers, Output *output) const
Definition SuggestAdtQcFilters.hpp:159
void filter_blocked(size_t n, const Block *block, const PerCellAdtQcMetrics::Buffers< Float, Integer > &buffers, Output *output) const
Definition SuggestAdtQcFilters.hpp:202
std::vector< std::vector< double > > subset_totals
Definition SuggestAdtQcFilters.hpp:139
std::vector< Output > filter(const PerCellAdtQcMetrics::Results &metrics) const
Definition SuggestAdtQcFilters.hpp:178
std::vector< Output > filter_blocked(const PerCellAdtQcMetrics::Results &metrics, const Block *block) const
Definition SuggestAdtQcFilters.hpp:225