scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
ChooseOutlierFilters.hpp
Go to the documentation of this file.
1#ifndef SCRAN_CHOOSE_OUTLIER_FILTERS_HPP
2#define SCRAN_CHOOSE_OUTLIER_FILTERS_HPP
3
4#include "../utils/macros.hpp"
5
6#include <vector>
7#include <limits>
8#include <cmath>
9
10#include "utils.hpp"
11#include "ComputeMedianMad.hpp"
12
19namespace scran {
20
31public:
35 struct Defaults {
39 static constexpr bool lower = true;
40
44 static constexpr bool upper = true;
45
49 static constexpr double num_mads = 3;
50
54 static constexpr double min_diff = 0;
55 };
56
57private:
58 bool lower = Defaults::lower;
59 bool upper = Defaults::upper;
60 double num_mads = Defaults::num_mads;
61 double min_diff = Defaults::min_diff;
62
63public:
71 lower = l;
72 return *this;
73 }
74
82 upper = u;
83 return *this;
84 }
85
93 num_mads = n;
94 return *this;
95 }
96
105 min_diff = m;
106 return *this;
107 }
108
109private:
110 double sanitize(double proposed, bool log) const {
111 if (log) {
112 if (std::isinf(proposed)) {
113 if (proposed < 0) {
114 proposed = 0;
115 }
116 } else {
117 proposed = std::exp(proposed);
118 }
119 }
120 return proposed;
121 }
122
123public:
127 struct Thresholds {
131 Thresholds(size_t nblocks = 0) : lower(nblocks), upper(nblocks) {}
141 std::vector<double> lower;
142
148 std::vector<double> upper;
149
150 public:
164 template<bool overwrite = true, typename Input, typename Output>
165 void filter(size_t n, Input* input, Output* output) const {
166 if (upper.size() > 1 || lower.size() > 1) {
167 throw std::runtime_error("should use filter_blocked() for multiple batches");
168 }
169 filter_<overwrite>(n, input, output, [](size_t i) -> size_t { return 0; });
170 }
171
182 template<typename Output = uint8_t, typename Input>
183 std::vector<Output> filter(size_t n, const Input* input) const {
184 std::vector<Output> output(n);
185 filter(n, input, output.data());
186 return output;
187 }
188
189 public:
204 template<bool overwrite = true, typename Block, typename Input, typename Output>
205 void filter_blocked(size_t n, const Block* block, const Input* input, Output* output) const {
206 if (block) {
207 filter_<overwrite>(n, input, output, [&](size_t i) -> Block { return block[i]; });
208 } else {
209 filter<overwrite>(n, input, output);
210 }
211 }
212
227 template<typename Output = uint8_t, typename Block, typename Input>
228 std::vector<Output> filter_blocked(size_t n, const Block* block, const Input* input) const {
229 std::vector<Output> output(n);
230 filter_blocked(n, block, input, output.data());
231 return output;
232 }
233
234 private:
235 template<bool overwrite, typename Input, typename Output, typename Function>
236 void filter_(size_t n, const Input* input, Output* output, Function indexer) const {
237 for (size_t i = 0; i < n; ++i) {
238 auto b = indexer(i);
239
240 if (!lower.empty()) {
241 if (quality_control::is_less_than(input[i], lower[b])) {
242 output[i] = true;
243 continue;
244 }
245 }
246
247 if (!upper.empty()) {
248 if (quality_control::is_greater_than(input[i], upper[b])) {
249 output[i] = true;
250 continue;
251 }
252 }
253
254 if constexpr(overwrite) {
255 output[i] = false;
256 }
257 }
258
259 return;
260 }
261 };
262
263public:
274 size_t nblocks = x.medians.size();
275
276 for (size_t b = 0; b < nblocks; ++b) {
277 auto med = x.medians[b];
278 auto mad = x.mads[b];
279
280 double& lthresh = x.medians[b];
281 double& uthresh = x.mads[b];
282
283 if (std::isnan(med)) {
284 if (lower) {
285 lthresh = std::numeric_limits<double>::quiet_NaN();
286 }
287 if (upper) {
288 uthresh = std::numeric_limits<double>::quiet_NaN();
289 }
290 } else {
291 auto delta = std::max(min_diff, num_mads * mad);
292 if (lower) {
293 lthresh = sanitize(med - delta, x.log);
294 }
295 if (upper) {
296 uthresh = sanitize(med + delta, x.log);
297 }
298 }
299 }
300
301 Thresholds output;
302 if (lower) {
303 output.lower = std::move(x.medians);
304 }
305 if (upper) {
306 output.upper = std::move(x.mads);
307 }
308 return output;
309 }
310};
311
312}
313
314#endif
Compute the median and MAD from an array of values.
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Default parameters.
Definition ChooseOutlierFilters.hpp:35
static constexpr double num_mads
Definition ChooseOutlierFilters.hpp:49
static constexpr bool lower
Definition ChooseOutlierFilters.hpp:39
static constexpr bool upper
Definition ChooseOutlierFilters.hpp:44
static constexpr double min_diff
Definition ChooseOutlierFilters.hpp:54
Outlier thresholds for QC filtering.
Definition ChooseOutlierFilters.hpp:127
std::vector< Output > filter_blocked(size_t n, const Block *block, const Input *input) const
Definition ChooseOutlierFilters.hpp:228
std::vector< double > lower
Definition ChooseOutlierFilters.hpp:141
std::vector< double > upper
Definition ChooseOutlierFilters.hpp:148
void filter(size_t n, Input *input, Output *output) const
Definition ChooseOutlierFilters.hpp:165
std::vector< Output > filter(size_t n, const Input *input) const
Definition ChooseOutlierFilters.hpp:183
void filter_blocked(size_t n, const Block *block, const Input *input, Output *output) const
Definition ChooseOutlierFilters.hpp:205
Define outlier filters from the median and MAD.
Definition ChooseOutlierFilters.hpp:30
ChooseOutlierFilters & set_min_diff(double m=Defaults::min_diff)
Definition ChooseOutlierFilters.hpp:104
ChooseOutlierFilters & set_num_mads(double n=Defaults::num_mads)
Definition ChooseOutlierFilters.hpp:92
ChooseOutlierFilters & set_lower(bool l=Defaults::lower)
Definition ChooseOutlierFilters.hpp:70
Thresholds run(ComputeMedianMad::Results x) const
Definition ChooseOutlierFilters.hpp:273
ChooseOutlierFilters & set_upper(bool u=Defaults::upper)
Definition ChooseOutlierFilters.hpp:81
Medians and MADs, possibly for multiple blocks.
Definition ComputeMedianMad.hpp:83
std::vector< double > medians
Definition ComputeMedianMad.hpp:99
std::vector< double > mads
Definition ComputeMedianMad.hpp:107
bool log
Definition ComputeMedianMad.hpp:112