scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
SuggestCrisprQcFilters.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SUGGEST_CRISPR_QC_FILTERS_H
2#define SCRAN_SUGGEST_CRISPR_QC_FILTERS_H
3
4#include "../utils/macros.hpp"
5
6#include <vector>
7#include <cmath>
8
9#include "utils.hpp"
11#include "ComputeMedianMad.hpp"
13
20namespace scran {
21
56public:
60 struct Defaults {
64 static constexpr double num_mads = 3;
65 };
66
67private:
68 double num_mads = Defaults::num_mads;
69
70public:
78 num_mads = n;
79 return *this;
80 }
81
82public:
90 struct Thresholds {
96 std::vector<double> max_count;
97
98 public:
115 template<bool overwrite = true, typename Float, typename Integer, typename Output>
116 void filter(size_t n, const PerCellCrisprQcMetrics::Buffers<Float, Integer>& buffers, Output* output) const {
117 if (max_count.size() != 1) {
118 throw std::runtime_error("should use filter_blocked() for multiple batches");
119 }
120 filter_<overwrite>(n, buffers, output, [](size_t i) -> size_t { return 0; });
121 }
122
134 template<typename Output = uint8_t>
135 std::vector<Output> filter(const PerCellCrisprQcMetrics::Results& metrics) const {
136 std::vector<Output> output(metrics.sums.size());
137 filter(output.size(), metrics.buffers(), output.data());
138 return output;
139 }
140
141 public:
158 template<bool overwrite = true, typename Block, typename Float, typename Integer, typename Output>
159 void filter_blocked(size_t n, const Block* block, const PerCellCrisprQcMetrics::Buffers<Float, Integer>& buffers, Output* output) const {
160 if (block) {
161 filter_<overwrite>(n, buffers, output, [&](size_t i) -> Block { return block[i]; });
162 } else {
163 filter<overwrite>(n, buffers, output);
164 }
165 }
166
181 template<typename Output = uint8_t, typename Block>
182 std::vector<Output> filter_blocked(const PerCellCrisprQcMetrics::Results& metrics, const Block* block) const {
183 std::vector<Output> output(metrics.sums.size());
184 filter_blocked(output.size(), block, metrics.buffers(), output.data());
185 return output;
186 }
187
188 private:
189 template<bool overwrite, typename Float, typename Integer, typename Output, typename Function>
190 void filter_(size_t n, const PerCellCrisprQcMetrics::Buffers<Float, Integer>& buffers, Output* output, Function indexer) const {
191 for (size_t i = 0; i < n; ++i) {
192 auto b = indexer(i);
193 auto candidate = buffers.max_proportion[i] * buffers.sums[i];
194
195 if (quality_control::is_less_than<double>(candidate, max_count[b])) {
196 output[i] = true;
197 continue;
198 }
199
200 if constexpr(overwrite) {
201 output[i] = false;
202 }
203 }
204
205 return;
206 }
207 };
208
209public:
219 template<typename Float, typename Integer>
221 return run_blocked(n, static_cast<int*>(NULL), buffers);
222 }
223
231 return run(metrics.max_proportion.size(), metrics.buffers());
232 }
233
234public:
248 template<typename Block, typename Float, typename Integer>
249 Thresholds run_blocked(size_t n, const Block* block, const PerCellCrisprQcMetrics::Buffers<Float, Integer>& buffers) const {
250 Thresholds output;
251 std::vector<int> starts;
252 std::vector<Block> subblock;
253 if (block) {
255 subblock.reserve(n);
256 }
257
258 // Subsetting to the observations in the top 50% of proportions.
259 std::vector<double> workspace(n);
260 std::vector<double> subvalues;
261 subvalues.reserve(n);
262
263 {
264 ComputeMedianMad meddler;
265 meddler.set_median_only(true);
266 auto prop_res = meddler.run_blocked(n, block, starts, buffers.max_proportion, workspace.data());
267
268 if (block) {
269 for (size_t i = 0; i < n; ++i) {
270 auto p = buffers.max_proportion[i];
271 if (quality_control::is_greater_than_or_equal_to(p, prop_res.medians[block[i]])) {
272 subvalues.push_back(p * buffers.sums[i]);
273 subblock.push_back(block[i]);
274 }
275 }
276 } else {
277 for (size_t i = 0; i < n; ++i) {
278 auto p = buffers.max_proportion[i];
279 if (quality_control::is_greater_than_or_equal_to(p, prop_res.medians[0])) {
280 subvalues.push_back(p * buffers.sums[i]);
281 }
282 }
283 }
284 }
285
286 // Filtering on the max counts.
287 {
288 ComputeMedianMad meddler;
289 meddler.set_log(true);
290
291 const Block* bptr = NULL;
292 starts.clear();
293 if (block) {
294 bptr = subblock.data();
295 starts = ComputeMedianMad::compute_block_starts(subblock.size(), bptr);
296 }
297
298 auto sums_res = meddler.run_blocked(subvalues.size(), bptr, starts, subvalues.data(), workspace.data());
299
301 filter.set_num_mads(num_mads);
302 filter.set_upper(false);
303 auto sums_filt = filter.run(std::move(sums_res));
304
305 output.max_count = std::move(sums_filt.lower);
306 }
307
308 return output;
309 }
310
322 template<typename Block>
323 Thresholds run_blocked(const PerCellCrisprQcMetrics::Results& metrics, const Block* block) const {
324 return run_blocked(metrics.max_proportion.size(), block, metrics.buffers());
325 }
326};
327
328}
329
330#endif
Define outlier filters using a MAD-based approach.
Compute the median and MAD from an array of values.
Compute per-cell quality control metrics from a CRISPR guide count matrix.
Compute the median and MAD from an array of values.
Definition ComputeMedianMad.hpp:30
ComputeMedianMad & set_median_only(bool m=Defaults::median_only)
Definition ComputeMedianMad.hpp:71
Results run_blocked(size_t n, const Block *block, std::vector< int > starts, const Input *metrics, Buffer *buffer) const
Definition ComputeMedianMad.hpp:214
static std::vector< int > compute_block_starts(size_t n, const Block *block)
Definition ComputeMedianMad.hpp:129
ComputeMedianMad & set_log(bool l=Defaults::log)
Definition ComputeMedianMad.hpp:60
Create filters to identify low-quality cells from CRISPR-derived QC metrics.
Definition SuggestCrisprQcFilters.hpp:55
Thresholds run(size_t n, const PerCellCrisprQcMetrics::Buffers< Float, Integer > &buffers) const
Definition SuggestCrisprQcFilters.hpp:220
Thresholds run_blocked(size_t n, const Block *block, const PerCellCrisprQcMetrics::Buffers< Float, Integer > &buffers) const
Definition SuggestCrisprQcFilters.hpp:249
Thresholds run_blocked(const PerCellCrisprQcMetrics::Results &metrics, const Block *block) const
Definition SuggestCrisprQcFilters.hpp:323
SuggestCrisprQcFilters & set_num_mads(double n=Defaults::num_mads)
Definition SuggestCrisprQcFilters.hpp:77
Thresholds run(const PerCellCrisprQcMetrics::Results &metrics) const
Definition SuggestCrisprQcFilters.hpp:230
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Define outlier filters from the median and MAD.
Definition ChooseOutlierFilters.hpp:30
ChooseOutlierFilters & set_num_mads(double n=Defaults::num_mads)
Definition ChooseOutlierFilters.hpp:92
Thresholds run(ComputeMedianMad::Results x) const
Definition ChooseOutlierFilters.hpp:273
ChooseOutlierFilters & set_upper(bool u=Defaults::upper)
Definition ChooseOutlierFilters.hpp:81
Buffers for direct storage of the calculated statistics.
Definition PerCellCrisprQcMetrics.hpp:73
Float * max_proportion
Definition PerCellCrisprQcMetrics.hpp:87
Float * sums
Definition PerCellCrisprQcMetrics.hpp:77
Result store for QC metric calculations.
Definition PerCellCrisprQcMetrics.hpp:142
Buffers buffers()
Definition PerCellCrisprQcMetrics.hpp:178
std::vector< double > max_proportion
Definition PerCellCrisprQcMetrics.hpp:166
std::vector< double > sums
Definition PerCellCrisprQcMetrics.hpp:156
Default parameters.
Definition SuggestCrisprQcFilters.hpp:60
static constexpr double num_mads
Definition SuggestCrisprQcFilters.hpp:64
Thresholds to define outliers on each metric.
Definition SuggestCrisprQcFilters.hpp:90
void filter(size_t n, const PerCellCrisprQcMetrics::Buffers< Float, Integer > &buffers, Output *output) const
Definition SuggestCrisprQcFilters.hpp:116
std::vector< Output > filter_blocked(const PerCellCrisprQcMetrics::Results &metrics, const Block *block) const
Definition SuggestCrisprQcFilters.hpp:182
void filter_blocked(size_t n, const Block *block, const PerCellCrisprQcMetrics::Buffers< Float, Integer > &buffers, Output *output) const
Definition SuggestCrisprQcFilters.hpp:159
std::vector< double > max_count
Definition SuggestCrisprQcFilters.hpp:96
std::vector< Output > filter(const PerCellCrisprQcMetrics::Results &metrics) const
Definition SuggestCrisprQcFilters.hpp:135