scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
ComputeMedianMad.hpp
Go to the documentation of this file.
1#ifndef SCRAN_COMPUTE_MEDIAN_MAD_H
2#define SCRAN_COMPUTE_MEDIAN_MAD_H
3
4#include "../utils/macros.hpp"
5
6#include <vector>
7#include <limits>
8#include <cmath>
9#include <algorithm>
10#include <cstdint>
11
12#include "utils.hpp"
13
20namespace scran {
21
31public:
35 struct Defaults {
39 static constexpr bool log = false;
40
44 static constexpr bool median_only = false;
45 };
46
47private:
48 bool log = Defaults::log;
49
50 bool median_only = Defaults::median_only;
51
52public:
61 log = l;
62 return *this;
63 }
64
72 median_only = m;
73 return *this;
74 }
75
76public:
83 struct Results {
87 Results(int nblocks=0) : medians(nblocks), mads(nblocks) {}
99 std::vector<double> medians;
100
107 std::vector<double> mads;
108
112 bool log = false;
113 };
114
115public:
128 template<typename Block>
129 static std::vector<int> compute_block_starts(size_t n, const Block* block) {
130 std::vector<int> starts;
131
132 for (size_t i = 0; i < n; ++i) {
133 size_t candidate = block[i] + 1;
134 if (candidate > starts.size()) {
135 starts.resize(candidate);
136 }
137 ++starts[block[i]];
138 }
139
140 int sofar = 0;
141 for (auto& s : starts) {
142 int last = sofar;
143 sofar += s;
144 s = last;
145 }
146
147 return starts;
148 }
149
150public:
162 template<typename Input, typename Buffer>
163 Results run(size_t n, const Input* metrics, Buffer* buffer) const {
164 Results output(1);
165 output.log = log;
166
167 auto copy = buffer;
168 for (size_t i = 0; i < n; ++i) {
169 if (!std::isnan(metrics[i])) {
170 *copy = metrics[i];
171 ++copy;
172 }
173 }
174
175 compute(copy - buffer, buffer, output.medians[0], output.mads[0]);
176 return output;
177 }
178
189 template<typename Input>
190 Results run(size_t n, const Input* metrics) const {
191 std::vector<double> buffer(n);
192 return run(n, metrics, buffer.data());
193 }
194
195public:
213 template<typename Input, typename Block, typename Buffer>
214 Results run_blocked(size_t n, const Block* block, std::vector<int> starts, const Input* metrics, Buffer* buffer) const {
215 if (block) {
216 // Unscrambling into the buffer.
217 auto ends = starts;
218 for (size_t i = 0; i < n; ++i) {
219 if (!std::isnan(metrics[i])) {
220 auto& pos = ends[block[i]];
221 buffer[pos] = metrics[i];
222 ++pos;
223 }
224 }
225
226 // Using the ranges on the buffer.
227 size_t nblocks = starts.size();
228 Results output(nblocks);
229 output.log = log;
230 for (size_t g = 0; g < nblocks; ++g) {
231 compute(ends[g] - starts[g], buffer + starts[g], output.medians[g], output.mads[g]);
232 }
233
234 return output;
235 } else {
236 return run(n, metrics, buffer);
237 }
238 }
239
254 template<typename Block, typename Input>
255 Results run_blocked(size_t n, const Block* block, const Input* metrics) const {
256 std::vector<double> buffer(n);
257 return run_blocked(n, block, compute_block_starts(n, block), metrics, buffer.data());
258 }
259
260private:
261 template<typename Buffer>
262 void compute(size_t n, Buffer* ptr, double& median, double& mad) const {
263 if (!n) {
264 median = std::numeric_limits<double>::quiet_NaN();
265 mad = std::numeric_limits<double>::quiet_NaN();
266 return;
267 }
268
269 if (log) {
270 auto copy = ptr;
271 for (size_t i = 0; i < n; ++i, ++copy) {
272 if (*copy > 0) {
273 *copy = std::log(*copy);
274 } else if (*copy == 0) {
275 *copy = -std::numeric_limits<double>::infinity();
276 } else {
277 throw std::runtime_error("cannot log-transform negative values");
278 }
279 }
280 }
281
282 // First, getting the median.
283 size_t halfway = n / 2;
284 median = compute_median(ptr, halfway, n);
285
286 if (median_only || std::isnan(median)) {
287 // Giving up.
288 mad = std::numeric_limits<double>::quiet_NaN();
289 return;
290 } else if (std::isinf(median)) {
291 // MADs should be no-ops when added/subtracted from infinity. Any
292 // finite value will do here, so might as well keep it simple.
293 mad = 0;
294 return;
295 }
296
297 // Now getting the MAD.
298 auto copy = ptr;
299 for (size_t i = 0; i < n; ++i, ++copy) {
300 *copy = std::abs(*copy - median);
301 }
302 mad = compute_median(ptr, halfway, n);
303 mad *= 1.4826; // for equivalence with the standard deviation under normality.
304 return;
305 }
306
307 static double compute_median (double* ptr, size_t halfway, size_t n) {
308 std::nth_element(ptr, ptr + halfway, ptr + n);
309 double medtmp = *(ptr + halfway);
310
311 if (n % 2 == 0) {
312 std::nth_element(ptr, ptr + halfway - 1, ptr + n);
313 double left = *(ptr + halfway - 1);
314
315 if (std::isinf(left) && std::isinf(medtmp)) {
316 if ((left > 0) != (medtmp > 0)) {
317 return std::numeric_limits<double>::quiet_NaN();
318 } else {
319 return medtmp;
320 }
321 }
322
323 medtmp = (medtmp + left)/2;
324 }
325
326 return medtmp;
327 }
328};
329
330}
331
332#endif
Compute the median and MAD from an array of values.
Definition ComputeMedianMad.hpp:30
Results run(size_t n, const Input *metrics, Buffer *buffer) const
Definition ComputeMedianMad.hpp:163
ComputeMedianMad & set_median_only(bool m=Defaults::median_only)
Definition ComputeMedianMad.hpp:71
Results run_blocked(size_t n, const Block *block, std::vector< int > starts, const Input *metrics, Buffer *buffer) const
Definition ComputeMedianMad.hpp:214
Results run(size_t n, const Input *metrics) const
Definition ComputeMedianMad.hpp:190
Results run_blocked(size_t n, const Block *block, const Input *metrics) const
Definition ComputeMedianMad.hpp:255
static std::vector< int > compute_block_starts(size_t n, const Block *block)
Definition ComputeMedianMad.hpp:129
ComputeMedianMad & set_log(bool l=Defaults::log)
Definition ComputeMedianMad.hpp:60
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Default parameters.
Definition ComputeMedianMad.hpp:35
static constexpr bool log
Definition ComputeMedianMad.hpp:39
static constexpr bool median_only
Definition ComputeMedianMad.hpp:44
Medians and MADs, possibly for multiple blocks.
Definition ComputeMedianMad.hpp:83
std::vector< double > medians
Definition ComputeMedianMad.hpp:99
std::vector< double > mads
Definition ComputeMedianMad.hpp:107
bool log
Definition ComputeMedianMad.hpp:112