scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
summarize_comparisons.hpp
Go to the documentation of this file.
1#ifndef SCRAN_SUMMARIZE_COMPARISONS_HPP
2#define SCRAN_SUMMARIZE_COMPARISONS_HPP
3
4#include "../utils/macros.hpp"
5
6#include <algorithm>
7#include <numeric>
8#include <vector>
9#include <cmath>
10
17namespace scran {
18
22namespace differential_analysis {
23
28enum summary {
29 MIN,
30 MEAN,
31 MEDIAN,
32 MAX,
33 MIN_RANK,
34 n_summaries
35};
36
40template<class IT>
41double median (IT start, size_t n) {
42 int halfway = n / 2;
43 std::nth_element(start, start + halfway, start + n);
44
45 if (n % 2 == 1) {
46 return start[halfway];
47 }
48
49 double med = start[halfway];
50 std::nth_element(start, start + halfway - 1, start + halfway);
51 return (med + start[halfway - 1])/ 2;
52}
53
54template<typename Stat>
55void summarize_comparisons(int ngroups, const Stat* effects, int group, size_t gene, std::vector<std::vector<Stat*> >& output, std::vector<Stat>& buffer) {
56 auto ebegin = buffer.data();
57 auto elast = ebegin;
58
59 // Ignoring the self comparison and pruning out NaNs.
60 {
61 auto eptr = effects;
62 for (int r = 0; r < ngroups; ++r, ++eptr) {
63 if (r == group || std::isnan(*eptr)) {
64 continue;
65 }
66 *elast = *eptr;
67 ++elast;
68 }
69 }
70
71 int ncomps = elast - ebegin;
72 if (ncomps == 0) {
73 for (size_t i = 0; i < MIN_RANK; ++i) {
74 if (output[i].size()) {
75 output[i][group][gene] = std::numeric_limits<double>::quiet_NaN();
76 }
77 }
78 } else if (ncomps == 1) {
79 for (size_t i = 0; i < MIN_RANK; ++i) {
80 if (output[i].size()) {
81 output[i][group][gene] = *ebegin;
82 }
83 }
84 } else {
85 if (output[MIN].size()) {
86 output[MIN][group][gene] = *std::min_element(ebegin, elast);
87 }
88 if (output[MEAN].size()) {
89 output[MEAN][group][gene] = std::accumulate(ebegin, elast, 0.0) / ncomps;
90 }
91 if (output[MEDIAN].size()) {
92 output[MEDIAN][group][gene] = median(ebegin, ncomps);
93 }
94 if (output[MAX].size()) {
95 output[MAX][group][gene] = *std::max_element(ebegin, elast);
96 }
97 }
98}
99
100template<typename Stat>
101void summarize_comparisons(size_t ngenes, int ngroups, const Stat* effects, std::vector<std::vector<Stat*> >& output, int threads) {
102#ifndef SCRAN_CUSTOM_PARALLEL
103 #pragma omp parallel num_threads(threads)
104 {
105 std::vector<double> effect_buffer(ngroups);
106 #pragma omp for
107 for (size_t gene = 0; gene < ngenes; ++gene) {
108#else
109 SCRAN_CUSTOM_PARALLEL([&](size_t, size_t start, size_t length) -> void {
110 std::vector<double> effect_buffer(ngroups);
111 for (size_t gene = start, end = start + length; gene < end; ++gene) {
112#endif
113
114 auto base = effects + gene * ngroups * ngroups;
115 for (int l = 0; l < ngroups; ++l) {
116 summarize_comparisons(ngroups, base + l * ngroups, l, gene, output, effect_buffer);
117 }
118
119#ifndef SCRAN_CUSTOM_PARALLEL
120 }
121 }
122#else
123 }
124 }, ngenes, threads);
125#endif
126
127 return;
128}
129
130template<typename Stat>
131size_t fill_and_sort_rank_buffer(const Stat* effects, size_t stride, std::vector<std::pair<Stat, int> >& buffer) {
132 auto bIt = buffer.begin();
133 for (size_t i = 0, end = buffer.size(); i < end; ++i, effects += stride) {
134 if (!std::isnan(*effects)) {
135 bIt->first = -*effects; // negative to sort by decreasing value.
136 bIt->second = i;
137 ++bIt;
138 }
139 }
140 std::sort(buffer.begin(), bIt);
141 return bIt - buffer.begin();
142}
143
144template<typename Stat, typename Rank>
145void compute_min_rank_internal(size_t use, const std::vector<std::pair<Stat, int> >& buffer, Rank* output) {
146 Rank counter = 1;
147 for (size_t i = 0; i < use; ++i) {
148 auto& current = output[buffer[i].second];
149 if (counter < current) {
150 current = counter;
151 }
152 ++counter;
153 }
154}
155
156template<typename Stat>
157void compute_min_rank(size_t ngenes, size_t ngroups, int group, const Stat* effects, Stat* output, int threads) {
158 // Assign groups to threads, minus the 'group' itself.
159 std::vector<std::vector<int> > assignments(threads);
160 {
161 size_t per_thread = std::ceil(static_cast<double>(ngroups - 1) / threads);
162 auto cur_thread = assignments.begin();
163 for (size_t counter = 0; counter < ngroups; ++counter) {
164 if (counter == group) {
165 continue;
166 }
167 if (cur_thread->size() >= per_thread) {
168 ++cur_thread;
169 }
170 cur_thread->push_back(counter);
171 }
172 }
173
174 std::vector<std::vector<int> > stores(threads, std::vector<int>(ngenes, ngenes + 1));
175
176#ifndef SCRAN_CUSTOM_PARALLEL
177 #pragma omp parallel num_threads(threads)
178 {
179 std::vector<std::pair<Stat, int> > buffer(ngenes);
180 #pragma omp for
181 for (int t = 0; t < threads; ++t) {
182 for (auto g : assignments[t]) {
183#else
184 SCRAN_CUSTOM_PARALLEL([&](size_t, size_t start, size_t length) -> void {
185 std::vector<std::pair<Stat, int> > buffer(ngenes);
186 for (size_t t = start, end = start + length; t < end; ++t) { // should be a no-op loop, but we do this just in case.
187 for (auto g : assignments[t]) {
188#endif
189
190 auto used = fill_and_sort_rank_buffer(effects + g, ngroups, buffer);
191 compute_min_rank_internal(used, buffer, stores[t].data());
192
193#ifndef SCRAN_CUSTOM_PARALLEL
194 }
195 }
196 }
197#else
198 }
199 }
200 }, threads, threads);
201#endif
202
203 std::fill(output, output + ngenes, ngenes + 1);
204 for (int t = 0; t < threads; ++t) {
205 auto copy = output;
206 for (auto x : stores[t]) {
207 if (x < *copy) {
208 *copy = x;
209 }
210 ++copy;
211 }
212 }
213}
214
215template<typename Stat>
216void compute_min_rank(size_t ngenes, size_t ngroups, const Stat* effects, std::vector<Stat*>& output, int threads) {
217 size_t shift = ngroups * ngroups;
218
219#ifndef SCRAN_CUSTOM_PARALLEL
220 #pragma omp parallel num_threads(threads)
221 {
222 std::vector<std::pair<Stat, int> > buffer(ngenes);
223 #pragma omp for
224 for (size_t g = 0; g < ngroups; ++g) {
225#else
226 SCRAN_CUSTOM_PARALLEL([&](size_t, size_t start, size_t length) -> void {
227 std::vector<std::pair<Stat, int> > buffer(ngenes);
228 for (size_t g = start, end = start + length; g < end; ++g) {
229#endif
230
231 auto target = output[g];
232 std::fill(target, target + ngenes, ngenes + 1);
233 auto base = effects + g * ngroups;
234
235 for (int g2 = 0; g2 < ngroups; ++g2) {
236 if (g == g2) {
237 continue;
238 }
239 auto used = fill_and_sort_rank_buffer(base + g2, shift, buffer);
240 compute_min_rank_internal(used, buffer, target);
241 }
242
243#ifndef SCRAN_CUSTOM_PARALLEL
244 }
245 }
246#else
247 }
248 }, ngroups, threads);
249#endif
250}
251
256}
257
258}
259
260#endif
summary
Definition summarize_comparisons.hpp:28
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18