scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
ScaleByNeighbors.hpp
Go to the documentation of this file.
1#ifndef SCRAN_NEIGHBOR_SCALING_HPP
2#define SCRAN_NEIGHBOR_SCALING_HPP
3
4#include "../utils/macros.hpp"
5
6#include <vector>
7#include <cmath>
8#include <memory>
9#include "knncolle/knncolle.hpp"
10#include "tatami/stats/medians.hpp"
11
18namespace scran {
19
39public:
43 struct Defaults {
47 static constexpr int neighbors = 20;
48
52 static constexpr bool approximate = false;
53
57 static constexpr int num_threads = 1;
58 };
59
67 num_neighbors = n;
68 return *this;
69 }
70
80 approximate = a;
81 return *this;
82 }
83
89 nthreads = n;
90 return *this;
91 }
92
93private:
94 int num_neighbors = Defaults::neighbors;
95 bool approximate = Defaults::approximate;
96 int nthreads = Defaults::num_threads;
97
98public:
109 std::pair<double, double> compute_distance(int ndim, size_t ncells, const double* data) const {
110 std::unique_ptr<knncolle::Base<> > ptr;
111 if (!approximate) {
112 ptr.reset(new knncolle::VpTreeEuclidean<>(ndim, ncells, data));
113 } else {
114 ptr.reset(new knncolle::AnnoyEuclidean<>(ndim, ncells, data));
115 }
116 return compute_distance(ptr.get());
117 }
118
127 template<class Search>
128 std::pair<double, double> compute_distance(const Search* search) const {
129 size_t nobs = search->nobs();
130 std::vector<double> dist(nobs);
131
132 tatami::parallelize([&](size_t, size_t start, size_t length) -> void {
133 for (size_t i = start, end = start + length; i < end; ++i) {
134 auto neighbors = search->find_nearest_neighbors(i, num_neighbors);
135 dist[i] = neighbors.back().second;
136 }
137 }, nobs, nthreads);
138
139 double med = tatami::stats::compute_median<double>(dist.data(), nobs);
140 double rmsd = 0;
141 for (auto d : dist) {
142 rmsd += d * d;
143 }
144 rmsd = std::sqrt(rmsd);
145 return std::make_pair(med, rmsd);
146 }
147
148public:
159 static std::vector<double> compute_scale(const std::vector<std::pair<double, double> >& distances) {
160 std::vector<double> output(distances.size());
161
162 // Use the first entry with a non-zero RMSD as the reference.
163 bool found_ref = false;
164 size_t ref = 0;
165 for (size_t e = 0; e < distances.size(); ++e) {
166 if (distances[e].second) {
167 found_ref = true;
168 ref = e;
169 break;
170 }
171 }
172
173 // If all of them have a zero RMSD, then all scalings are zero, because it doesn't matter.
174 if (found_ref) {
175 const auto& dref = distances[ref];
176 for (size_t e = 0; e < distances.size(); ++e) {
177 output[e] = (e == ref ? 1 : compute_scale(dref, distances[e]));
178 }
179 }
180
181 return output;
182 }
183
200 static double compute_scale(const std::pair<double, double>& ref, const std::pair<double, double>& target) {
201 if (target.first == 0 || ref.first == 0) {
202 if (target.second == 0) {
203 return std::numeric_limits<double>::infinity();
204 } else if (ref.second == 0) {
205 return 0;
206 } else {
207 return ref.second / target.second;
208 }
209 } else {
210 return ref.first / target.first;
211 }
212 }
213
232 template<typename Embed>
233 static void combine_scaled_embeddings(const std::vector<int>& ndims, size_t ncells, const std::vector<Embed>& embeddings, const std::vector<double>& scaling, double* output) {
234 size_t nembed = ndims.size();
235 if (embeddings.size() != nembed || scaling.size() != nembed) {
236 throw std::runtime_error("'ndims', 'embeddings' and 'scale' should have the same length");
237 }
238
239 int ntotal = std::accumulate(ndims.begin(), ndims.end(), 0);
240 size_t offset = 0;
241
242 for (size_t e = 0; e < nembed; ++e) {
243 size_t curdim = ndims[e];
244 auto inptr = embeddings[e];
245 auto outptr = output + offset;
246 auto s = scaling[e];
247
248 if (std::isinf(s)) {
249 // If the scaling factor is infinite, it implies that the current
250 // embedding is all-zero, so we just fill with zeros, and move on.
251 for (size_t c = 0; c < ncells; ++c, inptr += curdim, outptr += ntotal) {
252 std::fill(outptr, outptr + curdim, 0);
253 }
254 } else {
255 for (size_t c = 0; c < ncells; ++c, inptr += curdim, outptr += ntotal) {
256 for (size_t d = 0; d < curdim; ++d) {
257 outptr[d] = inptr[d] * s;
258 }
259 }
260 }
261
262 offset += curdim;
263 }
264 }
265};
266
267}
268
269#endif
Scale multi-modal embeddings to adjust for differences in variance.
Definition ScaleByNeighbors.hpp:38
static std::vector< double > compute_scale(const std::vector< std::pair< double, double > > &distances)
Definition ScaleByNeighbors.hpp:159
static double compute_scale(const std::pair< double, double > &ref, const std::pair< double, double > &target)
Definition ScaleByNeighbors.hpp:200
ScaleByNeighbors & set_num_threads(int n=Defaults::num_threads)
Definition ScaleByNeighbors.hpp:88
std::pair< double, double > compute_distance(int ndim, size_t ncells, const double *data) const
Definition ScaleByNeighbors.hpp:109
std::pair< double, double > compute_distance(const Search *search) const
Definition ScaleByNeighbors.hpp:128
ScaleByNeighbors & set_approximate(int a=Defaults::approximate)
Definition ScaleByNeighbors.hpp:79
static void combine_scaled_embeddings(const std::vector< int > &ndims, size_t ncells, const std::vector< Embed > &embeddings, const std::vector< double > &scaling, double *output)
Definition ScaleByNeighbors.hpp:233
ScaleByNeighbors & set_neighbors(int n=Defaults::neighbors)
Definition ScaleByNeighbors.hpp:66
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Default parameter settings.
Definition ScaleByNeighbors.hpp:43
static constexpr int num_threads
Definition ScaleByNeighbors.hpp:57
static constexpr bool approximate
Definition ScaleByNeighbors.hpp:52
static constexpr int neighbors
Definition ScaleByNeighbors.hpp:47