scran
C++ library for basic single-cell RNA-seq analyses
Loading...
Searching...
No Matches
BuildSnnGraph.hpp
Go to the documentation of this file.
1#ifndef SCRAN_BUILDSNNGRAPH_HPP
2#define SCRAN_BUILDSNNGRAPH_HPP
3
4#include "../utils/macros.hpp"
5
6#if __has_include("igraph.h")
7#include "igraph.h"
8#include "igraph_utils.hpp"
9#endif
10
11#include <vector>
12#include <algorithm>
13#include <memory>
14
15#include "knncolle/knncolle.hpp"
16
23namespace scran {
24
57public:
61 enum Scheme { RANKED, NUMBER, JACCARD };
62
66 struct Defaults {
70 static constexpr int neighbors = 10;
71
75 static constexpr Scheme weighting_scheme = RANKED;
76
80 static constexpr bool approximate = false;
81
85 static constexpr int num_threads = 1;
86 };
87private:
88 int num_neighbors = Defaults::neighbors;
89 Scheme weight_scheme = Defaults::weighting_scheme;
90 bool approximate = Defaults::approximate;
91 int nthreads = Defaults::num_threads;
92
93public:
100 num_neighbors = k;
101 return *this;
102 }
103
110 approximate = a;
111 return *this;
112 }
113
120 weight_scheme = w;
121 return *this;
122 }
123
129 nthreads = n;
130 return *this;
131 }
132
133public:
139 struct Results {
143 size_t ncells;
144
149#if __has_include("igraph.h")
150 std::vector<igraph_integer_t> edges;
151#else
152 std::vector<int> edges;
153#endif
154
159#if __has_include("igraph.h")
160 std::vector<igraph_real_t> weights;
161#else
162 std::vector<double> weights;
163#endif
164
165#if __has_include("igraph.h")
172 igraph::Graph to_igraph() const {
173 igraph_vector_int_t edge_view;
174 igraph_vector_int_view(&edge_view, edges.data(), edges.size());
175 return igraph::Graph(&edge_view, ncells, /* directed = */ false);
176 }
177#endif
178 };
179
180private:
181 template<class Indices_, class Function_>
182 Results run(const std::vector<Indices_>& indices, Function_ get_index) const {
183 size_t ncells = indices.size();
184
185 // Not parallel-frendly, so we don't construct this with the neighbor search
186 std::vector<std::vector<std::pair<int, int> > > hosts(ncells);
187 for (size_t i = 0; i < ncells; ++i) {
188 hosts[i].push_back(std::make_pair(0, i)); // each point is its own 0-th nearest neighbor
189 const auto& current = indices[i];
190 int counter = 1;
191 for (auto x : current) {
192 hosts[get_index(x)].push_back(std::make_pair(counter, i));
193 ++counter;
194 }
195 }
196
197 // Constructing the shared neighbor graph.
198 std::vector<std::vector<int> > edge_stores(ncells);
199 std::vector<std::vector<double> > weight_stores(ncells);
200
201#ifndef SCRAN_CUSTOM_PARALLEL
202 #pragma omp parallel num_threads(nthreads)
203 {
204#else
205 SCRAN_CUSTOM_PARALLEL([&](size_t, size_t start, size_t length) -> void {
206#endif
207
208 std::vector<int> current_score(ncells);
209 std::vector<int> current_added;
210 current_added.reserve(ncells);
211
212#ifndef SCRAN_CUSTOM_PARALLEL
213 #pragma omp for
214 for (size_t j = 0; j < ncells; ++j) {
215#else
216 for (size_t j = start, end = start + length; j < end; ++j) {
217#endif
218
219 const auto& current_neighbors = indices[j];
220 int nneighbors = current_neighbors.size();
221
222 for (int i = 0; i <= nneighbors; ++i) {
223 // First iteration treats 'j' as the zero-th neighbor.
224 // Remaining iterations go through the neighbors of 'j'.
225 const int cur_neighbor = (i==0 ? j : get_index(current_neighbors[i-1]));
226
227 // Going through all observations 'h' for which 'cur_neighbor'
228 // is a nearest neighbor, a.k.a., 'cur_neighbor' is a shared
229 // neighbor of both 'h' and 'j'.
230 for (const auto& h : hosts[cur_neighbor]) {
231 auto othernode = h.second;
232
233 if (othernode < j) { // avoid duplicates from symmetry in the SNN calculations.
234 int& existing_other = current_score[othernode];
235 if (weight_scheme == RANKED) {
236 // Recording the lowest combined rank per neighbor.
237 int currank = h.first + i;
238 if (existing_other == 0) {
239 existing_other = currank;
240 current_added.push_back(othernode);
241 } else if (existing_other > currank) {
242 existing_other = currank;
243 }
244 } else {
245 // Recording the number of shared neighbors.
246 if (existing_other==0) {
247 current_added.push_back(othernode);
248 }
249 ++existing_other;
250 }
251 }
252 }
253 }
254
255 // Converting to edges.
256 auto& current_edges = edge_stores[j];
257 current_edges.reserve(current_added.size() * 2);
258 auto& current_weights = weight_stores[j];
259 current_weights.reserve(current_added.size() * 2);
260
261 for (auto othernode : current_added) {
262 int& otherscore = current_score[othernode];
263 double finalscore;
264 if (weight_scheme == RANKED) {
265 finalscore = static_cast<double>(nneighbors) - 0.5 * static_cast<double>(otherscore);
266 } else {
267 finalscore = otherscore;
268 if (weight_scheme == JACCARD) {
269 finalscore = finalscore / (2 * (nneighbors + 1) - finalscore);
270 }
271 }
272
273 current_edges.push_back(j);
274 current_edges.push_back(othernode);
275 current_weights.push_back(std::max(finalscore, 1e-6)); // Ensuring that an edge with a positive weight is always reported.
276
277 // Resetting all those added to zero.
278 otherscore = 0;
279 }
280 current_added.clear();
281
282#ifndef SCRAN_CUSTOM_PARALLEL
283 }
284 }
285#else
286 }
287 }, ncells, nthreads);
288#endif
289
290 // Collating the total number of edges.
291 size_t nedges = 0;
292 for (const auto& w : weight_stores) {
293 nedges += w.size();
294 }
295
296 Results output;
297 output.ncells = ncells;
298
299 output.weights.reserve(nedges);
300 for (const auto& w : weight_stores) {
301 output.weights.insert(output.weights.end(), w.begin(), w.end());
302 }
303 weight_stores.clear();
304 weight_stores.shrink_to_fit(); // forcibly release memory so that we have some more space for edges.
305
306 output.edges.reserve(nedges * 2);
307 for (const auto& e : edge_stores) {
308 output.edges.insert(output.edges.end(), e.begin(), e.end());
309 }
310
311 return output;
312 }
313
314public:
324 Results run(size_t ndims, size_t ncells, const double* mat) const {
325 std::unique_ptr<knncolle::Base<> > ptr;
326 if (!approximate) {
327 ptr.reset(new knncolle::VpTreeEuclidean<>(ndims, ncells, mat));
328 } else {
329 ptr.reset(new knncolle::AnnoyEuclidean<>(ndims, ncells, mat));
330 }
331 return run(ptr.get());
332 }
333
341 template<class Algorithm>
342 Results run(const Algorithm* search) const {
343 // Collecting neighbors.
344 size_t ncells = search->nobs();
345 std::vector<std::vector<int> > indices(ncells);
346
347#ifndef SCRAN_CUSTOM_PARALLEL
348 #pragma omp parallel for num_threads(nthreads)
349 for (size_t i = 0; i < ncells; ++i) {
350#else
351 SCRAN_CUSTOM_PARALLEL([&](size_t, size_t start, size_t length) -> void {
352 for (size_t i = start, end = start + length; i < end; ++i) {
353#endif
354 auto neighbors = search->find_nearest_neighbors(i, num_neighbors);
355 auto& current = indices[i];
356 for (auto x : neighbors) {
357 current.push_back(x.first);
358 }
359 }
360#ifdef SCRAN_CUSTOM_PARALLEL
361 }, ncells, nthreads);
362#endif
363
364 return run(indices);
365 }
366
377 template<typename Index_, typename Distance_>
378 Results run(const knncolle::NeighborList<Index_, Distance_>& neighbors) const {
379 return run(neighbors, [](const std::pair<Index_, Distance_>& x) -> Index_ { return x.first; });
380 }
381
390 template<class Indices_>
391 Results run(const std::vector<Indices_>& indices) const {
392 typedef typename std::remove_cv<typename std::remove_reference<decltype(std::declval<Indices_>()[0])>::type>::type Element;
393 return run(indices, [](Element i) -> Element { return i; });
394 }
395};
396
397}
398
399#endif
Build a shared nearest-neighbor graph with cells as nodes.
Definition BuildSnnGraph.hpp:56
Results run(const knncolle::NeighborList< Index_, Distance_ > &neighbors) const
Definition BuildSnnGraph.hpp:378
Results run(const Algorithm *search) const
Definition BuildSnnGraph.hpp:342
BuildSnnGraph & set_approximate(bool a=Defaults::approximate)
Definition BuildSnnGraph.hpp:109
Scheme
Definition BuildSnnGraph.hpp:61
BuildSnnGraph & set_num_threads(int n=Defaults::num_threads)
Definition BuildSnnGraph.hpp:128
BuildSnnGraph & set_neighbors(int k=Defaults::neighbors)
Definition BuildSnnGraph.hpp:99
Results run(size_t ndims, size_t ncells, const double *mat) const
Definition BuildSnnGraph.hpp:324
Results run(const std::vector< Indices_ > &indices) const
Definition BuildSnnGraph.hpp:391
BuildSnnGraph & set_weighting_scheme(Scheme w=Defaults::weighting_scheme)
Definition BuildSnnGraph.hpp:119
Utilities for manipulating igraph data structures.
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
Default parameter settings.
Definition BuildSnnGraph.hpp:66
static constexpr bool approximate
Definition BuildSnnGraph.hpp:80
static constexpr Scheme weighting_scheme
Definition BuildSnnGraph.hpp:75
static constexpr int neighbors
Definition BuildSnnGraph.hpp:70
static constexpr int num_threads
Definition BuildSnnGraph.hpp:85
Results of SNN graph construction.
Definition BuildSnnGraph.hpp:139
std::vector< int > edges
Definition BuildSnnGraph.hpp:152
std::vector< double > weights
Definition BuildSnnGraph.hpp:162
size_t ncells
Definition BuildSnnGraph.hpp:143
Wrapper around the igraph_t class from igraph.
Definition igraph_utils.hpp:180