1#ifndef SCRAN_QUICK_GROUPED_SIZE_FACTORS_HPP
2#define SCRAN_QUICK_GROUPED_SIZE_FACTORS_HPP
4#include "../utils/macros.hpp"
12#include "tatami/tatami.hpp"
13#include "kmeans/Kmeans.hpp"
14#include "kmeans/InitializePCAPartition.hpp"
16#include "../utils/blocking.hpp"
17#include "../dimensionality_reduction/SimplePca.hpp"
32namespace quick_grouped_size_factors {
40 typename Block_ = int,
41 typename SizeFactor_ =
double
85auto cluster(
const tatami::Matrix<Value_, Index_>* mat,
int rank,
size_t clusters,
int num_threads) {
89 auto pc_out = pca_runner.
run(mat);
90 const auto& pcs = pc_out.
pcs;
92 kmeans::Kmeans kmeans_runner;
93 kmeans_runner.set_num_threads(num_threads);
94 kmeans::InitializePCAPartition<Value_, Index_, Index_> init;
95 return kmeans_runner.run(
137 typename OutputFactor_,
142 std::vector<Index_> clusters;
143 Index_ NC = mat->ncol();
144 auto ptr = tatami::wrap_shared_ptr(mat);
151 fun = [](
size_t n) ->
size_t {
152 size_t candidate = std::sqrt(
static_cast<double>(n));
153 return std::min(candidate,
static_cast<size_t>(50));
159 std::vector<std::vector<Index_> > assignments(nblocks);
160 for (Index_ c = 0; c < NC; ++c) {
161 assignments[opt.
block[c]].push_back(c);
165 Index_ last_cluster = 0;
167 for (
size_t b = 0; b < nblocks; ++b) {
168 const auto& inblock = assignments[b];
169 auto subptr = tatami::make_DelayedSubset<1>(ptr, tatami::ArrayView<Index_>(inblock.data(), inblock.size()));
171 std::shared_ptr<tatami::Matrix<Value_, Index_> > normalized;
173 std::vector<SizeFactor_> fac;
174 fac.reserve(inblock.size());
175 for (
auto i : inblock) {
178 normalized = logger.
run(std::move(subptr), std::move(fac));
180 normalized = logger.
run(std::move(subptr));
183 auto res = internal::cluster(normalized.get(), opt.
rank, fun(inblock.size()), opt.
num_threads);
184 auto cIt = res.clusters.begin();
185 for (
auto i : inblock) {
186 clusters[i] = *cIt + last_cluster;
189 last_cluster += *std::max_element(res.clusters.begin(), res.clusters.end()) + 1;
193 std::shared_ptr<const tatami::Matrix<Value_, Index_> > normalized;
196 normalized = logger.
run(std::move(ptr), std::move(fac));
198 normalized = logger.
run(std::move(ptr));
201 auto res = internal::cluster(normalized.get(), opt.
rank, fun(NC), opt.
num_threads);
202 clusters = std::move(res.clusters);
207 group_runner.
run(mat, clusters.data(), output);
225 typename OutputFactor_
227void run(
const tatami::Matrix<Value_, Index_>* mat, OutputFactor_* output) {
246 typename OutputFactor_ = double,
253 std::vector<OutputFactor_> output(mat->ncol());
254 run(mat, output.data(), opt);
270 typename OutputFactor_ = double,
274std::vector<OutputFactor_>
run(
const tatami::Matrix<Value_, Index_>* mat) {
275 std::vector<OutputFactor_> output(mat->ncol());
Compute size factors for groups of cells.
Compute log-normalized expression values.
Compute grouped size factors to handle composition bias.
Definition GroupedSizeFactors.hpp:43
GroupedSizeFactors & set_num_threads(int n=Defaults::num_threads)
Definition GroupedSizeFactors.hpp:126
void run(const tatami::Matrix< T, IDX > *mat, const Group *group, Out *output) const
Definition GroupedSizeFactors.hpp:157
Compute log-normalized expression values.
Definition LogNormCounts.hpp:33
LogNormCounts & set_num_threads(int n=Defaults::num_threads)
Definition LogNormCounts.hpp:186
std::shared_ptr< MAT > run(std::shared_ptr< MAT > mat, V size_factors) const
Definition LogNormCounts.hpp:247
Perform a simple PCA on a gene-cell matrix.
Definition SimplePca.hpp:33
SimplePca & set_rank(int r=Defaults::rank)
Definition SimplePca.hpp:93
SimplePca & set_num_threads(int n=Defaults::num_threads)
Definition SimplePca.hpp:153
Results run(const tatami::Matrix< T, IDX > *mat) const
Definition SimplePca.hpp:301
void run(const tatami::Matrix< Value_, Index_ > *mat, OutputFactor_ *output, const Options< Block_, SizeFactor_ > &opt)
Definition quick_grouped_size_factors.hpp:141
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
size_t count_ids(size_t length, const Id_ *ids)
Definition blocking.hpp:29
Eigen::MatrixXd pcs
Definition SimplePca.hpp:252
Options for run().
Definition quick_grouped_size_factors.hpp:43
const Block_ * block
Definition quick_grouped_size_factors.hpp:61
const SizeFactor_ * initial_factors
Definition quick_grouped_size_factors.hpp:68
int num_threads
Definition quick_grouped_size_factors.hpp:73
std::function< size_t(size_t)> clusters
Definition quick_grouped_size_factors.hpp:53
int rank
Definition quick_grouped_size_factors.hpp:47