1#ifndef SCRAN_CLUSTERED_SIZE_FACTORS_HPP
2#define SCRAN_CLUSTERED_SIZE_FACTORS_HPP
4#include "../utils/macros.hpp"
7#include "SanitizeSizeFactors.hpp"
9#include "../aggregation/AggregateAcrossCells.hpp"
11#include "tatami/tatami.hpp"
13#include "../utils/blocking.hpp"
118 handle_non_finite = n;
156 template<
typename T,
typename IDX,
typename Group,
typename Out>
157 void run(
const tatami::Matrix<T, IDX>* mat,
const Group* group, Out* output)
const {
158 run_internal(mat, group,
false, output);
178 template<
typename T,
typename IDX,
typename Group,
typename Out>
179 void run(
const tatami::Matrix<T, IDX>* mat,
const Group* group,
size_t reference, Out* output)
const {
180 run_internal(mat, group, reference, output);
184 template<
typename T,
typename IDX,
typename Group,
typename Ref,
typename Out>
185 void run_internal(
const tatami::Matrix<T, IDX>* mat,
const Group* group, Ref reference, Out* output)
const {
186 size_t NR = mat->nrow(), NC = mat->ncol();
193 std::vector<double> combined(ngroups * NR);
195 std::vector<double*> sums(ngroups);
196 for (
size_t i = 0; i < ngroups; ++i) {
197 sums[i] = combined.data() + i * NR;
199 AggregateAcrossCells aggregator;
200 aggregator.set_num_threads(num_threads).run(mat, group, std::move(sums), std::vector<int*>());
204 if constexpr(std::is_same<Ref, size_t>::value) {
215 for (
size_t i = 0; i < ngroups; ++i) {
216 auto start = combined.data() + i * NR;
218 for (
size_t j = 0; j < NR; ++j) {
219 current += std::sqrt(start[j]);
221 if (current > best) {
230 tatami::ArrayView view(combined.data(), combined.size());
231 tatami::DenseColumnMatrix<T, IDX,
decltype(view)> aggmat(NR, ngroups, std::move(view));
233 MedianSizeFactors med;
234 med.set_num_threads(num_threads).set_center(
false).set_prior_count(prior_count);
235 auto mres = med.run(&aggmat, combined.data() + ref * NR);
240 SanitizeSizeFactors sanitizer;
241 sanitizer.set_handle_zero(handle_zeros ? SanitizeSizeFactors::HandlerAction::SANITIZE : SanitizeSizeFactors::HandlerAction::ERROR);
242 sanitizer.set_handle_non_finite(handle_non_finite ? SanitizeSizeFactors::HandlerAction::SANITIZE : SanitizeSizeFactors::HandlerAction::ERROR);
243 sanitizer.run(mres.factors.size(), mres.factors.data(), cres);
246 auto aggcolsums = tatami::column_sums(&aggmat, num_threads);
247 auto colsums = tatami::column_sums(mat, num_threads);
248 for (
size_t i = 0; i < NC; ++i) {
249 auto curgroup = group[i];
250 if (aggcolsums[curgroup]) {
251 auto scale =
static_cast<double>(colsums[i])/
static_cast<double>(aggcolsums[curgroup]);
252 output[i] = scale * mres.factors[curgroup];
261 CenterSizeFactors centerer;
262 centerer.run(NC, output);
271 template<
typename Out>
303 template<
typename Out =
double,
typename T,
typename IDX,
typename Group>
326 template<
typename Out =
double,
typename T,
typename IDX,
typename Group>
327 Results<Out> run(
const tatami::Matrix<T, IDX>* mat,
const Group* group,
size_t reference)
const {
329 run(mat, group, reference, output.
factors.data());
Center size factors prior to normalization.
Compute grouped size factors to handle composition bias.
Definition GroupedSizeFactors.hpp:43
GroupedSizeFactors & set_handle_non_finite(bool n=Defaults::handle_non_finite)
Definition GroupedSizeFactors.hpp:117
void run(const tatami::Matrix< T, IDX > *mat, const Group *group, size_t reference, Out *output) const
Definition GroupedSizeFactors.hpp:179
Results< Out > run(const tatami::Matrix< T, IDX > *mat, const Group *group, size_t reference) const
Definition GroupedSizeFactors.hpp:327
GroupedSizeFactors & set_center(bool c=Defaults::center)
Definition GroupedSizeFactors.hpp:79
GroupedSizeFactors & set_handle_zeros(bool z=Defaults::handle_zeros)
Definition GroupedSizeFactors.hpp:103
GroupedSizeFactors & set_num_threads(int n=Defaults::num_threads)
Definition GroupedSizeFactors.hpp:126
Results< Out > run(const tatami::Matrix< T, IDX > *mat, const Group *group) const
Definition GroupedSizeFactors.hpp:304
GroupedSizeFactors & set_prior_count(double p=MedianSizeFactors::Defaults::prior_count)
Definition GroupedSizeFactors.hpp:89
void run(const tatami::Matrix< T, IDX > *mat, const Group *group, Out *output) const
Definition GroupedSizeFactors.hpp:157
Functions for single-cell RNA-seq analyses.
Definition AggregateAcrossCells.hpp:18
SizeFactorValidity validate_size_factors(size_t n, const T *size_factors)
Definition SanitizeSizeFactors.hpp:46
size_t count_ids(size_t length, const Id_ *ids)
Definition blocking.hpp:29
Default parameter settings.
Definition GroupedSizeFactors.hpp:48
static constexpr bool handle_non_finite
Definition GroupedSizeFactors.hpp:62
static constexpr int num_threads
Definition GroupedSizeFactors.hpp:67
static constexpr bool handle_zeros
Definition GroupedSizeFactors.hpp:57
static constexpr bool center
Definition GroupedSizeFactors.hpp:52
Result of the size factor calculations.
Definition GroupedSizeFactors.hpp:272
std::vector< Out > factors
Definition GroupedSizeFactors.hpp:285