124 minimum_window_count = c;
134 block_weight_policy = w;
145 variable_block_weight_parameters = v;
169 template<
bool blocked_,
typename Data_,
typename Index_,
typename Stat_,
typename Block_>
170 void compute_dense_row(
const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances,
const Block_* block,
const std::vector<Index_>& block_size)
const {
171 auto nblocks = block_size.size();
172 auto NR = mat->nrow(), NC = mat->ncol();
174 tatami::parallelize([&](
size_t, Index_ start, Index_ length) ->
void {
175 std::vector<Stat_> tmp_means(nblocks);
176 std::vector<Stat_> tmp_vars(nblocks);
178 std::vector<Data_> buffer(NC);
179 auto ext = tatami::consecutive_extractor<true, false>(mat, start, length);
181 for (Index_ r = start, end = start + length; r < end; ++r) {
182 auto ptr = ext->fetch(r, buffer.data());
183 feature_selection::blocked_variance_with_mean<blocked_>(ptr, NC, block, nblocks, block_size.data(), tmp_means.data(), tmp_vars.data());
184 for (
size_t b = 0; b < nblocks; ++b) {
185 means[b][r] = tmp_means[b];
186 variances[b][r] = tmp_vars[b];
192 template<
bool blocked_,
typename Data_,
typename Index_,
typename Stat_,
typename Block_>
193 void compute_sparse_row(
const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances,
const Block_* block,
const std::vector<Index_>& block_size)
const {
194 auto nblocks = block_size.size();
195 auto NR = mat->nrow(), NC = mat->ncol();
197 tatami::parallelize([&](
size_t, Index_ start, Index_ length) ->
void {
198 std::vector<Stat_> tmp_means(nblocks);
199 std::vector<Stat_> tmp_vars(nblocks);
200 std::vector<Index_> tmp_nzero(nblocks);
202 std::vector<Data_> vbuffer(NC);
203 std::vector<Index_> ibuffer(NC);
205 opt.sparse_ordered_index =
false;
206 auto ext = tatami::consecutive_extractor<true, true>(mat, start, length, opt);
208 for (Index_ r = start, end = start + length; r < end; ++r) {
209 auto range = ext->fetch(r, vbuffer.data(), ibuffer.data());
210 feature_selection::blocked_variance_with_mean<blocked_>(range, block, nblocks, block_size.data(), tmp_means.data(), tmp_vars.data(), tmp_nzero.data());
211 for (
size_t b = 0; b < nblocks; ++b) {
212 means[b][r] = tmp_means[b];
213 variances[b][r] = tmp_vars[b];
219 template<
bool blocked_,
typename Data_,
typename Index_,
typename Stat_,
typename Block_>
220 void compute_dense_column(
const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances,
const Block_* block,
const std::vector<Index_>& block_size)
const {
221 auto nblocks = block_size.size();
222 auto NR = mat->nrow(), NC = mat->ncol();
224 tatami::parallelize([&](
size_t, Index_ start, Index_ length) ->
void {
225 std::vector<Data_> buffer(length);
226 auto ext = tatami::consecutive_extractor<false, false>(mat, 0, NC, start, length);
230 auto vcopy = variances;
231 for (Index_ b = 0; b < nblocks; ++b) {
236 std::vector<Index_> counts(nblocks);
237 for (Index_ c = 0; c < NC; ++c) {
238 auto ptr = ext->fetch(c, buffer.data());
239 auto b = feature_selection::get_block<blocked_>(c, block);
240 tatami::stats::variances::compute_running(ptr, length, mcopy[b], vcopy[b], counts[b]);
243 for (
size_t b = 0; b < nblocks; ++b) {
244 tatami::stats::variances::finish_running(length, mcopy[b], vcopy[b], counts[b]);
249 template<
bool blocked_,
typename Data_,
typename Index_,
typename Stat_,
typename Block_>
250 void compute_sparse_column(
const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances,
const Block_* block,
const std::vector<Index_>& block_size)
const {
251 auto nblocks = block_size.size();
252 auto NR = mat->nrow(), NC = mat->ncol();
253 std::vector<std::vector<Index_> > nonzeros(nblocks, std::vector<Index_>(NR));
255 tatami::parallelize([&](
size_t, Index_ start, Index_ length) ->
void {
256 std::vector<Data_> vbuffer(length);
257 std::vector<Index_> ibuffer(length);
259 opt.sparse_ordered_index =
false;
260 auto ext = tatami::consecutive_extractor<false, true>(mat, 0, NC, start, length, opt);
262 std::vector<Index_> counts(nblocks);
263 for (Index_ c = 0; c < NC; ++c) {
264 auto range = ext->fetch(c, vbuffer.data(), ibuffer.data());;
265 auto b = feature_selection::get_block<blocked_>(c, block);
266 tatami::stats::variances::compute_running(range, means[b], variances[b], nonzeros[b].data(), counts[b]);
269 for (
size_t b = 0; b < nblocks; ++b) {
270 tatami::stats::variances::finish_running(length, means[b] + start, variances[b] + start, nonzeros[b].data() + start, counts[b]);
276 template<
bool blocked_,
typename Data_,
typename Index_,
typename Stat_,
typename Block_>
277 void compute(
const tatami::Matrix<Data_, Index_>* mat, std::vector<Stat_*>& means, std::vector<Stat_*>& variances,
const Block_* block,
const std::vector<Index_>& block_size)
const {
278 if (mat->prefer_rows()) {
280 compute_sparse_row<blocked_>(mat, means, variances, block, block_size);
282 compute_dense_row<blocked_>(mat, means, variances, block, block_size);
287 auto NR = mat->nrow();
288 for (
auto& mptr : means) {
289 std::fill(mptr, mptr + NR, 0);
291 for (
auto& vptr : variances) {
292 std::fill(vptr, vptr + NR, 0);
296 compute_sparse_column<blocked_>(mat, means, variances, block, block_size);
298 compute_dense_column<blocked_>(mat, means, variances, block, block_size);
318 template<
typename Value_,
typename Index_,
typename Stat_>
319 void run(
const tatami::Matrix<Value_, Index_>* mat, Stat_* means, Stat_* variances, Stat_* fitted, Stat_* residuals)
const {
320 run_blocked(mat,
static_cast<int*
>(NULL), std::vector<Stat_*>{means}, std::vector<Stat_*>{variances}, std::vector<Stat_*>{fitted}, std::vector<Stat_*>{residuals});
357 template<
typename Value_,
typename Index_,
typename Block_,
typename Stat_>
359 const tatami::Matrix<Value_, Index_>* mat,
361 std::vector<Stat_*> means,
362 std::vector<Stat_*> variances,
363 std::vector<Stat_*> fitted,
364 std::vector<Stat_*> residuals,
366 Stat_* ave_variances,
368 Stat_* ave_residuals)
370 Index_ NR = mat->nrow(), NC = mat->ncol();
371 std::vector<Index_> block_size;
375 compute<true>(mat, means, variances, block, block_size);
377 block_size.push_back(NC);
378 compute<false>(mat, means, variances, block, block_size);
389 for (
size_t b = 0; b < block_size.size(); ++b) {
390 if (block_size[b] >= 2) {
391 fit.
run(NR, means[b], variances[b], fitted[b], residuals[b]);
393 std::fill(fitted[b], fitted[b] + NR, std::numeric_limits<double>::quiet_NaN());
394 std::fill(residuals[b], residuals[b] + NR, std::numeric_limits<double>::quiet_NaN());
399 if (ave_means || ave_variances || ave_fitted || ave_residuals) {
400 std::vector<double> block_weight =
compute_block_weights(block_size, block_weight_policy, variable_block_weight_parameters);
439 template<
typename Value_,
typename Index_,
typename Block_,
typename Stat_>
441 const tatami::Matrix<Value_, Index_>* mat,
443 std::vector<Stat_*> means,
444 std::vector<Stat_*> variances,
445 std::vector<Stat_*> fitted,
446 std::vector<Stat_*> residuals)
452 std::move(variances),
454 std::move(residuals),
455 static_cast<Stat_*
>(NULL),
456 static_cast<Stat_*
>(NULL),
457 static_cast<Stat_*
>(NULL),
458 static_cast<Stat_*
>(NULL)
511 template<
typename Value_,
typename Index_>
512 Results run(
const tatami::Matrix<Value_, Index_>* mat)
const {
531 BlockResults(
size_t ngenes,
int nblocks,
bool compute_average) :
533 average(compute_average ? ngenes : 0) {}
550 template<
typename Stat_>
551 static void fill_pointers(
554 std::vector<Stat_*>& mean_ptr,
555 std::vector<Stat_*>& var_ptr,
556 std::vector<Stat_*>& fit_ptr,
557 std::vector<Stat_*>& resid_ptr
559 mean_ptr.reserve(nblocks);
560 var_ptr.reserve(nblocks);
561 fit_ptr.reserve(nblocks);
562 resid_ptr.reserve(nblocks);
564 for (
int b = 0; b < nblocks; ++b) {
565 mean_ptr.push_back(output.
per_block[b].means.data());
566 var_ptr.push_back(output.
per_block[b].variances.data());
567 fit_ptr.push_back(output.
per_block[b].fitted.data());
568 resid_ptr.push_back(output.
per_block[b].residuals.data());
586 template<
typename Value_,
typename Index_,
typename Block_>
588 int nblocks = (block ?
count_ids(mat->ncol(), block) : 1);
589 BlockResults output(mat->nrow(), nblocks, compute_average);
591 std::vector<double*> mean_ptr, var_ptr, fit_ptr, resid_ptr;
592 fill_pointers(nblocks, output, mean_ptr, var_ptr, fit_ptr, resid_ptr);
594 if (compute_average) {
601 std::move(resid_ptr),