53 static constexpr int rank = 10;
58 static constexpr bool scale =
false;
166 block_weight_policy = w;
177 variable_block_weight_parameters = v;
221 template<
typename Data_,
typename Index_,
typename Block_>
222 void run_sparse_simple(
223 const tatami::Matrix<Data_, Index_>* mat,
225 const pca_utils::BlockingDetails<true> block_details,
226 const irlba::Irlba& irb,
227 Eigen::MatrixXd& pcs,
228 Eigen::MatrixXd& rotation,
229 Eigen::VectorXd& variance_explained,
230 Eigen::VectorXd& center_v,
231 Eigen::VectorXd& scale_v,
234 auto extracted = pca_utils::extract_sparse_for_pca(mat, nthreads);
235 pca_utils::SparseMatrix emat(mat->ncol(), mat->nrow(), std::move(extracted.values), std::move(extracted.indices), std::move(extracted.ptrs), nthreads);
237 size_t ngenes = emat.cols();
238 center_v.resize(ngenes);
239 scale_v.resize(ngenes);
241 tatami::parallelize([&](
size_t,
size_t start,
size_t length) ->
void {
242 const auto& values = emat.get_values();
243 const auto& indices = emat.get_indices();
244 const auto& ptrs = emat.get_pointers();
246 const auto& block_size = block_details.block_size;
247 size_t nblocks = block_size.size();
248 std::vector<int> block_count(nblocks);
249 const auto& block_weight = block_details.per_element_weight;
251 for (
size_t r = start, end = start + length; r < end; ++r) {
252 auto offset = ptrs[r];
253 size_t num_entries = ptrs[r+1] - offset;
254 auto value_ptr = values.data() + offset;
255 auto index_ptr = indices.data() + offset;
257 std::fill(block_count.begin(), block_count.end(), 0);
260 double& grand_mean = center_v[r];
262 for (
size_t i = 0; i < num_entries; ++i) {
263 auto b = block[index_ptr[i]];
264 grand_mean += value_ptr[i] * block_weight[b];
267 grand_mean /= block_details.total_block_weight;
273 double& proxyvar = scale_v[r];
275 for (
size_t b = 0; b < nblocks; ++b) {
276 double zero_sum = (block_size[b] - block_count[b]) * grand_mean * grand_mean;
277 proxyvar += zero_sum * block_weight[b];
280 for (
size_t i = 0; i < num_entries; ++i) {
281 double diff = value_ptr[i] - grand_mean;
282 proxyvar += diff * diff * block_weight[block[index_ptr[i]]];
285 proxyvar /= emat.rows() - 1;
287 }, ngenes, nthreads);
289 total_var = pca_utils::process_scale_vector(scale, scale_v);
292 irlba::Centered<
decltype(emat)> centered(&emat, ¢er_v);
294 irlba::Scaled<
decltype(centered)> scaled(¢ered, &scale_v);
295 pca_utils::SampleScaledWrapper<
decltype(scaled)> weighted(&scaled, &(block_details.expanded_weights));
296 irb.run(weighted, pcs, rotation, variance_explained);
298 pca_utils::SampleScaledWrapper<
decltype(centered)> weighted(¢ered, &(block_details.expanded_weights));
299 irb.run(weighted, pcs, rotation, variance_explained);
303 pca_utils::project_sparse_matrix(emat, pcs, rotation, scale, scale_v, nthreads);
305 pca_utils::clean_up_projected<true>(pcs, variance_explained);
307 pcs.adjointInPlace();
311 template<
typename Data_,
typename Index_,
typename Block_>
312 void run_dense_simple(
313 const tatami::Matrix<Data_, Index_>* mat,
315 const pca_utils::BlockingDetails<true>& block_details,
316 const irlba::Irlba& irb,
317 Eigen::MatrixXd& pcs,
318 Eigen::MatrixXd& rotation,
319 Eigen::VectorXd& variance_explained,
320 Eigen::VectorXd& center_v,
321 Eigen::VectorXd& scale_v,
324 auto emat = pca_utils::extract_dense_for_pca(mat, nthreads);
326 size_t ngenes = emat.cols();
327 center_v.resize(ngenes);
328 scale_v.resize(ngenes);
330 tatami::parallelize([&](
size_t,
size_t start,
size_t length) ->
void {
331 size_t nblocks = block_details.num_blocks();
332 std::vector<double> mean_buffer(nblocks);
333 const auto& block_weight = block_details.per_element_weight;
334 size_t ncells = emat.rows();
336 for (
size_t c = start, end = start + length; c < end; ++c) {
337 auto ptr = emat.data() + c * ncells;
339 double& grand_mean = center_v[c];
341 for (
size_t r = 0; r < ncells; ++r) {
342 grand_mean += ptr[r] * block_weight[block[r]];
344 grand_mean /= block_details.total_block_weight;
348 double& proxyvar = scale_v[c];
350 for (
size_t r = 0; r < ncells; ++r) {
351 double diff = ptr[r] - grand_mean;
352 proxyvar += diff * diff * block_weight[block[r]];
355 proxyvar /= emat.rows() - 1;
357 }, ngenes, nthreads);
359 total_var = pca_utils::process_scale_vector(scale, scale_v);
362 pca_utils::apply_center_and_scale_to_dense_matrix(emat, center_v, scale, scale_v, nthreads);
364 pca_utils::SampleScaledWrapper<
decltype(emat)> weighted(&emat, &(block_details.expanded_weights));
365 irb.run(weighted, pcs, rotation, variance_explained);
367 pcs.noalias() = emat * rotation;
368 pca_utils::clean_up_projected<false>(pcs, variance_explained);
370 pcs.adjointInPlace();
375 template<
bool weight_,
typename Matrix_,
typename Block_>
376 void run_residuals_internal(
379 const pca_utils::BlockingDetails<weight_>& block_details,
380 const Eigen::MatrixXd& center_m,
381 const Eigen::VectorXd& scale_v,
382 const irlba::Irlba& irb,
383 Eigen::MatrixXd& pcs,
384 Eigen::MatrixXd& rotation,
385 Eigen::VectorXd& variance_explained)
387 pca_utils::RegressWrapper<Matrix_, Block_> centered(&emat, block, ¢er_m);
389 if constexpr(weight_) {
391 irlba::Scaled<
decltype(centered)> scaled(¢ered, &scale_v);
392 pca_utils::SampleScaledWrapper<
decltype(scaled)> weighted(&scaled, &(block_details.expanded_weights));
393 irb.run(weighted, pcs, rotation, variance_explained);
395 pca_utils::SampleScaledWrapper<
decltype(centered)> weighted(¢ered, &(block_details.expanded_weights));
396 irb.run(weighted, pcs, rotation, variance_explained);
401 irlba::Scaled<
decltype(centered)> scaled(¢ered, &scale_v);
402 irb.run(scaled, pcs, rotation, variance_explained);
404 irb.run(centered, pcs, rotation, variance_explained);
409 template<
bool weight_,
typename Data_,
typename Index_,
typename Block_>
410 void run_sparse_residuals(
411 const tatami::Matrix<Data_, Index_>* mat,
413 const pca_utils::BlockingDetails<weight_>& block_details,
414 const irlba::Irlba& irb,
415 Eigen::MatrixXd& pcs,
416 Eigen::MatrixXd& rotation,
417 Eigen::VectorXd& variance_explained,
418 Eigen::MatrixXd& center_m,
419 Eigen::VectorXd& scale_v,
422 auto extracted = pca_utils::extract_sparse_for_pca(mat, nthreads);
423 pca_utils::SparseMatrix emat(mat->ncol(), mat->nrow(), std::move(extracted.values), std::move(extracted.indices), std::move(extracted.ptrs), nthreads);
425 size_t ngenes = emat.cols();
426 center_m.resize(block_details.num_blocks(), ngenes);
427 scale_v.resize(ngenes);
428 pca_utils::compute_mean_and_variance_regress<weight_>(emat, block, block_details, center_m, scale_v, nthreads);
429 total_var = pca_utils::process_scale_vector(scale, scale_v);
431 run_residuals_internal<weight_>(
444 pca_utils::project_sparse_matrix(emat, pcs, rotation, scale, scale_v, nthreads);
446 pca_utils::clean_up_projected<true>(pcs, variance_explained);
448 pcs.adjointInPlace();
452 template<
bool weight_,
typename Data_,
typename Index_,
typename Block_>
453 void run_dense_residuals(
454 const tatami::Matrix<Data_, Index_>* mat,
456 const pca_utils::BlockingDetails<weight_>& block_details,
457 const irlba::Irlba& irb,
458 Eigen::MatrixXd& pcs,
459 Eigen::MatrixXd& rotation,
460 Eigen::VectorXd& variance_explained,
461 Eigen::MatrixXd& center_m,
462 Eigen::VectorXd& scale_v,
465 auto emat = pca_utils::extract_dense_for_pca(mat, nthreads);
467 size_t ngenes = emat.cols();
468 center_m.resize(block_details.num_blocks(), ngenes);
469 scale_v.resize(ngenes);
470 pca_utils::compute_mean_and_variance_regress<weight_>(emat, block, block_details, center_m, scale_v, nthreads);
471 total_var = pca_utils::process_scale_vector(scale, scale_v);
474 run_residuals_internal<weight_>(
487 pcs.noalias() = emat * (rotation.array().colwise() / scale_v.array()).matrix();
489 pcs.noalias() = emat * rotation;
492 pca_utils::clean_up_projected<false>(pcs, variance_explained);
494 pcs.adjointInPlace();
499 template<
typename Data_,
typename Index_,
typename Block_>
501 const tatami::Matrix<Data_, Index_>* mat,
503 Eigen::MatrixXd& pcs,
504 Eigen::MatrixXd& rotation,
505 Eigen::VectorXd& variance_explained,
506 Eigen::MatrixXd& center_m,
507 Eigen::VectorXd& scale_v,
510 irlba::EigenThreadScope t(nthreads);
512 irb.set_number(rank);
513 irb.set_cap_number(
true);
516 if (block_weight_policy == WeightPolicy::NONE) {
517 auto bdetails = pca_utils::compute_blocking_details(mat->ncol(), block);
519 run_sparse_residuals<false>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
521 run_dense_residuals<false>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
525 auto bdetails = pca_utils::compute_blocking_details(mat->ncol(), block, block_weight_policy, variable_block_weight_parameters);
527 run_sparse_residuals<true>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
529 run_dense_residuals<true>(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_m, scale_v, total_var);
534 if (block_weight_policy == WeightPolicy::NONE) {
535 throw std::runtime_error(
"block weight policy cannot be NONE when 'use_residuals = true', use SimplePca instead");
538 auto bdetails = pca_utils::compute_blocking_details(mat->ncol(), block, block_weight_policy, variable_block_weight_parameters);
540 Eigen::VectorXd center_v;
542 run_sparse_simple(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_v, scale_v, total_var);
544 run_dense_simple(mat, block, bdetails, irb, pcs, rotation, variance_explained, center_v, scale_v, total_var);
548 center_m.resize(1, center_v.size());
549 center_m.row(0) = center_v;
630 template<
typename T,
typename IDX,
typename Batch>
631 Results run(
const tatami::Matrix<T, IDX>* mat,
const Batch* batch)
const {
633 Eigen::MatrixXd rotation, center_m;
634 Eigen::VectorXd scale_v;
639 if (return_rotation) {
640 output.
rotation = std::move(rotation);
643 output.
center = center_m.adjoint();
646 output.
scale = std::move(scale_v);
672 template<
typename T,
typename IDX,
typename Batch,
typename X>
673 Results run(
const tatami::Matrix<T, IDX>* mat,
const Batch* batch,
const X* features)
const {
676 return run(mat, batch);
678 auto subsetted = pca_utils::subset_matrix_by_features(mat, features);
679 return run(subsetted.get(), batch);