import * as vec from "./Vector.js";
import * as gr from "./GRanges.js";
import * as utils from "./utils.js";
import * as cutils from "./clone-utils.js";
import * as generics from "./AllGenerics.js";
/**
* A GroupedGRanges object is a collection of groups of genomic ranges, inspired by the `GRangesList` class from the Bioconductor ecosystem.
* Each group consists of a {@linkplain GRanges} object of arbitrary length, which is most often used to represent a multi-exon gene.
* The GroupedGRanges can be considered a vector of groups, and defines methods for the following generics:
*
* - {@linkcode LENGTH}
* - {@linkcode SLICE}
* - {@linkcode COMBINE}
* - {@linkcode CLONE}
*
* Our implementation re-uses Bioconductor's strategy of storing the groups in a single concatenated GRanges.
* This improves efficiency for large numbers of small GRanges, especially in placeholder objects where all the GRanges are zero-length.
*
* @extends Vector
*/
export class GroupedGRanges extends vec.Vector {
static #computeStarts(lengths) {
let starts = new Int32Array(lengths.length);
let last = 0;
for (var i = 0; i < lengths.length; i++) {
starts[i] = last;
last += lengths[i];
}
return { starts: starts, total: last };
}
#staged_setGroup = null;
/**
* @param {Array|GRanges} ranges - An array of {@linkplain GRanges} objects, where each element represents a group of genomic ranges.
* All objects should have compatible columns in their {@linkplain Vector#elementMetadata elementMetadata}.
*
* Alternatively, a single GRanges containing a concatenation of ranges from all groups.
* In this case, `rangeLengths` must be supplied.
* @param {Object} [options={}] - Optional parameters.
* @param {?(TypedArray|Array)} [options.rangeLengths=null] - Length of the ranges within each group.
* This should be coercible to an Int32Array, contain non-negative values, and have a sum equal to the length of `ranges`.
* Only used if `ranges` is a single {@linkplain GRanges} object, where each group's ranges are assumed to form contiguous intervals along `ranges`.
* @param {?Array} [options.names=null] - Array of strings of length equal to `start`, containing names for each genomic range.
* Alternatively `null`, in which case the ranges are assumed to be unnamed.
* @param {?DataFrame} [options.elementMetadata=null] - A {@linkplain DataFrame} with number of rows equal to the length of `start`, containing arbitrary per-range annotations.
* Alternatively `null`, in which case a zero-column DataFrame is automatically constructed.
* @param {Object} [options.metadata={}] - Object containing arbitrary metadata as key-value pairs.
*/
constructor(ranges, { rangeLengths = null, names = null, elementMetadata = null, metadata = {} } = {}) {
if (arguments.length == 0) {
super();
return;
}
if (ranges.constructor == Array) {
super(ranges.length, { names, elementMetadata, metadata });
rangeLengths = new Int32Array(ranges.length);
for (var i = 0; i < rangeLengths.length; i++) {
if (!(ranges[i] instanceof gr.GRanges)) {
throw new Error("'ranges' must either be a 'GRanges' or an array of 'GRanges'");
}
rangeLengths[i] = generics.LENGTH(ranges[i]);
}
ranges = generics.COMBINE(ranges);
} else {
if (!(ranges instanceof gr.GRanges)) {
throw new Error("'ranges' must either be a 'GRanges' or an array of 'GRanges'");
}
if (rangeLengths == null) {
throw new Error("'rangeLengths' must be specified when 'ranges' is a 'GRanges'");
}
super(rangeLengths.length, { names, elementMetadata, metadata });
rangeLengths = utils.convertToInt32Array(rangeLengths);
utils.checkNonNegative(rangeLengths);
}
this._ranges = ranges;
this._rangeLengths = rangeLengths;
let accumulated = GroupedGRanges.#computeStarts(rangeLengths);
this._rangeStarts = accumulated.starts;
if (accumulated.total !== generics.LENGTH(ranges)) {
throw new Error("sum of 'rangeLengths' must be equal to the length of 'ranges'");
}
}
/**************************************************************************
**************************************************************************
**************************************************************************/
/**
* @return {GRanges} The concatenated set of ranges across all groups.
*/
ranges() {
this.#flush_staged_setGroup();
return this._ranges;
}
/**
* @return {Int32Array} The start indices for each group's ranges along the concatenated set of ranges returned by {@linkcode GroupedGRanges#ranges ranges}.
*/
rangeStarts() {
this.#flush_staged_setGroup();
return this._rangeStarts;
}
/**
* @return {Int32Array} The length of each group's ranges along the concatenated set of ranges returned by {@linkcode GroupedGRanges#ranges ranges}.
*/
rangeLengths() {
this.#flush_staged_setGroup();
return this._rangeLengths;
}
/**
* @param {number} i - Index of the group of interest.
* @param {Object} [options={}] - Optional parameters.
* @param {boolean} [options.allowView=false] - Whether a view can be created in any internal slicing operations.
*
* @return {GRanges} The genomic ranges for group `i`.
*/
group(i, { allowView = false } = {}) {
this.#flush_staged_setGroup();
let s = this._rangeStarts[i];
return generics.SLICE(this._ranges, { start: s, end: s + this._rangeLengths[i] }, { allowView });
}
/**
* @return {number} Number of groups in this object.
*/
numberOfGroups() {
return this._rangeStarts.length;
}
/**************************************************************************
**************************************************************************
**************************************************************************/
/**
* @param {GRanges} ranges - Genomic ranges of length equal to the concatenated set of ranges returned by {@linkcode GroupedGRanges#ranges ranges}.
* @param {Object} [options={}] - Optional parameters.
* @param {boolean} [options.inPlace=false] - Whether to mutate this GroupedGRanges instance in place.
* If `false`, a new instance is returned.
*
* @return {GroupedGRanges} The GroupedGRanges object after modifying the internal ranges.
* If `inPlace = true`, this is a reference to the current instance, otherwise a new instance is created and returned.
*/
setRanges(ranges, { inPlace = false } = {}) {
if (!(ranges instanceof gr.GRanges)) {
throw new Error("'ranges' must be a 'GRanges'");
}
this.#flush_staged_setGroup();
if (generics.LENGTH(ranges) !== generics.LENGTH(this._ranges)) {
throw utils.formatLengthError("'ranges'", "number of ranges");
}
let target = cutils.setterTarget(this, inPlace);
target._ranges = ranges;
return target;
}
/**
* @param {GRanges} ranges - Genomic ranges of length equal to the concatenated set of ranges returned by {@linkcode GroupedGRanges#ranges ranges}.
* @return {GroupedGRanges} A reference to this GroupedGRanges object after modifying the internal ranges.
*/
$setRanges(ranges) {
return this.setRanges(ranges, { inPlace: true });
}
#flush_staged_setGroup() {
let staged = this.#staged_setGroup;
if (staged === null) {
return;
}
staged.sort((a, b) => {
let diff = a[0] - b[0];
return (diff === 0 ? a[1] - b[1] : diff);
});
let counter = 0;
let accumulated = 0;
let last_start = 0;
let more_ranges = [];
let ngroups = this.numberOfGroups();
for (var g = 0; g < ngroups; g++) {
if (counter < staged.length && g == staged[counter][0]) {
let current_start = this._rangeStarts[g];
if (last_start < current_start) {
more_ranges.push(generics.SLICE(this._ranges, { start: last_start, end: current_start }));
}
last_start = current_start + this._rangeLengths[g];
let replacement;
do {
replacement = staged[counter][2];
counter++;
} while (counter < staged.length && g == staged[counter][0]);
more_ranges.push(replacement);
this._rangeLengths[g] = generics.LENGTH(replacement);
}
this._rangeStarts[g] = accumulated;
accumulated += this._rangeLengths[g];
}
let nranges = generics.LENGTH(this._ranges);
if (last_start < nranges) {
more_ranges.push(generics.SLICE(this._ranges, { start: last_start, end: nranges }));
}
try {
this._ranges = generics.COMBINE(more_ranges);
} catch (e) {
throw new Error("failed to combine staged '$setGroup' operations; " + e.message);
}
this.#staged_setGroup = null;
return;
}
/**
* Multiple consecutive calls to `$setGroup` are not executed immediately.
* Rather, the operations are staged and executed in batch once the modified GroupedGRanges is used in other methods.
* This enables efficient setting of individual groups inside a single concatenated {@linkplain GRanges}.
*
* @param {number} i - Index of the group of interest.
* @param {GRanges} ranges - Genomic ranges for group `i`.
* @param {Object} [options={}] - Optional parameters.
* @param {boolean} [options.inPlace=false] - Whether to mutate this GroupedGRanges instance in place.
* If `false`, a new instance is returned.
*
* @return {GroupedGRanges} The GroupedGRanges object after setting group `i`.
* If `inPlace = true`, this is a reference to the current instance, otherwise a new instance is created and returned.
*/
setGroup(i, ranges, { inPlace = false } = {}) {
let target = cutils.setterTarget(this, inPlace);
if (target.#staged_setGroup === null) {
target.#staged_setGroup = [];
} else if (!inPlace) {
target.#staged_setGroup = target.#staged_setGroup.slice();
}
if (!inPlace) {
target._rangeStarts = target._rangeStarts.slice();
target._rangeLengths = target._rangeLengths.slice();
}
let nops = target.#staged_setGroup.length;
target.#staged_setGroup.push([i, nops, ranges]);
return target;
}
/**
* See comments for {@linkcode GroupedGRanges#$setGroup $setGroup}.
*
* @param {number} i - Index of the group of interest.
* @param {GRanges} ranges - Genomic ranges for group `i`.
*
* @return {GroupedGRanges} A reference to this GroupedGRanges object after setting group `i`.
*/
$setGroup(i, ranges) {
return this.setGroup(i, ranges, { inPlace: true });
}
/**************************************************************************
**************************************************************************
**************************************************************************/
/**
* @param {Object} [options={}] - Optional parameters.
* @param {?(Array|Set)} [options.restrictToSeqnames=null] - Array or Set containing the sequence names to use in the index.
* If `null`, all available sequence names are used.
* @param {?(Array|Set)} [options.restrictToStrand=null] - Array or Set containing the strands to use in the index.
* If `null`, all available strands are used.
*
* @return {GroupedGRangesOverlapIndex} A pre-built index for computing overlaps with other {@linkplain GRanges} instances.
*/
buildOverlapIndex({ restrictToSeqnames = null, restrictToStrand = null } = {}) {
this.#flush_staged_setGroup();
return new GroupedGRangesOverlapIndex(
this._ranges.buildOverlapIndex({ restrictToSeqnames, restrictToStrand }),
generics.LENGTH(this._ranges),
this._rangeStarts,
this._rangeLengths
);
}
/**************************************************************************
**************************************************************************
**************************************************************************/
_bioconductor_LENGTH() {
return this._rangeStarts.length;
}
_bioconductor_SLICE(output, i, { allowView = false } = {}) {
super._bioconductor_SLICE(output, i, { allowView });
this.#flush_staged_setGroup();
output._rangeLengths = generics.SLICE(this._rangeLengths, i, { allowView });
let accumulated = GroupedGRanges.#computeStarts(output._rangeLengths);
output._rangeStarts = accumulated.starts;
if (i.constructor == Object) {
// Handle this specially for optimizing allowView = true.
let s = this._rangeStarts[i.start];
output._ranges = generics.SLICE(this._ranges, { start: s, end: s + accumulated.total }, { allowView });
} else {
let keep = new Int32Array(accumulated.total);
let counter = 0;
for (const j of i) {
let start = this._rangeStarts[j];
let end = start + this._rangeLengths[j];
for (var k = start; k < end; k++) {
keep[counter] = k;
counter++;
}
}
output._ranges = generics.SLICE(this._ranges, keep, { allowView });
}
return;
}
_bioconductor_COMBINE(output, objects) {
super._bioconductor_COMBINE(output, objects);
// We need to flush the staged operations in each object.
for (const o of objects) {
o.#flush_staged_setGroup();
}
output._rangeLengths = generics.COMBINE(objects.map(x => x.rangeLengths()));
let accumulated = GroupedGRanges.#computeStarts(output._rangeLengths);
output._rangeStarts = accumulated.starts;
output._ranges = generics.COMBINE(objects.map(x => x._ranges));
return;
}
_bioconductor_CLONE(output, { deepCopy = true }) {
super._bioconductor_CLONE(output, { deepCopy });
output.#staged_setGroup = cutils.cloneField(this.#staged_setGroup, deepCopy);
output._rangeLengths = cutils.cloneField(this._rangeLengths, deepCopy);
output._rangeStarts = cutils.cloneField(this._rangeStarts, deepCopy);
output._ranges = cutils.cloneField(this._ranges, deepCopy);
return;
}
/**************************************************************************
**************************************************************************
**************************************************************************/
/**
* @param {number} [numberOfGroups=0] - Numbe of empty groups to create.
* @return {GroupedGRanges} A GroupedGRanges object of length equal to `numberOfGroups`,
* where each group is of zero length.
*/
static empty(numberOfGroups) {
let runs = new Int32Array(numberOfGroups);
runs.fill(0);
return new GroupedGRanges(gr.GRanges.empty(), { rangeLengths: runs });
}
}
/**
* Pre-built index for overlapping {@linkplain GroupedGRanges} objects.
* This is typically constructed using the {@linkcode GroupedGRanges#buildOverlapIndex GroupedGRanges.buildOverlapIndex} method for a "reference" object,
* and can be applied to different query GroupedGRanges or {@linkplain GRanges} to identify overlaps with the reference.
*
* @hideconstructor
*/
export class GroupedGRangesOverlapIndex {
constructor(index, fullLength, rangeStarts, rangeLengths) {
this._index = index;
this._rangeStarts = rangeStarts;
this._rangeLengths = rangeLengths;
let rev_map = new Int32Array(fullLength);
for (var i = 0; i < rangeStarts.length; i++) {
let start = rangeStarts[i];
let end = start + rangeLengths[i];
for (var s = start; s < end; s++) {
rev_map[s] = i;
}
}
this._reverseMapping = rev_map;
}
/**
* @param {GroupedGRanges|GRanges} query - The query object, containing ranges to be overlapped with those in the reference GroupedGRanges (that was used to construct this GroupedGRangesOverlapIndex object).
* @param {Object} [options={}] - Optional parameters.
* @param {boolean} [options.ignoreStrand=true] - Whether to ignore differences in strandedness between the ranges in `query` and the reference object.
*
* @return {Array} An array of length equal to the number of ranges or groups in `query`,
* where each element is an array containing the indices of the overlapping ranges in the reference {@linkplain GRanges} object.
*/
overlap(query, { ignoreStrand = true } = {}) {
let output = new Array(this._rangeStarts.length);
let rev_map = this._reverseMapping;
if (query instanceof GroupedGRanges) {
let overlaps = this._index.overlap(query._ranges);
for (var i = 0; i < query._rangeStarts.length; i++) {
let start = query._rangeStarts[i];
let end = start + query._rangeLengths[i];
let results = new Set;
for (var s = start; s < end; s++) {
overlaps[s].forEach(x => results.add(rev_map[x]));
}
output[i] = Array.from(results);
}
} else {
let overlaps = this._index.overlap(query);
for (var i = 0; i < overlaps.length; i++) {
let results = new Set;
overlaps[i].forEach(x => results.add(rev_map[x]));
output[i] = Array.from(results);
}
}
return output;
}
}