120 std::vector<Index>
run(
const std::vector<std::vector<std::pair<Index, Float> > >& neighbors, Index* assigned)
const {
121 size_t nobs = neighbors.size();
124 Observation(Float d, Index i,
int c) : distance(d), index(i), covered(c) {}
130 std::vector<Observation> ordered;
131 ordered.reserve(nobs);
132 std::vector<Index> chosen;
133 std::vector<char> covered(nobs);
138 bool fresh = chosen.empty();
140 for (
size_t n = 0; n < nobs; ++n) {
144 const auto& current = neighbors[n];
145 Float dist_to_k = (current.empty() ? std::numeric_limits<Float>::infinity() : current.back().second);
149 for (
auto x : current) {
150 num_covered += covered[x.first];
153 ordered.emplace_back(dist_to_k, n, num_covered);
156 if (ordered.empty()) {
161 std::sort(ordered.begin(), ordered.end(), [](
const Observation& left,
const Observation& right) ->
bool {
162 if (left.covered < right.covered) {
164 }
else if (left.covered == right.covered) {
165 if (left.distance < right.distance) {
167 }
else if (left.distance == right.distance) {
168 return left.index < right.index;
178 bool needs_resort =
false;
181 for (
const auto& o : ordered) {
182 auto candidate = o.index;
183 auto original_num = o.covered;
184 if (covered[candidate]) {
186 }
else if (needs_resort && original_num >= resort_limit) {
190 const auto& current = neighbors[candidate];
192 for (
auto x : current) {
193 updated_num += covered[x.first];
196 if (updated_num == original_num && (!needs_resort || updated_num < resort_limit)) {
197 chosen.push_back(candidate);
201 assigned[candidate] = candidate;
202 for (
const auto& x : current) {
203 if (!covered[x.first]) {
204 assigned[x.first] = candidate;
209 covered[candidate] = 1;
210 for (
const auto& x : current) {
211 if (!covered[x.first]) {
212 covered[x.first] = 1;
218 resort_limit = updated_num;
219 }
else if (resort_limit > updated_num) {
224 resort_limit = updated_num;
230 std::sort(chosen.begin(), chosen.end());
251 std::vector<Index>
run(
int ndim,
size_t nobs,
const Float* data, Index* assigned)
const {
252 std::shared_ptr<knncolle::Base<Index, Float> > ptr;
254 ptr.reset(
new knncolle::AnnoyEuclidean<Index, Float>(ndim, nobs, data));
256 ptr.reset(
new knncolle::VpTreeEuclidean<Index, Float>(ndim, nobs, data));
258 return run(ptr.get(), assigned);
276 std::vector<Index>
run(
const knncolle::Base<Index, Float>* index, Index* assigned)
const {
277 size_t nobs = index->nobs();
278 std::vector<std::vector<std::pair<Index, Float> > > neighbors(nobs);
280#ifndef SCRAN_CUSTOM_PARALLEL
281 #pragma omp parallel for num_threads(nthreads)
282 for (
size_t i = 0; i < nobs; ++i) {
284 SCRAN_CUSTOM_PARALLEL([&](
size_t,
size_t start,
size_t length) ->
void {
285 for (
size_t i = start, end = start + length; i < end; ++i) {
288 neighbors[i] = index->find_nearest_neighbors(i, num_neighbors);
290#ifndef SCRAN_CUSTOM_PARALLEL
297 return run(neighbors, assigned);