95 Details<Index_> run(
const Matrix_& data,
const Cluster_ ncenters, Float_*
const centers, Cluster_*
const clusters)
const {
96 const auto nobs = data.num_observations();
97 if (internal::is_edge_case(nobs, ncenters)) {
98 return internal::process_edge_case(data, ncenters, centers, clusters);
101 auto sizes = sanisizer::create<std::vector<Index_> >(ncenters);
102 const auto ndim = data.num_dimensions();
103 internal::QuickSearch<Float_, Cluster_> index;
105 auto num_diff_threads = sanisizer::create<std::vector<Index_> >(my_options.
num_threads);
106 std::fill_n(clusters, nobs, 0);
110 index.reset(ndim, ncenters, centers);
112 parallelize(my_options.
num_threads, nobs, [&](
const int t,
const Index_ start,
const Index_ length) ->
void {
113 auto work = data.new_extractor(start, length);
115 for (Index_ obs = start, end = start + length; obs < end; ++obs) {
116 const auto dptr = work->get_observation();
117 const auto closest = index.find(dptr);
118 auto& previous = clusters[obs];
119 num_diff += (closest != previous);
122 num_diff_threads[t] = num_diff;
127 bool updated =
false;
128 for (
const auto num_diff : num_diff_threads) {
139 std::fill(sizes.begin(), sizes.end(), 0);
140 for (Index_ obs = 0; obs < nobs; ++obs) {
141 ++sizes[clusters[obs]];
143 internal::compute_centroids(data, ncenters, centers, clusters, sizes);
152 return Details<Index_>(std::move(sizes), iter, status);
void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:28