92 Details<Index_> run(
const Matrix_& data, Cluster_ ncenters, Float_* centers, Cluster_* clusters)
const {
93 Index_ nobs = data.num_observations();
94 if (internal::is_edge_case(nobs, ncenters)) {
95 return internal::process_edge_case(data, ncenters, centers, clusters);
98 int iter = 0, status = 0;
99 std::vector<Index_> sizes(ncenters);
100 std::vector<Cluster_> copy(nobs);
101 size_t ndim = data.num_dimensions();
102 internal::QuickSearch<Float_, Cluster_> index;
105 index.reset(ndim, ncenters, centers);
107 auto work = data.new_extractor(start, length);
108 for (Index_ obs = start, end = start + length; obs < end; ++obs) {
109 auto dptr = work->get_observation();
110 copy[obs] = index.find(dptr);
115 bool updated =
false;
116 for (Index_ obs = 0; obs < nobs; ++obs) {
117 if (copy[obs] != clusters[obs]) {
125 std::copy(copy.begin(), copy.end(), clusters);
127 std::fill(sizes.begin(), sizes.end(), 0);
128 for (Index_ obs = 0; obs < nobs; ++obs) {
129 ++sizes[clusters[obs]];
131 internal::compute_centroids(data, ncenters, centers, clusters, sizes);
138 return Details<Index_>(std::move(sizes), iter, status);
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:28