95 Details<Index_> run(
const Matrix_& data,
const Cluster_ ncenters, Float_*
const centers, Cluster_*
const clusters)
const {
96 const auto nobs = data.num_observations();
97 if (internal::is_edge_case(nobs, ncenters)) {
98 return internal::process_edge_case(data, ncenters, centers, clusters);
101 auto sizes = sanisizer::create<std::vector<Index_> >(ncenters);
102 auto copy = sanisizer::create<std::vector<Cluster_> >(nobs);
104 const auto ndim = data.num_dimensions();
105 internal::QuickSearch<Float_, Cluster_> index;
109 index.reset(ndim, ncenters, centers);
110 parallelize(my_options.
num_threads, nobs, [&](
const int,
const Index_ start,
const Index_ length) ->
void {
111 auto work = data.new_extractor(start, length);
112 for (Index_ obs = start, end = start + length; obs < end; ++obs) {
113 const auto dptr = work->get_observation();
114 copy[obs] = index.find(dptr);
119 bool updated =
false;
120 for (Index_ obs = 0; obs < nobs; ++obs) {
121 if (copy[obs] != clusters[obs]) {
129 std::copy(copy.begin(), copy.end(), clusters);
131 std::fill(sizes.begin(), sizes.end(), 0);
132 for (Index_ obs = 0; obs < nobs; ++obs) {
133 ++sizes[clusters[obs]];
135 internal::compute_centroids(data, ncenters, centers, clusters, sizes);
144 return Details<Index_>(std::move(sizes), iter, status);
void parallelize(const int num_workers, const Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:28