kmeans
A C++ library for k-means
Loading...
Searching...
No Matches
RefineLloyd.hpp
Go to the documentation of this file.
1#ifndef KMEANS_LLOYD_HPP
2#define KMEANS_LLOYD_HPP
3
4#include <vector>
5#include <algorithm>
6
7#include "Refine.hpp"
8#include "Details.hpp"
9#include "QuickSearch.hpp"
10#include "is_edge_case.hpp"
11#include "compute_centroids.hpp"
12#include "parallelize.hpp"
13
20namespace kmeans {
21
31
36 int num_threads = 1;
37};
38
63template<typename Index_, typename Data_, typename Cluster_, typename Float_, typename Matrix_ = Matrix<Index_, Data_> >
64class RefineLloyd final : public Refine<Index_, Data_, Cluster_, Float_, Matrix_> {
65private:
66 RefineLloydOptions my_options;
67
68public:
72 RefineLloyd(RefineLloydOptions options) : my_options(std::move(options)) {}
73
77 RefineLloyd() = default;
78
79public:
85 return my_options;
86 }
87
88public:
92 Details<Index_> run(const Matrix_& data, Cluster_ ncenters, Float_* centers, Cluster_* clusters) const {
93 Index_ nobs = data.num_observations();
94 if (internal::is_edge_case(nobs, ncenters)) {
95 return internal::process_edge_case(data, ncenters, centers, clusters);
96 }
97
98 int iter = 0, status = 0;
99 std::vector<Index_> sizes(ncenters);
100 std::vector<Cluster_> copy(nobs);
101 size_t ndim = data.num_dimensions();
102 internal::QuickSearch<Float_, Cluster_> index;
103
104 for (iter = 1; iter <= my_options.max_iterations; ++iter) {
105 index.reset(ndim, ncenters, centers);
106 parallelize(my_options.num_threads, nobs, [&](int, Index_ start, Index_ length) -> void {
107 auto work = data.new_extractor(start, length);
108 for (Index_ obs = start, end = start + length; obs < end; ++obs) {
109 auto dptr = work->get_observation();
110 copy[obs] = index.find(dptr);
111 }
112 });
113
114 // Checking if it already converged.
115 bool updated = false;
116 for (Index_ obs = 0; obs < nobs; ++obs) {
117 if (copy[obs] != clusters[obs]) {
118 updated = true;
119 break;
120 }
121 }
122 if (!updated) {
123 break;
124 }
125 std::copy(copy.begin(), copy.end(), clusters);
126
127 std::fill(sizes.begin(), sizes.end(), 0);
128 for (Index_ obs = 0; obs < nobs; ++obs) {
129 ++sizes[clusters[obs]];
130 }
131 internal::compute_centroids(data, ncenters, centers, clusters, sizes);
132 }
133
134 if (iter == my_options.max_iterations + 1) {
135 status = 2;
136 }
137
138 return Details<Index_>(std::move(sizes), iter, status);
139 }
143};
144
145}
146
147#endif
Report detailed clustering statistics.
Interface for k-means refinement.
Implements the Lloyd algorithm for k-means clustering.
Definition RefineLloyd.hpp:64
RefineLloyd(RefineLloydOptions options)
Definition RefineLloyd.hpp:72
RefineLloydOptions & get_options()
Definition RefineLloyd.hpp:84
Interface for k-means refinement algorithms.
Definition Refine.hpp:26
virtual Details< Index_ > run(const Matrix_ &data, Cluster_ num_centers, Float_ *centers, Cluster_ *clusters) const =0
Namespace for k-means clustering.
Definition compute_wcss.hpp:12
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:28
Utilities for parallelization.
Additional statistics from the k-means algorithm.
Definition Details.hpp:20
Options for RefineLloyd construction.
Definition RefineLloyd.hpp:25
int num_threads
Definition RefineLloyd.hpp:36
int max_iterations
Definition RefineLloyd.hpp:30