kmeans
A C++ library for k-means
Loading...
Searching...
No Matches
RefineLloyd.hpp
Go to the documentation of this file.
1#ifndef KMEANS_LLOYD_HPP
2#define KMEANS_LLOYD_HPP
3
4#include <vector>
5#include <algorithm>
6
7#include "Refine.hpp"
8#include "Details.hpp"
9#include "QuickSearch.hpp"
10#include "is_edge_case.hpp"
11#include "compute_centroids.hpp"
12#include "parallelize.hpp"
13
20namespace kmeans {
21
31
36 int num_threads = 1;
37};
38
60template<typename Matrix_ = SimpleMatrix<double, int>, typename Cluster_ = int, typename Float_ = double>
61class RefineLloyd : public Refine<Matrix_, Cluster_, Float_> {
62private:
63 RefineLloydOptions my_options;
64
65 typedef typename Matrix_::index_type Index_;
66
67public:
72
76 RefineLloyd() = default;
77
78public:
84 return my_options;
85 }
86
87public:
88 Details<Index_> run(const Matrix_& data, Cluster_ ncenters, Float_* centers, Cluster_* clusters) const {
89 auto nobs = data.num_observations();
90 if (internal::is_edge_case(nobs, ncenters)) {
91 return internal::process_edge_case(data, ncenters, centers, clusters);
92 }
93
94 int iter = 0, status = 0;
95 std::vector<Index_> sizes(ncenters);
96 std::vector<Cluster_> copy(nobs);
97 auto ndim = data.num_dimensions();
98 internal::QuickSearch<Float_, Cluster_, decltype(ndim)> index;
99
100 for (iter = 1; iter <= my_options.max_iterations; ++iter) {
101 index.reset(ndim, ncenters, centers);
102 parallelize(my_options.num_threads, nobs, [&](int, Index_ start, Index_ length) {
103 auto work = data.create_workspace(start, length);
104 for (Index_ obs = start, end = start + length; obs < end; ++obs) {
105 auto dptr = data.get_observation(work);
106 copy[obs] = index.find(dptr);
107 }
108 });
109
110 // Checking if it already converged.
111 bool updated = false;
112 for (Index_ obs = 0; obs < nobs; ++obs) {
113 if (copy[obs] != clusters[obs]) {
114 updated = true;
115 break;
116 }
117 }
118 if (!updated) {
119 break;
120 }
121 std::copy(copy.begin(), copy.end(), clusters);
122
123 std::fill(sizes.begin(), sizes.end(), 0);
124 for (Index_ obs = 0; obs < nobs; ++obs) {
125 ++sizes[clusters[obs]];
126 }
127 internal::compute_centroids(data, ncenters, centers, clusters, sizes);
128 }
129
130 if (iter == my_options.max_iterations + 1) {
131 status = 2;
132 }
133
134 return Details<Index_>(std::move(sizes), iter, status);
135 }
136};
137
138}
139
140#endif
Report detailed clustering statistics.
Interface for k-means refinement.
Implements the variance partitioning method of Su and Dy (2007).
Definition InitializeVariancePartition.hpp:164
Implements the Lloyd algorithm for k-means clustering.
Definition RefineLloyd.hpp:61
Details< Index_ > run(const Matrix_ &data, Cluster_ ncenters, Float_ *centers, Cluster_ *clusters) const
Definition RefineLloyd.hpp:88
RefineLloyd(RefineLloydOptions options)
Definition RefineLloyd.hpp:71
RefineLloydOptions & get_options()
Definition RefineLloyd.hpp:83
Interface for all k-means refinement algorithms.
Definition Refine.hpp:23
Namespace for k-means clustering.
Definition compute_wcss.hpp:12
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:28
Utilities for parallelization.
Options for RefineLloyd construction.
Definition RefineLloyd.hpp:25
int num_threads
Definition RefineLloyd.hpp:36
int max_iterations
Definition RefineLloyd.hpp:30