kmeans
k-means clustering in C++
Loading...
Searching...
No Matches
kmeans.hpp
Go to the documentation of this file.
1#ifndef KMEANS_KMEANS_HPP
2#define KMEANS_KMEANS_HPP
3
4#include <vector>
5
6#include "sanisizer/sanisizer.hpp"
7
8#include "Details.hpp"
9#include "Refine.hpp"
10#include "Initialize.hpp"
11#include "Matrix.hpp"
12#include "SimpleMatrix.hpp"
13
15#include "InitializeRandom.hpp"
17#include "InitializeNone.hpp"
18
20#include "RefineLloyd.hpp"
21#include "RefineMiniBatch.hpp"
22
23#include "compute_wcss.hpp"
25
35namespace kmeans {
36
58template<typename Index_, typename Data_, typename Cluster_, typename Float_, class Matrix_ = Matrix<Index_, Data_> >
60 const Matrix_& data,
63 Cluster_ num_centers,
64 Float_* centers,
65 Cluster_* clusters)
66{
67 auto actual_centers = initialize.run(data, num_centers, centers);
68 auto output = refine.run(data, actual_centers, centers, clusters);
69 sanisizer::resize(output.sizes, num_centers); // restoring the full size.
70 return output;
71}
72
93template<typename Index_, typename Data_, typename Cluster_, typename Float_>
95 const Matrix<Index_, Data_>& data,
96 const Initialize<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& initialize,
97 const Refine<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& refine,
98 const Cluster_ num_centers,
99 Float_* const centers,
100 Cluster_* const clusters)
101{
102 return compute<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >(data, initialize, refine, num_centers, centers, clusters);
103}
104
108template<typename Index_, typename Cluster_, typename Float_>
109struct Results {
113 Results(const std::size_t num_dimensions, const Index_ num_observations, const Cluster_ num_centers) :
114 centers(sanisizer::product<I<decltype(centers.size())> >(num_dimensions, num_centers)),
115 clusters(num_observations)
116 {}
117
118 Results() = default;
127 std::vector<Cluster_> clusters;
128
134 std::vector<Float_> centers;
135
140};
141
160template<typename Index_, typename Data_, typename Cluster_, typename Float_, class Matrix_ = Matrix<Index_, Data_> >
162 const Matrix_& data,
165 const Cluster_ num_centers)
166{
168 sanisizer::resize(output.clusters, data.num_observations());
169 output.centers.resize(sanisizer::product<I<decltype(output.centers.size())> >(num_centers, data.num_dimensions()));
170 output.details = compute(data, initialize, refine, num_centers, output.centers.data(), output.clusters.data());
171 return output;
172}
173
190template<typename Index_, typename Data_, typename Cluster_, typename Float_>
192 const Matrix<Index_, Data_>& data,
193 const Initialize<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& initialize,
194 const Refine<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& refine,
195 const Cluster_ num_centers)
196{
197 return compute<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >(data, initialize, refine, num_centers);
198}
199
200}
201
202#endif
Report detailed clustering statistics.
Class for kmeans++ initialization.
Class for no initialization.
Class for random initialization.
k-means initialization with variance partitioning.
Interface for k-means initialization.
Interface for matrix inputs.
Implements the Hartigan-Wong algorithm for k-means clustering.
Implements the Lloyd algorithm for k-means clustering.
Implements the mini-batch algorithm for k-means clustering.
Interface for k-means refinement.
Wrapper for a simple dense matrix.
Interface for k-means initialization algorithms.
Definition Initialize.hpp:29
virtual Cluster_ run(const Matrix_ &data, Cluster_ num_centers, Float_ *centers) const =0
Interface for matrix data.
Definition Matrix.hpp:133
Interface for k-means refinement algorithms.
Definition Refine.hpp:30
virtual Details< Index_ > run(const Matrix_ &data, Cluster_ num_centers, Float_ *centers, Cluster_ *clusters) const =0
Compute within-cluster sum of squares.
Perform k-means clustering.
Definition compute_wcss.hpp:16
Details< Index_ > compute(const Matrix_ &data, const Initialize< Index_, Data_, Cluster_, Float_, Matrix_ > &initialize, const Refine< Index_, Data_, Cluster_, Float_, Matrix_ > &refine, Cluster_ num_centers, Float_ *centers, Cluster_ *clusters)
Definition kmeans.hpp:59
Remove unused k-means centroids.
Additional statistics from the k-means algorithm.
Definition Details.hpp:20
Results of the k-means clustering.
Definition kmeans.hpp:109
std::vector< Float_ > centers
Definition kmeans.hpp:134
Details< Index_ > details
Definition kmeans.hpp:139
std::vector< Cluster_ > clusters
Definition kmeans.hpp:127