kmeans
k-means clustering in C++
Loading...
Searching...
No Matches
kmeans.hpp
Go to the documentation of this file.
1#ifndef KMEANS_KMEANS_HPP
2#define KMEANS_KMEANS_HPP
3
4#include <vector>
5
6#include "sanisizer/sanisizer.hpp"
7
8#include "Details.hpp"
9#include "Refine.hpp"
10#include "Initialize.hpp"
11#include "Matrix.hpp"
12#include "SimpleMatrix.hpp"
13
15#include "InitializeRandom.hpp"
17#include "InitializeNone.hpp"
18
20#include "RefineLloyd.hpp"
21#include "RefineMiniBatch.hpp"
22
23#include "compute_wcss.hpp"
24
34namespace kmeans {
35
57template<typename Index_, typename Data_, typename Cluster_, typename Float_, class Matrix_ = Matrix<Index_, Data_> >
59 const Matrix_& data,
62 Cluster_ num_centers,
63 Float_* centers,
64 Cluster_* clusters)
65{
66 auto actual_centers = initialize.run(data, num_centers, centers);
67 auto output = refine.run(data, actual_centers, centers, clusters);
68 sanisizer::resize(output.sizes, num_centers); // restoring the full size.
69 return output;
70}
71
92template<typename Index_, typename Data_, typename Cluster_, typename Float_>
94 const Matrix<Index_, Data_>& data,
95 const Initialize<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& initialize,
96 const Refine<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& refine,
97 const Cluster_ num_centers,
98 Float_* const centers,
99 Cluster_* const clusters)
100{
101 return compute<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >(data, initialize, refine, num_centers, centers, clusters);
102}
103
107template<typename Index_, typename Cluster_, typename Float_>
108struct Results {
112 Results(const std::size_t num_dimensions, const Index_ num_observations, const Cluster_ num_centers) :
113 centers(sanisizer::product<decltype(I(centers.size()))>(num_dimensions, num_centers)),
114 clusters(num_observations)
115 {}
116
117 Results() = default;
126 std::vector<Cluster_> clusters;
127
133 std::vector<Float_> centers;
134
139};
140
159template<typename Index_, typename Data_, typename Cluster_, typename Float_, class Matrix_ = Matrix<Index_, Data_> >
161 const Matrix_& data,
164 const Cluster_ num_centers)
165{
167 sanisizer::resize(output.clusters, data.num_observations());
168 output.centers.resize(sanisizer::product<decltype(I(output.centers.size()))>(num_centers, data.num_dimensions()));
169 output.details = compute(data, initialize, refine, num_centers, output.centers.data(), output.clusters.data());
170 return output;
171}
172
189template<typename Index_, typename Data_, typename Cluster_, typename Float_>
191 const Matrix<Index_, Data_>& data,
192 const Initialize<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& initialize,
193 const Refine<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >& refine,
194 const Cluster_ num_centers)
195{
196 return compute<Index_, Data_, Cluster_, Float_, Matrix<Index_, Data_> >(data, initialize, refine, num_centers);
197}
198
199}
200
201#endif
Report detailed clustering statistics.
Class for kmeans++ initialization.
Class for no initialization.
Class for random initialization.
k-means initialization with variance partitioning.
Interface for k-means initialization.
Interface for matrix inputs.
Implements the Hartigan-Wong algorithm for k-means clustering.
Implements the Lloyd algorithm for k-means clustering.
Implements the mini-batch algorithm for k-means clustering.
Interface for k-means refinement.
Wrapper for a simple dense matrix.
Interface for k-means initialization algorithms.
Definition Initialize.hpp:29
virtual Cluster_ run(const Matrix_ &data, Cluster_ num_centers, Float_ *centers) const =0
Interface for matrix data.
Definition Matrix.hpp:133
Interface for k-means refinement algorithms.
Definition Refine.hpp:30
virtual Details< Index_ > run(const Matrix_ &data, Cluster_ num_centers, Float_ *centers, Cluster_ *clusters) const =0
Compute within-cluster sum of squares.
Perform k-means clustering.
Definition compute_wcss.hpp:16
Details< Index_ > compute(const Matrix_ &data, const Initialize< Index_, Data_, Cluster_, Float_, Matrix_ > &initialize, const Refine< Index_, Data_, Cluster_, Float_, Matrix_ > &refine, Cluster_ num_centers, Float_ *centers, Cluster_ *clusters)
Definition kmeans.hpp:58
Additional statistics from the k-means algorithm.
Definition Details.hpp:20
Results of the k-means clustering.
Definition kmeans.hpp:108
std::vector< Float_ > centers
Definition kmeans.hpp:133
Details< Index_ > details
Definition kmeans.hpp:138
std::vector< Cluster_ > clusters
Definition kmeans.hpp:126