1#ifndef KMEANS_REFINE_MINIBATCH_HPP
2#define KMEANS_REFINE_MINIBATCH_HPP
13#include "aarand/aarand.hpp"
17#include "QuickSearch.hpp"
18#include "is_edge_case.hpp"
93template<
typename Matrix_ = SimpleMatrix<
double,
int>,
typename Cluster_ =
int,
typename Float_ =
double>
120 auto nobs =
data.num_observations();
122 return internal::process_edge_case(
data,
ncenters, centers, clusters);
125 int iter = 0, status = 0;
128 typedef decltype(
nobs) Index_;
132 typedef typename std::conditional<std::is_signed<Index_>::value,
int,
unsigned int>::type
SafeCompInt;
137 std::mt19937_64
eng(my_options.
seed);
153 auto work = data.create_workspace(chosen.data() + start, length);
154 for (Index_ s = start, end = start + length; s < end; ++s) {
155 auto ptr = data.get_observation(work);
156 clusters[chosen[s]] = index.find(ptr);
163 const auto c = clusters[
o];
181 auto c = clusters[
o];
212 index.reset(ndim, ncenters, centers);
214 auto work = data.create_workspace(start, length);
215 for (Index_ s = start, end = start + length; s < end; ++s) {
216 auto ptr = data.get_observation(work);
217 clusters[s] = index.find(ptr);
221 std::vector<Index_> cluster_sizes(ncenters);
222 for (Index_ o = 0; o < nobs; ++o) {
223 ++cluster_sizes[clusters[o]];
226 internal::compute_centroids(data, ncenters, centers, clusters, cluster_sizes);
227 return Details<Index_>(std::move(cluster_sizes), iter, status);
Report detailed clustering statistics.
Interface for k-means refinement.
Implements the variance partitioning method of Su and Dy (2007).
Definition InitializeVariancePartition.hpp:164
Implements the mini-batch algorithm for k-means clustering.
Definition RefineMiniBatch.hpp:94
Details< typename Matrix_::index_type > run(const Matrix_ &data, Cluster_ ncenters, Float_ *centers, Cluster_ *clusters) const
Definition RefineMiniBatch.hpp:119
RefineMiniBatchOptions & get_options()
Definition RefineMiniBatch.hpp:111
RefineMiniBatch(RefineMiniBatchOptions options)
Definition RefineMiniBatch.hpp:99
RefineMiniBatch()=default
Interface for all k-means refinement algorithms.
Definition Refine.hpp:23
Namespace for k-means clustering.
Definition compute_wcss.hpp:12
void parallelize(int num_workers, Task_ num_tasks, Run_ run_task_range)
Definition parallelize.hpp:28
Utilities for parallelization.
Options for RefineMiniBatch construction.
Definition RefineMiniBatch.hpp:32
int max_iterations
Definition RefineMiniBatch.hpp:37
double max_change_proportion
Definition RefineMiniBatch.hpp:49
int convergence_history
Definition RefineMiniBatch.hpp:55
uint64_t seed
Definition RefineMiniBatch.hpp:60
int num_threads
Definition RefineMiniBatch.hpp:66
int batch_size
Definition RefineMiniBatch.hpp:43