48 : dimensions_( dimensions )
55 bool EuclideanKmeans::data_validation( std::vector<Point>
const& data )
const 57 for(
size_t i = 0; i < data.size(); ++i ) {
58 auto const& datum = data[i];
59 if( datum.size() != dimensions_ ) {
60 throw std::runtime_error(
69 void EuclideanKmeans::update_centroids(
70 std::vector<Point>
const& data,
76 assert( data.size() == assignments.size() );
78 auto const k = centroids.size();
83 #pragma omp parallel for 84 for(
size_t i = 0; i < k; ++i ) {
87 auto centroid =
Point( dimensions_, 0.0 );
91 for(
size_t j = 0; j < data.size(); ++j ) {
94 if( assignments[j] != i ) {
99 auto const& datum = data[ j ];
100 for(
size_t d = 0; d < dimensions_; ++d ) {
101 centroid[ d ] += datum[ d ];
108 for(
size_t d = 0; d < dimensions_; ++d ) {
109 centroid[ d ] /= count;
114 centroids[ i ] = std::move( centroid );
122 centroids = std::vector<Point>( k,
Point( dimensions_, 0.0 ) );
123 auto counts = std::vector<size_t>( k, 0 );
126 for(
size_t i = 0; i < data.size(); ++i ) {
129 auto const& datum = data[ i ];
130 auto& centroid = centroids[ assignments[i] ];
133 for(
size_t d = 0; d < dimensions_; ++d ) {
134 centroid[ d ] += datum[ d ];
137 ++counts[ assignments[i] ];
141 for(
size_t i = 0; i < k; ++i ) {
142 if( counts[ i ] > 0 ) {
143 auto& centroid = centroids[ i ];
144 for(
size_t d = 0; d < dimensions_; ++d ) {
145 centroid[ d ] /= counts[ i ];
153 double EuclideanKmeans::distance(
Point const& lhs,
Point const& rhs )
const 157 for(
size_t d = 0; d < dimensions_; ++d ) {
158 auto const diff = lhs[d] - rhs[d];
161 return std::sqrt( sum );
EuclideanKmeans(size_t dimensions)
std::vector< std::vector< double > > const & centroids() const
double sum(const Histogram &h)
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::vector< size_t > const & assignments() const
std::vector< double > Point
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.