Octopus
kmeans_clustering.F90
Go to the documentation of this file.
1!! Copyright (C) 2024. A Buccheri.
2!!
3!! This program is free software; you can redistribute it and/or modify
4!! it under the terms of the GNU General Public License as published by
5!! the Free Software Foundation; either version 2, or (at your option)
6!! any later version.
7!!
8!! This program is distributed in the hope that it will be useful,
9!! but WITHOUT ANY WARRANTY; without even the implied warranty of
10!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11!! GNU General Public License for more details.
12!!
13!! You should have received a copy of the GNU General Public License
14!! along with this program; if not, write to the Free Software
15!! Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16!! 02110-1301, USA.
17
18#include "global.h"
19
21 use, intrinsic :: iso_fortran_env
22 use debug_oct_m
23 use global_oct_m
24 use mesh_oct_m
26 use mpi_oct_m
29 use space_oct_m
30 implicit none
31 private
32
36
37contains
38
39 ! TODO(Alex) Issue #1004. Implement `assign_points_to_centroids` for periodic boundary conditions
40
66 subroutine assign_points_to_centroids_finite_bc(mesh, centroids, ip_to_ic)
67 class(mesh_t), intent(in) :: mesh
68 real(real64), intent(in) :: centroids(:, :)
69 integer, intent(out) :: ip_to_ic(:)
70
71 integer :: ip, ic, icen
72 integer :: n_centroids
73 real(real64) :: min_dist, dist
74 real(real64), allocatable :: point(:)
75
77
78 ! Grid to centroid index map should have size of the grid
79 assert(size(ip_to_ic) == mesh%np)
80
81 n_centroids = size(centroids, 2)
82 safe_allocate(point(1:size(centroids, 1)))
83
84 !$omp parallel do default(shared) private(point, icen, min_dist, dist)
85 do ip = 1, mesh%np
86 ip_to_ic(ip) = 0
87 ! Compute which centroid, grid point `ip` is closest to
88 point = mesh%x(ip, :)
89 icen = 1
90 min_dist = sum((centroids(:, 1) - point(:))**2)
91 do ic = 2, n_centroids
92 dist = sum((centroids(:, ic) - point(:))**2)
93 if (dist < min_dist) then
94 min_dist = dist
95 icen = ic
96 endif
97 enddo
98 ip_to_ic(ip) = icen
99 enddo
100 !$omp end parallel do
101
102 safe_deallocate_a(point)
103
105
107
108
119 subroutine update_centroids(mesh, weight, ip_to_ic, centroids)
120 class(mesh_t), intent(in) :: mesh
121 real(real64), intent(in) :: weight(:)
122 integer, intent(in) :: ip_to_ic(:)
123 real(real64), contiguous, intent(inout) :: centroids(:, :)
124 ! Out: Updated centroid positions
125
126 integer :: n_centroids, ic, ip
127 real(real64) :: one_over_denom
128 real(real64), allocatable :: denominator(:)
129
130 push_sub(update_centroids)
131
132 ! The indexing of weight and grid must be consistent => must belong to the same spatial distribution
133 ! This does not explicit assert this, but if the grid sizes differ, this is a clear indicator of a problem
134 assert(mesh%np == size(weight))
135
136 n_centroids = size(centroids, 2)
137 safe_allocate(denominator(1:n_centroids))
138
139 do ic = 1, n_centroids
140 centroids(:, ic) = 0._real64
141 denominator(ic) = 0._real64
142 enddo
143
144 !$omp parallel do private(ic) reduction(+ : centroids, denominator)
145 do ip = 1, mesh%np
146 ic = ip_to_ic(ip)
147 ! Initially accumulate the numerator in `centroids`
148 centroids(:, ic) = centroids(:, ic) + (mesh%x(ip, :) * weight(ip))
149 denominator(ic) = denominator(ic) + weight(ip)
150 enddo
151 !$omp end parallel do
152
153 ! Gather contributions to numerator and denominator of all centroids, from all domains of the mesh/grid
154 call mesh%allreduce(centroids)
155 call mesh%allreduce(denominator)
156
157 ! If division by zero occurs here it implies that the sum(weight) = 0 for all grid points
158 ! in cluster ic. This can occur if the initial centroid is poorly chosen at a point with no
159 !! associated weight (such as the vacuum of a crystal cell)
160 !$omp parallel do private(one_over_denom) reduction(* : centroids)
161 do ic = 1, n_centroids
162 one_over_denom = 1._real64 / denominator(ic)
163 centroids(:, ic) = centroids(:, ic) * one_over_denom
164 enddo
165 !$omp end parallel do
166
167 safe_deallocate_a(denominator)
168 pop_sub(update_centroids)
169
170 end subroutine update_centroids
171
172
177 subroutine compute_grid_difference(points, updated_points, tol, points_differ)
178 real(real64), intent(in) :: points(:, :)
179 real(real64), intent(in) :: updated_points(:, :)
180 real(real64), intent(in) :: tol
181 logical, intent(out) :: points_differ(:)
182
183 integer :: ip, n_dim
184 real(real64), allocatable :: diff(:)
185
187
188 n_dim = size(points, 1)
189 allocate(diff(n_dim))
190
191 !$omp parallel do default(shared) private(diff)
192 do ip = 1, size(points, 2)
193 diff(:) = abs(updated_points(:, ip) - points(:, ip))
194 points_differ(ip) = any(diff > tol)
195 enddo
196 !$omp end parallel do
197
198 if(debug%info) then
199 call report_differences_in_grids(points, updated_points, tol, points_differ)
200 endif
201
203
204 end subroutine compute_grid_difference
205
206
208 subroutine report_differences_in_grids(points, updated_points, tol, points_differ)
209 real(real64), intent(in) :: points(:, :)
210 real(real64), intent(in) :: updated_points(:, :)
211 real(real64), intent(in) :: tol
212 logical, intent(in) :: points_differ(:)
213
214 integer, allocatable :: indices(:)
215 integer :: i, j, n_unconverged, ndim
216 character(len=50) :: f_string
217 real(real64), allocatable :: diff(:)
218
220
221 indices = pack([(i, i=1,size(points_differ))], points_differ)
222 n_unconverged = size(indices)
223 ndim = size(points, 1)
224 allocate(diff(ndim))
225
226 write(f_string, '(A, I1, A, I1, A, I1, A)') '(', &
227 & ndim, '(F16.10, X), ', &
228 & ndim, '(F16.10, X), ', &
229 & ndim, '(F16.10, X), F16.10)'
230
231 write(message(1), '(a)') "# Current Point , Prior Point , |ri - r_{i-1}| , tol"
232 call messages_info(1)
233 do j = 1, n_unconverged
234 i = indices(j)
235 diff(:) = abs(updated_points(:, i) - points(:, i))
236 write(message(1), f_string) updated_points(:, i), points(:, i), diff, tol
237 call messages_info(1)
238 enddo
239 write(message(1), *) "Summary:", n_unconverged, "of out", size(points, 2), "are not converged"
240 call messages_info(1)
241
243
244 end subroutine report_differences_in_grids
245
246
272 subroutine weighted_kmeans(space, mesh, weight, centroids, n_iter, centroid_tol, discretize)
273 class(space_t), intent(in) :: space
274 class(mesh_t), intent(in) :: mesh
275 real(real64), intent(in) :: weight(:)
276 real(real64), contiguous, intent(inout) :: centroids(:, :)
277 ! Out: Final centroids
278 integer, optional, intent(in) :: n_iter
279 real(real64), optional, intent(in) :: centroid_tol
280 logical, optional, intent(in) :: discretize
281
282 logical :: discretize_centroids
283 integer :: n_iterations, n_centroid, i
284 real(real64) :: tol
285 integer, allocatable :: ip_to_ic(:)
286 real(real64), allocatable :: prior_centroids(:, :)
287 logical, allocatable :: points_differ(:)
288
289 push_sub(weighted_kmeans)
290
291 n_iterations = optional_default(n_iter, 200)
292 tol = optional_default(centroid_tol, 1.e-6_real64)
293 discretize_centroids = optional_default(discretize, .true.)
294
295 ! Should use a positive number of iterations
296 assert(n_iterations >= 1)
297 ! Number of weights inconsistent with number of grid points
298 assert(size(weight) == mesh%np)
299 ! Spatial dimensions of centroids array is inconsistent
300 assert(size(centroids, 1) == space%dim)
301 ! Assignment of points to centroids only implemented for finite BCs
302 assert(.not. space%is_periodic())
303
304 ! Work arrays
305 n_centroid = size(centroids, 2)
306 safe_allocate_source(prior_centroids(space%dim, size(centroids, 2)), centroids)
307 safe_allocate(ip_to_ic(1:mesh%np))
308 safe_allocate(points_differ(1:n_centroid))
309
310 if (debug%info) then
311 write(message(1), '(a)') 'Performing weighted Kmeans clustering '
312 call messages_info(1)
313 endif
314
315 do i = 1, n_iterations
316 if (debug%info) then
317 write(message(1), '(a, I3)') 'Iteration ', i
318 call messages_info(1)
319 endif
320 ! TODO(Alex) Issue #1004. Implement `assign_points_to_centroids` for periodic boundary conditions
321 call assign_points_to_centroids_finite_bc(mesh, centroids, ip_to_ic)
322
323 call update_centroids(mesh, weight, ip_to_ic, centroids)
324 call compute_grid_difference(prior_centroids, centroids, tol, points_differ)
325
326 if (any(points_differ)) then
327 prior_centroids = centroids
328 else
329 if(debug%info) then
330 write(message(1), '(a)') 'All centroid points converged'
331 call messages_info(1)
332 endif
333 ! Break loop
334 exit
335 endif
336
337 enddo
338
339 if (discretize_centroids) then
340 call mesh_discretize_values_to_mesh(mesh, centroids)
341 endif
342
343 safe_deallocate_a(prior_centroids)
344 safe_deallocate_a(ip_to_ic)
345 safe_deallocate_a(points_differ)
346
347 pop_sub(weighted_kmeans)
348
349 end subroutine weighted_kmeans
350
type(debug_t), save, public debug
Definition: debug.F90:154
subroutine report_differences_in_grids(points, updated_points, tol, points_differ)
Report differences returned from compute_grid_difference.
subroutine, public assign_points_to_centroids_finite_bc(mesh, centroids, ip_to_ic)
Assign each grid point to the closest centroid. A centroid and its set of nearest grid points defines...
subroutine, public weighted_kmeans(space, mesh, weight, centroids, n_iter, centroid_tol, discretize)
Weighted K-means clustering.
subroutine, public update_centroids(mesh, weight, ip_to_ic, centroids)
Compute a new set of centroids.
subroutine compute_grid_difference(points, updated_points, tol, points_differ)
Compute the difference in two grids as .
This module defines the meshes, which are used in Octopus.
Definition: mesh.F90:118
subroutine, public mesh_discretize_values_to_mesh(mesh, values)
Assign a set of values to their nearest discrete points on the mesh.
Definition: mesh.F90:410
subroutine, public messages_info(no_lines, iunit, verbose_limit, stress, all_nodes, namespace)
Definition: messages.F90:624
character(len=256), dimension(max_lines), public message
to be output by fatal, warning
Definition: messages.F90:160
This module contains some common usage patterns of MPI routines.
Definition: mpi_lib.F90:115
Describes mesh distribution to nodes.
Definition: mesh.F90:186
int true(void)