using Accord.MachineLearning;
using Accord.Math.Distances;
using System.Collections.Generic;
public class KMeansClustering
public static int[] ClusterWithCapacity(double[][] data, int k, int capacity)
if (data == null || data.Length == 0)
throw new ArgumentException("Input data cannot be null or empty.", nameof(data));
throw new ArgumentOutOfRangeException(nameof(k), "The number of clusters (k) must be greater than 0.");
throw new ArgumentOutOfRangeException(nameof(capacity), "The capacity must be greater than 0.");
throw new ArgumentException("The number of data points must be greater than or equal to the number of clusters.", nameof(data));
if (data.Any(row => row == null || row.Length == 0))
throw new ArgumentException("Data rows cannot be null or empty.", nameof(data));
int firstRowLength = data[0].Length;
if (data.Any(row => row.Length != firstRowLength))
throw new ArgumentException("All data rows must have the same length.", nameof(data));
KMeans kmeans = new KMeans(k);
KMeansClusterCollection clusters = kmeans.Learn(data);
int[] initialLabels = clusters.Decide(data);
int[] clusterAssignments = new int[data.Length];
int[] clusterCounts = new int[k];
List<Tuple<int, double[]>> unassignedData = new List<Tuple<int, double[]>>();
for (int i = 0; i < data.Length; i++)
int clusterIndex = initialLabels[i];
if (clusterCounts[clusterIndex] < capacity)
clusterAssignments[i] = clusterIndex;
clusterCounts[clusterIndex]++;
unassignedData.Add(new Tuple<int, double[]>(i, data[i]));
foreach (var dataPoint in unassignedData)
double minDistance = double.MaxValue;
for (int j = 0; j < k; j++)
if (clusterCounts[j] < capacity)
double distance = new Euclidean().Distance(dataPoint.Item2, kmeans.Centroids[j]);
if (distance < minDistance)
if (nearestCluster != -1)
clusterAssignments[dataPoint.Item1] = nearestCluster;
clusterCounts[nearestCluster]++;
minDistance = double.MaxValue;
for (int j = 0; j < k; j++)
double distance = new Euclidean().Distance(dataPoint.Item2, kmeans.Centroids[j]);
if(distance < minDistance)
clusterAssignments[dataPoint.Item1] = nearestCluster;
return clusterAssignments;
public static void Main(string[] args)
double[][] data = new double[][]
new double[] { 25, 50000 },
new double[] { 30, 60000 },
new double[] { 28, 55000 },
new double[] { 35, 70000 },
new double[] { 22, 45000 },
new double[] { 31, 62000 }
int[] clusterAssignments = ClusterWithCapacity(data, k, capacity);
Console.WriteLine("Cluster Assignments:");
for (int i = 0; i < clusterAssignments.Length; i++)
Console.WriteLine($"Data Point {i}: Cluster {clusterAssignments[i]}");
Console.WriteLine($"An error occurred: {ex.Message}");