using Accord.MachineLearning;
using Accord.Math.Distances;
using System.Collections.Generic;
using Accord.MachineLearning;
using Accord.Math.Distances;
using System.Collections.Generic;
public class KMeansClustering
public static int[] ClusterWithCapacity(double[][] data, int k, int capacity)
if (data == null || data.Length == 0)
throw new ArgumentException("Input data cannot be null or empty.", nameof(data));
throw new ArgumentOutOfRangeException(nameof(k), "The number of clusters (k) must be greater than 0.");
throw new ArgumentOutOfRangeException(nameof(capacity), "The capacity must be greater than 0.");
throw new ArgumentException("The number of data points must be greater than or equal to the number of clusters.", nameof(data));
if (data.Any(row => row == null || row.Length == 0))
throw new ArgumentException("Data rows cannot be null or empty.", nameof(data));
int firstRowLength = data[0].Length;
if (data.Any(row => row.Length != firstRowLength))
throw new ArgumentException("All data rows must have the same length.", nameof(data));
KMeans kmeans = new KMeans(k);
KMeansClusterCollection clusters = kmeans.Learn(data);
int[] initialLabels = clusters.Decide(data);
int[] clusterAssignments = new int[data.Length];
int[] clusterCounts = new int[k];
List<Tuple<int, double[]>> unassignedData = new List<Tuple<int, double[]>>();
for (int i = 0; i < data.Length; i++)
int clusterIndex = initialLabels[i];
if (clusterCounts[clusterIndex] < capacity)
clusterAssignments[i] = clusterIndex;
clusterCounts[clusterIndex]++;
unassignedData.Add(new Tuple<int, double[]>(i, data[i]));
foreach (var dataPoint in unassignedData)
double minDistance = double.MaxValue;
for (int j = 0; j < k; j++)
if (clusterCounts[j] < capacity)
double distance = new Euclidean().Distance(dataPoint.Item2, kmeans.Centroids[j]);
if (distance < minDistance)
if (nearestCluster != -1)
clusterAssignments[dataPoint.Item1] = nearestCluster;
clusterCounts[nearestCluster]++;
minDistance = double.MaxValue;
for (int j = 0; j < k; j++)
double distance = new Euclidean().Distance(dataPoint.Item2, kmeans.Centroids[j]);
if(distance < minDistance)
clusterAssignments[dataPoint.Item1] = nearestCluster;
return clusterAssignments;
static void Main(string[] args)
Type testClassType = typeof(KMeansClusteringTests);
object testInstance = Activator.CreateInstance(testClassType);
MethodInfo[] testMethods = testClassType
.Where(m => m.GetCustomAttributes(typeof(FactAttribute), false).Any())
int passed = 0, failed = 0;
Console.WriteLine("Running xUnit tests using reflection...\n");
foreach (MethodInfo method in testMethods)
method.Invoke(testInstance, null);
Console.WriteLine($"✅ {method.Name} PASSED");
catch (TargetInvocationException ex)
Console.WriteLine($"❌ {method.Name} FAILED: {ex.InnerException?.Message}");
Console.WriteLine($"\nTest Results: {passed} Passed, {failed} Failed.");
public class KMeansClusteringTests
public void ClusterWithCapacity_ValidInput_ReturnsCorrectAssignments()
double[][] data = new double[][]
new double[] { 25, 50000 },
new double[] { 30, 60000 },
new double[] { 28, 55000 },
new double[] { 35, 70000 },
new double[] { 22, 45000 },
new double[] { 31, 62000 }
int[] assignments = KMeansClustering.ClusterWithCapacity(data, k, capacity);
Assert.Equal(data.Length, assignments.Length);
for (int i = 0; i < assignments.Length; i++)
if (assignments[i] == 0) countCluster0++;
if (assignments[i] == 1) countCluster1++;
Assert.True(countCluster0 <= capacity);
Assert.True(countCluster1 <= capacity);
public void ClusterWithCapacity_ValidInput_ReturnsCorrectAssignments_DifferentKandCapacity()
double[][] data = new double[][]
new double[] { 25, 50000 },
new double[] { 30, 60000 },
new double[] { 28, 55000 },
new double[] { 35, 70000 },
new double[] { 22, 45000 },
new double[] { 31, 62000 },
new double[] { 32, 63000 }
int[] assignments = KMeansClustering.ClusterWithCapacity(data, k, capacity);
Assert.Equal(data.Length, assignments.Length);
for (int i = 0; i < assignments.Length; i++)
if (assignments[i] == 0) countCluster0++;
if (assignments[i] == 1) countCluster1++;
if (assignments[i] == 2) countCluster2++;
Assert.True(countCluster0 <= capacity);
Assert.True(countCluster1 <= capacity);
Assert.True(countCluster2 <= capacity);
assignments = KMeansClustering.ClusterWithCapacity(data, k, capacity);
Assert.Equal(data.Length, assignments.Length);
for (int i = 0; i < assignments.Length; i++)
if (assignments[i] == 0) countCluster0++;
if (assignments[i] == 1) countCluster1++;
Assert.True(countCluster0 > 0);
Assert.True(countCluster1 > 0);
public void ClusterWithCapacity_NullData_ThrowsArgumentException()
Assert.Throws<ArgumentException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_EmptyData_ThrowsArgumentException()
double[][] data = new double[][] { };
Assert.Throws<ArgumentException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_InvalidK_ThrowsArgumentOutOfRangeException()
double[][] data = new double[][] { new double[] { 1, 2 } };
Assert.Throws<ArgumentOutOfRangeException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
Assert.Throws<ArgumentOutOfRangeException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_InvalidCapacity_ThrowsArgumentOutOfRangeException()
double[][] data = new double[][] { new double[] { 1, 2 } };
Assert.Throws<ArgumentOutOfRangeException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
Assert.Throws<ArgumentOutOfRangeException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_FewerDataPointsThanClusters_ThrowsArgumentException()
double[][] data = new double[][] { new double[] { 1, 2 } };
Assert.Throws<ArgumentException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_NullDataRow_ThrowsArgumentException()
double[][] data = new double[][] { new double[] { 1, 2 }, null };
Assert.Throws<ArgumentException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_EmptyDataRow_ThrowsArgumentException()
double[][] data = new double[][] { new double[] { 1, 2 }, new double[] {} };
Assert.Throws<ArgumentException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_DifferentDataRowLength_ThrowsArgumentException()
double[][] data = new double[][] { new double[] { 1, 2 }, new double[] {1} };
Assert.Throws<ArgumentException>(() => KMeansClustering.ClusterWithCapacity(data, k, capacity));
public void ClusterWithCapacity_AllClustersFull_AssignsToNearest()
double[][] data = new double[][]
new double[] { 1.1, 1.1 },
new double[] { 2.1, 2.1 },
int[] assignments = KMeansClustering.ClusterWithCapacity(data, k, capacity);
Assert.Equal(data.Length, assignments.Length);
Euclidean distance = new Euclidean();
double[] centroid0 = new double[2];
double[] centroid1 = new double[2];
for(int i = 0; i < data.Length -1; i++)
centroid0[0] += data[i][0];
centroid0[1] += data[i][1];
centroid1[0] += data[i][0];
centroid1[1] += data[i][1];
distToCentroid0 = distance.Distance(data[4], centroid0);
distToCentroid1 = distance.Distance(data[4], centroid1);
if(distToCentroid0 <= distToCentroid1)
Assert.Equal(0, assignments[4]);
Assert.Equal(1, assignments[4]);