using System.Collections.Generic;
using System.Diagnostics;
public class ImportPerformanceConfig
public string TableName {get; set;}
public string Stack {get; set; }
public int? SaveDataMaxDegreeOfParallelism { get; set; }
public bool? UseDeadlockPreventionBatches { get; set; }
public class MockDbResponse
public List<ImportPerformanceConfig> ImportPerformanceConfigs
return new List<ImportPerformanceConfig>
new ImportPerformanceConfig
SaveDataMaxDegreeOfParallelism = 8,
UseDeadlockPreventionBatches = true
private static int defaultSaveDataMaxDegreeOfParallelism = 8;
private static string stack = "Prod";
static DataTable GetTestData()
var table = new DataTable();
table.TableName = "ABSENCE";
table.Columns.Add("PEOPLE_ID", typeof(string));
for (var i = 0; i < 7; i ++)
for (var i = 0; i < 0; i ++)
table.Rows.Add("Jeremey");
for (var i = 0; i < 0; i ++)
table.Rows.Add("Big Mad Andy");
for (var i = 0; i < 0; i ++)
table.Rows.Add("NAME" + i);
public static void Main()
var mockDbResponse = new MockDbResponse();
var importData = GetTestData().AsEnumerable().ToList();
var tableName = importData.First().Table.TableName;
var config = mockDbResponse.ImportPerformanceConfigs.FirstOrDefault(config =>
String.Equals(config.TableName, tableName, StringComparison.OrdinalIgnoreCase)
&& String.Equals(config.Stack, stack, StringComparison.OrdinalIgnoreCase));
var saveDataMaxDegreeOfParallelism = GetSaveDataMaxDegreeOfParallelism(config, defaultSaveDataMaxDegreeOfParallelism);
var useDeadlockPreventionBatches = IsConfigValueTrue(config) && saveDataMaxDegreeOfParallelism > 1 ? true : false;
var batches = useDeadlockPreventionBatches
? CreateBatches(importData, "PEOPLE_ID", saveDataMaxDegreeOfParallelism)
: new List<List<DataRow>> { importData };
SaveDataProcessor(batches, saveDataMaxDegreeOfParallelism, importData.Count());
static List<List<DataRow>> CreateBatches(List<DataRow> importData, string identityField, int optimalBatchMultiplier)
var timer = new Stopwatch();
var batches = new List<List<DataRow>>();
var rowGroups = importData
.GroupBy(row => row.Field<string>(identityField))
.OrderByDescending(grp => grp.Count())
if (rowGroups.Count() == 0)
var multiRowGroups = rowGroups.Where(grp => grp.Count() > 1).ToList();
if (multiRowGroups.Count() == 0)
foreach (var row in multiRowGroups.First())
batches.Add(new List<DataRow>{ row });
foreach (var rowGroup in multiRowGroups.Skip(1))
var rowsInGroup = rowGroup.ToList();
for (var i = 0; i < rowsInGroup.Count(); i++)
batches[i].Add(rowsInGroup[i]);
var singleRows = rowGroups.Where(grp => grp.Count() == 1).Select(grp => grp.First()).ToList();
var remainingRows = singleRows.Count();
while (remainingRows != 0 && batchIndex < batches.Count)
var availableRowsInBatch = (optimalBatchMultiplier - batches[batchIndex].Count() % optimalBatchMultiplier);
if (availableRowsInBatch != optimalBatchMultiplier)
var rowsToAdd = Math.Min(remainingRows, availableRowsInBatch);
batches[batchIndex].AddRange(singleRows.Skip(rowIndex).Take(rowsToAdd));
remainingRows = singleRows.Count() - rowIndex;
batches.Last().AddRange(singleRows.Skip(rowIndex));
var timeTaken = timer.Elapsed;
Console.WriteLine("Time taken: " + timeTaken.ToString(@"m\:ss\.fff"));
static void SaveDataProcessor(List<List<DataRow>> importBatches, int saveDataMaxDegreeOfParallelism, int initialCountForValidation)
foreach(var batch in importBatches)
totalRecords += batch.Count();
while (rowIndex < batch.Count())
var remainingRows = batch.Count() - rowIndex;
var rowsToSend = Math.Min(saveDataMaxDegreeOfParallelism, remainingRows);
Console.WriteLine("Sending " + rowsToSend + " rows to save data in parallel");
var rows = batch.Skip(rowIndex).Take(rowsToSend);
foreach (var row in rows)
Console.WriteLine(row["PEOPLE_ID"].ToString());
Console.WriteLine("Total records in initial entity: " + initialCountForValidation);
Console.WriteLine("Total batches: " + importBatches.Count());
Console.WriteLine("Total records in batches: " + totalRecords);
static int GetSaveDataMaxDegreeOfParallelism(ImportPerformanceConfig? config, int defaultMaxValue)
var saveDataMaxDegreeOfParallelism = defaultMaxValue;
saveDataMaxDegreeOfParallelism = IsPositiveValue(config.SaveDataMaxDegreeOfParallelism) ? config.SaveDataMaxDegreeOfParallelism.Value : saveDataMaxDegreeOfParallelism;
return saveDataMaxDegreeOfParallelism;
static bool IsConfigValueTrue(ImportPerformanceConfig? config) => config != null && (config.UseDeadlockPreventionBatches ?? false) == true;
static bool IsPositiveValue(int? inputValue) => (inputValue ?? 0) >= 1;