using CsvHelper.Configuration;
using OfficeOpenXml.Style;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using static OfficeOpenXml.ExcelErrorValue;
static Dictionary<string, string> hSupportedFlags = new Dictionary<string, string>();
static Dictionary<string, object> hSetup = new Dictionary<string, object>();
static Dictionary<string, Tuple<string, string>> hFlags = new Dictionary<string, Tuple<string, string>>
{ "file", Tuple.Create("filePath", "\tPoint to the DAT file to normalize.") },
{ "software", Tuple.Create("ediscoverySoftware", "Loading into Casepoint, NUIX, Law, etc.? Defaults to Casepoint.") },
{ "env", Tuple.Create("scriptEnv", "\tPROD, STAGE, DEV. Defaults to PROD") },
{ "fieldOverride", Tuple.Create("fieldOverride", "If fields are unmapped and the template can't be updated. DATFIELD=SOFTWAREFIELD,DATFIELD=SOFTWAREFIELD") },
{ "bulkImport", Tuple.Create("bulkImport", "Point to a file that contains a collection of DAT files that need to be normalized.") },
{ "noPrompts", Tuple.Create("noPrompts", "Point to a file that contains a collection of DAT files that need to be normalized.") },
{ "datCompare", Tuple.Create("datCompare", "Point to a collection of DAT files that need to be compared.") },
static Dictionary<int, string> errorRecsCollection = new Dictionary<int, string>();
const int batchSize = 500;
readonly Color darkBlue = Color.FromArgb(255, 32, 55, 100);
static DateTime startRunTime = DateTime.Now;
static async Task Main(string[] userArgs)
ExcelPackage.LicenseContext = LicenseContext.Commercial;
await FileReviewAndSetupAsync();
await FieldHeaderRemappingInitializeAsync();
await FileRemappingAsync();
private static async Task DatCompareWorkflowAsync()
if (hSetup.ContainsKey("datCompare"))
await FileReviewAndSetupAsync();
await FieldHeaderRemappingInitializeAsync();
await DataTableComparisonReportAsync();
private static async Task DataTableComparisonReportAsync()
hSetup["readOutputCsv"] = "true";
DataTable originalTable = (DataTable)hSetup["csvDataTable"];
DataTable outputTable = (DataTable)await ReadCsvToDataTableAsync();
var columnMappings = (Dictionary<string, string>)(hSetup["fileHeaderRemap"]);
var comparer = new DataTableComparer(columnMappings);
string report = comparer.GenerateComparisonReport(originalTable, outputTable);
byte[] encodedText = Encoding.UTF8.GetBytes(report);
using (FileStream sourceStream = new FileStream(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["fileName"].ToString().ToUpper().Replace(".DAT","-compareReport.log")),
FileMode.Append, FileAccess.Write, FileShare.None,
bufferSize: 4096, useAsync: true))
await sourceStream.WriteAsync(encodedText, 0, encodedText.Length);
private static async Task FileRemappingAsync()
var hFieldExamples = new Dictionary<string, List<string>>();
Dictionary<string, string> hTempRemap = new Dictionary<string, string>();
string outputFile = Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["fileName"].ToString());
DataTable dataTable = (DataTable)hSetup["csvDataTable"];
int totalLinesInFile = dataTable.Rows.Count + 1;
Dictionary<string, string> fileHeaderRemap = (Dictionary<string, string>)(hSetup["fileHeaderRemap"]);
var originalDictionary = ((HDR.ColumnNameNormalizer)hSetup["ColumnNameNormalizer"])._fieldTypeDictionary;
Dictionary<string, (string fieldType, string multiValueSep)> fieldTypeDictionary = new Dictionary<string, (string fieldType, string multiValueSep)>(originalDictionary, StringComparer.OrdinalIgnoreCase);
Console.WriteLine("START: PROCESSING LINES");
Stopwatch stopwatch = new Stopwatch();
ConsoleUtility.WriteProgressBar(0, true, $"Lines processed: 0 / {totalLinesInFile} Elapsed Time: {stopwatch.Elapsed.ToString(@"hh\:mm\:ss")} ");
hSetup.TryGetValue("fileDelimiterOutput", out object fileDelimiters);
char fieldSeparator = char.Parse(((IList<string>)fileDelimiters)[0]);
char textSeparator = char.Parse(((IList<string>)fileDelimiters)[1]);
using (var writer = new StreamWriter(outputFile, false, new System.Text.UTF8Encoding(true)))
using (var csv = new CsvWriter(writer, new CsvConfiguration(CultureInfo.InvariantCulture)
Encoding = Encoding.UTF8,
Delimiter = fieldSeparator.ToString(),
Dictionary<string, string> appendExtraFields = (Dictionary<string, string>)hSetup["appendExtraFieldsDATA"];
if (appendExtraFields != null)
foreach (var col in appendExtraFields)
dataTable.Columns.Add((string)col.Key, typeof(string));
dataTable.AsEnumerable().ToList<DataRow>().ForEach(r => r[col.Key] = col.Value.ToString());
foreach (DataRow row in dataTable.Rows)
foreach (DataColumn column in dataTable.Columns)
if (!appendExtraFields.Keys.Where(c => c.Contains(column.ColumnName)).Any())
string fieldName = CompactHeaderFieldToRemoveExtraItem(column.ColumnName);
string fieldValue = row[column].ToString();
string[] fieldNameSplit = (fileHeaderRemap[fieldName]).Split('|');
if (fieldNameSplit.Length == 0)
fieldNameSplit = new[] { fileHeaderRemap[fieldName].ToString() };
foreach (string newField in fieldNameSplit)
string newFieldWoSuffix = Regex.Replace(newField, @"-HEADER-NORM-DUPE-FIELD\d*", "").ToString();
string foundField = newFieldWoSuffix;
string foundFieldType = fieldTypeDictionary[newFieldWoSuffix].fieldType;
string foundFieldMultiValueSep = fieldTypeDictionary[newFieldWoSuffix].multiValueSep;
if (foundFieldType.ToUpper() == "DATE" && new[] { "00/00/0000", "00000000" }.Any(c => fieldValue.Contains(c)))
fieldValue = string.Empty;
string multiValueSep = " ";
bool multiValueSepDatFieldHandle = false;
if (foundFieldMultiValueSep.Any())
if (foundFieldMultiValueSep.ToUpper() == "DATFIELD")
multiValueSep = ' ' + fieldName + @": ";
multiValueSep = foundFieldMultiValueSep;
if (hTempRemap.ContainsKey(newField))
if (hSetup.ContainsKey("dateTimeColumnNames") && ((Dictionary<string, bool>)hSetup["dateTimeColumnNames"]).ContainsKey(column.ColumnName))
string newFieldValue = hTempRemap[newField] + multiValueSep + fieldValue;
hTempRemap[newField] = string.Join(" ", string.Join(" ", newFieldValue.Split(';').Distinct()).Split(' ').Distinct());
string newFieldValue = hTempRemap[newField] + ';' + fieldValue;
hTempRemap[newField] = string.Join(" ", string.Join(" ", newFieldValue.Split(';').Distinct()).Split(' ').Distinct());
if ((!multiValueSepDatFieldHandle) || (!String.IsNullOrEmpty(fieldValue)))
hTempRemap[newField] = fieldValue;
if (newField.EndsWith("IGNORE"))
if (!hFieldExamples.ContainsKey(fieldName))
hFieldExamples[fieldName] = new List<string> { };
hTempRemap[column.ColumnName] = row[column].ToString();
foreach (string columnName in hTempRemap.Keys)
csv.WriteField(columnName, true);
foreach (string value in hTempRemap.Values)
csv.WriteField(string.IsNullOrWhiteSpace(value) ? "" : value, true);
foreach (string value in hTempRemap.Values)
csv.WriteField(string.IsNullOrWhiteSpace(value) ? "" : value, true);
if ((csv.Row % batchSize) == 0)
int precentComplete = (100 * csv.Row) / totalLinesInFile;
ConsoleUtility.WriteProgressBar(precentComplete, true, $"Lines processed: {csv.Row} / {totalLinesInFile} Elapsed Time: {stopwatch.Elapsed.ToString(@"hh\:mm\:ss")} ");
ConsoleUtility.WriteProgressBar((100 * (csv.Row - 1)) / totalLinesInFile, true, $"Lines processed: {csv.Row - 1} / {totalLinesInFile} Elapsed Time: {stopwatch.Elapsed.ToString(@"hh\:mm\:ss")} ");
if (hFieldExamples.Any())
var columnsRemoved = ((List<string>)hSetup["columnsToRemove"]).Select(x => CompactHeaderFieldToRemoveExtraItem(x)).ToList();
for (int i = 0; i <= hFieldExamples.Keys.Count - 1; i++)
var fieldMapping = hFieldExamples.ElementAt(i);
string fileHeaderUnAlteredHeaders = ((Dictionary<string, string>)hSetup["fileHeader"])[fieldMapping.Key];
var hFieldExamplesValues = new List<string> { };
if (columnsRemoved.Any(x => x.Equals(CompactHeaderFieldToRemoveExtraItem(fileHeaderUnAlteredHeaders), StringComparison.OrdinalIgnoreCase)))
hFieldExamplesValues = new List<string> { "EMPTY_FIELD_IGNORE" };
hFieldExamplesValues = ((DataTable)hSetup["csvDataTable"]).AsEnumerable()
.Select(row => row.Field<string>(fileHeaderUnAlteredHeaders))
.Where(value => !string.IsNullOrEmpty(value))
hFieldExamples[fieldMapping.Key] = hFieldExamplesValues;
OutputFieldExamplesToExcel(hFieldExamples, numOfExamples, new List<(string headerCol, string mapField)>(), "Ignore Field e.g.", "continue");
Console.WriteLine("FINISHED: PROCESSING LINES");
foreach (string optFileType in new[] { ".log", ".opt" })
string optFile = Path.Combine(hSetup["folderPath"].ToString(), hSetup["fileNameWithoutExt"] + optFileType);
if (File.Exists(optFile))
Console.WriteLine($"COPYING: {optFile}");
File.Copy(optFile, Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["fileNameWithoutExt"] + optFileType), true);
OutputLoadReadyFileInfo();
private static void OutputLoadReadyFileInfo()
FileInfo pathFileout = new FileInfo(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["mappingInfoXLSXNameOutName"].ToString()));
int fileOutputTotalNumOfLines = (int)TotalLinesInCSV(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["fileName"].ToString()));
using (ExcelPackage package = new ExcelPackage(pathFileout))
ExcelWorksheet worksheet = package.Workbook.Worksheets["Load Ready File INFO"];
worksheet.Cells[2, 2].Value = hSetup["folderPath"];
worksheet.Cells[3, 2].Value = hSetup["fileName"];
worksheet.Cells[4, 2].Value = hSetup["fileTotalLines"];
worksheet.Cells[5, 2].Value = hSetup["filePath"];
worksheet.Cells[7, 2].Value = hSetup["fileOutpath"];
worksheet.Cells[8, 2].Value = hSetup["fileName"];
worksheet.Cells[9, 2].Value = fileOutputTotalNumOfLines;
worksheet.Cells[10, 2].Value = hSetup["filePath"];
ConsoleColor lineColor = ConsoleColor.Green;
hSetup["fileAndTargetLineCountsMatch"] = true;
if (((int)hSetup["fileTotalLines"] != fileOutputTotalNumOfLines))
lineColor = ConsoleColor.Red;
hSetup["fileAndTargetLineCountsMatch"] = false;
Console.ForegroundColor = lineColor;
Console.Write("SOURCE LR TOTAL LINES: ");
Console.WriteLine(hSetup["fileTotalLines"]);
Console.ForegroundColor = lineColor;
Console.Write("TARGET LR TOTAL LINES: ");
Console.WriteLine(fileOutputTotalNumOfLines);
worksheet.Column(2).AutoFit();
Console.ForegroundColor = lineColor;
if (package.Workbook.Worksheets["DupeMapped Header"] != null)
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("!!WARNING!! Duplicate Mapped Fields Detected");
Console.WriteLine("Please review the \"DupeMapped Header\" spreadsheet for detailed information on the duplicates in the folowing Output Path.");
package.Workbook.Worksheets["DupeMapped Header"].Select();
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Header Norm Complete.");
Console.WriteLine("Output Path: ");
Console.WriteLine(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["fileName"].ToString()));
DateTime endRunTime = DateTime.Now;
TimeSpan ts = (endRunTime - startRunTime);
string toolRuntime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
if (lineColor == ConsoleColor.Red)
Console.ForegroundColor = lineColor;
Console.WriteLine("Troubleshooting:");
Console.WriteLine(" 1. Manually check the line counts for the SOURCE/TARGET.");
Console.WriteLine(" 2. If line counts are incorrect, attempt to reprocess.");
Console.WriteLine(" 3. If line counts are still incorrect, escalate to IRT.");
Console.WriteLine("Total Tool Runtime: {0}", toolRuntime);
Console.WriteLine("Press any key to exit.");
Process.Start(new ProcessStartInfo(pathFileout.FullName) { UseShellExecute = true });
Console.WriteLine("Total Tool Runtime: {0}", toolRuntime);
Console.WriteLine("Press any key to exit.");
Process.Start(new ProcessStartInfo(pathFileout.FullName) { UseShellExecute = true });
private static async Task FieldHeaderRemappingInitializeAsync()
var fieldOverrides = new Dictionary<string, string>();
if (!string.IsNullOrEmpty(hSetup["fieldOverride"].ToString()))
foreach (var item in hSetup["fieldOverride"].ToString().Split(','))
var split = item.Split('=');
fieldOverrides[split[0].Trim()] = split[1].Trim();
Dictionary<string, string> hNewFieldMapping = new Dictionary<string, string>();
var normalizer = new ColumnNameNormalizer(hSetup);
hSetup["ColumnNameNormalizer"] = normalizer;
if (normalizer._appendExtraFieldsDictionary.Any())
var folderPathSplitSelection = Regex.Replace(hSetup["folderPath"].ToString(), @"(^.+\\\d{4}-\d{2}-\d{2}\\\d{6})\\.+$", "$1")
.Split(new char[] { Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar }).AsEnumerable()
Dictionary<string, string> hTemp = new Dictionary<string, string>();
foreach (var item in normalizer._appendExtraFieldsDictionary)
string fieldName = item.Trim().ToString();
case "Producing Party_CF":
hTemp[fieldName] = folderPathSplitSelection.ToList()[0];
DateTime? prodDate = DateTime.TryParse(folderPathSplitSelection.ToList()[1], out DateTime dateTimeValue) ? (DateTime?)dateTimeValue : null;
hTemp[fieldName] = String.IsNullOrWhiteSpace(prodDate.ToString()) ? "" : Convert.ToDateTime(prodDate).ToString("MM/dd/yyyy", CultureInfo.InvariantCulture);
hTemp[fieldName] = folderPathSplitSelection.ToList()[2];
hTemp[fieldName] = hSetup["todayDate"].ToString();
hSetup["appendExtraFieldsDATA"] = hTemp;
List<(string headerCol, string mapField, string fieldType, string multiValueSep)> normalizedColumns = normalizer.NormalizeColumnNames(new List<string>(((IDictionary<string, string>)hSetup["fileHeader"]).Keys));
foreach (string headerCol in ((IDictionary<string, string>)hSetup["fileHeader"]).Keys)
bool containsOverride = fieldOverrides.ContainsKey(headerCol);
hNewFieldMapping[headerCol] = containsOverride ? fieldOverrides[headerCol] : normalizedColumns
.Where(col => col.headerCol.Equals(CompactHeaderFieldToRemoveExtraItem(headerCol)))
.LastOrDefault().mapField;
hSetup["fileHeaderRemap"] = hNewFieldMapping;
CheckDuplicateMappings(hNewFieldMapping);
FileInfo pathFileout = new FileInfo(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["mappingInfoXLSXNameOutName"].ToString()));
using (ExcelPackage package = new ExcelPackage(pathFileout))
ExcelWorksheet worksheet = package.Workbook.Worksheets["Mapped Header"];
int[] colCounter = { 5, 6, 7 };
foreach (var mapping in ((Dictionary<string, string>)hSetup["fileHeaderRemap"]))
string key = mapping.Key;
string value = mapping.Value;
string fileHeaderUnAlteredHeaders = ((IDictionary<string, string>)hSetup["fileHeader"])[key];
foreach (int col in colCounter)
string rowValue = string.Empty;
rowValue = fileHeaderUnAlteredHeaders;
worksheet.Cells[rowCounter, col].Value = rowValue;
foreach (string valueSplit in value.Split('|'))
int[] extendedColCounters = { 1, 2, 3 };
foreach (int col in extendedColCounters)
string rowValue = string.Empty;
rowValue = fileHeaderUnAlteredHeaders;
worksheet.Cells[rowCounter, col].Value = rowValue;
var range = worksheet.Cells["E1:G" + rowCounter];
package.Workbook.Names.Add("headerCompact", range);
package.Workbook.Names["headerCompact"].NameComment = "HEADER Compact Table Range";
var hitsFound = normalizedColumns
.Where(col => col.mapField.StartsWith("_NO_FIELD_FOUND"))
.Select(c => new { c.headerCol, c.mapField })
.Select(c => (headerCol: c.headerCol, mapField: c.mapField))
.OrderBy(c => Regex.Replace(c.mapField, "[0-9]+", match => match.Value.PadLeft(5,'0')))
if (!hSetup.ContainsKey("csvDataTable") && !((DataTable)hSetup["csvDataTable"] == null))
hSetup["csvDataTable"] = await ReadCsvToDataTableAsync();
const int numOfExamples = 5;
var hFieldExamplesSkip = new Dictionary<string, string>();
var hFieldExamples = new Dictionary<string, List<string>>();
List<string> columnsRemoved = new List<string>();
if (hSetup.ContainsKey("columnsToRemove"))
columnsRemoved = ((List<string>)hSetup["columnsToRemove"]);
foreach (var fieldMapping in hitsFound)
string fileHeaderUnAlteredHeaders = ((IDictionary<string, string>)hSetup["fileHeader"])[fieldMapping.headerCol];
var hFieldExamplesValues = new List<string> { };
if (columnsRemoved != null && columnsRemoved.Any(x => x.Equals(fileHeaderUnAlteredHeaders, StringComparison.OrdinalIgnoreCase)))
hFieldExamplesValues = new List<string> { "EMPTY_FIELD_IGNORE" };
hFieldExamplesValues = ((DataTable)hSetup["csvDataTable"]).AsEnumerable()
.Select(row => row.Field<string>(fileHeaderUnAlteredHeaders))
.Where(value => !string.IsNullOrEmpty(value))
hFieldExamples.Add(fieldMapping.headerCol, hFieldExamplesValues);
hSetup["hFieldExamples"] = hFieldExamples;
var hFieldExamplesValuesCount = hFieldExamples.Where(kvp => kvp.Value.All(value => value == "EMPTY_FIELD_IGNORE")).ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
if (hFieldExamplesValuesCount.Count == hitsFound.Count)
OutputFieldExamplesToExcel(hFieldExamples, numOfExamples, hitsFound, "Unmapped Header", "continue");
hitsFound = hitsFound.Where(item => !hFieldExamplesValuesCount.Any(x => x.Key == item.headerCol)).ToList();
hFieldExamples = hFieldExamples.AsEnumerable().Where(kvp => kvp.Value.All(value => value != "EMPTY_FIELD_IGNORE")).ToDictionary(kvp => kvp.Key, kvp => kvp.Value);
OutputFieldExamplesToExcel(hFieldExamples, numOfExamples, hitsFound, "Unmapped Header", "quit");
private static void CheckDuplicateMappings(Dictionary<string, string> hNewFieldMapping)
Dictionary<string, string> dupeMapped = new Dictionary<string, string>();
List<string> columnsToRemove = new List<string>();
if (hSetup.ContainsKey("columnsToRemove"))
columnsToRemove = ((List<string>)hSetup["columnsToRemove"]).AsEnumerable().Select(item => CompactHeaderFieldToRemoveExtraItem(item)).ToList();
var groupedMappings = hNewFieldMapping.GroupBy(kvp => kvp.Value)
.Where(g => g.Count() > 1)
.ToDictionary(g => g.Key, g => g.Select(kvp => kvp.Key).ToList());
foreach (var kvp in groupedMappings)
string mappedColumnNames = kvp.Key;
List<string> originalColumnNames = kvp.Value.ToList().Except(columnsToRemove)
.Select(item => ((Dictionary<string, string>)hSetup["fileHeader"])[item].ToString())
if (originalColumnNames.Count != 0)
bool columnsEqual = HasDuplicateData((DataTable)hSetup["csvDataTable"], originalColumnNames);
Console.ForegroundColor = ConsoleColor.Red;
string message = $"!!WARNING!! The following columns are mapped to '{mappedColumnNames}' and contains duplicate data: {string.Join(", ", originalColumnNames)}";
Console.WriteLine(message);
var x = hNewFieldMapping.Where(q => q.Value == mappedColumnNames).ToList();
dupeMapped[x[0].Key] = mappedColumnNames;
for (int i = 1; i < x.Count; i++)
hNewFieldMapping[x[i].Key] = $"{mappedColumnNames}-HEADER-NORM-DUPE-FIELD";
dupeMapped[x[i].Key] = $"{mappedColumnNames}-HEADER-NORM-DUPE-FIELD";
hNewFieldMapping[x[i].Key] = $"{mappedColumnNames}-HEADER-NORM-DUPE-FIELD{i}";
dupeMapped[x[i].Key] = $"{mappedColumnNames}-HEADER-NORM-DUPE-FIELD{i}";
else if (columnsEqual == false && originalColumnNames.Count >= 3)
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"!!ERROR!! The following columns are mapped to '{mappedColumnNames}' and not contains duplicate data: {string.Join(", ", originalColumnNames)}");
Console.Write("Please reach out to ");
Console.ForegroundColor = ConsoleColor.Cyan;
Console.Write("USER SUPPORT");
Console.Write(" for help mapping these fields.\n");
Console.WriteLine("Press any key to exit");
FileInfo pathFileout = new FileInfo(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["mappingInfoXLSXNameOutName"].ToString()));
using (ExcelPackage package = new ExcelPackage(pathFileout))
package.Workbook.Worksheets.Add("DupeMapped Header");
package.Workbook.Worksheets.MoveAfter("DupeMapped Header", "Unmapped Header");
package.Workbook.Worksheets.MoveAfter("Load Ready File INFO", "Ignore Field e.g.");
ExcelWorksheet worksheet = package.Workbook.Worksheets["DupeMapped Header"];
worksheet.Cells[1, 1].Value = "ORIGINAL UNALTERED";
worksheet.Cells[1, 2].Value = "MAPPED TO";
worksheet.Cells[1, 3].Value = "ORIGINAL ALTERED";
var headerStyle = package.Workbook.Styles.CreateNamedStyle("HeaderStyle");
headerStyle.Style.Font.Bold = true;
headerStyle.Style.Font.Color.SetColor(Color.White);
headerStyle.Style.Font.Size = 13;
headerStyle.Style.Fill.PatternType = ExcelFillStyle.Solid;
headerStyle.Style.Fill.BackgroundColor.SetColor(System.Drawing.ColorTranslator.FromHtml("#002060"));
headerStyle.Style.Border.Top.Style = ExcelBorderStyle.Thin;
headerStyle.Style.Border.Bottom.Style = ExcelBorderStyle.Thin;
headerStyle.Style.Border.Right.Style = ExcelBorderStyle.Thin;
headerStyle.Style.Border.Left.Style = ExcelBorderStyle.Thin;
headerStyle.Style.HorizontalAlignment = ExcelHorizontalAlignment.Center;
worksheet.Cells["A1:C1"].StyleName = "HeaderStyle";
var rowStyle = package.Workbook.Styles.CreateNamedStyle("RowStyle");
rowStyle.Style.Border.Top.Style = ExcelBorderStyle.Thin;
rowStyle.Style.Border.Bottom.Style = ExcelBorderStyle.Thin;
rowStyle.Style.Border.Right.Style = ExcelBorderStyle.Thin;
rowStyle.Style.Border.Left.Style = ExcelBorderStyle.Thin;
worksheet.Cells["A2:C500"].StyleName = "RowStyle";
worksheet.Column(1).Width = 44;
worksheet.Column(2).Width = 44;
worksheet.Column(3).Width = 44;
worksheet.Row(1).Height = 20;
worksheet.Cells["A1:C500"].AutoFilter = true;
int[] colCounter = { 1, 2, 3 };
foreach (var mapping in dupeMapped)
string key = mapping.Key;
string value = mapping.Value;
string fileHeaderUnAlteredHeaders = ((IDictionary<string, string>)hSetup["fileHeader"])[CompactHeaderFieldToRemoveExtraItem(key)];
foreach (int col in colCounter)
string rowValue = string.Empty;
rowValue = fileHeaderUnAlteredHeaders;
worksheet.Cells[rowCounter, col].Value = rowValue;
hSetup["fileHeaderRemap"] = hNewFieldMapping;
private static bool HasDuplicateData(DataTable dataTable, List<string> originalColumnNames)
if (originalColumnNames.Count == 0) return false;
if (originalColumnNames.Count < 2) return false;
var referenceColumn = originalColumnNames[0];
foreach (var columnName in originalColumnNames.Skip(1))
bool columnsEqual = dataTable.AsEnumerable()
.Select(row => row.IsNull(referenceColumn) ? null : row.Field<string>(referenceColumn))
.SequenceEqual(dataTable.AsEnumerable()
.Select(row => row.IsNull(columnName) ? null : row.Field<string>(columnName)));
private static void OutputFieldExamplesToExcel(Dictionary<string, List<string>> hFieldExamples, int numOfExamples, List<(string headerCol, string mapField)> hitsFound, string worksheetName, string quitOrContinue)
FileInfo pathFileout = new FileInfo(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["mappingInfoXLSXNameOutName"].ToString()));
using (ExcelPackage package = new ExcelPackage(pathFileout))
ExcelWorksheet worksheet = package.Workbook.Worksheets[worksheetName];
var hFieldExamplesFinal = new Dictionary<string, List<string>>();
if (worksheetName.Equals("Ignore Field e.g."))
rowCounter = (worksheet.Cells[2, 1, worksheet.Dimension.End.Row, 1].Where(cell => !string.IsNullOrWhiteSpace(cell.Text)).Max(cell => cell.Start.Row)) + 1;
foreach (var item in hFieldExamples)
int numOfExamplesFound = value.Count;
int numOfRemainExamples = numOfExamples - numOfExamplesFound;
if (numOfRemainExamples != 0)
if (numOfRemainExamples == numOfExamples)
value.Add("FIELD IS EMPTY");
for (int i = 0; i < numOfRemainExamples; i++)
if (!value.Contains("EMPTY_FIELD_IGNORE"))
value.Add("NO ADDITIONAL EXAMPLES");
hFieldExamplesFinal[key] = value;
foreach (var keyValuePair in hFieldExamplesFinal)
var key = keyValuePair.Key;
string fileHeaderUnAlteredHeaders = ((IDictionary<string, string>)hSetup["fileHeader"])[Regex.Replace(key, @"-HEADER-NORM-DUPE-FIELD\d$", "").ToString().Trim()];
for (int colCounter = 1; colCounter <= 3; colCounter++)
var rowValue = string.Empty;
rowValue = fileHeaderUnAlteredHeaders;
worksheet.Cells[rowCounter, colCounter].Value = rowValue;
worksheet.Cells[rowCounter, colCounter].Value = rowValue;
int FromRow = rowCounter;
foreach (string item in keyValuePair.Value)
worksheet.Cells[rowCounter, 1].Value = fileHeaderUnAlteredHeaders.ToString();
worksheet.Cells[rowCounter, 2].Value = key.ToString();
worksheet.Cells[rowCounter, colCounter].Value = item.ToString();
using (var range = worksheet.Cells[2, 1, rowCounter, 3])
worksheet.Rows.CustomHeight = true;
worksheet.Column(3).AutoFit();
range.Style.VerticalAlignment = ExcelVerticalAlignment.Center;
worksheet.Columns.AutoFit();
worksheet.Column(3).AutoFit();
if (quitOrContinue == "quit")
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("!! WARNING !!");
Console.WriteLine("DAT HEADER MAPPING ISSUE");
Console.WriteLine("Source DAT:");
Console.WriteLine(hSetup["filePath"].ToString());
Console.WriteLine("The DAT fields listed below are not configured for mapping.");
Console.WriteLine("Review the EXCEL document for mapping and example data.");
Console.Write("EXCEL Location: ");
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine(Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["mappingInfoXLSXNameOutName"].ToString()));
Console.WriteLine("Worksheet: Mapped Header");
Console.WriteLine("Worksheet: Unmapped Header");
Console.Write("Please reach out to ");
Console.ForegroundColor = ConsoleColor.Cyan;
Console.Write("USER SUPPORT");
Console.Write(" for help mapping these new fields.\n");
Console.WriteLine($"FIELD MAPPING DATABASE: {hSetup["dbPath"].ToString()}");
int col1Width = hitsFound.Max(item => item.headerCol.Length);
int col2Width = hitsFound.Max(item => item.mapField.Length);
const int spacebetweencol = 10;
Console.WriteLine("{0,-" + col1Width + "}" + new string(' ', spacebetweencol) + "{1,-" + col2Width + "}", "DAT FIELD", "MAP TO FIELD");
Console.WriteLine("{0,-" + col1Width + "}" + new string(' ', spacebetweencol) + "{1,-" + (col2Width - 1) + "}", new string('-', col1Width), new string('-', col2Width));
foreach (var hit in hitsFound)
Console.WriteLine("{0,-" + col1Width + "}" + new string(' ', spacebetweencol) + "{1,-" + col2Width + "}", hit.headerCol, hit.mapField);
if (String.IsNullOrEmpty(hSetup["noPrompts"].ToString()))
Process.Start(new ProcessStartInfo(pathFileout.FullName) { UseShellExecute = true });
Console.WriteLine("Press any key to exit");
private static async Task FileReviewAndSetupAsync()
hSetup["fileTotalLines"] = TotalLinesInCSV(hSetup["filePath"].ToString());
Encoding encoding = GetFileEncoding(hSetup["filePath"].ToString(), out string firstLine);
hSetup["fileEncoding"] = encoding;
XmlDocument xmlDoc = (XmlDocument)hSetup["scriptConfigXML"];
XmlNodeList inputFieldNodes = xmlDoc.SelectNodes("//datSeparators/input/field/obj");
XmlNodeList inputTextNodes = xmlDoc.SelectNodes("//datSeparators/input/text/obj");
List<string> inputdelimiters = new List<string>
CheckForSeparators(firstLine, inputFieldNodes),
CheckForSeparators(firstLine, inputTextNodes),
hSetup["fileDelimiterInput"] = inputdelimiters;
XmlNodeList outputFieldNodes = xmlDoc.SelectNodes("//datSeparators/output/field/obj");
XmlNodeList outputTextNodes = xmlDoc.SelectNodes("//datSeparators/output/text/obj");
List<string> outputdelimiters = new List<string>();
foreach (XmlNode node in outputFieldNodes)
delim = CharFromValue(node.Attributes["chr"].Value).ToString();
outputdelimiters.Add(delim);
foreach (XmlNode node in outputTextNodes)
delim = CharFromValue(node.Attributes["chr"].Value).ToString();
outputdelimiters.Add(delim);
hSetup["fileDelimiterOutput"] = outputdelimiters;
Dictionary<string, string> fieldOverrides = new Dictionary<string, string>();
if (hSetup.ContainsKey("fieldOverride") && !string.IsNullOrEmpty(hSetup["fieldOverride"].ToString()))
foreach (var pair in (hSetup["fieldOverride"]).ToString().Split(','))
var splitPair = pair.Split('=');
if (splitPair.Length == 2)
fieldOverrides[splitPair[0].Trim()] = splitPair[1].Trim();
Dictionary<string, string> hFileHeader = new Dictionary<string, string>();
int headerNullValues = 0;
char delimiter = Char.Parse(inputdelimiters[0]);
var delimiterReplacement = inputdelimiters[1];
var headers = firstLine.Split(delimiter);
for (int i = 0; i < headers.Length; i++)
string headerCol = headers[i].Replace(delimiterReplacement, "").Trim();
if (string.IsNullOrEmpty(headerCol))
string compactHeaderCol = CompactHeaderFieldToRemoveExtraItem(headerCol);
if (hFileHeader.ContainsKey(compactHeaderCol))
int headerFoundAddNum = hFileHeader.Keys.Count(key => key.StartsWith(compactHeaderCol)) + 1;
hFileHeader[$"{compactHeaderCol}-HEADER-NORM-DUPE-FIELD{headerFoundAddNum}"] = headerCol;
hFileHeader[compactHeaderCol] = headerCol;
if (headerNullValues != 0)
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("!!ERROR: HEADER CONTAINS EMPTY VALUES");
Console.WriteLine("--------------------------------------");
Console.WriteLine($"ERROR: Header contains {headerNullValues} Empty Fields");
Console.WriteLine("\nScript eiting, please fix header\n");
hSetup["fileHeader"] = hFileHeader;
List<string> aHeader = hFileHeader.Keys.Select(key => $"{delimiterReplacement}{key}{delimiterReplacement}").ToList();
hSetup["fileHeaderForImporting"] = string.Join(delimiter.ToString(), aHeader);
hSetup["csvDataTable"] = await ReadCsvToDataTableAsync();
ProcessDateTimeValuesInColumns();
Console.WriteLine("!ERROR! " + Ex.Message);
Console.WriteLine("Press any key to exit.");
private static string CompactHeaderFieldToRemoveExtraItem(string field)
return field.ToUpper().Replace(" ", "").Replace("_", "").ToString();
private static string CheckForSeparators(string firstLine, XmlNodeList sepItems)
foreach (XmlNode el in sepItems)
switch (el.Attributes["charName"].Value)
case "Device Control Four":
delim = CharFromValue(el.Attributes["chr"].Value);
case "Latin Small Letter Thron (UTF8)":
delim = firstLine.Contains(Convert.ToChar(int.Parse(el.Attributes["powershell"].Value))) ? Convert.ToChar(int.Parse(el.Attributes["powershell"].Value)) : '\0';
case "Latin Small Letter Thron (ASCII)":
delim = firstLine.Contains(Convert.ToChar(int.Parse(el.Attributes["powershell"].Value))) ? Convert.ToChar(int.Parse(el.Attributes["powershell"].Value)) : '\0';
if (String.IsNullOrEmpty(delim.ToString()) || delim == '\0')
throw new Exception("Input delimiters could not be identified, Please review the input DAT file");
private static char CharFromValue(string charValue)
int intValue = int.Parse(charValue.Substring(2), System.Globalization.NumberStyles.HexNumber);
return Convert.ToChar(intValue);
private static Encoding GetFileEncoding(string filePath, out string firstLine)
byte[] bom = new byte[4];
using (var file = new FileStream(filePath, FileMode.Open, FileAccess.Read))
if (bom[0] == 0xfe && bom[1] == 0x00 && bom[2] == 0x46 && bom[3] == 0x00)
firstLine = File.ReadLines(filePath).First();
Encoding encoding = Encoding.GetEncoding("Unicode");
firstLine = File.ReadLines(filePath).First();
Encoding encoding = Encoding.GetEncoding("UTF-8");
public static int TotalLinesInCSV(string filePath)
long fileSize = new FileInfo(filePath).Length;
int bufferSize = GetBufferSize(fileSize);
using (var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize, FileOptions.SequentialScan))
using (var reader = new StreamReader(stream))
var chunkResults = new System.Collections.Concurrent.ConcurrentBag<int>();
Parallel.ForEach(ReadFileInChunks(reader, bufferSize), chunk =>
foreach (var line in chunk)
chunkResults.Add(localLineCount);
lineCount = chunkResults.Sum();
private static int GetBufferSize(long fileSize)
if (fileSize < 1_000_000)
else if (fileSize < 1_000_000_000)
else if (fileSize < 10_000_000_000)
private static IEnumerable<string[]> ReadFileInChunks(StreamReader reader, int bufferSize)
while (!reader.EndOfStream)
var lines = new List<string>();
for (int i = 0; i < bufferSize && !reader.EndOfStream; i++)
string line = reader.ReadLine();
yield return lines.ToArray();
private static async Task<object> ReadCsvToDataTableAsync()
var dataTable = new DataTable();
Encoding readEncoding = (Encoding)hSetup["fileEncoding"];
bool boolMissingFieldFound = false;
hSetup.TryGetValue("fileDelimiterInput", out object fileDelimiters);
char fieldSeparator = char.Parse(((IList<string>)fileDelimiters)[0]);
char textSeparator = char.Parse(((IList<string>)fileDelimiters)[1]);
int totalLinesInFile = (int)hSetup["fileTotalLines"];
string csvPath = hSetup["filePath"].ToString();
if(hSetup["readOutputCsv"].ToString() == "true")
csvPath = Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["fileName"].ToString());
readEncoding = new System.Text.UTF8Encoding(true);
fieldSeparator = char.Parse(((IList<string>)fileDelimiters)[0]);
textSeparator = char.Parse(((IList<string>)fileDelimiters)[1]);
totalLinesInFile = (int)hSetup["fileTotalLines"];
Console.WriteLine("START: READING LINES");
Stopwatch stopwatch = new Stopwatch();
ConsoleUtility.WriteProgressBar(0, true, $"Lines read: 0 / {totalLinesInFile} Elapsed Time: {stopwatch.Elapsed.ToString(@"hh\:mm\:ss")} ");
using (var reader = new StreamReader(csvPath, readEncoding))
using (var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
Delimiter = fieldSeparator.ToString(),
MissingFieldFound = context =>
boolMissingFieldFound = true;
Console.OutputEncoding = (Encoding)hSetup["fileEncoding"];
string rawRec = (context.Context.Parser.RawRecord);
var malformedData = (context.Context.Parser.Record).AsEnumerable()
.Select((value, index) => new { value, index })
.FirstOrDefault(item => item.value.ToString().Contains(fieldSeparator));
if (malformedData != null)
string fieldName = context.Context.Reader.HeaderRecord[malformedData.index];
Console.ForegroundColor = ConsoleColor.Yellow;
Console.Write($"\r\n!!WARNING!! ");
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"Malformed record detected in DAT file at line {context.Context.Parser.RawRow}.\tField: {fieldName}\tMalformed Value: {malformedData.value}\r\nRaw record: {rawRec.Substring(0, 60)} ...");
Console.ForegroundColor = ConsoleColor.Yellow;
Console.Write($"\r\n!!WARNING!! ");
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"Malformed record detected in DAT file at line {context.Context.Parser.RawRow}.\tHeader Field Count: {context.Context.Reader.HeaderRecord.Length}\tRow Field Count: {context.Context.Parser.Record.Length}\r\nRaw record: {rawRec.Substring(0, 60)} ...");
LineBreakInQuotedFieldIsBadData = false,
foreach (var header in csv.HeaderRecord)
dataTable.Columns.Add(header.Trim(new char[] { textSeparator, 'þ', (char)65533, '\u00FE' }), typeof(string));
var row = dataTable.NewRow();
foreach (var column in csv.HeaderRecord)
var fieldValue = csv.GetField(typeof(string), column).ToString().Trim(new char[] { textSeparator, 'þ', (char)65533, '\u00FE' });
row[column.ToString().Trim(new char[] { textSeparator, 'þ', (char)65533, '\u00FE' })] = fieldValue.Equals(" ") ? "" : fieldValue;
if ((currentLine % batchSize) == 0)
ConsoleUtility.WriteProgressBar(((100 * currentLine) / totalLinesInFile), true, $"Lines read: {currentLine} / {totalLinesInFile} Elapsed Time: {stopwatch.Elapsed.ToString(@"hh\:mm\:ss")} ");
ConsoleUtility.WriteProgressBar((100 * (dataTable.Rows.Count + 1)) / totalLinesInFile, true, $"Lines read: {(dataTable.Rows.Count + 1)} / {totalLinesInFile} Elapsed Time: {stopwatch.Elapsed.ToString(@"hh\:mm\:ss")} ");
Console.WriteLine("FINISHED: READING LINES");
hSetup["csvDataTable"] = dataTable;
List<string> malformedColName = (dataTable.Columns)
.Select(column => column.ColumnName)
.Where(colName => colName.Contains(fieldSeparator.ToString()) || colName.Contains(textSeparator))
if (malformedColName != null && malformedColName.Any())
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("\n!ERROR!\n");
Console.WriteLine("MALFORMED HEADER FOUND IN DAT FILE:");
Console.WriteLine("Column Name");
Console.WriteLine("------------");
foreach (var colName in malformedColName)
Console.WriteLine(colName);
Console.Write("FIX HEADER IN DAT FILE: ");
Console.WriteLine(hSetup["filePath"].ToString());
Console.WriteLine("Press any key to exit");
if (boolMissingFieldFound)
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("\n!ERROR!\n");
Console.WriteLine("MALFORMED RECORD FOUND IN DAT FILE:");
Console.WriteLine("Press any key to exit");
private static void ProcessDateTimeValuesInColumns()
DataTable dataTable = (DataTable)hSetup["csvDataTable"];
Console.WriteLine("START: CHECKING DATETTIME VALUES IN COLUMNS");
Stopwatch stopwatch = new Stopwatch();
Dictionary<string, bool> dateTimeColNames = new Dictionary<string, bool>();
List<string> knownTimeZones = TimeZoneInfo.GetSystemTimeZones().Select(tz => tz.Id).Distinct().ToList();
knownTimeZones.AddRange(TimeZoneInfo.GetSystemTimeZones().Select(tz => tz.DisplayName).Distinct().ToList());
knownTimeZones.AddRange(TimeZoneInfo.GetSystemTimeZones().Select(tz => tz.StandardName).Distinct().ToList());
knownTimeZones.AddRange(TimeZoneInfo.GetSystemTimeZones().Select(tz => tz.DaylightName).Distinct().ToList());
knownTimeZones.AddRange(new []{ "EST", "PST", "CST", "EDT", "CDT", "PDT", "MST", "GMT", "UTC", "BST", "IST", "CET", "AEST", "AEDT" });
knownTimeZones.Distinct();
ConsoleUtility.WriteProgressBar(0, true, $"Columns checked: 0 / {dataTable.Columns.Count} Elapsed Time: {stopwatch.Elapsed:hh\\:mm\\:ss} ");
var columnStats = dataTable.Columns.Cast<DataColumn>()
ColumnName = col.ColumnName,
MaxLength = dataTable.Rows.Cast<DataRow>()
.Max(row => row[col]?.ToString()?.Length ?? 0),
MinLength = dataTable.Rows.Cast<DataRow>()
.Min(row => row[col]?.ToString()?.Length ?? 0)
foreach (DataColumn column in dataTable.Columns)
bool isDateTimeColumn = false;
var dateTimeResults = dataTable.AsEnumerable()
.Select(row => row.Field<string>(column.ColumnName))
.Where(value => !string.IsNullOrEmpty(value))
.All(value => IsDateTime(value));
if (dateTimeResults == true)
dateTimeColNames.Add(column.ColumnName, isDateTimeColumn);
foreach (DataRow row in dataTable.Rows)
string value = row[column]?.ToString().ToUpper().Trim();
string potentialTimezone = knownTimeZones.FirstOrDefault(
tz => value.IndexOf(tz, StringComparison.OrdinalIgnoreCase) >= 0);
if (potentialTimezone != null)
value = value.ToUpper().Replace(potentialTimezone.ToUpper(), string.Empty).Trim().ToString();
if (string.IsNullOrWhiteSpace(value)) continue;
string[] parts = value.Split(' ');
string timePart = parts[0].Contains(":") ? parts[0] : value;
if (parts.Length == 2 && parts[0].Contains(":"))
if (DateTime.TryParse(timePart, out DateTime time))
value = (time.ToString("hh:mm:ss", CultureInfo.InvariantCulture) + $" {parts.LastOrDefault()}" + $" {potentialTimezone}").Trim();
var splitValue = value.Split(' ');
row[column] = string.Join(" ", splitValue.Distinct()).Trim();
else if (parts.Length == 1 && DateTime.TryParse(value, out DateTime timeOnly) && value.Contains(":"))
value = (timeOnly.ToString("hh:mm:ss tt", CultureInfo.InvariantCulture) + $" {potentialTimezone}").Trim();
var splitValue = value.Split(' ');
row[column] = string.Join(" ", splitValue.Distinct()).Trim();
else if (DateTime.TryParse(value, out DateTime dateTime))
if (dateTime.Year >= 1900 && dateTime.Year <= 2100)
string formattedDate = value.Contains(":") ? dateTime.ToString("MM/dd/yyyy hh:mm:ss tt", CultureInfo.InvariantCulture) : dateTime.ToString("MM/dd/yyyy", CultureInfo.InvariantCulture);
row[column] = formattedDate;
ConsoleUtility.WriteProgressBar(((100 * currentColumn) / dataTable.Columns.Count), true, $"Columns checked: {currentColumn} / {dataTable.Columns.Count} Elapsed Time: {stopwatch.Elapsed:hh\\:mm\\:ss} ");
Console.WriteLine("FINISHED: CHECKING DATETTIME VALUES IN COLUMNS");
hSetup["dateTimeColumnNames"] = dateTimeColNames;
static bool IsDateTime(string value)
if (string.IsNullOrWhiteSpace(value))
string datePattern = @"^[0-2]?[0-9](/|-)[0-3]?[0-9](/|-)[1-2][0-9][0-9][0-9]( [0-2]?[0-9](:|\.)[0-5][0-9](:|\.)[0-5][0-9])?$";
string iso8601Pattern = @"^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?)?$";
bool matchesPattern = Regex.IsMatch(value, datePattern, RegexOptions.IgnoreCase) || Regex.IsMatch(value, iso8601Pattern, RegexOptions.IgnoreCase);
return DateTime.TryParse(value, out _);
return DateTime.TryParse(value, out _);
static void RemoveEmptyColumns()
var dataTable = hSetup["csvDataTable"] as DataTable;
Console.WriteLine("Invalid DataTable in hSetup.");
var columnsToRemove = new List<DataColumn>();
foreach (DataColumn column in dataTable.Columns)
bool isEmpty = dataTable.AsEnumerable().All(row => string.IsNullOrEmpty(row[column].ToString()));
columnsToRemove.Add(column);
if (columnsToRemove.Any())
hSetup["columnsToRemove"] = columnsToRemove.Select(c => c.ColumnName).ToList();
foreach (var column in columnsToRemove)
dataTable.Columns.Remove(column);
Console.WriteLine($"REMOVED {columnsToRemove.Count} EMPTY COLUMN(s).");
if (hSetup.ContainsKey("columnsToRemove"))
WriteRemovedColumnNamesInExcel(hSetup);
Console.WriteLine("NO EMPTY COLUMNS FOUND");
private static void WriteRemovedColumnNamesInExcel(Dictionary<string, object> hSetup)
if (!hSetup.ContainsKey("fileOutpath") || !hSetup.ContainsKey("mappingInfoXLSXNameOutName"))
Console.WriteLine("Output file path or Excel file name is missing in hSetup.");
string fileOutPath = hSetup["fileOutpath"].ToString();
string fileName = hSetup["mappingInfoXLSXNameOutName"].ToString();
FileInfo pathFileout = new FileInfo(Path.Combine(fileOutPath, fileName));
if (!hSetup.ContainsKey("columnsToRemove") || !(hSetup["columnsToRemove"] is IList<string> columnsToRemove))
Console.WriteLine("No columns to remove found in the hSetup.");
ExcelPackage.LicenseContext = LicenseContext.NonCommercial;
using (ExcelPackage package = new ExcelPackage(pathFileout))
ExcelWorksheet worksheet = package.Workbook.Worksheets["Ignore Field e.g."] ?? package.Workbook.Worksheets.Add("Ignore Field e.g.");
if (worksheet.Dimension == null)
worksheet.Cells[1, 1].Value = "ORIGINAL UNALTERED";
worksheet.Cells[1, 2].Value = "ORIGINAL ALTERED";
worksheet.Cells[1, 3].Value = "EXAMPLE DATA";
int rowCounter = worksheet.Dimension?.Rows + 1 ?? 2;
for (int i = 1; i < rowCounter; i++)
if (string.IsNullOrWhiteSpace(worksheet.Cells[i, 1].Value?.ToString()))
foreach (var columnName in columnsToRemove)
worksheet.Cells[rowCounter, 1].Value = columnName;
worksheet.Cells[rowCounter, 2].Value = CompactHeaderFieldToRemoveExtraItem(columnName);
worksheet.Cells[rowCounter, 3].Value = "EMPTY_FIELD_IGNORE";
static void setupUserArgs(string[] userArgs)
bool userArgsExist = false;
bool anyThingPopulated = false;
foreach (string arg in userArgs)
if (!string.IsNullOrEmpty(arg))
anyThingPopulated = true;
if (!userArgsExist && !anyThingPopulated)
Console.ForegroundColor = ConsoleColor.Magenta;
Console.WriteLine("EXAMPLE #1 HAS BEEN SELECTED");
Console.Write("Please Provide DAT File: ");
string filepath = Console.ReadLine()?.Replace("\"", "").Trim();
while (!File.Exists(filepath))
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("!WARNING! DAT FILE NOT FOUND");
Console.Write("Please Provide DAT File: ");
filepath = Console.ReadLine()?.Replace("\"", "").Trim();
hSupportedFlags["filePath"] = filepath;
hSetup["userInputRun"] = "true";
Console.WriteLine("PROCESSING DAT FILE");
startRunTime = DateTime.Now;
foreach (string arg in userArgs)
foreach (var hFlag in hFlags)
if (arg.StartsWith("-" + hFlag.Key))
string tempValue = "true";
if (counter + 1 < userArgs.Length && !arg.Equals("-noPrompts") && !arg.Equals("-bulkImport"))
tempValue = userArgs[counter + 1].Trim();
hSupportedFlags[hFlag.Value.Item1] = tempValue;
else if (anyThingPopulated)
foreach (string arg in userArgs)
if (arg.Contains("\\") || arg.Contains(":"))
hSupportedFlags["filePath"] = arg;
else if(hSupportedFlags.ContainsKey("datCompare"))
foreach (var hFlag in hFlags)
string flag = hFlag.Value.Item1;
string defaultValue = hFlag.Key == "software" ? "casepoint" :
hFlag.Key == "env" ? "prod" :
hSetup[flag] = hSupportedFlags.ContainsKey(flag) ? hSupportedFlags[flag] : defaultValue;
hSetup["folderPath"] = Path.GetDirectoryName(hSetup["filePath"].ToString()) ?? "";
hSetup["fileName"] = Path.GetFileName(hSetup["filePath"].ToString()) ?? "";
hSetup["fileNameWithoutExt"] = Path.GetFileNameWithoutExtension(hSetup["fileName"].ToString()) ?? "";
hSetup["fileOutFolderName"] = "NEW_CP_DAT";
hSetup["fileOutpath"] = Path.Combine(hSetup["folderPath"].ToString(), hSetup["fileOutFolderName"].ToString());
hSetup["todayDate"] = DateTime.Now.ToString("MM/dd/yyyy");
hSetup["readOutputCsv"] = "false";
if (string.IsNullOrEmpty(hSetup["filePath"].ToString()) && string.IsNullOrEmpty(hSetup["bulkImport"].ToString()))
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("\n!ERROR!\n");
Console.WriteLine("PROVIDE A PROPER EXAMPLE\n-filePath and -bulkImport are empty");
if (!string.IsNullOrEmpty(hSetup["filePath"].ToString()) && !string.IsNullOrEmpty(hSetup["datCompare"].ToString()))
Console.ForegroundColor = ConsoleColor.Magenta;
Console.WriteLine("EXAMPLE #4 HAS BEEN SELECTED");
Console.Write("Please Provide Original DAT File: ");
string filepath = Console.ReadLine()?.Replace("\"", "").Trim();
while (!File.Exists(filepath))
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("!WARNING! DAT FILE NOT FOUND");
Console.Write("Please Provide Original DAT File: ");
filepath = Console.ReadLine()?.Replace("\"", "").Trim();
hSupportedFlags["filePath"] = filepath;
hSetup["userInputRun"] = "true";
Console.WriteLine("COMPARING DAT FILE");
startRunTime = DateTime.Now;
_ = DatCompareWorkflowAsync();
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("Press any key to exit");
if (!File.Exists(hSetup["filePath"].ToString()))
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("\n!ERROR!\n");
Console.WriteLine("PROVIDE A PROPER EXAMPLE\n-filePath does not exist: " + hSetup["filePath"].ToString());
private static void LatestScriptVersion()
string scriptRootFolder = AppDomain.CurrentDomain.BaseDirectory;
hSetup["scriptRootFolder"] = Regex.Match(scriptRootFolder, @"^.+\\headerNorm\\", RegexOptions.IgnoreCase).Groups[0].ToString();
hSetup["machineRunningFrom"] = Environment.MachineName;
var directories = Directory.GetDirectories(Path.Combine(hSetup["scriptRootFolder"].ToString(), hSetup["scriptEnv"].ToString()))
.Where(dir => !dir.EndsWith("_backup", StringComparison.OrdinalIgnoreCase))
int latestVersion = directories
.Select(dir => Path.GetFileName(dir))
.Where(name => int.TryParse(name, out _))
.Select(name => int.Parse(name))
.OrderByDescending(version => version)
hSetup["scriptLatestVersionFolder"] = Path.Combine(hSetup["scriptRootFolder"].ToString(), hSetup["scriptEnv"].ToString(), latestVersion.ToString("D5"));
hSetup["scriptLatestVersion"] = latestVersion.ToString("D5");
hSetup["scriptConfig"] = "_config.xml";
hSetup["scriptConfigpath"] = Path.Combine(hSetup["scriptLatestVersionFolder"].ToString(), hSetup["scriptConfig"].ToString());
hSetup["mappingInfoXLSXName"] = "_mappingInfo.xlsx";
hSetup["mappingInfoXLSXNameOutName"] = hSetup["fileNameWithoutExt"].ToString() + "_mappingInfo.xlsx";
hSetup["dbPath"] = Path.Combine(hSetup["scriptLatestVersionFolder"].ToString(), "db.accdb");
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load((string)hSetup["scriptConfigpath"]);
hSetup["scriptConfigXML"] = xmlDoc;
Directory.CreateDirectory(hSetup["fileOutpath"].ToString());
File.Copy(Path.Combine(hSetup["scriptLatestVersionFolder"].ToString(), hSetup["mappingInfoXLSXName"].ToString()), Path.Combine(hSetup["fileOutpath"].ToString(), hSetup["mappingInfoXLSXNameOutName"].ToString()), true);
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("!!ERROR: COPYING MAPPING FILE");
Console.WriteLine("--------------------------------------");
Console.WriteLine($"\n {except} \n");
Console.WriteLine("\nAn error occured while copying the file:\n" + except);
Console.WriteLine("Press any key to exit");
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("!!ERROR: SCRIPT NOT FOUND");
Console.WriteLine("--------------------------------------");
Console.WriteLine($"\n {except} \n");
Console.WriteLine("\nScript not found, please attempt to re-run\n");
Console.WriteLine("Press any key to exit");
private static void ShowHeaderNormInfo()
HHHHHHHHH HHHHHHHHH DDDDDDDDDDDDD RRRRRRRRRRRRRRRRR
H:::::::H H:::::::H D::::::::::::DDD R::::::::::::::::R
H:::::::H H:::::::H D:::::::::::::::DD R::::::RRRRRR:::::R
HH::::::H H::::::HH DDD:::::DDDDD:::::D RR:::::R R:::::R
H:::::H H:::::H D:::::D D:::::D R::::R R:::::R
H:::::H H:::::H D:::::D D:::::D R::::R R:::::R
H::::::HHHHH::::::H D:::::D D:::::D R::::RRRRRR:::::R
H:::::::::::::::::H D:::::D D:::::D R:::::::::::::RR
H:::::::::::::::::H D:::::D D:::::D R::::RRRRRR:::::R
H::::::HHHHH::::::H D:::::D D:::::D R::::R R:::::R
H:::::H H:::::H D:::::D D:::::D R::::R R:::::R
H:::::H H:::::H D:::::D D:::::D R::::R R:::::R
HH::::::H H::::::HH DDD:::::DDDDD:::::D RR:::::R R:::::R
H:::::::H H:::::::H D:::::::::::::::DD R::::::R R:::::R
H:::::::H H:::::::H D::::::::::::DDD R::::::R R:::::R
HHHHHHHHH HHHHHHHHH DDDDDDDDDDDDD RRRRRRRR RRRRRRR
Console.ForegroundColor = ConsoleColor.Red;
Console.ForegroundColor = ConsoleColor.Green;
Console.Write("BASIC: ");
Console.WriteLine("HEADER NORMALIZATION INFO");
Console.ForegroundColor = ConsoleColor.Cyan;
Console.Write("EXAMPLE #1 ");
Console.WriteLine("- Basic Runtime for 99% of all users");
Console.WriteLine($"Double click on headerNormTool.bat --> SHIFT+RIGHT CLICK on the File --> Paste into this Window --> Hit Enter");
Console.ForegroundColor = ConsoleColor.Cyan;
Console.Write("EXAMPLE #2 ");
Console.WriteLine("- Basic Runtime if a user doesn't want prompt (bulk running)");
Console.WriteLine($"headerNormTool.bat -file \\UNC\\DATFILE.dat -noPrompts");
Console.ForegroundColor = ConsoleColor.Cyan;
Console.Write("EXAMPLE #3 ");
Console.WriteLine("- Basic Runtime for Field Overide. Quickly remap fields outside of the DAT file.");
Console.WriteLine($"headerNormTool.bat -file \\UNC\\DATFILE.dat -fieldOverride SOURCEPATH=SEC_SOURCE_CF,SOURCESHA=FILE HASHCODE SHA1,etc.");
Console.WriteLine("\nFLAG\t\tINFO");
Console.WriteLine("----\t\t----");
foreach (var flag in hFlags)
Console.WriteLine(String.Format("-{0,0}\t{1,20}", flag.Key, flag.Value.Item2));
private static void ChangeWindowSize()
Console.SetBufferSize(128, Console.BufferHeight);
Console.WindowWidth = 128;
Console.WindowHeight = 50;