using CSharpFunctionalExtensions;
using System.Text.RegularExpressions;
using System.Collections.Generic;
public static void Main()
(string name, string culture)[] nameTests = {
("Sônia de Hollanda Zakaria - Recibo oficial março2021 - PJ.docx", "pt-BR")
, ("Laudo médico com afastamento por 2 meses 13042021.pdf", "pt-BR")
, ("Laudo médico com afastamento por 2 meses .pdf", "pt-BR")
, ("Laudo médico com afastamento por 100 dias.pdf", "pt-BR")
, ("Prontuário Itau com anotações entre 2007 e 2021.pdf", "pt-BR")
, ("Recibos comparecimento a Fisioterapia entre 052016 e 122019.pdf", "pt-BR")
, ("US abdominal˗cisto hepático_5f9a739a-0b2e-4fcc-9228-81035e6e50ea 23122022 v3.docx", "pt-BR")
, ("Ruy Quiroga_20230605161406.pdf", "pt-BR")
, ("Aderaldo Vieira Chaves - SOLICITAÇÃO DE EXAME_5f9a739a-0b2e-4fcc-9228-81035e6e50ea.pdf", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo 3 - 23122022.docx", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo - 23122022 2.docx", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo - 122022 2.docx", "pt-BR")
, ("Nome do paciente - relatório internação de 12 a 20122022 - 2 20__.docx", "pt-BR")
, ("Nome do paciente - relatório internação de 12 a 20122022 - 2 2017.docx", "pt-BR")
, ("Nome do paciente - relatório internação de 12 a 20122022 - v2 2017.docx", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo - 23122022 v2.docx", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo v3 - 23122022.docx", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo - 122022 v2.docx", "pt-BR")
, ("Nome do paciente - relatório internação de 12 a 20122022 - v2 20__.docx", "pt-BR")
, ("US abdominal-cisto hepático 23122022.docx", "pt-BR"), ("US abdominal˗cisto hepático 23122022.docx", "pt-BR")
, ("US abdominal–cisto hepático 23122022.docx", "pt-BR")
, ("US abdominal - cisto hepático 23122022.docx", "pt-BR"), ("US abdominal - cisto hepático v3 23122022.doc", "pt-BR")
, ("US abdominal 23122022.docx", "pt-BR")
, ("US abdominal - 23122022.docx", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo v2 - 23122022.docx", "pt-BR")
, ("Nome do paciente - v2 23122022.docx", "pt-BR")
, ("Outro paciente - exame laboratorial amplo para home-care - v2 23122022.docx", "pt-BR")
, ("Nome do paciente - relatório internação de 12 a 20122022 - v2 23122022.docx", "pt-BR")
, ("Nome do paciente - exame laboratorial amplo - 202_ v3.docx", "pt-BR")
, ("Nome do paciente D'Oliveira - RECEITUÁRIO CONTROLE ESPECIAL - Ciprofloxacino 500 mg 2017.pdf", "pt-BR"), ("Fulano Sicrano Beltrano - relatório internação - de 1208 até 20062021 - erosão traqueal - v2 12072021.docx", "pt-BR")
, ("Paulo Afonso Zavataro - Recibo PF referente ao período de 06 a 11082023 internação no Hospital Copa Star.pdf", "pt-BR")
, ("Paulo Afonso Zavataro - Recibo PJ referente ao período de 12 a 17082023 internação no Hospital Copa Star - NFSe_00003478_02874440.pdf", "pt-BR")
, ("Paulo Afonso Zavataro - Relatório e Recibo PF referente ao período de 06 a 11082023 internação no Hospital Copa Star - 77d93944-071c-4f01-8788-838d2a204a3d.pdf", "pt-BR")
, ("Waldenice de Albuquerque Haidamus - Relatório da internação entre 3005 a 14062013.docx", "pt-BR")
, ("Miuza de Holanda Fragelli - anotação Outlook_backup_2013_abr._20__13_38_12.pdf", "pt-BR")
, ("Miuza de Holanda Fragelli - anotação Outlook_backup_2013_apr_20__13_38_12.pdf", "en-US")
, ("Miuza de Holanda Fragelli - anotação Outlook_backup_2013_abr_20__13_38_12.pdf", "pt-BR")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - 23.05.pdf", "pt-BR")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - 05.23.pdf", "en-US")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - 23.5.pdf", "pt-BR")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - 5.23.pdf", "en-US")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - apr2005.pdf", "en-US")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - abr.2005.pdf", "pt-BR")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - abr2005.pdf", "pt-BR")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - march2021.pdf", "en-US")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - march.2021.pdf", "en-US")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - março2021.pdf", "pt-BR")
, ("CARLOS ANTONIO MENEZES AMARAL BARBOSA - março.2021.pdf", "pt-BR")
, ("Nome do paciente D'Oliveira - RECEITUÁRIO CONTROLE ESPECIAL_5f9a739a-0b2e-4fcc-9228-81035e6e50ea durante internação Copa D'Or de 12 a 15052017 - Ciprofloxacino 500 mg 3 2017.pdf", "pt-BR")
foreach (var fileName in nameTests)
Console.WriteLine($"// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^");
Console.WriteLine($"// Matches for {fileName.name}:");
IterateMatches("GrabDatesWithMonthsInLettersOrPureNumbersFromFile", DocNameHelper.GrabDatesWithMonthsInLettersOrPureNumbersFromFile, fileName.name);
IterateMatches("GrabDateIntervalFromPartOfFile", DocNameHelper.GrabDateIntervalFromPartOfFile, fileName.name);
IterateMatches("GrabDatesIncorrectFormattedWithInvalidChars", DocNameHelper.GrabDatesIncorrectFormattedWithInvalidChars, fileName.name);
IterateMatches("GrabDatesIncorrectFormattedWithInvalidChars", DocNameHelper.GrabFullMonthWithTwoOrFourDigitYearWithSomeCharSeparatingThemOrNot, fileName.name);
IterateMatches("GrabAbbreviatedMonthWithTwoOrFourDigitYearWithDotSeparatingThemOrNot", DocNameHelper.GrabAbbreviatedMonthWithTwoOrFourDigitYearWithDotSeparatingThemOrNot, fileName.name);
IterateMatches("GrabDatesOrPureNumbersOrDatesWithCardFromPartOfFile", DocNameHelper.GrabDatesOrPureNumbersOrDatesWithCardFromPartOfFile, fileName.name);
internal static void IterateMatches(string namePattern, string pattern, string testCase)
var matches = DocNameHelper.GetMatches(pattern, testCase);
Console.WriteLine($"// Matches for {namePattern}: {matches.Count}");
List<Dictionary<string, string>> capturesDict = DocNameHelper.GetAllMatchesWithNamedGroups(matches);
Result<List<List<(string namedGroup, string value)>>> capturesList = DocNameHelper.ReturnAllResults(matches);
if (capturesList.IsSuccess)
foreach (var capture in capturesList.Value)
Console.WriteLine("\n $$$$$$$$$$$$$$$$$$ from capturesList: $$$$$$$$$$$$$$$$$$");
Console.WriteLine("\n ############ from capturesDict: ############");
public static class DocNameHelper
internal const string GrabDatesWithMonthsInLettersOrPureNumbersFromFile = @"(?<relationandinterval>(?<temporalrelation> em | por )?(?<=\W)(?<intervalordate>(?:\d{6,8}|\d{2,3}_{1,2})|(?:\b(0?[1-9]|[12][0-9]|3[01])*(?: de )?([JFMASONDjfmasond]([a-z0-9]+)([ç]{0,1})([a-z0-9]+)*)*(?: de )?(19|20)?[0-9]{1}\b))(?<timeframe> anos[ .]{0,1}| ano[ .]{0,1}| meses[ .]{0,1}| mês[ .]{0,1}| dias[ .]{0,1}| dia[ .]{0,1})?)";
internal const string GrabDatesOrPureNumbersOrDatesWithCardFromPartOfFile = @"(?<date>\d{6,8}|\d{2,3}_{1,2})";
internal const string GrabDatesIncorrectFormattedWithInvalidChars = @"(?<![_.]{1})(?<date>\d{2}[_.]{1}\d{2}[_.]{1}(?:\d{4}|\d{2}))";
internal const string GrabDatesUTC = @"(?<dateUTC>(?<year>-?(?:[1-9][0-9]*)?[0-9]{4})-(?<month>1[0-2]|0[1-9])-(?<day>3[01]|0[1-9]|[12][0-9])T(?<hour>2[0-3]|[01][0-9]):(?<minute>[0-5][0-9]):(?<second>[0-5][0-9])(\\.[0-9]+)?(?<zeroUTCoffse>Z)?|(?<year>(?:[1-9][0-9]*)?[0-9]{4})(?<month>1[0-2]|0[1-9])(?<day>3[01]|0[1-9]|[12][0-9])(?<hour>2[0-3]|[01][0-9])(?<minute>[0-5][0-9])(?<second>[0-5][0-9])(\\.[0-9]+)?(?<zeroUTCoffse>Z)?)";
internal const string GrabDateIntervalFromPartOfFile = @"(?<=\W)(?<interval>(?<starting>[ mçinícandouetr]+?)(?<datebegin>\d{2,8}|\d{14}(?!\d))(?<link>[ eatéfimnldzou]+?)(?<=\W)(?<!\w-)(?<dateend>\d{4,8}|\d{14})(?!\d))";
internal const string GrabDateAndVersionDotAtEnd = @"(?<date>\d{6,8}|\d{2,3}_{1,2}) {1,2}(?<version>[vVersionão]+[\d@]+[\w@]*|[\d@]+[vVersionão]+[\w@]*).|(?<version>[vVersionão]+[\d@]+[\w@]*|[\d@]+[vVersionão]+[\w@]*) {1,2}(?<date>\d{6,8}|\d{2,3}_{1,2})\.|(?<date>\d{6,8}|\d{2,3}_{1,2})\.";
internal const string GrabDatePersonalizedForAnotaçãoOutlookBackup = @"backup_(?<date>(?<yyyy>(?:19|20)[0-9]{2})_(?<MMM>[JFMASONDjfmasond]{1}[aeiouAEIOUnNvVrRbBlLgGtTzZpPyYcC]{2})_(?<dd>0[1-9]|[12][0-9]|3[01])__(?<hh>0?[0-9]|[1][0-9]|[2][0-3])_(?<mm>[0-9]{2})_(?<ss>[0-9]{2}))";
internal const string GrabDatePersonalizedForAnotaçãoOutlookBackupWithDotAfterMMM = @"backup_(?<date>(?<yyyy>(?:19|20)[0-9]{2})_(?<MMM>[JFMASONDjfmasond]{1}[aeiouAEIOUnNvVrRbBlLgGtTzZpPyYcC]{2})._(?<dd>0[1-9]|[12][0-9]|3[01])__(?<hh>0?[0-9]|[1][0-9]|[2][0-3])_(?<mm>[0-9]{2})_(?<ss>[0-9]{2}))";
internal const string GrabAbbreviatedMonthWithTwoOrFourDigitYearWithDotSeparatingThemOrNot = @"(?<date>(?<datewithdot>(?<MMM>jan|fev|feb|mar|abr|apr|mai|may|jun|jul|ago|aug|set|sep|out|oct|nov|dez|dec)\.(?<yyyy>(?:19|20){0,1}[0-9]{2}))|(?<datewithoutdot>(?<MMM>jan|fev|feb|mar|abr|apr|mai|may|jun|jul|ago|aug|set|sep|out|oct|nov|dez|dec)(?<yyyy>(?:19|20){0,1}[0-9]{2})))";
internal const string GrabFullMonthWithTwoOrFourDigitYearWithSomeCharSeparatingThemOrNot = @"(?<date>(?:(?<datewithsomecharseparating>(?<MMM>janeiro|january|fevereiro|february|março|march|abril|april|maio|may|junho|julho|agosto|august|setembro|september|outubro|october|novembro|november|dezembro|december)[-._ ]{1})|(?<datewithoutseparation>(?<MMM>janeiro|january|fevereiro|february|março|march|abril|april|maio|may|junho|julho|agosto|august|setembro|september|outubro|october|novembro|november|dezembro|december)))(?<yyyy>(?:19|20){0,1}[0-9]{2}))";
internal const string GrabVersionFromFile = @"(?<version>\sv[ersionão]*[\d@]+[\w@]*|[\d@]+v[ersionão]*[\w@]*)";
internal const string GrabNameAndExtension = @"(?<fullname>.+?)(\.(?<extension>[^.]*)$|$)";
internal const string GrabPartsOfName = @"([\w _<>+;.,']*)*";
internal const string GrabDelimiterWithSpaceAround = @"( - )";
internal const string GrabFakeDelimiterWithoutAnySpaceAround = @"(?<before>[\d\w' -.]*)(?<=\w)(?<FakeDelimiter>[-˗–])(?=\w)(?<after>[\d\w -.]*)";
internal static Result<string> ReturnResult(Regex rgx, string part, string groupName, string errorMsg)
_ = GetMatch(rgx, part, groupName, errorMsg, out var result);
internal static Result<List<(string namedGroup, string value)>> ReturnResults(MatchCollection matches, string errorMsg = "No named group was captured")
var captures = GetAllMatchesWithNamedGroups(matches);
var result = new List<(string namedGroup, string value)>();
return Result.Failure<List<(string, string)>>(errorMsg);
foreach (var capture in captures)
foreach (var namedGroup in capture)
result.Add((namedGroup.Key, namedGroup.Value));
internal static Result<List<List<(string namedGroup, string value)>>> ReturnAllResults(MatchCollection matches, string errorMsg = "No named group was captured")
var captures = GetAllMatchesWithNamedGroups(matches);
var result = new List<(string namedGroup, string value)>();
var results = new List<List<(string namedGroup, string value)>>();
return Result.Failure<List<List<(string, string)>>>(errorMsg);
foreach (var capture in captures)
foreach (var namedGroup in capture)
result.Add((namedGroup.Key, namedGroup.Value));
results.Add(result.ToList());
internal static List<Dictionary<string, string>> GetAllMatchesWithNamedGroups(MatchCollection matches)
var captures = new List<Dictionary<string, string>>();
foreach (Match itemMatch in matches)
Console.WriteLine($"// \t Match {++counter}:");
Console.WriteLine($"// \t \t Value: {itemMatch.Value}; Length: {itemMatch.Length}");
var itemGroup = new Dictionary<string, string>();
foreach (Group group in itemMatch.Groups)
itemGroup.Add(key, group.Value);
internal static Match GetMatch(Regex rgx, string part, string groupName, string errorMsg, out Result<string> result)
var match = rgx.Match(" " + part.TrimStart());
result = match.Success ? Result.Success(match.Groups[groupName].Value.Trim()) : Result.Failure<string>(errorMsg);
internal static Match GetMatch(string pattern, string part)
var rgx = new Regex(pattern);
var match = rgx.Match(" " + part);
internal static Group GetMatchMainGroup(string pattern, string part)
var rgx = new Regex(pattern);
var match = rgx.Match(part.EnsureOneSingleSpaceAtBegin());
internal static Group GetMatch(string pattern, string part, string groupName)
var rgx = new Regex(pattern);
var match = rgx.Match(part.EnsureOneSingleSpaceAtBegin());
return match.Groups[groupName];
internal static MatchCollection GetMatches(string pattern, string part)
var rgx = new Regex(pattern, RegexOptions.Multiline);
var matches = rgx.Matches(part.EnsureOneSingleSpaceAtBegin());
internal static MatchCollection GetMatches(Regex rgx, string part)
var matches = rgx.Matches(part.EnsureOneSingleSpaceAtBegin());
public static class MyExtensions
const char OneBlankSpace = ' ';
public static string EnsureOneSingleSpaceAtBegin(this string text)
return OneBlankSpace + text.TrimStart();
public static bool IsDigit(this string @this) => @this.All(c => c >= '0' && c <= '9');