using System; using System.Text; using System.Linq; using System.Diagnostics;
public static class Program
{
// Based on http://www.codeproject.com/Articles/13503/Stripping-Accents-from-Latin-Characters-A-Foray-in
// Proper Normalization
public static string UnicodeToANSI(this string s)
var sb = new StringBuilder();
sb.Append(s.Normalize(NormalizationForm.FormKD)
.Where(x => (x > 30 && x <= 255))
.ToArray());
return sb.ToString();
}
//ANSI characters 32 to 127 correspond to those in the 7-bit ASCII character set,
public static string ReducetoASCII(this string s)
StringBuilder sb = new StringBuilder(s.Length);
foreach (char c in s)
if ((int)c > 255) // remove chars > 127
continue;
if ((int)c < 32) // remove control characters
sb.Append(c);
public static void Main()
Stopwatch sw = new Stopwatch();
string french = "A Paris, le cortège parisien s’était élancé à 14 heures.\r\n\tFace à l’affluence, un «itinéraire bis» a été mis en place. D’importants rassemblements ont lieu à Bordeaux, Marseille, Rennes ou Lyon. Suivez la journée avec nos journalistes dans toute la France.";
string ftemp = string.Empty;
string german = "ޘ Trump\t\r\nverwechselt Klägerin Carroll auf Foto mit Ex-Frau – das könnte Folgen haben";
string gtemp = string.Empty;
Console.WriteLine(french);
sw.Start();
ftemp = french.ReducetoASCII();
sw.Stop();
Console.WriteLine("Ansi reduced\r\n" + ftemp + " in " + sw.ElapsedTicks);
sw.Reset();
ftemp = french.UnicodeToANSI();
Console.WriteLine("Proper Normalization\r\n" + ftemp + " in " + sw.ElapsedTicks);
Console.WriteLine();
Console.WriteLine(german);
gtemp = german.ReducetoASCII();
Console.WriteLine("Ansi reduced\r\n" + gtemp + " in " + sw.ElapsedTicks);
gtemp = german.UnicodeToANSI();
Console.WriteLine("Proper Normalization\r\n" + gtemp + " in " + sw.ElapsedTicks);