using System;using System.Text;using System.Diagnostics;using System.Collections.Generic;using System.Text.RegularExpressions;
public static class Program
public static bool isSpecialUnicodeCntrlChr(this Char c)
public static string RemoveUnicodeControlChars(this string s) {
StringBuilder sb = new StringBuilder(s.Length);
for (int i = 0; i < s.Length; i++)
if ( !Char.IsControl(s[i]) && !s[i].isSpecialUnicodeCntrlChr() )
private static HashSet<char> specialUnicodeCtrlChr = new HashSet<char>(new char[] {'\u200E','\u200F','\u202A','\u202B','\u202D','\u202E', '\u2066', '\u2067'} );
public static string FilterUnicodeControlChars(this string str)
char[] buffer = new char[str.Length];
if ( !Char.IsControl(ch) && !specialUnicodeCtrlChr.Contains(ch))
return new String(buffer, 0, index);
const string pattern = @"\p{C}";
const RegexOptions options = RegexOptions.Multiline;
private static readonly Regex regRemoveAllUni = new Regex(pattern, options);
public static void Main()
Stopwatch sw = new Stopwatch();
Console.WriteLine("Hungarian\bGrand\t\t\r\vPrix\u202EF1");
Console.Write("Hungarian\bGrand\t\t\r\vPrix\u202EF1".RemoveUnicodeControlChars());
Console.WriteLine(" in {0} ticks.",sw.ElapsedTicks );
Console.WriteLine("ŐhᢰHung\u2063arian\u008D\bGrand\t\t\r\vPrix\u202EF1".RemoveUnicodeControlChars());
Console.WriteLine(" in {0} ticks.",sw.ElapsedTicks );
Console.WriteLine("Using HashSet Filtering");
Console.Write("Hungarian\bGrand\t\t\r\vPrix\u202EF1".FilterUnicodeControlChars());
Console.WriteLine(" in {0} ticks.",sw.ElapsedTicks );
Console.WriteLine("ŐhᢰHung\u2063arian\u008D\bGrand\t\t\r\vPrix\u202EF1".FilterUnicodeControlChars());
Console.WriteLine(" in {0} ticks.",sw.ElapsedTicks );
Console.WriteLine("\n\nUnicode Regex to remove all Control chars");
Console.WriteLine(regRemoveAllUni.Replace("ŐhᢰHung\u2063arian\u008D\bGrand\t\t\r\vPrix\u202EF1", ""));
Console.WriteLine(" in {0} ticks.",sw.ElapsedTicks );