100
1
using System;
2
using System.Globalization;
3
using System.Text.RegularExpressions;
4
5
public static class Program
6
{
7
8
const string strRegHexPrefixCandidates = @"0x[0-9a-f]{2,}|%x[0-9a-f]{2,}|\\u[0-9a-f]{2,}|&#x([0-9a-f]){1,6};|&#([0-9a-f]){1,6};|\\x[0-9a-f]{2,}|\\s[0-9a-f]{2,}|U\+[0-9a-f]{2,}|X'[0-9a-f]{2,}|16#([0-9a-f]){2,}|#x([0-9a-f]){2,}|#16r([0-9a-f]){2,6}|&H([0-9a-f]){2,}|0h([0-9a-f]){2,}|#([0-9a-f]){1,6}|%[0-9a-f]{2,}";
9
const string strRegGetHexNumber = @"[0-9a-f]{2,}|[«‹»›„‚“‟‘‛”’""""❛❜❝❞〝〞〟"""""'‘][0-9a-f]{2,}[’'""""«‹»›„‚“‟‘‛”’""""❛❜❝❞〝〞〟"]";
10
private static readonly Regex rgxHexPre = new Regex(strRegHexPrefixCandidates, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled);
11
private static readonly Regex rgxGetHexAgressive = new Regex(strRegGetHexNumber, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
12
public static void Main()
13
{
14
//string unicodeText = "UTF-16 (hex) 0x0023 (0023)";
15
string unicodeText = @"In XML and XHTML, characters can be expressed as hexadecimal numeric character references using the notation ode;, for instance ’ represents the character U+2019 (the right single quotation mark). If there is no x the number is decimal (thus ’ is the same character).[3]";
16
//string unicodeText = "8E2";
17
//string unicodeText = "this is the end";
18
19
string firstCandidateHexVal = string.Empty;
20
21
//https://en.wikipedia.org/wiki/Hexadecimal --remove possible prefixes
22
foreach (Match p in rgxHexPre.Matches(unicodeText))
23
{
24
if (p.Success) {
Cached Result