using System.Collections.Generic;
public static class XmlExtensions
public static string RemoveInvalidXmlCharacters(this string xmlText, char? fallback = null)
ArgumentNullException.ThrowIfNull(xmlText);
StringBuilder? sb = null;
for (int i = 0; i < xmlText.Length; i++)
if (XmlConvert.IsXmlChar(xmlText[i]))
else if (i < xmlText.Length - 1 && XmlConvert.IsXmlSurrogatePair(xmlText[i+1], xmlText[i]))
sb.Append(xmlText, i, 2);
sb.Append(xmlText, 0, i);
sb.Append(fallback.Value);
return sb?.ToString() ?? xmlText;
public static void Test()
var xmlWithInvalidCharacters = new []
(bad : "👩🏽•🚒 \x01Hello, \x01\x1EWorld! H\u0302\U00029E3D", fallback: (char?) null, good : "👩🏽•🚒 Hello, World! Ĥ𩸽"),
(bad: "\x0\x1\x2\x3", fallback: (char?) null, good : ""),
(bad: "\x0\x1\x2\x3", fallback: (char?) '?', good : "????"),
foreach ((var bad, var fallback, var good) in xmlWithInvalidCharacters)
Test(bad, fallback, good);
static void Test(string badXmlText, char? fallback, string expectedFixedXmlText)
Assert.Throws(Is.InstanceOf(typeof(Exception)), () => XmlConvert.VerifyXmlChars(badXmlText));
var @fixed = fallback == null ? badXmlText.RemoveInvalidXmlCharacters() : badXmlText.RemoveInvalidXmlCharacters(fallback);
Assert.DoesNotThrow(() => XmlConvert.VerifyXmlChars(@fixed));
Console.WriteLine("Original: {0}\n Fixed: {1}", badXmlText, @fixed);
Assert.That(expectedFixedXmlText == @fixed);
public static void Main()
Console.WriteLine("Environment version: {0} ({1}, {2}, NewLine: {3}).",
System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription , Environment.Version, Environment.OSVersion,
System.Text.Json.JsonSerializer.Serialize(Environment.NewLine));
Console.WriteLine("Failed with unhandled exception: ");