using System.Collections.Generic;
public static class XDocumentExtensions
static readonly XmlReaderSettings noCheckedCharacterParseSettings = new() { CheckCharacters = false, };
public static XDocument ParseAndRemoveInvalidXmlCharacters(string xml, char? fallback = null)
ArgumentNullException.ThrowIfNull(xml);
using var reader = new StringReader(xml.RemoveInvalidXmlCharacters(fallback));
using var xmlReader = XmlReader.Create(reader, noCheckedCharacterParseSettings);
return XDocument.Load(xmlReader).RemoveInvalidXmlCharacters(fallback);
public static TXObject RemoveInvalidXmlCharacters<TXObject>(this TXObject node, char? fallback = null) where TXObject : XObject
text.Value = text.Value.RemoveInvalidXmlCharacters(fallback);
case XAttribute attribute:
attribute.Value = attribute.Value.RemoveInvalidXmlCharacters(fallback);
comment.Value = comment.Value.RemoveInvalidXmlCharacters();
doc.Root?.RemoveInvalidXmlCharacters();
foreach (var attr in element.Attributes())
attr.RemoveInvalidXmlCharacters();
foreach (var child in element.Nodes())
child.RemoveInvalidXmlCharacters();
case XContainer container:
foreach (var child in container.Nodes())
child.RemoveInvalidXmlCharacters();
public static string RemoveInvalidXmlCharacters(this string xmlText, char? fallback = null)
ArgumentNullException.ThrowIfNull(xmlText);
StringBuilder? sb = null;
for (int i = 0; i < xmlText.Length; i++)
if (XmlConvert.IsXmlChar(xmlText[i]))
else if (i < xmlText.Length - 1 && XmlConvert.IsXmlSurrogatePair(xmlText[i+1], xmlText[i]))
sb.Append(xmlText, i, 2);
sb.Append(xmlText, 0, i);
sb.Append(fallback.Value);
return sb?.ToString() ?? xmlText;
public static void Test()
string message = "\x01Hello, \x01\x1EWorld! H\u0302\U00029E3D";
TestXmlDocument(message);
string xmlWithInvalidCharacters = "<greeting attribute=\"Attrbute with escapes: \x01\x1E H\u0302\U00029E3D\">Hello, \x01\x1EWorld! H\u0302\U00029E3D</greeting>";
TestXDocument(message, xmlWithInvalidCharacters);
static void TestXmlDocument(string message)
Console.WriteLine("Testing XmlDocument:");
XmlDocument xmlDoc = new XmlDocument();
XmlElement root = xmlDoc.CreateElement("greeting");
xmlDoc.AppendChild(root);
root.InnerText = message;
Console.WriteLine(xmlDoc.OuterXml);
var xmlDoc2 = new XmlDocument();
xmlDoc2.LoadXml(xmlDoc.OuterXml);
Console.WriteLine(xmlDoc2.OuterXml);
string xmlWithEscapedHexEntity = xmlDoc.OuterXml;
xmlDoc = new XmlDocument();
xmlDoc.LoadXml(xmlWithEscapedHexEntity);
Console.WriteLine(xmlDoc.OuterXml);
static void TestXDocument(string message, string xmlWithInvalidCharacters)
Console.WriteLine("\nTesting XDocument: with {0} and\n{1}\n", message, xmlWithInvalidCharacters);
var xdoc = new XDocument(
new XElement("greeting", message)
).RemoveInvalidXmlCharacters();
Console.WriteLine(xdoc.ToString());
Assert.DoesNotThrow(() => XDocument.Parse(xdoc.ToString()));
Console.WriteLine($"XDocument creation error: {ex}");
var xDoc = XDocumentExtensions.ParseAndRemoveInvalidXmlCharacters(xmlWithInvalidCharacters);
Console.WriteLine(xDoc.ToString());
Assert.DoesNotThrow(() => XDocument.Parse(xDoc.ToString()));
Console.WriteLine($"XDocument parse failure: {ex}");
public static void Main()
Console.WriteLine("Environment version: {0} ({1}, {2}, NewLine: {3}).",
System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription , Environment.Version, Environment.OSVersion,
System.Text.Json.JsonSerializer.Serialize(Environment.NewLine));
Console.WriteLine("Failed with unhandled exception: ");