using System.Collections;
using System.Collections.Generic;
public static class StringExtensions
public static int IndexOf(this ReadOnlySpan<byte> utf8Bytes, char @char)
catch (ArgumentOutOfRangeException)
return utf8Bytes.IndexOf(rune);
public static int IndexOf(this ReadOnlySpan<byte> utf8Bytes, Rune @char)
Span<byte> charBytes = stackalloc byte[MaxBytes];
var n = @char.EncodeToUtf8(charBytes);
charBytes = charBytes.Slice(0, n);
for (int i = 0, thisLength = 1; i <= utf8Bytes.Length - charBytes.Length; i += thisLength)
thisLength = Utf8ByteSequenceLength(utf8Bytes[i]);
if (thisLength == charBytes.Length && charBytes.CommonPrefixLength(utf8Bytes.Slice(i)) == charBytes.Length)
internal static int Utf8ByteSequenceLength(byte firstByte)
if ( (firstByte & 0b11111000) == 0b11110000)
else if ((firstByte & 0b11110000) == 0b11100000)
else if ((firstByte & 0b11100000) == 0b11000000)
public static void Test()
Test("π©Έ½?hello", '\a');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", 'o');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", '\xFF');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", '\xfe');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", '\x01');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", '\xFFFF');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", '\xDC00');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", 'Ξ±');
Test("Ξ±ππΌπΆπ°πͺπ©Έ½?hello", new Rune("πͺ"[0], "πͺ"[1]));
Test("?hello\U00029E3D", new Rune("\U00029E3D"[0], "\U00029E3D"[1]));
Test("?hello\U00029E3D", default(Rune));
public static void Test(string s, char c)
AssertCharacterByteSequenceLengthCorrect(s);
ReadOnlySpan<byte> bytes = Encoding.UTF8.GetBytes(s);
var index = bytes.IndexOf(c);
var charIndex = s.IndexOf(c, StringComparison.OrdinalIgnoreCase);
var charFound = charIndex >= 0;
Console.WriteLine($"For s={s} and c={c}, index={index}, charIndex={charIndex}");
Assert.That(charFound == found, $"For s={s} and c={c}, {charFound} != {found}");
var charBytes = Encoding.UTF8.GetBytes(c.ToString());
if (charBytes.Length == 1 && bytes.Length == s.Length)
Assert.AreEqual(charIndex, index, $"{charIndex} == {index}");
public static void Test(string s, Rune c)
AssertCharacterByteSequenceLengthCorrect(s);
ReadOnlySpan<byte> bytes = Encoding.UTF8.GetBytes(s);
var index = bytes.IndexOf(c);
var charIndex = s.IndexOf(c.ToString(), StringComparison.OrdinalIgnoreCase);
var charFound = charIndex >= 0;
Console.WriteLine($"For s={s} and c={c}, index={index}, charIndex={charIndex}");
Assert.That(charFound == found, $"For s={s} and c={c}, {charFound} != {found}");
var charBytes = Encoding.UTF8.GetBytes(c.ToString());
if (charBytes.Length == 1 && bytes.Length == s.Length)
Assert.AreEqual(charIndex, index, $"{charIndex} == {index}");
static void AssertCharacterByteSequenceLengthCorrect(string s)
Span<byte> charBytes = stackalloc byte[6];
for (int i = 0; i < s.Length;)
if (!Rune.TryGetRuneAt(s, i, out Rune rune))
throw new Exception("String was not well-formed UTF-16.");
var n = rune.EncodeToUtf8(charBytes);
Assert.That(n == StringExtensions.Utf8ByteSequenceLength(charBytes[0]));
i += rune.Utf16SequenceLength;
public static void Main()
Console.WriteLine("Environment version: {0} ({1}), {2}", System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription, Environment.Version, Environment.OSVersion);
Console.WriteLine("Failed with unhandled exception: ");