
trivial な notes

【C#】ASCII文字に最適化された処理を提供する Ascii クラス(.NET 8~)

.NET 8 から、ASCII文字に最適化された処理を提供するAsciiクラスが提供された。


なお、複数のAPIが、charbyte (UTF8) をそれぞれあるいは両方を受けるシグニチャを備えている。そうした場合、ベンチマークではcharを受けるものを扱う。




[HideColumns("Error", "StdDev", "Median", "RatioSD")]
public class AsciiBenchmark
    private readonly string target = $"The quick brown fox jumps over the lazy dog{'.'} {1234567890}"; // avoid ReferenceEquals
    private readonly string targetToBeTrimmed = $" {ascii} ";
    private const string ascii = "The quick brown fox jumps over the lazy dog. 1234567890";
    private const string asciiIgnoreCase = "the quick brown FOX jumps over the lazy DOG. 1234567890";
    private readonly byte[] utf8Target = "The quick brown fox jumps over the lazy dog. 1234567890"u8.ToArray();

    [Benchmark(Baseline = true), BenchmarkCategory("Equals")]
    public bool Equals() => target.Equals(ascii);
    [Benchmark(Description = "(Ascii)"), BenchmarkCategory("Equals")]
    public bool EqualsAscii() => Ascii.Equals(target, ascii);

    [Benchmark(Baseline = true), BenchmarkCategory("EqualsIgnoreCase")]
    public bool EqualsIgnoreCase() => target.Equals(asciiIgnoreCase, StringComparison.OrdinalIgnoreCase);
    [Benchmark(Description = "(Ascii)"), BenchmarkCategory("EqualsIgnoreCase")]
    public bool EqualsIgnoreCaseAscii() => Ascii.EqualsIgnoreCase(target, asciiIgnoreCase);

    [Benchmark(Baseline = true), BenchmarkCategory("IsValid")]
    public bool IsValid()
        foreach(var c in target.AsSpan())
            if (char.IsAscii(c) is false) return false;
        return true;
    [Benchmark(Description = "(Ascii)"), BenchmarkCategory("IsValid")]
    public bool IsValidAscii() => Ascii.IsValid(target);

    [Benchmark(Baseline = true), BenchmarkCategory("ToLower")]
    public string ToLower() => target.ToLowerInvariant();
    [Benchmark(Description = "(Span)"), BenchmarkCategory("ToLower")]
    public string ToLowerSpan() => string.Create(target.Length, target,
        static (dest, target) => target.AsSpan().ToLowerInvariant(dest));
    [Benchmark(Description = "(Ascii)"), BenchmarkCategory("ToLower")]
    public string ToLowerAscii() => string.Create(target.Length, target,
        static (dest, target) => Ascii.ToLower(target, dest, out int _));

    [Benchmark(Baseline = true), BenchmarkCategory("Trim")]
    public string Trim() => targetToBeTrimmed.Trim();
    [Benchmark(Description = "(Span)"), BenchmarkCategory("Trim")]
    public string TrimSpan() => targetToBeTrimmed.AsSpan().Trim().ToString();
    [Benchmark(Description = "(Ascii)"), BenchmarkCategory("Trim")]
    public string TrimAscii() => targetToBeTrimmed.AsSpan()[Ascii.Trim(targetToBeTrimmed)].ToString();

    [Benchmark(Baseline = true), BenchmarkCategory("FromUtf16")]
    public byte[] FromUtf16() => Encoding.UTF8.GetBytes(target);
    [Benchmark(Description = "(Utf8)"), BenchmarkCategory("FromUtf16")]
    public byte[] FromUtf16Utf8()
        var dest = new byte[target.Length];
        Utf8.FromUtf16(target, dest, out int _, out int _);
        return dest;
    [Benchmark(Description = "(Ascii)"), BenchmarkCategory("FromUtf16")]
    public byte[] FromUtf16Ascii()
        var dest = new byte[target.Length];
        Ascii.FromUtf16(target, dest, out int _);
        return dest;

    [Benchmark(Baseline = true), BenchmarkCategory("ToUtf16")]
    public string ToUtf16() => Encoding.UTF8.GetString(utf8Target);
    [Benchmark(Description = "(Utf8)"), BenchmarkCategory("ToUtf16")]
    public string ToUtf16Utf8() => string.Create(utf8Target.Length, utf8Target,
        static (span, target) => Utf8.ToUtf16(target, span, out int _, out int _));
    [Benchmark(Description = "(Ascii)"), BenchmarkCategory("ToUtf16")]
    public string ToUtf16Ascii() => string.Create(utf8Target.Length, utf8Target,
        static (span, target) => Ascii.ToUtf16(target, span, out int _));

| Method           | Mean      | Ratio | Gen0   | Allocated | Alloc Ratio |
|----------------- |----------:|------:|-------:|----------:|------------:|
| Equals           |  9.567 ns |  1.00 |      - |         - |          NA |
| (Ascii)          | 12.373 ns |  1.29 |      - |         - |          NA |
|                  |           |       |        |           |             |
| EqualsIgnoreCase | 22.450 ns |  1.00 |      - |         - |          NA |
| (Ascii)          | 33.296 ns |  1.48 |      - |         - |          NA |
|                  |           |       |        |           |             |
| IsValid          | 35.909 ns |  1.00 |      - |         - |          NA |
| (Ascii)          |  4.183 ns |  0.12 |      - |         - |          NA |
|                  |           |       |        |           |             |
| ToLower          | 39.016 ns |  1.00 | 0.0433 |     136 B |        1.00 |
| (Span)           | 38.223 ns |  0.98 | 0.0433 |     136 B |        1.00 |
| (Ascii)          | 35.589 ns |  0.91 | 0.0433 |     136 B |        1.00 |
|                  |           |       |        |           |             |
| Trim             | 26.968 ns |  1.00 | 0.0433 |     136 B |        1.00 |
| (Span)           | 35.558 ns |  1.32 | 0.0433 |     136 B |        1.00 |
| (Ascii)          | 35.313 ns |  1.31 | 0.0433 |     136 B |        1.00 |
|                  |           |       |        |           |             |
| FromUtf16        | 43.008 ns |  1.00 | 0.0255 |      80 B |        1.00 |
| (Utf8)           | 26.745 ns |  0.62 | 0.0255 |      80 B |        1.00 |
| (Ascii)          | 22.682 ns |  0.53 | 0.0255 |      80 B |        1.00 |
|                  |           |       |        |           |             |
| ToUtf16          | 43.751 ns |  1.00 | 0.0433 |     136 B |        1.00 |
| (Utf8)           | 33.506 ns |  0.77 | 0.0433 |     136 B |        1.00 |
| (Ascii)          | 22.452 ns |  0.51 | 0.0433 |     136 B |        1.00 |

各項目の最初の行はstringの対応メソッドなどによる方法、(Ascii) となっているのがAsciiクラスによるもの。(Utf8) や (Span) は参考として別の方法。


  • 基本的に効果がある
  • Equals / EqualsIgnoreCaseTrimは今回の例では逆に悪くなった
    • ただし、Trimについては文字列を得なくていい (Spanでよい) なら良くなる


Equals / EqualsIgnoreCase

public bool Equals() => target.Equals(ascii);
public bool EqualsAscii() => Ascii.Equals(target, ascii);
public bool EqualsIgnoreCase() => target.Equals(asciiIgnoreCase, StringComparison.OrdinalIgnoreCase);
public bool EqualsIgnoreCaseAscii() => Ascii.EqualsIgnoreCase(target, asciiIgnoreCase);



また、ReadOnlySpan<char>ReadOnlySpan<byte> (UTF8) を直接比較できるシグニチャが存在する。

const string utf16 = "The quick brown fox jumps over the lazy dog. 1234567890";
ReadOnlySpan<byte> utf8 = "The quick brown fox jumps over the lazy dog. 1234567890"u8;

Console.WriteLine(Ascii.Equals(utf16, utf8));
// True


public bool IsValid()
    foreach(var c in target.AsSpan())
        if (char.IsAscii(c) is false) return false;
    return true;

public bool IsValidAscii() => Ascii.IsValid(target);



ToLower / ToUpper

public string ToLower() => target.ToLowerInvariant();
public string ToLowerSpan() => string.Create(target.Length, target,
    static (dest, target) => target.AsSpan().ToLowerInvariant(dest));
public string ToLowerAscii() => string.Create(target.Length, target,
    static (dest, target) => Ascii.ToLower(target, dest, out int _));


また、destination を用意せずインプレースに行うToLowerInPlaceもある。

他に、ReadOnlySpan<char>ReadOnlySpan<byte>の両方を受けるシグニチャもある。(charを受けてbyteに書き出す / byteを受けてcharに書き出す)



public string Trim() => targetToBeTrimmed.Trim();
public string TrimSpan() => targetToBeTrimmed.AsSpan().Trim().ToString();
public string TrimAscii() => targetToBeTrimmed.AsSpan()[Ascii.Trim(targetToBeTrimmed)].ToString();



TrimStart, TrimEndもある。ベンチマーク省略。


public byte[] FromUtf16() => Encoding.UTF8.GetBytes(target);

public byte[] FromUtf16Utf8()
    var dest = new byte[target.Length];
    Utf8.FromUtf16(target, dest, out int _, out int _);
    return dest;

public byte[] FromUtf16Ascii()
    var dest = new byte[target.Length];
    Ascii.FromUtf16(target, dest, out int _);
    return dest;

char (UTF16) to byte (UTF8)。



public string ToUtf16() => Encoding.UTF8.GetString(utf8Target);

public string ToUtf16Utf8() => string.Create(utf8Target.Length, utf8Target,
    static (span, target) => Utf8.ToUtf16(target, span, out int _, out int _));

public string ToUtf16Ascii() => string.Create(utf8Target.Length, utf8Target,
    static (span, target) => Ascii.ToUtf16(target, span, out int _));

byte (UTF8) to char (UTF16)。


  1. 冒頭 .NET Blog 記事ではEqualsIgnoreCaseベンチマークを行っていたが、そこでは case-insensitive 処理をベタ書きしたものを比較対象としていた。StringComparison.OrdinalIgnoreCaseは比較対象として不適?(未調査)