add utf8proc back for bench

2025-08-02 14:57:31 +03:00 · 2024-02-08 13:21:36 -08:00
parent 7da82688b8
commit 4ae41579da
8 changed files with 112 additions and 0 deletions
--- a/build.zig
+++ b/build.zig
@ -1082,6 +1082,15 @@ fn addDeps(
    step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
    try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
    // utf8proc
    const utf8proc_dep = b.dependency("utf8proc", .{
        .target = target,
        .optimize = optimize,
    });
    step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc"));
    step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
    try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
    // Spirv-Cross
    step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
    try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
--- a/build.zig.zon
+++ b/build.zig.zon
@ -38,6 +38,7 @@
        .opengl = .{ .path = "./pkg/opengl" },
        .pixman = .{ .path = "./pkg/pixman" },
        .simdutf = .{ .path = "./pkg/simdutf" },
        .utf8proc = .{ .path = "./pkg/utf8proc" },
        .utfcpp = .{ .path = "./pkg/utfcpp" },
        .zlib = .{ .path = "./pkg/zlib" },
--- a/pkg/utf8proc/build.zig
+++ b/pkg/utf8proc/build.zig
@ -0,0 +1,37 @@
 const std = @import("std");
 pub fn build(b: *std.Build) !void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});
    const module = b.addModule("utf8proc", .{ .root_source_file = .{ .path = "main.zig" } });
    const upstream = b.dependency("utf8proc", .{});
    const lib = b.addStaticLibrary(.{
        .name = "utf8proc",
        .target = target,
        .optimize = optimize,
    });
    lib.linkLibC();
    lib.addIncludePath(upstream.path(""));
    module.addIncludePath(upstream.path(""));
    var flags = std.ArrayList([]const u8).init(b.allocator);
    try flags.append("-DUTF8PROC_EXPORTS");
    defer flags.deinit();
    lib.addCSourceFiles(.{
        .dependency = upstream,
        .files = &.{"utf8proc.c"},
        .flags = flags.items,
    });
    lib.installHeadersDirectoryOptions(.{
        .source_dir = upstream.path(""),
        .install_dir = .header,
        .install_subdir = "",
        .include_extensions = &.{".h"},
    });
    b.installArtifact(lib);
 }
--- a/pkg/utf8proc/build.zig.zon
+++ b/pkg/utf8proc/build.zig.zon
@ -0,0 +1,11 @@
 .{
    .name = "utf8proc",
    .version = "2.8.0",
    .paths = .{""},
    .dependencies = .{
        .utf8proc = .{
            .url = "https://github.com/JuliaStrings/utf8proc/archive/refs/tags/v2.8.0.tar.gz",
            .hash = "1220056ce228a8c58f1fa66ab778f5c8965e62f720c1d30603c7d534cb7d8a605ad7",
        },
    },
 }
--- a/pkg/utf8proc/c.zig
+++ b/pkg/utf8proc/c.zig
@ -0,0 +1,3 @@
 pub usingnamespace @cImport({
    @cInclude("utf8proc.h");
 });
--- a/pkg/utf8proc/main.zig
+++ b/pkg/utf8proc/main.zig
@ -0,0 +1,20 @@
 pub const c = @import("c.zig");
 /// Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
 /// except that a width of 0 is returned for non-printable codepoints
 /// instead of -1 as in `wcwidth`.
 pub fn charwidth(codepoint: u21) u8 {
    return @intCast(c.utf8proc_charwidth(@intCast(codepoint)));
 }
 /// Given a pair of consecutive codepoints, return whether a grapheme break is
 /// permitted between them (as defined by the extended grapheme clusters in UAX#29).
 pub fn graphemeBreakStateful(cp1: u21, cp2: u21, state: *i32) bool {
    return c.utf8proc_grapheme_break_stateful(
        @intCast(cp1),
        @intCast(cp2),
        state,
    );
 }
 test {}
--- a/src/bench/codepoint-width.sh
+++ b/src/bench/codepoint-width.sh
@ -27,6 +27,8 @@ hyperfine \
  "./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} </tmp/ghostty_bench_data" \
  -n wcwidth \
  "./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \
  -n utf8proc \
  "./zig-out/bin/bench-codepoint-width --mode=utf8proc${ARGS} </tmp/ghostty_bench_data" \
  -n ziglyph \
  "./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
  -n simd \
--- a/src/bench/codepoint-width.zig
+++ b/src/bench/codepoint-width.zig
@ -45,6 +45,9 @@ const Mode = enum {
    /// libc wcwidth
    wcwidth,
    /// Use utf8proc library to calculate the display width of each codepoint.
    utf8proc,
    /// Use ziglyph library to calculate the display width of each codepoint.
    ziglyph,
@ -76,6 +79,7 @@ pub fn main() !void {
    switch (args.mode) {
        .noop => try benchNoop(reader, buf),
        .wcwidth => try benchWcwidth(reader, buf),
        .utf8proc => try benchUtf8proc(reader, buf),
        .ziglyph => try benchZiglyph(reader, buf),
        .simd => try benchSimd(reader, buf),
    }
@ -124,6 +128,31 @@ noinline fn benchWcwidth(
    }
 }
 noinline fn benchUtf8proc(
    reader: anytype,
    buf: []u8,
 ) !void {
    const utf8proc = @import("utf8proc");
    var d: UTF8Decoder = .{};
    while (true) {
        const n = try reader.read(buf);
        if (n == 0) break;
        // Using stream.next directly with a for loop applies a naive
        // scalar approach.
        for (buf[0..n]) |c| {
            const cp_, const consumed = d.next(c);
            assert(consumed);
            if (cp_) |cp| {
                const width = utf8proc.charwidth(cp);
                // Write the width to the buffer to avoid it being compiled away
                buf[0] = @intCast(width);
            }
        }
    }
 }
 noinline fn benchZiglyph(
    reader: anytype,
    buf: []u8,