add utf8proc back for bench

This commit is contained in:
Mitchell Hashimoto
2024-02-08 13:21:36 -08:00
parent 7da82688b8
commit 4ae41579da
8 changed files with 112 additions and 0 deletions

View File

@ -1082,6 +1082,15 @@ fn addDeps(
step.linkLibrary(utfcpp_dep.artifact("utfcpp")); step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin()); try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
// utf8proc
const utf8proc_dep = b.dependency("utf8proc", .{
.target = target,
.optimize = optimize,
});
step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc"));
step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
// Spirv-Cross // Spirv-Cross
step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());

View File

@ -38,6 +38,7 @@
.opengl = .{ .path = "./pkg/opengl" }, .opengl = .{ .path = "./pkg/opengl" },
.pixman = .{ .path = "./pkg/pixman" }, .pixman = .{ .path = "./pkg/pixman" },
.simdutf = .{ .path = "./pkg/simdutf" }, .simdutf = .{ .path = "./pkg/simdutf" },
.utf8proc = .{ .path = "./pkg/utf8proc" },
.utfcpp = .{ .path = "./pkg/utfcpp" }, .utfcpp = .{ .path = "./pkg/utfcpp" },
.zlib = .{ .path = "./pkg/zlib" }, .zlib = .{ .path = "./pkg/zlib" },

37
pkg/utf8proc/build.zig Normal file
View File

@ -0,0 +1,37 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const module = b.addModule("utf8proc", .{ .root_source_file = .{ .path = "main.zig" } });
const upstream = b.dependency("utf8proc", .{});
const lib = b.addStaticLibrary(.{
.name = "utf8proc",
.target = target,
.optimize = optimize,
});
lib.linkLibC();
lib.addIncludePath(upstream.path(""));
module.addIncludePath(upstream.path(""));
var flags = std.ArrayList([]const u8).init(b.allocator);
try flags.append("-DUTF8PROC_EXPORTS");
defer flags.deinit();
lib.addCSourceFiles(.{
.dependency = upstream,
.files = &.{"utf8proc.c"},
.flags = flags.items,
});
lib.installHeadersDirectoryOptions(.{
.source_dir = upstream.path(""),
.install_dir = .header,
.install_subdir = "",
.include_extensions = &.{".h"},
});
b.installArtifact(lib);
}

View File

@ -0,0 +1,11 @@
.{
.name = "utf8proc",
.version = "2.8.0",
.paths = .{""},
.dependencies = .{
.utf8proc = .{
.url = "https://github.com/JuliaStrings/utf8proc/archive/refs/tags/v2.8.0.tar.gz",
.hash = "1220056ce228a8c58f1fa66ab778f5c8965e62f720c1d30603c7d534cb7d8a605ad7",
},
},
}

3
pkg/utf8proc/c.zig Normal file
View File

@ -0,0 +1,3 @@
pub usingnamespace @cImport({
@cInclude("utf8proc.h");
});

20
pkg/utf8proc/main.zig Normal file
View File

@ -0,0 +1,20 @@
pub const c = @import("c.zig");
/// Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
/// except that a width of 0 is returned for non-printable codepoints
/// instead of -1 as in `wcwidth`.
pub fn charwidth(codepoint: u21) u8 {
return @intCast(c.utf8proc_charwidth(@intCast(codepoint)));
}
/// Given a pair of consecutive codepoints, return whether a grapheme break is
/// permitted between them (as defined by the extended grapheme clusters in UAX#29).
pub fn graphemeBreakStateful(cp1: u21, cp2: u21, state: *i32) bool {
return c.utf8proc_grapheme_break_stateful(
@intCast(cp1),
@intCast(cp2),
state,
);
}
test {}

View File

@ -27,6 +27,8 @@ hyperfine \
"./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} </tmp/ghostty_bench_data" \ "./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} </tmp/ghostty_bench_data" \
-n wcwidth \ -n wcwidth \
"./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \ "./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \
-n utf8proc \
"./zig-out/bin/bench-codepoint-width --mode=utf8proc${ARGS} </tmp/ghostty_bench_data" \
-n ziglyph \ -n ziglyph \
"./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \ "./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
-n simd \ -n simd \

View File

@ -45,6 +45,9 @@ const Mode = enum {
/// libc wcwidth /// libc wcwidth
wcwidth, wcwidth,
/// Use utf8proc library to calculate the display width of each codepoint.
utf8proc,
/// Use ziglyph library to calculate the display width of each codepoint. /// Use ziglyph library to calculate the display width of each codepoint.
ziglyph, ziglyph,
@ -76,6 +79,7 @@ pub fn main() !void {
switch (args.mode) { switch (args.mode) {
.noop => try benchNoop(reader, buf), .noop => try benchNoop(reader, buf),
.wcwidth => try benchWcwidth(reader, buf), .wcwidth => try benchWcwidth(reader, buf),
.utf8proc => try benchUtf8proc(reader, buf),
.ziglyph => try benchZiglyph(reader, buf), .ziglyph => try benchZiglyph(reader, buf),
.simd => try benchSimd(reader, buf), .simd => try benchSimd(reader, buf),
} }
@ -124,6 +128,31 @@ noinline fn benchWcwidth(
} }
} }
noinline fn benchUtf8proc(
reader: anytype,
buf: []u8,
) !void {
const utf8proc = @import("utf8proc");
var d: UTF8Decoder = .{};
while (true) {
const n = try reader.read(buf);
if (n == 0) break;
// Using stream.next directly with a for loop applies a naive
// scalar approach.
for (buf[0..n]) |c| {
const cp_, const consumed = d.next(c);
assert(consumed);
if (cp_) |cp| {
const width = utf8proc.charwidth(cp);
// Write the width to the buffer to avoid it being compiled away
buf[0] = @intCast(width);
}
}
}
}
noinline fn benchZiglyph( noinline fn benchZiglyph(
reader: anytype, reader: anytype,
buf: []u8, buf: []u8,