mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-25 13:16:11 +03:00
Add unicode-test and fix benchmarks
This commit is contained in:
@ -32,6 +32,10 @@ pub fn build(b: *std.Build) !void {
|
||||
const bench = try buildpkg.GhosttyBench.init(b, &deps);
|
||||
if (config.emit_bench) bench.install();
|
||||
|
||||
// Ghostty unicode test exe
|
||||
const unicode_test = try buildpkg.GhosttyUnicodeTest.init(b, &config, &deps);
|
||||
if (config.emit_unicode_test) unicode_test.install();
|
||||
|
||||
// Ghostty dist tarball
|
||||
const dist = try buildpkg.GhosttyDist.init(b, &config);
|
||||
{
|
||||
|
@ -27,6 +27,8 @@ hyperfine \
|
||||
"./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} </tmp/ghostty_bench_data" \
|
||||
-n wcwidth \
|
||||
"./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \
|
||||
-n zg \
|
||||
"./zig-out/bin/bench-codepoint-width --mode=zg${ARGS} </tmp/ghostty_bench_data" \
|
||||
-n table \
|
||||
"./zig-out/bin/bench-codepoint-width --mode=table${ARGS} </tmp/ghostty_bench_data" \
|
||||
-n simd \
|
||||
|
@ -7,14 +7,14 @@
|
||||
//! This will consume all of the available stdin, so you should run it
|
||||
//! with `head` in a pipe to restrict. For example, to test ASCII input:
|
||||
//!
|
||||
//! bench-stream --mode=gen-ascii | head -c 50M | bench-codepoint-width --mode=ziglyph
|
||||
//! bench-stream --mode=gen-ascii | head -c 50M | bench-codepoint-width --mode=zg
|
||||
//!
|
||||
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const ArenaAllocator = std.heap.ArenaAllocator;
|
||||
const ziglyph = @import("ziglyph");
|
||||
const DisplayWidth = @import("DisplayWidth");
|
||||
const cli = @import("../cli.zig");
|
||||
const simd = @import("../simd/main.zig");
|
||||
const table = @import("../unicode/main.zig").table;
|
||||
@ -46,8 +46,8 @@ const Mode = enum {
|
||||
/// libc wcwidth
|
||||
wcwidth,
|
||||
|
||||
/// Use ziglyph library to calculate the display width of each codepoint.
|
||||
ziglyph,
|
||||
/// Use zg library to calculate the display width of each codepoint.
|
||||
zg,
|
||||
|
||||
/// Our SIMD implementation.
|
||||
simd,
|
||||
@ -64,6 +64,10 @@ pub fn main() !void {
|
||||
// We want to use the c allocator because it is much faster than GPA.
|
||||
const alloc = std.heap.c_allocator;
|
||||
|
||||
// Initialize DisplayWidth for zg
|
||||
const display_width = try DisplayWidth.init(alloc);
|
||||
display_width.deinit(alloc);
|
||||
|
||||
// Parse our args
|
||||
var args: Args = .{};
|
||||
defer args.deinit();
|
||||
@ -80,7 +84,7 @@ pub fn main() !void {
|
||||
switch (args.mode) {
|
||||
.noop => try benchNoop(reader, buf),
|
||||
.wcwidth => try benchWcwidth(reader, buf),
|
||||
.ziglyph => try benchZiglyph(reader, buf),
|
||||
.zg => try benchZg(display_width, reader, buf),
|
||||
.simd => try benchSimd(reader, buf),
|
||||
.table => try benchTable(reader, buf),
|
||||
}
|
||||
@ -155,7 +159,8 @@ noinline fn benchTable(
|
||||
}
|
||||
}
|
||||
|
||||
noinline fn benchZiglyph(
|
||||
noinline fn benchZg(
|
||||
display_width: DisplayWidth,
|
||||
reader: anytype,
|
||||
buf: []u8,
|
||||
) !void {
|
||||
@ -170,7 +175,7 @@ noinline fn benchZiglyph(
|
||||
const cp_, const consumed = d.next(c);
|
||||
assert(consumed);
|
||||
if (cp_) |cp| {
|
||||
const width = ziglyph.display_width.codePointWidth(cp, .half);
|
||||
const width = DisplayWidth.codePointWidth(display_width, cp);
|
||||
|
||||
// Write the width to the buffer to avoid it being compiled away
|
||||
buf[0] = @intCast(width);
|
||||
|
@ -25,8 +25,6 @@ hyperfine \
|
||||
--warmup 10 \
|
||||
-n noop \
|
||||
"./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} </tmp/ghostty_bench_data" \
|
||||
-n ziglyph \
|
||||
"./zig-out/bin/bench-grapheme-break --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
|
||||
-n zg \
|
||||
"./zig-out/bin/bench-grapheme-break --mode=zg${ARGS} </tmp/ghostty_bench_data" \
|
||||
-n table \
|
||||
|
@ -5,14 +5,13 @@
|
||||
//! This will consume all of the available stdin, so you should run it
|
||||
//! with `head` in a pipe to restrict. For example, to test ASCII input:
|
||||
//!
|
||||
//! bench-stream --mode=gen-ascii | head -c 50M | bench-grapheme-break --mode=ziglyph
|
||||
//! bench-stream --mode=gen-ascii | head -c 50M | bench-grapheme-break --mode=zg
|
||||
//!
|
||||
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const ArenaAllocator = std.heap.ArenaAllocator;
|
||||
const ziglyph = @import("ziglyph");
|
||||
const Graphemes = @import("Graphemes");
|
||||
const cli = @import("../cli.zig");
|
||||
const simd = @import("../simd/main.zig");
|
||||
@ -42,9 +41,6 @@ const Mode = enum {
|
||||
/// and establishes a baseline for the other modes.
|
||||
noop,
|
||||
|
||||
/// Use ziglyph library to calculate the display width of each codepoint.
|
||||
ziglyph,
|
||||
|
||||
/// Use zg library to calculate the display width of each codepoint.
|
||||
zg,
|
||||
|
||||
@ -79,7 +75,6 @@ pub fn main() !void {
|
||||
// Handle the modes that do not depend on terminal state first.
|
||||
switch (args.mode) {
|
||||
.noop => try benchNoop(reader, buf),
|
||||
.ziglyph => try benchZiglyph(reader, buf),
|
||||
.zg => try benchZg(&graphemes, reader, buf),
|
||||
.table => try benchTable(reader, buf),
|
||||
}
|
||||
@ -152,28 +147,3 @@ noinline fn benchZg(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
noinline fn benchZiglyph(
|
||||
reader: anytype,
|
||||
buf: []u8,
|
||||
) !void {
|
||||
var d: UTF8Decoder = .{};
|
||||
var state: u3 = 0;
|
||||
var cp1: u21 = 0;
|
||||
while (true) {
|
||||
const n = try reader.read(buf);
|
||||
if (n == 0) break;
|
||||
|
||||
// Using stream.next directly with a for loop applies a naive
|
||||
// scalar approach.
|
||||
for (buf[0..n]) |c| {
|
||||
const cp_, const consumed = d.next(c);
|
||||
assert(consumed);
|
||||
if (cp_) |cp2| {
|
||||
const v = ziglyph.graphemeBreak(cp1, @intCast(cp2), &state);
|
||||
buf[0] = @intCast(@intFromBool(v));
|
||||
cp1 = cp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +50,7 @@ patch_rpath: ?[]const u8 = null,
|
||||
flatpak: bool = false,
|
||||
emit_test_exe: bool = false,
|
||||
emit_bench: bool = false,
|
||||
emit_unicode_test: bool = false,
|
||||
emit_helpgen: bool = false,
|
||||
emit_docs: bool = false,
|
||||
emit_webdata: bool = false,
|
||||
@ -276,6 +277,12 @@ pub fn init(b: *std.Build) !Config {
|
||||
"Build and install the benchmark executables.",
|
||||
) orelse false;
|
||||
|
||||
config.emit_unicode_test = b.option(
|
||||
bool,
|
||||
"emit-unicode-test",
|
||||
"Build and install the unicode test executable.",
|
||||
) orelse false;
|
||||
|
||||
config.emit_helpgen = b.option(
|
||||
bool,
|
||||
"emit-helpgen",
|
||||
@ -289,6 +296,7 @@ pub fn init(b: *std.Build) !Config {
|
||||
) orelse emit_docs: {
|
||||
// If we are emitting any other artifacts then we default to false.
|
||||
if (config.emit_bench or
|
||||
config.emit_unicode_test or
|
||||
config.emit_test_exe or
|
||||
config.emit_helpgen) break :emit_docs false;
|
||||
|
||||
@ -337,6 +345,7 @@ pub fn init(b: *std.Build) !Config {
|
||||
target.result.os.tag == .macos and
|
||||
config.app_runtime == .none and
|
||||
(!config.emit_bench and
|
||||
!config.emit_unicode_test and
|
||||
!config.emit_test_exe and
|
||||
!config.emit_helpgen);
|
||||
|
||||
|
47
src/build/GhosttyUnicodeTest.zig
Normal file
47
src/build/GhosttyUnicodeTest.zig
Normal file
@ -0,0 +1,47 @@
|
||||
const UnicodeTest = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const Config = @import("Config.zig");
|
||||
const SharedDeps = @import("SharedDeps.zig");
|
||||
|
||||
/// The unicode test executable.
|
||||
exe: *std.Build.Step.Compile,
|
||||
|
||||
/// The install step for the executable.
|
||||
install_step: *std.Build.Step.InstallArtifact,
|
||||
|
||||
pub fn init(b: *std.Build, cfg: *const Config, deps: *const SharedDeps) !UnicodeTest {
|
||||
const exe: *std.Build.Step.Compile = b.addExecutable(.{
|
||||
.name = "unicode-test",
|
||||
.root_module = b.createModule(.{
|
||||
.root_source_file = b.path("src/unicode/main.zig"),
|
||||
.target = cfg.target,
|
||||
.optimize = cfg.optimize,
|
||||
.strip = cfg.strip,
|
||||
.omit_frame_pointer = cfg.strip,
|
||||
.unwind_tables = if (cfg.strip) .none else .sync,
|
||||
}),
|
||||
});
|
||||
const install_step = b.addInstallArtifact(exe, .{});
|
||||
|
||||
// Add the shared dependencies
|
||||
_ = try deps.add(exe);
|
||||
|
||||
if (b.lazyDependency("ziglyph", .{
|
||||
.target = cfg.target,
|
||||
.optimize = cfg.optimize,
|
||||
})) |dep| {
|
||||
exe.root_module.addImport("ziglyph", dep.module("ziglyph"));
|
||||
}
|
||||
|
||||
return .{
|
||||
.exe = exe,
|
||||
.install_step = install_step,
|
||||
};
|
||||
}
|
||||
|
||||
/// Add the unicode test exe to the install target.
|
||||
pub fn install(self: *const UnicodeTest) void {
|
||||
const b = self.install_step.step.owner;
|
||||
b.getInstallStep().dependOn(&self.install_step.step);
|
||||
}
|
@ -411,12 +411,6 @@ pub fn add(
|
||||
})) |dep| {
|
||||
step.root_module.addImport("z2d", dep.module("z2d"));
|
||||
}
|
||||
if (b.lazyDependency("ziglyph", .{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
})) |dep| {
|
||||
step.root_module.addImport("ziglyph", dep.module("ziglyph"));
|
||||
}
|
||||
if (b.lazyDependency("zg", .{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
|
@ -15,6 +15,7 @@ pub const GhosttyFrameData = @import("GhosttyFrameData.zig");
|
||||
pub const GhosttyLib = @import("GhosttyLib.zig");
|
||||
pub const GhosttyResources = @import("GhosttyResources.zig");
|
||||
pub const GhosttyI18n = @import("GhosttyI18n.zig");
|
||||
pub const GhosttyUnicodeTest = @import("GhosttyUnicodeTest.zig");
|
||||
pub const GhosttyXCFramework = @import("GhosttyXCFramework.zig");
|
||||
pub const GhosttyWebdata = @import("GhosttyWebdata.zig");
|
||||
pub const HelpStrings = @import("HelpStrings.zig");
|
||||
|
@ -149,49 +149,6 @@ fn graphemeBreakClass(
|
||||
return true;
|
||||
}
|
||||
|
||||
// This test will verify the grapheme break implementation. This iterates over billions of codepoints so it is SLOW.
|
||||
// It's not meant to be run in CI, but it's useful for debugging.
|
||||
test "grapheme break check against ziglyph" {
|
||||
const ziglyph = @import("ziglyph");
|
||||
|
||||
// Set the min and max to control the test range.
|
||||
const min = 0;
|
||||
const max = std.math.maxInt(u21) + 1;
|
||||
var success: bool = true;
|
||||
|
||||
var state: BreakState = .{};
|
||||
var zg_state: u3 = 0;
|
||||
for (min..max) |cp1| {
|
||||
if (cp1 == '\r' or cp1 == '\n' or
|
||||
ziglyph.grapheme_break.isControl(@intCast(cp1))) continue;
|
||||
|
||||
for (min..max) |cp2| {
|
||||
if (cp2 == '\r' or cp2 == '\n' or
|
||||
ziglyph.grapheme_break.isControl(@intCast(cp2))) continue;
|
||||
|
||||
const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state);
|
||||
const zg_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg_state);
|
||||
if (gb != zg_gb) {
|
||||
success = false;
|
||||
std.log.warn("cp1={x} cp2={x} gb={} state={} zg_gb={} zg_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
state,
|
||||
zg_gb,
|
||||
zg_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try std.testing.expect(success);
|
||||
}
|
||||
|
||||
pub const std_options = struct {
|
||||
pub const log_level: std.log.Level = .info;
|
||||
};
|
||||
|
||||
test "grapheme break: emoji modifier" {
|
||||
const testing = std.testing;
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
const std = @import("std");
|
||||
pub const lut = @import("lut.zig");
|
||||
|
||||
const grapheme = @import("grapheme.zig");
|
||||
@ -10,3 +11,113 @@ pub const GraphemeBreakState = grapheme.BreakState;
|
||||
test {
|
||||
@import("std").testing.refAllDecls(@This());
|
||||
}
|
||||
|
||||
/// Build Ghostty with `zig build -Doptimize=ReleaseFast -Demit-unicode-test`.
|
||||
///
|
||||
/// Usage: ./zig-out/bin/unicode-test [grapheme|width|all] [zg|ziglyph|all]
|
||||
///
|
||||
/// grapheme: this will verify the grapheme break implementation. This
|
||||
/// iterates over billions of codepoints so it is SLOW.
|
||||
///
|
||||
/// width: this verifies the table codepoint widths match
|
||||
/// zg: compare grapheme/width against zg
|
||||
/// ziglyph: compare grapheme/width against ziglyph
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
|
||||
const alloc = gpa.allocator();
|
||||
|
||||
const args = try std.process.argsAlloc(alloc);
|
||||
defer std.process.argsFree(alloc, args);
|
||||
|
||||
var zg = try props.init(alloc);
|
||||
defer zg.deinit(alloc);
|
||||
|
||||
const ziglyph = @import("ziglyph");
|
||||
const Graphemes = @import("Graphemes");
|
||||
const DisplayWidth = @import("DisplayWidth");
|
||||
|
||||
const testAll = args.len < 2 or std.mem.eql(u8, args[1], "all");
|
||||
const compareAll = args.len < 3 or std.mem.eql(u8, args[2], "all");
|
||||
const compareZg = compareAll or std.mem.eql(u8, args[2], "zg");
|
||||
const compareZiglyph = compareAll or std.mem.eql(u8, args[2], "ziglyph");
|
||||
|
||||
// Set the min and max to control the test range.
|
||||
const min = 0;
|
||||
const max = 0x110000;
|
||||
|
||||
var state: GraphemeBreakState = .{};
|
||||
var zg_state: Graphemes.State = .{};
|
||||
var ziglyph_state: u3 = 0;
|
||||
|
||||
if (testAll or std.mem.eql(u8, args[1], "grapheme")) {
|
||||
std.log.info("============== testing grapheme break ===============", .{});
|
||||
|
||||
for (min..max) |cp1| {
|
||||
if (cp1 % 0x100 == 0) std.log.info("progress: cp1={x}", .{cp1});
|
||||
|
||||
if (cp1 == '\r' or cp1 == '\n' or
|
||||
Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue;
|
||||
|
||||
for (min..max) |cp2| {
|
||||
if (cp2 == '\r' or cp2 == '\n' or
|
||||
Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue;
|
||||
|
||||
const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state);
|
||||
if (compareZg) {
|
||||
const zg_gb = Graphemes.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg.graphemes, &zg_state);
|
||||
if (gb != zg_gb) {
|
||||
std.log.warn("[zg mismatch] cp1={x} cp2={x} gb={} zg_gb={} state={} zg_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
zg_gb,
|
||||
state,
|
||||
zg_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (compareZiglyph) {
|
||||
const ziglyph_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &ziglyph_state);
|
||||
if (gb != ziglyph_gb) {
|
||||
std.log.warn("[ziglyph mismatch] cp1={x} cp2={x} gb={} ziglyph_gb={} state={} ziglyph_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
ziglyph_gb,
|
||||
state,
|
||||
ziglyph_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (testAll or std.mem.eql(u8, args[1], "width")) {
|
||||
std.log.info("============== testing codepoint width ==============", .{});
|
||||
|
||||
for (min..max) |cp| {
|
||||
if (cp % 0x10000 == 0) std.log.info("progress: cp={x}", .{cp});
|
||||
|
||||
const t = table.get(@intCast(cp));
|
||||
if (compareZg) {
|
||||
const zg_width = @min(2, @max(0, DisplayWidth.codePointWidth(zg.display_width, @intCast(cp))));
|
||||
if (t.width != zg_width) {
|
||||
std.log.warn("[zg mismatch] cp={x} t={} zg={}", .{ cp, t.width, zg_width });
|
||||
}
|
||||
}
|
||||
if (compareZiglyph) {
|
||||
const ziglyph_width = @min(2, @max(0, DisplayWidth.codePointWidth(zg.display_width, @intCast(cp))));
|
||||
if (t.width != ziglyph_width) {
|
||||
std.log.warn("[ziglyph mismatch] cp={x} t={} zg={}", .{ cp, t.width, ziglyph_width });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const std_options: std.Options = .{
|
||||
.log_level = .debug,
|
||||
};
|
||||
|
@ -8,7 +8,8 @@ const lut = @import("lut.zig");
|
||||
graphemes: Graphemes,
|
||||
display_width: DisplayWidth,
|
||||
|
||||
fn init(alloc: std.mem.Allocator) !props {
|
||||
// Public only for unicode-test
|
||||
pub fn init(alloc: std.mem.Allocator) !props {
|
||||
const graphemes = try Graphemes.init(alloc);
|
||||
return .{
|
||||
.graphemes = graphemes,
|
||||
@ -16,7 +17,8 @@ fn init(alloc: std.mem.Allocator) !props {
|
||||
};
|
||||
}
|
||||
|
||||
fn deinit(self: *props, alloc: std.mem.Allocator) void {
|
||||
// Public only for unicode-test
|
||||
pub fn deinit(self: *props, alloc: std.mem.Allocator) void {
|
||||
self.graphemes.deinit(alloc);
|
||||
self.display_width.deinit(alloc);
|
||||
}
|
||||
@ -180,22 +182,3 @@ pub fn main() !void {
|
||||
// t.stage3.len,
|
||||
// });
|
||||
}
|
||||
|
||||
// This is not very fast in debug modes, so its commented by default.
|
||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
||||
//test "tables match zg" {
|
||||
// const testing = std.testing;
|
||||
//
|
||||
// const display_width = try DisplayWidth.init(std.testing.allocator);
|
||||
// defer display_width.deinit(std.testing.allocator);
|
||||
//
|
||||
// const min = 0xFF + 1; // start outside ascii
|
||||
// for (min..0x110000) |cp| {
|
||||
// const t = table.get(@intCast(cp));
|
||||
// const zg = @min(2, @max(0, DisplayWidth.codePointWidth(display_width, @intCast(cp))));
|
||||
// if (t.width != zg) {
|
||||
// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
|
||||
// try testing.expect(false);
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
Reference in New Issue
Block a user