diff --git a/build.zig b/build.zig index 80af88488..f020a0106 100644 --- a/build.zig +++ b/build.zig @@ -32,6 +32,10 @@ pub fn build(b: *std.Build) !void { const bench = try buildpkg.GhosttyBench.init(b, &deps); if (config.emit_bench) bench.install(); + // Ghostty unicode test exe + const unicode_test = try buildpkg.GhosttyUnicodeTest.init(b, &config, &deps); + if (config.emit_unicode_test) unicode_test.install(); + // Ghostty dist tarball const dist = try buildpkg.GhosttyDist.init(b, &config); { diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh index 43304ec2e..d894f641a 100755 --- a/src/bench/codepoint-width.sh +++ b/src/bench/codepoint-width.sh @@ -27,6 +27,8 @@ hyperfine \ "./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} try benchNoop(reader, buf), .wcwidth => try benchWcwidth(reader, buf), - .ziglyph => try benchZiglyph(reader, buf), + .zg => try benchZg(display_width, reader, buf), .simd => try benchSimd(reader, buf), .table => try benchTable(reader, buf), } @@ -155,7 +159,8 @@ noinline fn benchTable( } } -noinline fn benchZiglyph( +noinline fn benchZg( + display_width: DisplayWidth, reader: anytype, buf: []u8, ) !void { @@ -170,7 +175,7 @@ noinline fn benchZiglyph( const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp| { - const width = ziglyph.display_width.codePointWidth(cp, .half); + const width = DisplayWidth.codePointWidth(display_width, cp); // Write the width to the buffer to avoid it being compiled away buf[0] = @intCast(width); diff --git a/src/bench/grapheme-break.sh b/src/bench/grapheme-break.sh index 832728951..02a787b2e 100755 --- a/src/bench/grapheme-break.sh +++ b/src/bench/grapheme-break.sh @@ -25,8 +25,6 @@ hyperfine \ --warmup 10 \ -n noop \ "./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} try benchNoop(reader, buf), - .ziglyph => try benchZiglyph(reader, buf), .zg => try benchZg(&graphemes, reader, buf), .table => try benchTable(reader, buf), } @@ -152,28 +147,3 @@ noinline fn benchZg( } } } - -noinline fn benchZiglyph( - reader: anytype, - buf: []u8, -) !void { - var d: UTF8Decoder = .{}; - var state: u3 = 0; - var cp1: u21 = 0; - while (true) { - const n = try reader.read(buf); - if (n == 0) break; - - // Using stream.next directly with a for loop applies a naive - // scalar approach. - for (buf[0..n]) |c| { - const cp_, const consumed = d.next(c); - assert(consumed); - if (cp_) |cp2| { - const v = ziglyph.graphemeBreak(cp1, @intCast(cp2), &state); - buf[0] = @intCast(@intFromBool(v)); - cp1 = cp2; - } - } - } -} diff --git a/src/build/Config.zig b/src/build/Config.zig index 5f8780af9..90945a1c0 100644 --- a/src/build/Config.zig +++ b/src/build/Config.zig @@ -50,6 +50,7 @@ patch_rpath: ?[]const u8 = null, flatpak: bool = false, emit_test_exe: bool = false, emit_bench: bool = false, +emit_unicode_test: bool = false, emit_helpgen: bool = false, emit_docs: bool = false, emit_webdata: bool = false, @@ -276,6 +277,12 @@ pub fn init(b: *std.Build) !Config { "Build and install the benchmark executables.", ) orelse false; + config.emit_unicode_test = b.option( + bool, + "emit-unicode-test", + "Build and install the unicode test executable.", + ) orelse false; + config.emit_helpgen = b.option( bool, "emit-helpgen", @@ -289,6 +296,7 @@ pub fn init(b: *std.Build) !Config { ) orelse emit_docs: { // If we are emitting any other artifacts then we default to false. if (config.emit_bench or + config.emit_unicode_test or config.emit_test_exe or config.emit_helpgen) break :emit_docs false; @@ -337,6 +345,7 @@ pub fn init(b: *std.Build) !Config { target.result.os.tag == .macos and config.app_runtime == .none and (!config.emit_bench and + !config.emit_unicode_test and !config.emit_test_exe and !config.emit_helpgen); diff --git a/src/build/GhosttyUnicodeTest.zig b/src/build/GhosttyUnicodeTest.zig new file mode 100644 index 000000000..db3575c79 --- /dev/null +++ b/src/build/GhosttyUnicodeTest.zig @@ -0,0 +1,47 @@ +const UnicodeTest = @This(); + +const std = @import("std"); +const Config = @import("Config.zig"); +const SharedDeps = @import("SharedDeps.zig"); + +/// The unicode test executable. +exe: *std.Build.Step.Compile, + +/// The install step for the executable. +install_step: *std.Build.Step.InstallArtifact, + +pub fn init(b: *std.Build, cfg: *const Config, deps: *const SharedDeps) !UnicodeTest { + const exe: *std.Build.Step.Compile = b.addExecutable(.{ + .name = "unicode-test", + .root_module = b.createModule(.{ + .root_source_file = b.path("src/unicode/main.zig"), + .target = cfg.target, + .optimize = cfg.optimize, + .strip = cfg.strip, + .omit_frame_pointer = cfg.strip, + .unwind_tables = if (cfg.strip) .none else .sync, + }), + }); + const install_step = b.addInstallArtifact(exe, .{}); + + // Add the shared dependencies + _ = try deps.add(exe); + + if (b.lazyDependency("ziglyph", .{ + .target = cfg.target, + .optimize = cfg.optimize, + })) |dep| { + exe.root_module.addImport("ziglyph", dep.module("ziglyph")); + } + + return .{ + .exe = exe, + .install_step = install_step, + }; +} + +/// Add the unicode test exe to the install target. +pub fn install(self: *const UnicodeTest) void { + const b = self.install_step.step.owner; + b.getInstallStep().dependOn(&self.install_step.step); +} diff --git a/src/build/SharedDeps.zig b/src/build/SharedDeps.zig index 5ec6664f3..909f727d3 100644 --- a/src/build/SharedDeps.zig +++ b/src/build/SharedDeps.zig @@ -411,12 +411,6 @@ pub fn add( })) |dep| { step.root_module.addImport("z2d", dep.module("z2d")); } - if (b.lazyDependency("ziglyph", .{ - .target = target, - .optimize = optimize, - })) |dep| { - step.root_module.addImport("ziglyph", dep.module("ziglyph")); - } if (b.lazyDependency("zg", .{ .target = target, .optimize = optimize, diff --git a/src/build/main.zig b/src/build/main.zig index 3154d395f..aa1e57827 100644 --- a/src/build/main.zig +++ b/src/build/main.zig @@ -15,6 +15,7 @@ pub const GhosttyFrameData = @import("GhosttyFrameData.zig"); pub const GhosttyLib = @import("GhosttyLib.zig"); pub const GhosttyResources = @import("GhosttyResources.zig"); pub const GhosttyI18n = @import("GhosttyI18n.zig"); +pub const GhosttyUnicodeTest = @import("GhosttyUnicodeTest.zig"); pub const GhosttyXCFramework = @import("GhosttyXCFramework.zig"); pub const GhosttyWebdata = @import("GhosttyWebdata.zig"); pub const HelpStrings = @import("HelpStrings.zig"); diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig index 2ba36161f..66ba27b12 100644 --- a/src/unicode/grapheme.zig +++ b/src/unicode/grapheme.zig @@ -149,49 +149,6 @@ fn graphemeBreakClass( return true; } -// This test will verify the grapheme break implementation. This iterates over billions of codepoints so it is SLOW. -// It's not meant to be run in CI, but it's useful for debugging. -test "grapheme break check against ziglyph" { - const ziglyph = @import("ziglyph"); - - // Set the min and max to control the test range. - const min = 0; - const max = std.math.maxInt(u21) + 1; - var success: bool = true; - - var state: BreakState = .{}; - var zg_state: u3 = 0; - for (min..max) |cp1| { - if (cp1 == '\r' or cp1 == '\n' or - ziglyph.grapheme_break.isControl(@intCast(cp1))) continue; - - for (min..max) |cp2| { - if (cp2 == '\r' or cp2 == '\n' or - ziglyph.grapheme_break.isControl(@intCast(cp2))) continue; - - const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state); - const zg_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg_state); - if (gb != zg_gb) { - success = false; - std.log.warn("cp1={x} cp2={x} gb={} state={} zg_gb={} zg_state={}", .{ - cp1, - cp2, - gb, - state, - zg_gb, - zg_state, - }); - } - } - } - - try std.testing.expect(success); -} - -pub const std_options = struct { - pub const log_level: std.log.Level = .info; -}; - test "grapheme break: emoji modifier" { const testing = std.testing; diff --git a/src/unicode/main.zig b/src/unicode/main.zig index e8ba05b72..8d297569a 100644 --- a/src/unicode/main.zig +++ b/src/unicode/main.zig @@ -1,3 +1,4 @@ +const std = @import("std"); pub const lut = @import("lut.zig"); const grapheme = @import("grapheme.zig"); @@ -10,3 +11,113 @@ pub const GraphemeBreakState = grapheme.BreakState; test { @import("std").testing.refAllDecls(@This()); } + +/// Build Ghostty with `zig build -Doptimize=ReleaseFast -Demit-unicode-test`. +/// +/// Usage: ./zig-out/bin/unicode-test [grapheme|width|all] [zg|ziglyph|all] +/// +/// grapheme: this will verify the grapheme break implementation. This +/// iterates over billions of codepoints so it is SLOW. +/// +/// width: this verifies the table codepoint widths match +/// zg: compare grapheme/width against zg +/// ziglyph: compare grapheme/width against ziglyph +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + + const alloc = gpa.allocator(); + + const args = try std.process.argsAlloc(alloc); + defer std.process.argsFree(alloc, args); + + var zg = try props.init(alloc); + defer zg.deinit(alloc); + + const ziglyph = @import("ziglyph"); + const Graphemes = @import("Graphemes"); + const DisplayWidth = @import("DisplayWidth"); + + const testAll = args.len < 2 or std.mem.eql(u8, args[1], "all"); + const compareAll = args.len < 3 or std.mem.eql(u8, args[2], "all"); + const compareZg = compareAll or std.mem.eql(u8, args[2], "zg"); + const compareZiglyph = compareAll or std.mem.eql(u8, args[2], "ziglyph"); + + // Set the min and max to control the test range. + const min = 0; + const max = 0x110000; + + var state: GraphemeBreakState = .{}; + var zg_state: Graphemes.State = .{}; + var ziglyph_state: u3 = 0; + + if (testAll or std.mem.eql(u8, args[1], "grapheme")) { + std.log.info("============== testing grapheme break ===============", .{}); + + for (min..max) |cp1| { + if (cp1 % 0x100 == 0) std.log.info("progress: cp1={x}", .{cp1}); + + if (cp1 == '\r' or cp1 == '\n' or + Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue; + + for (min..max) |cp2| { + if (cp2 == '\r' or cp2 == '\n' or + Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue; + + const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state); + if (compareZg) { + const zg_gb = Graphemes.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg.graphemes, &zg_state); + if (gb != zg_gb) { + std.log.warn("[zg mismatch] cp1={x} cp2={x} gb={} zg_gb={} state={} zg_state={}", .{ + cp1, + cp2, + gb, + zg_gb, + state, + zg_state, + }); + } + } + if (compareZiglyph) { + const ziglyph_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &ziglyph_state); + if (gb != ziglyph_gb) { + std.log.warn("[ziglyph mismatch] cp1={x} cp2={x} gb={} ziglyph_gb={} state={} ziglyph_state={}", .{ + cp1, + cp2, + gb, + ziglyph_gb, + state, + ziglyph_state, + }); + } + } + } + } + } + + if (testAll or std.mem.eql(u8, args[1], "width")) { + std.log.info("============== testing codepoint width ==============", .{}); + + for (min..max) |cp| { + if (cp % 0x10000 == 0) std.log.info("progress: cp={x}", .{cp}); + + const t = table.get(@intCast(cp)); + if (compareZg) { + const zg_width = @min(2, @max(0, DisplayWidth.codePointWidth(zg.display_width, @intCast(cp)))); + if (t.width != zg_width) { + std.log.warn("[zg mismatch] cp={x} t={} zg={}", .{ cp, t.width, zg_width }); + } + } + if (compareZiglyph) { + const ziglyph_width = @min(2, @max(0, DisplayWidth.codePointWidth(zg.display_width, @intCast(cp)))); + if (t.width != ziglyph_width) { + std.log.warn("[ziglyph mismatch] cp={x} t={} zg={}", .{ cp, t.width, ziglyph_width }); + } + } + } + } +} + +pub const std_options: std.Options = .{ + .log_level = .debug, +}; diff --git a/src/unicode/props.zig b/src/unicode/props.zig index c4c147c70..86553a8bf 100644 --- a/src/unicode/props.zig +++ b/src/unicode/props.zig @@ -8,7 +8,8 @@ const lut = @import("lut.zig"); graphemes: Graphemes, display_width: DisplayWidth, -fn init(alloc: std.mem.Allocator) !props { +// Public only for unicode-test +pub fn init(alloc: std.mem.Allocator) !props { const graphemes = try Graphemes.init(alloc); return .{ .graphemes = graphemes, @@ -16,7 +17,8 @@ fn init(alloc: std.mem.Allocator) !props { }; } -fn deinit(self: *props, alloc: std.mem.Allocator) void { +// Public only for unicode-test +pub fn deinit(self: *props, alloc: std.mem.Allocator) void { self.graphemes.deinit(alloc); self.display_width.deinit(alloc); } @@ -180,22 +182,3 @@ pub fn main() !void { // t.stage3.len, // }); } - -// This is not very fast in debug modes, so its commented by default. -// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. -//test "tables match zg" { -// const testing = std.testing; -// -// const display_width = try DisplayWidth.init(std.testing.allocator); -// defer display_width.deinit(std.testing.allocator); -// -// const min = 0xFF + 1; // start outside ascii -// for (min..0x110000) |cp| { -// const t = table.get(@intCast(cp)); -// const zg = @min(2, @max(0, DisplayWidth.codePointWidth(display_width, @intCast(cp)))); -// if (t.width != zg) { -// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg }); -// try testing.expect(false); -// } -// } -//}