diff --git a/build.zig b/build.zig index fd49b7e62..0669f27cc 100644 --- a/build.zig +++ b/build.zig @@ -217,17 +217,6 @@ pub fn build(b: *std.Build) !void { // Add our benchmarks try benchSteps(b, target, config, emit_bench); - { - const exe = b.addExecutable(.{ - .name = "grapheme-verify", - .root_source_file = .{ .path = "src/unicode/grapheme.zig" }, - .target = target, - .optimize = .ReleaseFast, - }); - b.installArtifact(exe); - _ = try addDeps(b, exe, config); - } - // We only build an exe if we have a runtime set. const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{ .name = "ghostty", @@ -1093,15 +1082,6 @@ fn addDeps( step.linkLibrary(utfcpp_dep.artifact("utfcpp")); try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin()); - // utf8proc - const utf8proc_dep = b.dependency("utf8proc", .{ - .target = target, - .optimize = optimize, - }); - step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc")); - step.linkLibrary(utf8proc_dep.artifact("utf8proc")); - try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin()); - // Spirv-Cross step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); diff --git a/build.zig.zon b/build.zig.zon index 535d51c24..a694562ea 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -39,7 +39,6 @@ .pixman = .{ .path = "./pkg/pixman" }, .simdutf = .{ .path = "./pkg/simdutf" }, .utfcpp = .{ .path = "./pkg/utfcpp" }, - .utf8proc = .{ .path = "./pkg/utf8proc" }, .zlib = .{ .path = "./pkg/zlib" }, // Shader translation diff --git a/src/bench/grapheme-break.sh b/src/bench/grapheme-break.sh index c395c3799..24f475caa 100755 --- a/src/bench/grapheme-break.sh +++ b/src/bench/grapheme-break.sh @@ -27,8 +27,6 @@ hyperfine \ "./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} try benchNoop(reader, buf), .ziglyph => try benchZiglyph(reader, buf), .table => try benchTable(reader, buf), - .utf8proc => try benchUtf8proc(reader, buf), } } @@ -101,7 +98,7 @@ noinline fn benchTable( buf: []u8, ) !void { var d: UTF8Decoder = .{}; - var state: u3 = 0; + var state: unicode.GraphemeBreakState = .{}; var cp1: u21 = 0; while (true) { const n = try reader.read(buf); @@ -145,29 +142,3 @@ noinline fn benchZiglyph( } } } - -noinline fn benchUtf8proc( - reader: anytype, - buf: []u8, -) !void { - const utf8proc = @import("utf8proc"); - var d: UTF8Decoder = .{}; - var state: i32 = 0; - var cp1: u21 = 0; - while (true) { - const n = try reader.read(buf); - if (n == 0) break; - - // Using stream.next directly with a for loop applies a naive - // scalar approach. - for (buf[0..n]) |c| { - const cp_, const consumed = d.next(c); - assert(consumed); - if (cp_) |cp2| { - const v = utf8proc.graphemeBreakStateful(cp1, @intCast(cp2), &state); - buf[0] = @intCast(@intFromBool(v)); - cp1 = cp2; - } - } - } -} diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig index 19437844c..d4c146e49 100644 --- a/src/unicode/grapheme.zig +++ b/src/unicode/grapheme.zig @@ -18,19 +18,27 @@ const table = props.table; /// line feeds, and carriage returns are expected to be filtered out before /// calling this function. This is because this function is tuned for /// Ghostty. -pub fn graphemeBreak(cp1: u21, cp2: u21, state: *u3) bool { +pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool { const gbc1 = table.get(cp1).grapheme_boundary_class; const gbc2 = table.get(cp2).grapheme_boundary_class; return graphemeBreakClass(gbc1, gbc2, state); } +/// The state that must be maintained between calls to `graphemeBreak`. +pub const BreakState = packed struct(u2) { + extended_pictographic: bool = false, + regional_indicator: bool = false, +}; + fn graphemeBreakClass( gbc1: GraphemeBoundaryClass, gbc2: GraphemeBoundaryClass, - state: *u3, + state: *BreakState, ) bool { // GB11: Emoji Extend* ZWJ x Emoji - if (!hasXpic(state) and gbc1 == .extended_pictographic) setXpic(state); + if (!state.extended_pictographic and gbc1 == .extended_pictographic) { + state.extended_pictographic = true; + } // These two properties are ignored because they're not relevant to // Ghostty -- they're filtered out before checking grapheme boundaries. @@ -67,56 +75,27 @@ fn graphemeBreakClass( // GB12, GB13: RI x RI if (gbc1 == .regional_indicator and gbc2 == .regional_indicator) { - if (hasRegional(state)) { - unsetRegional(state); + if (state.regional_indicator) { + state.regional_indicator = false; return true; } else { - setRegional(state); + state.regional_indicator = true; return false; } } // GB11: Emoji Extend* ZWJ x Emoji - if (hasXpic(state) and + if (state.extended_pictographic and gbc1 == .zwj and gbc2 == .extended_pictographic) { - unsetXpic(state); + state.extended_pictographic = false; return false; } return true; } -const State = packed struct(u2) { - extended_pictographic: bool = false, - regional_indicator: bool = false, -}; - -fn hasXpic(state: *const u3) bool { - return state.* & 1 == 1; -} - -fn setXpic(state: *u3) void { - state.* |= 1; -} - -fn unsetXpic(state: *u3) void { - state.* ^= 1; -} - -fn hasRegional(state: *const u3) bool { - return state.* & 2 == 2; -} - -fn setRegional(state: *u3) void { - state.* |= 2; -} - -fn unsetRegional(state: *u3) void { - state.* ^= 2; -} - /// If you build this file as a binary, we will verify the grapheme break /// implementation. This iterates over billions of codepoints so it is /// SLOW. It's not meant to be run in CI, but it's useful for debugging. @@ -127,7 +106,7 @@ pub fn main() !void { const min = 0; const max = std.math.maxInt(u21) + 1; - var state: u3 = 0; + var state: BreakState = .{}; var zg_state: u3 = 0; for (min..max) |cp1| { if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1}); diff --git a/src/unicode/main.zig b/src/unicode/main.zig index 3cc4779ed..e8ba05b72 100644 --- a/src/unicode/main.zig +++ b/src/unicode/main.zig @@ -5,6 +5,7 @@ const props = @import("props.zig"); pub const table = props.table; pub const Properties = props.Properties; pub const graphemeBreak = grapheme.graphemeBreak; +pub const GraphemeBreakState = grapheme.BreakState; test { @import("std").testing.refAllDecls(@This());