mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-25 13:16:11 +03:00
comparing to old version of table (ziglyph based)
This commit is contained in:
@ -3,6 +3,7 @@ const UnicodeTest = @This();
|
||||
const std = @import("std");
|
||||
const Config = @import("Config.zig");
|
||||
const SharedDeps = @import("SharedDeps.zig");
|
||||
const UnicodeTables = @import("UnicodeTables.zig");
|
||||
|
||||
/// The unicode test executable.
|
||||
exe: *std.Build.Step.Compile,
|
||||
@ -27,6 +28,7 @@ pub fn init(b: *std.Build, cfg: *const Config, deps: *const SharedDeps) !Unicode
|
||||
// Add the shared dependencies
|
||||
_ = try deps.add(exe);
|
||||
|
||||
// Add ziglyph just for unicode-test
|
||||
if (b.lazyDependency("ziglyph", .{
|
||||
.target = cfg.target,
|
||||
.optimize = cfg.optimize,
|
||||
@ -34,6 +36,15 @@ pub fn init(b: *std.Build, cfg: *const Config, deps: *const SharedDeps) !Unicode
|
||||
exe.root_module.addImport("ziglyph", dep.module("ziglyph"));
|
||||
}
|
||||
|
||||
// Add the old version of the unicode tables
|
||||
const old_unicode_tables = try UnicodeTables.init(b);
|
||||
old_unicode_tables.run.addArg("old");
|
||||
|
||||
old_unicode_tables.output.addStepDependencies(&exe.step);
|
||||
exe.root_module.addAnonymousImport("old_unicode_tables", .{
|
||||
.root_source_file = old_unicode_tables.output,
|
||||
});
|
||||
|
||||
return .{
|
||||
.exe = exe,
|
||||
.install_step = install_step,
|
||||
|
@ -6,6 +6,9 @@ const Config = @import("Config.zig");
|
||||
/// The exe.
|
||||
exe: *std.Build.Step.Compile,
|
||||
|
||||
/// The run artifact for the exe.
|
||||
run: *std.Build.Step.Run,
|
||||
|
||||
/// The output path for the unicode tables
|
||||
output: std.Build.LazyPath,
|
||||
|
||||
@ -28,9 +31,18 @@ pub fn init(b: *std.Build) !UnicodeTables {
|
||||
exe.root_module.addImport("DisplayWidth", dep.module("DisplayWidth"));
|
||||
}
|
||||
|
||||
// Only used if we're building the old unicode tables
|
||||
if (b.lazyDependency("ziglyph", .{
|
||||
.target = b.graph.host,
|
||||
})) |dep| {
|
||||
exe.root_module.addImport("ziglyph", dep.module("ziglyph"));
|
||||
}
|
||||
|
||||
const run = b.addRunArtifact(exe);
|
||||
|
||||
return .{
|
||||
.exe = exe,
|
||||
.run = run,
|
||||
.output = run.captureStdOut(),
|
||||
};
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ const std = @import("std");
|
||||
const props = @import("props.zig");
|
||||
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
||||
const table = props.table;
|
||||
const oldTable = props.oldTable;
|
||||
|
||||
/// Determines if there is a grapheme break between two codepoints. This
|
||||
/// must be called sequentially maintaining the state between calls.
|
||||
@ -22,6 +23,19 @@ pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
||||
return value.result;
|
||||
}
|
||||
|
||||
/// Only used for unicode-test.
|
||||
pub fn oldGraphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
||||
const value = Precompute.data[
|
||||
(Precompute.Key{
|
||||
.gbc1 = oldTable.get(cp1).grapheme_boundary_class,
|
||||
.gbc2 = oldTable.get(cp2).grapheme_boundary_class,
|
||||
.state = state.*,
|
||||
}).index()
|
||||
];
|
||||
state.* = value.state;
|
||||
return value.result;
|
||||
}
|
||||
|
||||
/// The state that must be maintained between calls to `graphemeBreak`.
|
||||
pub const BreakState = packed struct(u2) {
|
||||
extended_pictographic: bool = false,
|
||||
|
@ -14,14 +14,24 @@ test {
|
||||
|
||||
/// Build Ghostty with `zig build -Doptimize=ReleaseFast -Demit-unicode-test`.
|
||||
///
|
||||
/// Usage: ./zig-out/bin/unicode-test [grapheme|width|all] [zg|ziglyph|all]
|
||||
///
|
||||
/// grapheme: this will verify the grapheme break implementation. This
|
||||
/// iterates over billions of codepoints so it is SLOW.
|
||||
/// Usage: ./zig-out/bin/unicode-test [width|class|break|all] [zg|ziglyph|old|all]
|
||||
///
|
||||
/// width: this verifies the table codepoint widths match
|
||||
/// class: this verifies the table grapheme boundary classes match
|
||||
/// break: this will verify the grapheme break implementation. This
|
||||
/// iterates over billions of codepoints so it is SLOW.
|
||||
///
|
||||
/// zg: compare grapheme/width against zg
|
||||
/// ziglyph: compare grapheme/width against ziglyph
|
||||
/// old: compare grapheme against old implementation
|
||||
///
|
||||
/// Note: To enable `old` comparisons, uncomment sections of these files
|
||||
/// (search for "old"):
|
||||
/// * ./main.zig (this file)
|
||||
/// * ./props.zig
|
||||
/// * ./grapheme.zig
|
||||
/// * src/build/GhosttyUnicodeTest.zig
|
||||
/// * src/build/UnicodeTables.zig
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
@ -42,59 +52,12 @@ pub fn main() !void {
|
||||
const compareAll = args.len < 3 or std.mem.eql(u8, args[2], "all");
|
||||
const compareZg = compareAll or std.mem.eql(u8, args[2], "zg");
|
||||
const compareZiglyph = compareAll or std.mem.eql(u8, args[2], "ziglyph");
|
||||
const compareOld = compareAll or std.mem.eql(u8, args[2], "old");
|
||||
|
||||
// Set the min and max to control the test range.
|
||||
const min = 0;
|
||||
const max = 0x110000;
|
||||
|
||||
var state: GraphemeBreakState = .{};
|
||||
var zg_state: Graphemes.State = .{};
|
||||
var ziglyph_state: u3 = 0;
|
||||
|
||||
if (testAll or std.mem.eql(u8, args[1], "grapheme")) {
|
||||
std.log.info("============== testing grapheme break ===============", .{});
|
||||
|
||||
for (min..max) |cp1| {
|
||||
if (cp1 % 0x100 == 0) std.log.info("progress: cp1={x}", .{cp1});
|
||||
|
||||
if (cp1 == '\r' or cp1 == '\n' or
|
||||
Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue;
|
||||
|
||||
for (min..max) |cp2| {
|
||||
if (cp2 == '\r' or cp2 == '\n' or
|
||||
Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue;
|
||||
|
||||
const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state);
|
||||
if (compareZg) {
|
||||
const zg_gb = Graphemes.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg.graphemes, &zg_state);
|
||||
if (gb != zg_gb) {
|
||||
std.log.warn("[zg mismatch] cp1={x} cp2={x} gb={} zg_gb={} state={} zg_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
zg_gb,
|
||||
state,
|
||||
zg_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (compareZiglyph) {
|
||||
const ziglyph_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &ziglyph_state);
|
||||
if (gb != ziglyph_gb) {
|
||||
std.log.warn("[ziglyph mismatch] cp1={x} cp2={x} gb={} ziglyph_gb={} state={} ziglyph_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
ziglyph_gb,
|
||||
state,
|
||||
ziglyph_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (testAll or std.mem.eql(u8, args[1], "width")) {
|
||||
std.log.info("============== testing codepoint width ==============", .{});
|
||||
|
||||
@ -116,6 +79,149 @@ pub fn main() !void {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (testAll or std.mem.eql(u8, args[1], "class")) {
|
||||
std.log.info("============== testing grapheme boundary class ======", .{});
|
||||
|
||||
for (min..max) |cp| {
|
||||
if (cp % 0x10000 == 0) std.log.info("progress: cp={x}", .{cp});
|
||||
|
||||
const t = table.get(@intCast(cp));
|
||||
|
||||
if (compareZg) {
|
||||
const gbp = Graphemes.gbp(zg.graphemes, @intCast(cp));
|
||||
const matches = switch (t.grapheme_boundary_class) {
|
||||
.extended_pictographic_base => gbp == .Emoji_Modifier_Base,
|
||||
.emoji_modifier => gbp == .Emoji_Modifier,
|
||||
.extended_pictographic => gbp == .Extended_Pictographic,
|
||||
.L => gbp == .L,
|
||||
.V => gbp == .V,
|
||||
.T => gbp == .T,
|
||||
.LV => gbp == .LV,
|
||||
.LVT => gbp == .LVT,
|
||||
.prepend => gbp == .Prepend,
|
||||
.extend => gbp == .Extend,
|
||||
.zwj => gbp == .ZWJ,
|
||||
.spacing_mark => gbp == .SpacingMark,
|
||||
.regional_indicator => gbp == .Regional_Indicator,
|
||||
.invalid => gbp == .none or gbp == .Control or gbp == .CR or gbp == .LF,
|
||||
};
|
||||
|
||||
if (!matches) {
|
||||
std.log.warn("[zg mismatch] cp={x} t={} zg={}", .{ cp, t.grapheme_boundary_class, gbp });
|
||||
}
|
||||
}
|
||||
|
||||
if (compareZiglyph) {
|
||||
const ziglyph_valid = (ziglyph.emoji.isEmojiModifierBase(@intCast(cp)) or
|
||||
ziglyph.emoji.isEmojiModifier(@intCast(cp)) or
|
||||
ziglyph.emoji.isExtendedPictographic(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isL(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isV(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isT(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isLv(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isLvt(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isPrepend(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isExtend(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isZwj(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isSpacingmark(@intCast(cp)) or
|
||||
ziglyph.grapheme_break.isRegionalIndicator(@intCast(cp)));
|
||||
|
||||
const matches = switch (t.grapheme_boundary_class) {
|
||||
.extended_pictographic_base => ziglyph.emoji.isEmojiModifierBase(@intCast(cp)),
|
||||
.emoji_modifier => ziglyph.emoji.isEmojiModifier(@intCast(cp)),
|
||||
.extended_pictographic => ziglyph.emoji.isExtendedPictographic(@intCast(cp)),
|
||||
.L => ziglyph.grapheme_break.isL(@intCast(cp)),
|
||||
.V => ziglyph.grapheme_break.isV(@intCast(cp)),
|
||||
.T => ziglyph.grapheme_break.isT(@intCast(cp)),
|
||||
.LV => ziglyph.grapheme_break.isLv(@intCast(cp)),
|
||||
.LVT => ziglyph.grapheme_break.isLvt(@intCast(cp)),
|
||||
.prepend => ziglyph.grapheme_break.isPrepend(@intCast(cp)),
|
||||
.extend => ziglyph.grapheme_break.isExtend(@intCast(cp)),
|
||||
.zwj => ziglyph.grapheme_break.isZwj(@intCast(cp)),
|
||||
.spacing_mark => ziglyph.grapheme_break.isSpacingmark(@intCast(cp)),
|
||||
.regional_indicator => ziglyph.grapheme_break.isRegionalIndicator(@intCast(cp)),
|
||||
.invalid => !ziglyph_valid,
|
||||
};
|
||||
|
||||
if (!matches) {
|
||||
std.log.warn("[ziglyph mismatch] cp={x} t={} ziglyph_valid={}", .{ cp, t.grapheme_boundary_class, ziglyph_valid });
|
||||
}
|
||||
}
|
||||
|
||||
if (compareOld) {
|
||||
const oldT = props.oldTable.get(@intCast(cp));
|
||||
if (oldT.grapheme_boundary_class != t.grapheme_boundary_class) {
|
||||
std.log.warn("[old mismatch] cp={x} t={} old={}", .{ cp, t.grapheme_boundary_class, oldT.grapheme_boundary_class });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var state: GraphemeBreakState = .{};
|
||||
var old_state: GraphemeBreakState = .{};
|
||||
var zg_state: Graphemes.State = .{};
|
||||
var ziglyph_state: u3 = 0;
|
||||
|
||||
if (testAll or std.mem.eql(u8, args[1], "break")) {
|
||||
std.log.info("============== testing grapheme break ===============", .{});
|
||||
|
||||
for (min..max) |cp1| {
|
||||
if (cp1 % 0x100 == 0) std.log.info("progress: cp1={x}", .{cp1});
|
||||
|
||||
if (cp1 == '\r' or cp1 == '\n' or
|
||||
Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue;
|
||||
|
||||
for (min..max) |cp2| {
|
||||
if (cp2 == '\r' or cp2 == '\n' or
|
||||
Graphemes.gbp(zg.graphemes, @intCast(cp1)) == .Control) continue;
|
||||
|
||||
const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state);
|
||||
|
||||
if (compareOld) {
|
||||
const old_gb = grapheme.oldGraphemeBreak(@intCast(cp1), @intCast(cp2), &old_state);
|
||||
if (gb != old_gb) {
|
||||
std.log.warn("[old mismatch] cp1={x} cp2={x} gb={} old_gb={} state={} old_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
old_gb,
|
||||
state,
|
||||
old_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (compareZg) {
|
||||
const zg_gb = Graphemes.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg.graphemes, &zg_state);
|
||||
if (gb != zg_gb) {
|
||||
std.log.warn("[zg mismatch] cp1={x} cp2={x} gb={} zg_gb={} state={} zg_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
zg_gb,
|
||||
state,
|
||||
zg_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (compareZiglyph) {
|
||||
const ziglyph_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &ziglyph_state);
|
||||
if (gb != ziglyph_gb) {
|
||||
std.log.warn("[ziglyph mismatch] cp1={x} cp2={x} gb={} ziglyph_gb={} state={} ziglyph_state={}", .{
|
||||
cp1,
|
||||
cp2,
|
||||
gb,
|
||||
ziglyph_gb,
|
||||
state,
|
||||
ziglyph_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const std_options: std.Options = .{
|
||||
|
@ -8,6 +8,9 @@ const lut = @import("lut.zig");
|
||||
graphemes: Graphemes,
|
||||
display_width: DisplayWidth,
|
||||
|
||||
// Whether to use the old implementation based on ziglyph.
|
||||
old: bool = false,
|
||||
|
||||
// Public only for unicode-test
|
||||
pub fn init(alloc: std.mem.Allocator) !props {
|
||||
const graphemes = try Graphemes.init(alloc);
|
||||
@ -36,6 +39,19 @@ pub const table = table: {
|
||||
};
|
||||
};
|
||||
|
||||
/// The old lookup tables for Ghostty. Only used for unicode-test.
|
||||
pub const oldTable = table: {
|
||||
// This is only available after running main() below as part of the Ghostty
|
||||
// build.zig, but due to Zig's lazy analysis we can still reference it here.
|
||||
const generated = @import("old_unicode_tables").Tables(Properties);
|
||||
const Tables = lut.Tables(Properties);
|
||||
break :table Tables{
|
||||
.stage1 = &generated.stage1,
|
||||
.stage2 = &generated.stage2,
|
||||
.stage3 = &generated.stage3,
|
||||
};
|
||||
};
|
||||
|
||||
/// Property set per codepoint that Ghostty cares about.
|
||||
///
|
||||
/// Adding to this lets you find new properties but also potentially makes
|
||||
@ -120,6 +136,36 @@ pub const GraphemeBoundaryClass = enum(u4) {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn initOld(cp: u21) GraphemeBoundaryClass {
|
||||
const ziglyph = @import("ziglyph");
|
||||
|
||||
// We special-case modifier bases because we should not break
|
||||
// if a modifier isn't next to a base.
|
||||
if (ziglyph.emoji.isEmojiModifierBase(cp)) {
|
||||
assert(ziglyph.emoji.isExtendedPictographic(cp));
|
||||
return .extended_pictographic_base;
|
||||
}
|
||||
|
||||
if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
|
||||
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
|
||||
if (ziglyph.grapheme_break.isL(cp)) return .L;
|
||||
if (ziglyph.grapheme_break.isV(cp)) return .V;
|
||||
if (ziglyph.grapheme_break.isT(cp)) return .T;
|
||||
if (ziglyph.grapheme_break.isLv(cp)) return .LV;
|
||||
if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
|
||||
if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
|
||||
if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
|
||||
if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
|
||||
if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
|
||||
if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
|
||||
|
||||
// This is obviously not INVALID invalid, there is SOME grapheme
|
||||
// boundary class for every codepoint. But we don't care about
|
||||
// anything that doesn't fit into the above categories.
|
||||
|
||||
return .invalid;
|
||||
}
|
||||
|
||||
/// Returns true if this is an extended pictographic type. This
|
||||
/// should be used instead of comparing the enum value directly
|
||||
/// because we classify multiple.
|
||||
@ -145,7 +191,8 @@ pub fn get(ctx: props, cp: u21) !Properties {
|
||||
|
||||
return .{
|
||||
.width = @intCast(@min(2, @max(0, zg_width))),
|
||||
.grapheme_boundary_class = .init(ctx, cp),
|
||||
//.grapheme_boundary_class = .init(ctx, cp),
|
||||
.grapheme_boundary_class = if (ctx.old) .initOld(cp) else .init(ctx, cp),
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -161,9 +208,16 @@ pub fn main() !void {
|
||||
defer arena_state.deinit();
|
||||
const alloc = arena_state.allocator();
|
||||
|
||||
const args = try std.process.argsAlloc(alloc);
|
||||
defer std.process.argsFree(alloc, args);
|
||||
|
||||
var self = try init(alloc);
|
||||
defer self.deinit(alloc);
|
||||
|
||||
if (args.len > 1 and std.mem.eql(u8, args[1], "old")) {
|
||||
self.old = true;
|
||||
}
|
||||
|
||||
const gen: lut.Generator(
|
||||
Properties,
|
||||
props,
|
||||
|
Reference in New Issue
Block a user