mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-14 15:56:13 +03:00
unicode: use packed struct for break state
This commit is contained in:
20
build.zig
20
build.zig
@ -217,17 +217,6 @@ pub fn build(b: *std.Build) !void {
|
|||||||
// Add our benchmarks
|
// Add our benchmarks
|
||||||
try benchSteps(b, target, config, emit_bench);
|
try benchSteps(b, target, config, emit_bench);
|
||||||
|
|
||||||
{
|
|
||||||
const exe = b.addExecutable(.{
|
|
||||||
.name = "grapheme-verify",
|
|
||||||
.root_source_file = .{ .path = "src/unicode/grapheme.zig" },
|
|
||||||
.target = target,
|
|
||||||
.optimize = .ReleaseFast,
|
|
||||||
});
|
|
||||||
b.installArtifact(exe);
|
|
||||||
_ = try addDeps(b, exe, config);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We only build an exe if we have a runtime set.
|
// We only build an exe if we have a runtime set.
|
||||||
const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{
|
const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{
|
||||||
.name = "ghostty",
|
.name = "ghostty",
|
||||||
@ -1093,15 +1082,6 @@ fn addDeps(
|
|||||||
step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
|
step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
|
||||||
try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
|
try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
|
||||||
|
|
||||||
// utf8proc
|
|
||||||
const utf8proc_dep = b.dependency("utf8proc", .{
|
|
||||||
.target = target,
|
|
||||||
.optimize = optimize,
|
|
||||||
});
|
|
||||||
step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc"));
|
|
||||||
step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
|
|
||||||
try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
|
|
||||||
|
|
||||||
// Spirv-Cross
|
// Spirv-Cross
|
||||||
step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
|
step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
|
||||||
try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
|
try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
|
||||||
|
@ -39,7 +39,6 @@
|
|||||||
.pixman = .{ .path = "./pkg/pixman" },
|
.pixman = .{ .path = "./pkg/pixman" },
|
||||||
.simdutf = .{ .path = "./pkg/simdutf" },
|
.simdutf = .{ .path = "./pkg/simdutf" },
|
||||||
.utfcpp = .{ .path = "./pkg/utfcpp" },
|
.utfcpp = .{ .path = "./pkg/utfcpp" },
|
||||||
.utf8proc = .{ .path = "./pkg/utf8proc" },
|
|
||||||
.zlib = .{ .path = "./pkg/zlib" },
|
.zlib = .{ .path = "./pkg/zlib" },
|
||||||
|
|
||||||
// Shader translation
|
// Shader translation
|
||||||
|
@ -27,8 +27,6 @@ hyperfine \
|
|||||||
"./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} </tmp/ghostty_bench_data" \
|
"./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} </tmp/ghostty_bench_data" \
|
||||||
-n ziglyph \
|
-n ziglyph \
|
||||||
"./zig-out/bin/bench-grapheme-break --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
|
"./zig-out/bin/bench-grapheme-break --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
|
||||||
-n utf8proc \
|
|
||||||
"./zig-out/bin/bench-grapheme-break --mode=utf8proc${ARGS} </tmp/ghostty_bench_data" \
|
|
||||||
-n table \
|
-n table \
|
||||||
"./zig-out/bin/bench-grapheme-break --mode=table${ARGS} </tmp/ghostty_bench_data"
|
"./zig-out/bin/bench-grapheme-break --mode=table${ARGS} </tmp/ghostty_bench_data"
|
||||||
|
|
||||||
|
@ -46,8 +46,6 @@ const Mode = enum {
|
|||||||
|
|
||||||
/// Ghostty's table-based approach.
|
/// Ghostty's table-based approach.
|
||||||
table,
|
table,
|
||||||
|
|
||||||
utf8proc,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const std_options = struct {
|
pub const std_options = struct {
|
||||||
@ -75,7 +73,6 @@ pub fn main() !void {
|
|||||||
.noop => try benchNoop(reader, buf),
|
.noop => try benchNoop(reader, buf),
|
||||||
.ziglyph => try benchZiglyph(reader, buf),
|
.ziglyph => try benchZiglyph(reader, buf),
|
||||||
.table => try benchTable(reader, buf),
|
.table => try benchTable(reader, buf),
|
||||||
.utf8proc => try benchUtf8proc(reader, buf),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,7 +98,7 @@ noinline fn benchTable(
|
|||||||
buf: []u8,
|
buf: []u8,
|
||||||
) !void {
|
) !void {
|
||||||
var d: UTF8Decoder = .{};
|
var d: UTF8Decoder = .{};
|
||||||
var state: u3 = 0;
|
var state: unicode.GraphemeBreakState = .{};
|
||||||
var cp1: u21 = 0;
|
var cp1: u21 = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
const n = try reader.read(buf);
|
const n = try reader.read(buf);
|
||||||
@ -145,29 +142,3 @@ noinline fn benchZiglyph(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
noinline fn benchUtf8proc(
|
|
||||||
reader: anytype,
|
|
||||||
buf: []u8,
|
|
||||||
) !void {
|
|
||||||
const utf8proc = @import("utf8proc");
|
|
||||||
var d: UTF8Decoder = .{};
|
|
||||||
var state: i32 = 0;
|
|
||||||
var cp1: u21 = 0;
|
|
||||||
while (true) {
|
|
||||||
const n = try reader.read(buf);
|
|
||||||
if (n == 0) break;
|
|
||||||
|
|
||||||
// Using stream.next directly with a for loop applies a naive
|
|
||||||
// scalar approach.
|
|
||||||
for (buf[0..n]) |c| {
|
|
||||||
const cp_, const consumed = d.next(c);
|
|
||||||
assert(consumed);
|
|
||||||
if (cp_) |cp2| {
|
|
||||||
const v = utf8proc.graphemeBreakStateful(cp1, @intCast(cp2), &state);
|
|
||||||
buf[0] = @intCast(@intFromBool(v));
|
|
||||||
cp1 = cp2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -18,19 +18,27 @@ const table = props.table;
|
|||||||
/// line feeds, and carriage returns are expected to be filtered out before
|
/// line feeds, and carriage returns are expected to be filtered out before
|
||||||
/// calling this function. This is because this function is tuned for
|
/// calling this function. This is because this function is tuned for
|
||||||
/// Ghostty.
|
/// Ghostty.
|
||||||
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *u3) bool {
|
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
||||||
const gbc1 = table.get(cp1).grapheme_boundary_class;
|
const gbc1 = table.get(cp1).grapheme_boundary_class;
|
||||||
const gbc2 = table.get(cp2).grapheme_boundary_class;
|
const gbc2 = table.get(cp2).grapheme_boundary_class;
|
||||||
return graphemeBreakClass(gbc1, gbc2, state);
|
return graphemeBreakClass(gbc1, gbc2, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The state that must be maintained between calls to `graphemeBreak`.
|
||||||
|
pub const BreakState = packed struct(u2) {
|
||||||
|
extended_pictographic: bool = false,
|
||||||
|
regional_indicator: bool = false,
|
||||||
|
};
|
||||||
|
|
||||||
fn graphemeBreakClass(
|
fn graphemeBreakClass(
|
||||||
gbc1: GraphemeBoundaryClass,
|
gbc1: GraphemeBoundaryClass,
|
||||||
gbc2: GraphemeBoundaryClass,
|
gbc2: GraphemeBoundaryClass,
|
||||||
state: *u3,
|
state: *BreakState,
|
||||||
) bool {
|
) bool {
|
||||||
// GB11: Emoji Extend* ZWJ x Emoji
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
if (!hasXpic(state) and gbc1 == .extended_pictographic) setXpic(state);
|
if (!state.extended_pictographic and gbc1 == .extended_pictographic) {
|
||||||
|
state.extended_pictographic = true;
|
||||||
|
}
|
||||||
|
|
||||||
// These two properties are ignored because they're not relevant to
|
// These two properties are ignored because they're not relevant to
|
||||||
// Ghostty -- they're filtered out before checking grapheme boundaries.
|
// Ghostty -- they're filtered out before checking grapheme boundaries.
|
||||||
@ -67,56 +75,27 @@ fn graphemeBreakClass(
|
|||||||
|
|
||||||
// GB12, GB13: RI x RI
|
// GB12, GB13: RI x RI
|
||||||
if (gbc1 == .regional_indicator and gbc2 == .regional_indicator) {
|
if (gbc1 == .regional_indicator and gbc2 == .regional_indicator) {
|
||||||
if (hasRegional(state)) {
|
if (state.regional_indicator) {
|
||||||
unsetRegional(state);
|
state.regional_indicator = false;
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
setRegional(state);
|
state.regional_indicator = true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GB11: Emoji Extend* ZWJ x Emoji
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
if (hasXpic(state) and
|
if (state.extended_pictographic and
|
||||||
gbc1 == .zwj and
|
gbc1 == .zwj and
|
||||||
gbc2 == .extended_pictographic)
|
gbc2 == .extended_pictographic)
|
||||||
{
|
{
|
||||||
unsetXpic(state);
|
state.extended_pictographic = false;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const State = packed struct(u2) {
|
|
||||||
extended_pictographic: bool = false,
|
|
||||||
regional_indicator: bool = false,
|
|
||||||
};
|
|
||||||
|
|
||||||
fn hasXpic(state: *const u3) bool {
|
|
||||||
return state.* & 1 == 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn setXpic(state: *u3) void {
|
|
||||||
state.* |= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn unsetXpic(state: *u3) void {
|
|
||||||
state.* ^= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn hasRegional(state: *const u3) bool {
|
|
||||||
return state.* & 2 == 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn setRegional(state: *u3) void {
|
|
||||||
state.* |= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn unsetRegional(state: *u3) void {
|
|
||||||
state.* ^= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If you build this file as a binary, we will verify the grapheme break
|
/// If you build this file as a binary, we will verify the grapheme break
|
||||||
/// implementation. This iterates over billions of codepoints so it is
|
/// implementation. This iterates over billions of codepoints so it is
|
||||||
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
|
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
|
||||||
@ -127,7 +106,7 @@ pub fn main() !void {
|
|||||||
const min = 0;
|
const min = 0;
|
||||||
const max = std.math.maxInt(u21) + 1;
|
const max = std.math.maxInt(u21) + 1;
|
||||||
|
|
||||||
var state: u3 = 0;
|
var state: BreakState = .{};
|
||||||
var zg_state: u3 = 0;
|
var zg_state: u3 = 0;
|
||||||
for (min..max) |cp1| {
|
for (min..max) |cp1| {
|
||||||
if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1});
|
if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1});
|
||||||
|
@ -5,6 +5,7 @@ const props = @import("props.zig");
|
|||||||
pub const table = props.table;
|
pub const table = props.table;
|
||||||
pub const Properties = props.Properties;
|
pub const Properties = props.Properties;
|
||||||
pub const graphemeBreak = grapheme.graphemeBreak;
|
pub const graphemeBreak = grapheme.graphemeBreak;
|
||||||
|
pub const GraphemeBreakState = grapheme.BreakState;
|
||||||
|
|
||||||
test {
|
test {
|
||||||
@import("std").testing.refAllDecls(@This());
|
@import("std").testing.refAllDecls(@This());
|
||||||
|
Reference in New Issue
Block a user