mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-15 00:06:09 +03:00
unicode: precompute grapheme break data
This commit is contained in:
@ -3,14 +3,6 @@ const props = @import("props.zig");
|
||||
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
||||
const table = props.table;
|
||||
|
||||
// The algorithm in this file is based on the Ziglyph and utf8proc algorithm,
|
||||
// only modified to use our own lookup tables.
|
||||
//
|
||||
// I'll note I also tried a fully precomputed table approach where all
|
||||
// combinations of state and boundary classes were precomputed. It was
|
||||
// marginally faster (about 2%) but the table is a few KB and I'm not
|
||||
// sure it's worth it.
|
||||
|
||||
/// Determines if there is a grapheme break between two codepoints. This
|
||||
/// must be called sequentially maintaining the state between calls.
|
||||
///
|
||||
@ -19,9 +11,15 @@ const table = props.table;
|
||||
/// calling this function. This is because this function is tuned for
|
||||
/// Ghostty.
|
||||
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
||||
const gbc1 = table.get(cp1).grapheme_boundary_class;
|
||||
const gbc2 = table.get(cp2).grapheme_boundary_class;
|
||||
return graphemeBreakClass(gbc1, gbc2, state);
|
||||
const value = Precompute.data[
|
||||
(Precompute.Key{
|
||||
.gbc1 = table.get(cp1).grapheme_boundary_class,
|
||||
.gbc2 = table.get(cp2).grapheme_boundary_class,
|
||||
.state = state.*,
|
||||
}).index()
|
||||
];
|
||||
state.* = value.state;
|
||||
return value.result;
|
||||
}
|
||||
|
||||
/// The state that must be maintained between calls to `graphemeBreak`.
|
||||
@ -30,6 +28,52 @@ pub const BreakState = packed struct(u2) {
|
||||
regional_indicator: bool = false,
|
||||
};
|
||||
|
||||
/// This is all the structures and data for the precomputed lookup table
|
||||
/// for all possible permutations of state and grapheme boundary classes.
|
||||
/// Precomputation only requires 2^10 keys of 3 bit values so the whole
|
||||
/// table is less than 1KB.
|
||||
const Precompute = struct {
|
||||
const Key = packed struct(u10) {
|
||||
state: BreakState,
|
||||
gbc1: GraphemeBoundaryClass,
|
||||
gbc2: GraphemeBoundaryClass,
|
||||
|
||||
fn index(self: Key) usize {
|
||||
return @intCast(@as(u10, @bitCast(self)));
|
||||
}
|
||||
};
|
||||
|
||||
const Value = packed struct(u3) {
|
||||
result: bool,
|
||||
state: BreakState,
|
||||
};
|
||||
|
||||
const data = precompute: {
|
||||
var result: [std.math.maxInt(u10)]Value = undefined;
|
||||
|
||||
@setEvalBranchQuota(2_000);
|
||||
const info = @typeInfo(GraphemeBoundaryClass).Enum;
|
||||
for (0..std.math.maxInt(u2) + 1) |state_init| {
|
||||
for (info.fields) |field1| {
|
||||
for (info.fields) |field2| {
|
||||
var state: BreakState = @bitCast(@as(u2, @intCast(state_init)));
|
||||
const key: Key = .{
|
||||
.gbc1 = @field(GraphemeBoundaryClass, field1.name),
|
||||
.gbc2 = @field(GraphemeBoundaryClass, field2.name),
|
||||
.state = state,
|
||||
};
|
||||
const v = graphemeBreakClass(key.gbc1, key.gbc2, &state);
|
||||
result[key.index()] = .{ .result = v, .state = state };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break :precompute result;
|
||||
};
|
||||
};
|
||||
|
||||
/// This is the algorithm from utf8proc. We only use this offline for
|
||||
/// precomputing the lookup table.
|
||||
fn graphemeBreakClass(
|
||||
gbc1: GraphemeBoundaryClass,
|
||||
gbc2: GraphemeBoundaryClass,
|
||||
|
@ -60,7 +60,7 @@ pub const Properties = struct {
|
||||
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
||||
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
||||
/// impossible because they're handled by the terminal.
|
||||
pub const GraphemeBoundaryClass = enum {
|
||||
pub const GraphemeBoundaryClass = enum(u4) {
|
||||
invalid,
|
||||
L,
|
||||
V,
|
||||
|
Reference in New Issue
Block a user