mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-15 00:06:09 +03:00
unicode: precompute grapheme break data
This commit is contained in:
@ -3,14 +3,6 @@ const props = @import("props.zig");
|
|||||||
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
||||||
const table = props.table;
|
const table = props.table;
|
||||||
|
|
||||||
// The algorithm in this file is based on the Ziglyph and utf8proc algorithm,
|
|
||||||
// only modified to use our own lookup tables.
|
|
||||||
//
|
|
||||||
// I'll note I also tried a fully precomputed table approach where all
|
|
||||||
// combinations of state and boundary classes were precomputed. It was
|
|
||||||
// marginally faster (about 2%) but the table is a few KB and I'm not
|
|
||||||
// sure it's worth it.
|
|
||||||
|
|
||||||
/// Determines if there is a grapheme break between two codepoints. This
|
/// Determines if there is a grapheme break between two codepoints. This
|
||||||
/// must be called sequentially maintaining the state between calls.
|
/// must be called sequentially maintaining the state between calls.
|
||||||
///
|
///
|
||||||
@ -19,9 +11,15 @@ const table = props.table;
|
|||||||
/// calling this function. This is because this function is tuned for
|
/// calling this function. This is because this function is tuned for
|
||||||
/// Ghostty.
|
/// Ghostty.
|
||||||
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
||||||
const gbc1 = table.get(cp1).grapheme_boundary_class;
|
const value = Precompute.data[
|
||||||
const gbc2 = table.get(cp2).grapheme_boundary_class;
|
(Precompute.Key{
|
||||||
return graphemeBreakClass(gbc1, gbc2, state);
|
.gbc1 = table.get(cp1).grapheme_boundary_class,
|
||||||
|
.gbc2 = table.get(cp2).grapheme_boundary_class,
|
||||||
|
.state = state.*,
|
||||||
|
}).index()
|
||||||
|
];
|
||||||
|
state.* = value.state;
|
||||||
|
return value.result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The state that must be maintained between calls to `graphemeBreak`.
|
/// The state that must be maintained between calls to `graphemeBreak`.
|
||||||
@ -30,6 +28,52 @@ pub const BreakState = packed struct(u2) {
|
|||||||
regional_indicator: bool = false,
|
regional_indicator: bool = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// This is all the structures and data for the precomputed lookup table
|
||||||
|
/// for all possible permutations of state and grapheme boundary classes.
|
||||||
|
/// Precomputation only requires 2^10 keys of 3 bit values so the whole
|
||||||
|
/// table is less than 1KB.
|
||||||
|
const Precompute = struct {
|
||||||
|
const Key = packed struct(u10) {
|
||||||
|
state: BreakState,
|
||||||
|
gbc1: GraphemeBoundaryClass,
|
||||||
|
gbc2: GraphemeBoundaryClass,
|
||||||
|
|
||||||
|
fn index(self: Key) usize {
|
||||||
|
return @intCast(@as(u10, @bitCast(self)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const Value = packed struct(u3) {
|
||||||
|
result: bool,
|
||||||
|
state: BreakState,
|
||||||
|
};
|
||||||
|
|
||||||
|
const data = precompute: {
|
||||||
|
var result: [std.math.maxInt(u10)]Value = undefined;
|
||||||
|
|
||||||
|
@setEvalBranchQuota(2_000);
|
||||||
|
const info = @typeInfo(GraphemeBoundaryClass).Enum;
|
||||||
|
for (0..std.math.maxInt(u2) + 1) |state_init| {
|
||||||
|
for (info.fields) |field1| {
|
||||||
|
for (info.fields) |field2| {
|
||||||
|
var state: BreakState = @bitCast(@as(u2, @intCast(state_init)));
|
||||||
|
const key: Key = .{
|
||||||
|
.gbc1 = @field(GraphemeBoundaryClass, field1.name),
|
||||||
|
.gbc2 = @field(GraphemeBoundaryClass, field2.name),
|
||||||
|
.state = state,
|
||||||
|
};
|
||||||
|
const v = graphemeBreakClass(key.gbc1, key.gbc2, &state);
|
||||||
|
result[key.index()] = .{ .result = v, .state = state };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break :precompute result;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/// This is the algorithm from utf8proc. We only use this offline for
|
||||||
|
/// precomputing the lookup table.
|
||||||
fn graphemeBreakClass(
|
fn graphemeBreakClass(
|
||||||
gbc1: GraphemeBoundaryClass,
|
gbc1: GraphemeBoundaryClass,
|
||||||
gbc2: GraphemeBoundaryClass,
|
gbc2: GraphemeBoundaryClass,
|
||||||
|
@ -60,7 +60,7 @@ pub const Properties = struct {
|
|||||||
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
||||||
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
||||||
/// impossible because they're handled by the terminal.
|
/// impossible because they're handled by the terminal.
|
||||||
pub const GraphemeBoundaryClass = enum {
|
pub const GraphemeBoundaryClass = enum(u4) {
|
||||||
invalid,
|
invalid,
|
||||||
L,
|
L,
|
||||||
V,
|
V,
|
||||||
|
Reference in New Issue
Block a user