mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-17 09:16:11 +03:00
Merge pull request #1494 from mitchellh/grapheme-break
Optimized grapheme break detection (6x speedup)
This commit is contained in:
33
src/bench/grapheme-break.sh
Executable file
33
src/bench/grapheme-break.sh
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# This is a trivial helper script to help run the grapheme-break benchmark.
|
||||||
|
# You probably want to tweak this script depending on what you're
|
||||||
|
# trying to measure.
|
||||||
|
|
||||||
|
# Options:
|
||||||
|
# - "ascii", uniform random ASCII bytes
|
||||||
|
# - "utf8", uniform random unicode characters, encoded as utf8
|
||||||
|
# - "rand", pure random data, will contain many invalid code sequences.
|
||||||
|
DATA="utf8"
|
||||||
|
SIZE="25000000"
|
||||||
|
|
||||||
|
# Add additional arguments
|
||||||
|
ARGS=""
|
||||||
|
|
||||||
|
# Generate the benchmark input ahead of time so it's not included in the time.
|
||||||
|
./zig-out/bin/bench-stream --mode=gen-$DATA | head -c $SIZE > /tmp/ghostty_bench_data
|
||||||
|
#cat ~/Downloads/JAPANESEBIBLE.txt > /tmp/ghostty_bench_data
|
||||||
|
|
||||||
|
# Uncomment to instead use the contents of `stream.txt` as input.
|
||||||
|
# yes $(cat ./stream.txt) | head -c $SIZE > /tmp/ghostty_bench_data
|
||||||
|
|
||||||
|
hyperfine \
|
||||||
|
--warmup 10 \
|
||||||
|
-n noop \
|
||||||
|
"./zig-out/bin/bench-grapheme-break --mode=noop${ARGS} </tmp/ghostty_bench_data" \
|
||||||
|
-n ziglyph \
|
||||||
|
"./zig-out/bin/bench-grapheme-break --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
|
||||||
|
-n table \
|
||||||
|
"./zig-out/bin/bench-grapheme-break --mode=table${ARGS} </tmp/ghostty_bench_data"
|
||||||
|
|
||||||
|
|
144
src/bench/grapheme-break.zig
Normal file
144
src/bench/grapheme-break.zig
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
//! This benchmark tests the throughput of grapheme break calculation.
|
||||||
|
//! This is a common operation in terminal character printing for terminals
|
||||||
|
//! that support grapheme clustering.
|
||||||
|
//!
|
||||||
|
//! This will consume all of the available stdin, so you should run it
|
||||||
|
//! with `head` in a pipe to restrict. For example, to test ASCII input:
|
||||||
|
//!
|
||||||
|
//! bench-stream --mode=gen-ascii | head -c 50M | bench-grapheme-break --mode=ziglyph
|
||||||
|
//!
|
||||||
|
|
||||||
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
const ArenaAllocator = std.heap.ArenaAllocator;
|
||||||
|
const ziglyph = @import("ziglyph");
|
||||||
|
const cli = @import("../cli.zig");
|
||||||
|
const simd = @import("../simd/main.zig");
|
||||||
|
const unicode = @import("../unicode/main.zig");
|
||||||
|
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
||||||
|
|
||||||
|
const Args = struct {
|
||||||
|
mode: Mode = .noop,
|
||||||
|
|
||||||
|
/// The size for read buffers. Doesn't usually need to be changed. The
|
||||||
|
/// main point is to make this runtime known so we can avoid compiler
|
||||||
|
/// optimizations.
|
||||||
|
@"buffer-size": usize = 4096,
|
||||||
|
|
||||||
|
/// This is set by the CLI parser for deinit.
|
||||||
|
_arena: ?ArenaAllocator = null,
|
||||||
|
|
||||||
|
pub fn deinit(self: *Args) void {
|
||||||
|
if (self._arena) |arena| arena.deinit();
|
||||||
|
self.* = undefined;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const Mode = enum {
|
||||||
|
/// The baseline mode copies the data from the fd into a buffer. This
|
||||||
|
/// is used to show the minimal overhead of reading the fd into memory
|
||||||
|
/// and establishes a baseline for the other modes.
|
||||||
|
noop,
|
||||||
|
|
||||||
|
/// Use ziglyph library to calculate the display width of each codepoint.
|
||||||
|
ziglyph,
|
||||||
|
|
||||||
|
/// Ghostty's table-based approach.
|
||||||
|
table,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const std_options = struct {
|
||||||
|
pub const log_level: std.log.Level = .debug;
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn main() !void {
|
||||||
|
// We want to use the c allocator because it is much faster than GPA.
|
||||||
|
const alloc = std.heap.c_allocator;
|
||||||
|
|
||||||
|
// Parse our args
|
||||||
|
var args: Args = .{};
|
||||||
|
defer args.deinit();
|
||||||
|
{
|
||||||
|
var iter = try std.process.argsWithAllocator(alloc);
|
||||||
|
defer iter.deinit();
|
||||||
|
try cli.args.parse(Args, alloc, &args, &iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
const reader = std.io.getStdIn().reader();
|
||||||
|
const buf = try alloc.alloc(u8, args.@"buffer-size");
|
||||||
|
|
||||||
|
// Handle the modes that do not depend on terminal state first.
|
||||||
|
switch (args.mode) {
|
||||||
|
.noop => try benchNoop(reader, buf),
|
||||||
|
.ziglyph => try benchZiglyph(reader, buf),
|
||||||
|
.table => try benchTable(reader, buf),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline fn benchNoop(
|
||||||
|
reader: anytype,
|
||||||
|
buf: []u8,
|
||||||
|
) !void {
|
||||||
|
var d: UTF8Decoder = .{};
|
||||||
|
while (true) {
|
||||||
|
const n = try reader.read(buf);
|
||||||
|
if (n == 0) break;
|
||||||
|
|
||||||
|
// Using stream.next directly with a for loop applies a naive
|
||||||
|
// scalar approach.
|
||||||
|
for (buf[0..n]) |c| {
|
||||||
|
_ = d.next(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline fn benchTable(
|
||||||
|
reader: anytype,
|
||||||
|
buf: []u8,
|
||||||
|
) !void {
|
||||||
|
var d: UTF8Decoder = .{};
|
||||||
|
var state: unicode.GraphemeBreakState = .{};
|
||||||
|
var cp1: u21 = 0;
|
||||||
|
while (true) {
|
||||||
|
const n = try reader.read(buf);
|
||||||
|
if (n == 0) break;
|
||||||
|
|
||||||
|
// Using stream.next directly with a for loop applies a naive
|
||||||
|
// scalar approach.
|
||||||
|
for (buf[0..n]) |c| {
|
||||||
|
const cp_, const consumed = d.next(c);
|
||||||
|
assert(consumed);
|
||||||
|
if (cp_) |cp2| {
|
||||||
|
const v = unicode.graphemeBreak(cp1, @intCast(cp2), &state);
|
||||||
|
buf[0] = @intCast(@intFromBool(v));
|
||||||
|
cp1 = cp2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline fn benchZiglyph(
|
||||||
|
reader: anytype,
|
||||||
|
buf: []u8,
|
||||||
|
) !void {
|
||||||
|
var d: UTF8Decoder = .{};
|
||||||
|
var state: u3 = 0;
|
||||||
|
var cp1: u21 = 0;
|
||||||
|
while (true) {
|
||||||
|
const n = try reader.read(buf);
|
||||||
|
if (n == 0) break;
|
||||||
|
|
||||||
|
// Using stream.next directly with a for loop applies a naive
|
||||||
|
// scalar approach.
|
||||||
|
for (buf[0..n]) |c| {
|
||||||
|
const cp_, const consumed = d.next(c);
|
||||||
|
assert(consumed);
|
||||||
|
if (cp_) |cp2| {
|
||||||
|
const v = ziglyph.graphemeBreak(cp1, @intCast(cp2), &state);
|
||||||
|
buf[0] = @intCast(@intFromBool(v));
|
||||||
|
cp1 = cp2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -141,4 +141,5 @@ pub const ExeEntrypoint = enum {
|
|||||||
bench_parser,
|
bench_parser,
|
||||||
bench_stream,
|
bench_stream,
|
||||||
bench_codepoint_width,
|
bench_codepoint_width,
|
||||||
|
bench_grapheme_break,
|
||||||
};
|
};
|
||||||
|
@ -9,4 +9,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
|
|||||||
.bench_parser => @import("bench/parser.zig"),
|
.bench_parser => @import("bench/parser.zig"),
|
||||||
.bench_stream => @import("bench/stream.zig"),
|
.bench_stream => @import("bench/stream.zig"),
|
||||||
.bench_codepoint_width => @import("bench/codepoint-width.zig"),
|
.bench_codepoint_width => @import("bench/codepoint-width.zig"),
|
||||||
|
.bench_grapheme_break => @import("bench/grapheme-break.zig"),
|
||||||
};
|
};
|
||||||
|
@ -6,7 +6,6 @@ const Terminal = @This();
|
|||||||
|
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const builtin = @import("builtin");
|
const builtin = @import("builtin");
|
||||||
const ziglyph = @import("ziglyph");
|
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
@ -786,24 +785,19 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||||||
if (prev.cell.char == 0) break :grapheme;
|
if (prev.cell.char == 0) break :grapheme;
|
||||||
|
|
||||||
const grapheme_break = brk: {
|
const grapheme_break = brk: {
|
||||||
var state: u3 = 0;
|
var state: unicode.GraphemeBreakState = .{};
|
||||||
var cp1: u21 = @intCast(prev.cell.char);
|
var cp1: u21 = @intCast(prev.cell.char);
|
||||||
if (prev.cell.attrs.grapheme) {
|
if (prev.cell.attrs.grapheme) {
|
||||||
var it = row.codepointIterator(prev.x);
|
var it = row.codepointIterator(prev.x);
|
||||||
while (it.next()) |cp2| {
|
while (it.next()) |cp2| {
|
||||||
// log.debug("cp1={x} cp2={x}", .{ cp1, cp2 });
|
// log.debug("cp1={x} cp2={x}", .{ cp1, cp2 });
|
||||||
assert(!ziglyph.graphemeBreak(
|
assert(!unicode.graphemeBreak(cp1, cp2, &state));
|
||||||
cp1,
|
|
||||||
cp2,
|
|
||||||
&state,
|
|
||||||
));
|
|
||||||
|
|
||||||
cp1 = cp2;
|
cp1 = cp2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// log.debug("cp1={x} cp2={x} end", .{ cp1, c });
|
// log.debug("cp1={x} cp2={x} end", .{ cp1, c });
|
||||||
break :brk ziglyph.graphemeBreak(cp1, c, &state);
|
break :brk unicode.graphemeBreak(cp1, c, &state);
|
||||||
};
|
};
|
||||||
|
|
||||||
// If we can NOT break, this means that "c" is part of a grapheme
|
// If we can NOT break, this means that "c" is part of a grapheme
|
||||||
|
183
src/unicode/grapheme.zig
Normal file
183
src/unicode/grapheme.zig
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
const std = @import("std");
|
||||||
|
const props = @import("props.zig");
|
||||||
|
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
||||||
|
const table = props.table;
|
||||||
|
|
||||||
|
/// Determines if there is a grapheme break between two codepoints. This
|
||||||
|
/// must be called sequentially maintaining the state between calls.
|
||||||
|
///
|
||||||
|
/// This function does NOT work with control characters. Control characters,
|
||||||
|
/// line feeds, and carriage returns are expected to be filtered out before
|
||||||
|
/// calling this function. This is because this function is tuned for
|
||||||
|
/// Ghostty.
|
||||||
|
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
||||||
|
const value = Precompute.data[
|
||||||
|
(Precompute.Key{
|
||||||
|
.gbc1 = table.get(cp1).grapheme_boundary_class,
|
||||||
|
.gbc2 = table.get(cp2).grapheme_boundary_class,
|
||||||
|
.state = state.*,
|
||||||
|
}).index()
|
||||||
|
];
|
||||||
|
state.* = value.state;
|
||||||
|
return value.result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The state that must be maintained between calls to `graphemeBreak`.
|
||||||
|
pub const BreakState = packed struct(u2) {
|
||||||
|
extended_pictographic: bool = false,
|
||||||
|
regional_indicator: bool = false,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// This is all the structures and data for the precomputed lookup table
|
||||||
|
/// for all possible permutations of state and grapheme boundary classes.
|
||||||
|
/// Precomputation only requires 2^10 keys of 3 bit values so the whole
|
||||||
|
/// table is less than 1KB.
|
||||||
|
const Precompute = struct {
|
||||||
|
const Key = packed struct(u10) {
|
||||||
|
state: BreakState,
|
||||||
|
gbc1: GraphemeBoundaryClass,
|
||||||
|
gbc2: GraphemeBoundaryClass,
|
||||||
|
|
||||||
|
fn index(self: Key) usize {
|
||||||
|
return @intCast(@as(u10, @bitCast(self)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const Value = packed struct(u3) {
|
||||||
|
result: bool,
|
||||||
|
state: BreakState,
|
||||||
|
};
|
||||||
|
|
||||||
|
const data = precompute: {
|
||||||
|
var result: [std.math.maxInt(u10)]Value = undefined;
|
||||||
|
|
||||||
|
@setEvalBranchQuota(2_000);
|
||||||
|
const info = @typeInfo(GraphemeBoundaryClass).Enum;
|
||||||
|
for (0..std.math.maxInt(u2) + 1) |state_init| {
|
||||||
|
for (info.fields) |field1| {
|
||||||
|
for (info.fields) |field2| {
|
||||||
|
var state: BreakState = @bitCast(@as(u2, @intCast(state_init)));
|
||||||
|
const key: Key = .{
|
||||||
|
.gbc1 = @field(GraphemeBoundaryClass, field1.name),
|
||||||
|
.gbc2 = @field(GraphemeBoundaryClass, field2.name),
|
||||||
|
.state = state,
|
||||||
|
};
|
||||||
|
const v = graphemeBreakClass(key.gbc1, key.gbc2, &state);
|
||||||
|
result[key.index()] = .{ .result = v, .state = state };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break :precompute result;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/// This is the algorithm from utf8proc. We only use this offline for
|
||||||
|
/// precomputing the lookup table.
|
||||||
|
fn graphemeBreakClass(
|
||||||
|
gbc1: GraphemeBoundaryClass,
|
||||||
|
gbc2: GraphemeBoundaryClass,
|
||||||
|
state: *BreakState,
|
||||||
|
) bool {
|
||||||
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
|
if (!state.extended_pictographic and gbc1 == .extended_pictographic) {
|
||||||
|
state.extended_pictographic = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// These two properties are ignored because they're not relevant to
|
||||||
|
// Ghostty -- they're filtered out before checking grapheme boundaries.
|
||||||
|
// GB3: CR x LF
|
||||||
|
// GB4: Control
|
||||||
|
|
||||||
|
// GB6: Hangul L x (L|V|LV|VT)
|
||||||
|
if (gbc1 == .L) {
|
||||||
|
if (gbc2 == .L or
|
||||||
|
gbc2 == .V or
|
||||||
|
gbc2 == .LV or
|
||||||
|
gbc2 == .LVT) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB7: Hangul (LV | V) x (V | T)
|
||||||
|
if (gbc1 == .LV or gbc1 == .V) {
|
||||||
|
if (gbc2 == .V or
|
||||||
|
gbc2 == .T) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB8: Hangul (LVT | T) x T
|
||||||
|
if (gbc1 == .LVT or gbc1 == .T) {
|
||||||
|
if (gbc2 == .T) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB9b: x (Extend | ZWJ)
|
||||||
|
if (gbc2 == .extend or gbc2 == .zwj) return false;
|
||||||
|
|
||||||
|
// GB9a: x Spacing
|
||||||
|
if (gbc2 == .spacing_mark) return false;
|
||||||
|
|
||||||
|
// GB9b: Prepend x
|
||||||
|
if (gbc1 == .prepend) return false;
|
||||||
|
|
||||||
|
// GB12, GB13: RI x RI
|
||||||
|
if (gbc1 == .regional_indicator and gbc2 == .regional_indicator) {
|
||||||
|
if (state.regional_indicator) {
|
||||||
|
state.regional_indicator = false;
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
state.regional_indicator = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
|
if (state.extended_pictographic and
|
||||||
|
gbc1 == .zwj and
|
||||||
|
gbc2 == .extended_pictographic)
|
||||||
|
{
|
||||||
|
state.extended_pictographic = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If you build this file as a binary, we will verify the grapheme break
|
||||||
|
/// implementation. This iterates over billions of codepoints so it is
|
||||||
|
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
|
||||||
|
pub fn main() !void {
|
||||||
|
const ziglyph = @import("ziglyph");
|
||||||
|
|
||||||
|
// Set the min and max to control the test range.
|
||||||
|
const min = 0;
|
||||||
|
const max = std.math.maxInt(u21) + 1;
|
||||||
|
|
||||||
|
var state: BreakState = .{};
|
||||||
|
var zg_state: u3 = 0;
|
||||||
|
for (min..max) |cp1| {
|
||||||
|
if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1});
|
||||||
|
|
||||||
|
if (cp1 == '\r' or cp1 == '\n' or
|
||||||
|
ziglyph.grapheme_break.isControl(@intCast(cp1))) continue;
|
||||||
|
|
||||||
|
for (min..max) |cp2| {
|
||||||
|
if (cp2 == '\r' or cp2 == '\n' or
|
||||||
|
ziglyph.grapheme_break.isControl(@intCast(cp2))) continue;
|
||||||
|
|
||||||
|
const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state);
|
||||||
|
const zg_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg_state);
|
||||||
|
if (gb != zg_gb) {
|
||||||
|
std.log.warn("cp1={x} cp2={x} gb={} state={} zg_gb={} zg_state={}", .{
|
||||||
|
cp1,
|
||||||
|
cp2,
|
||||||
|
gb,
|
||||||
|
state,
|
||||||
|
zg_gb,
|
||||||
|
zg_state,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const std_options = struct {
|
||||||
|
pub const log_level: std.log.Level = .info;
|
||||||
|
};
|
@ -1,8 +1,11 @@
|
|||||||
pub const lut = @import("lut.zig");
|
pub const lut = @import("lut.zig");
|
||||||
|
|
||||||
|
const grapheme = @import("grapheme.zig");
|
||||||
const props = @import("props.zig");
|
const props = @import("props.zig");
|
||||||
pub const table = props.table;
|
pub const table = props.table;
|
||||||
pub const Properties = props.Properties;
|
pub const Properties = props.Properties;
|
||||||
|
pub const graphemeBreak = grapheme.graphemeBreak;
|
||||||
|
pub const GraphemeBreakState = grapheme.BreakState;
|
||||||
|
|
||||||
test {
|
test {
|
||||||
@import("std").testing.refAllDecls(@This());
|
@import("std").testing.refAllDecls(@This());
|
||||||
|
@ -27,9 +27,13 @@ pub const Properties = struct {
|
|||||||
/// becomes a 2-em dash).
|
/// becomes a 2-em dash).
|
||||||
width: u2 = 0,
|
width: u2 = 0,
|
||||||
|
|
||||||
|
/// Grapheme boundary class.
|
||||||
|
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
|
||||||
|
|
||||||
// Needed for lut.Generator
|
// Needed for lut.Generator
|
||||||
pub fn eql(a: Properties, b: Properties) bool {
|
pub fn eql(a: Properties, b: Properties) bool {
|
||||||
return a.width == b.width;
|
return a.width == b.width and
|
||||||
|
a.grapheme_boundary_class == b.grapheme_boundary_class;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Needed for lut.Generator
|
// Needed for lut.Generator
|
||||||
@ -41,17 +45,64 @@ pub const Properties = struct {
|
|||||||
) !void {
|
) !void {
|
||||||
_ = layout;
|
_ = layout;
|
||||||
_ = opts;
|
_ = opts;
|
||||||
try std.fmt.format(writer, ".{{ .width= {}, }}", .{
|
try std.fmt.format(writer,
|
||||||
|
\\.{{
|
||||||
|
\\ .width= {},
|
||||||
|
\\ .grapheme_boundary_class= .{s},
|
||||||
|
\\}}
|
||||||
|
, .{
|
||||||
self.width,
|
self.width,
|
||||||
|
@tagName(self.grapheme_boundary_class),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
||||||
|
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
||||||
|
/// impossible because they're handled by the terminal.
|
||||||
|
pub const GraphemeBoundaryClass = enum(u4) {
|
||||||
|
invalid,
|
||||||
|
L,
|
||||||
|
V,
|
||||||
|
T,
|
||||||
|
LV,
|
||||||
|
LVT,
|
||||||
|
prepend,
|
||||||
|
extend,
|
||||||
|
zwj,
|
||||||
|
spacing_mark,
|
||||||
|
regional_indicator,
|
||||||
|
extended_pictographic,
|
||||||
|
|
||||||
|
/// Gets the grapheme boundary class for a codepoint. This is VERY
|
||||||
|
/// SLOW. The use case for this is only in generating lookup tables.
|
||||||
|
pub fn init(cp: u21) GraphemeBoundaryClass {
|
||||||
|
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
|
||||||
|
if (ziglyph.emoji.isEmojiModifier(cp)) return .extend;
|
||||||
|
if (ziglyph.grapheme_break.isL(cp)) return .L;
|
||||||
|
if (ziglyph.grapheme_break.isV(cp)) return .V;
|
||||||
|
if (ziglyph.grapheme_break.isT(cp)) return .T;
|
||||||
|
if (ziglyph.grapheme_break.isLv(cp)) return .LV;
|
||||||
|
if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
|
||||||
|
if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
|
||||||
|
if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
|
||||||
|
if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
|
||||||
|
if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
|
||||||
|
if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
|
||||||
|
|
||||||
|
// This is obviously not INVALID invalid, there is SOME grapheme
|
||||||
|
// boundary class for every codepoint. But we don't care about
|
||||||
|
// anything that doesn't fit into the above categories.
|
||||||
|
return .invalid;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
pub fn get(cp: u21) Properties {
|
pub fn get(cp: u21) Properties {
|
||||||
const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
|
const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.width = @intCast(@min(2, @max(0, zg_width))),
|
.width = @intCast(@min(2, @max(0, zg_width))),
|
||||||
|
.grapheme_boundary_class = GraphemeBoundaryClass.init(cp),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user