mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-25 13:16:11 +03:00
185 lines
5.6 KiB
Zig
185 lines
5.6 KiB
Zig
const props = @This();
|
|
const std = @import("std");
|
|
const assert = std.debug.assert;
|
|
const Graphemes = @import("Graphemes");
|
|
const DisplayWidth = @import("DisplayWidth");
|
|
const lut = @import("lut.zig");
|
|
|
|
graphemes: Graphemes,
|
|
display_width: DisplayWidth,
|
|
|
|
// Public only for unicode-test
|
|
pub fn init(alloc: std.mem.Allocator) !props {
|
|
const graphemes = try Graphemes.init(alloc);
|
|
return .{
|
|
.graphemes = graphemes,
|
|
.display_width = try DisplayWidth.initWithGraphemes(alloc, graphemes),
|
|
};
|
|
}
|
|
|
|
// Public only for unicode-test
|
|
pub fn deinit(self: *props, alloc: std.mem.Allocator) void {
|
|
self.graphemes.deinit(alloc);
|
|
self.display_width.deinit(alloc);
|
|
}
|
|
|
|
/// The lookup tables for Ghostty.
|
|
pub const table = table: {
|
|
// This is only available after running main() below as part of the Ghostty
|
|
// build.zig, but due to Zig's lazy analysis we can still reference it here.
|
|
const generated = @import("unicode_tables").Tables(Properties);
|
|
const Tables = lut.Tables(Properties);
|
|
break :table Tables{
|
|
.stage1 = &generated.stage1,
|
|
.stage2 = &generated.stage2,
|
|
.stage3 = &generated.stage3,
|
|
};
|
|
};
|
|
|
|
/// Property set per codepoint that Ghostty cares about.
|
|
///
|
|
/// Adding to this lets you find new properties but also potentially makes
|
|
/// our lookup tables less efficient. Any changes to this should run the
|
|
/// benchmarks in src/bench to verify that we haven't regressed.
|
|
pub const Properties = struct {
|
|
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
|
|
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
|
|
/// becomes a 2-em dash).
|
|
width: u2 = 0,
|
|
|
|
/// Grapheme boundary class.
|
|
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
|
|
|
|
// Needed for lut.Generator
|
|
pub fn eql(a: Properties, b: Properties) bool {
|
|
return a.width == b.width and
|
|
a.grapheme_boundary_class == b.grapheme_boundary_class;
|
|
}
|
|
|
|
// Needed for lut.Generator
|
|
pub fn format(
|
|
self: Properties,
|
|
comptime layout: []const u8,
|
|
opts: std.fmt.FormatOptions,
|
|
writer: anytype,
|
|
) !void {
|
|
_ = layout;
|
|
_ = opts;
|
|
try std.fmt.format(writer,
|
|
\\.{{
|
|
\\ .width= {},
|
|
\\ .grapheme_boundary_class= .{s},
|
|
\\}}
|
|
, .{
|
|
self.width,
|
|
@tagName(self.grapheme_boundary_class),
|
|
});
|
|
}
|
|
};
|
|
|
|
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
|
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
|
/// impossible because they're handled by the terminal.
|
|
pub const GraphemeBoundaryClass = enum(u4) {
|
|
invalid,
|
|
L,
|
|
V,
|
|
T,
|
|
LV,
|
|
LVT,
|
|
prepend,
|
|
extend,
|
|
zwj,
|
|
spacing_mark,
|
|
regional_indicator,
|
|
extended_pictographic,
|
|
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
|
emoji_modifier, // \p{Emoji_Modifier}
|
|
|
|
/// Gets the grapheme boundary class for a codepoint. This is VERY
|
|
/// SLOW. The use case for this is only in generating lookup tables.
|
|
pub fn init(ctx: props, cp: u21) GraphemeBoundaryClass {
|
|
return switch (Graphemes.gbp(ctx.graphemes, cp)) {
|
|
.Emoji_Modifier_Base => .extended_pictographic_base,
|
|
.Emoji_Modifier => .emoji_modifier,
|
|
.Extended_Pictographic => .extended_pictographic,
|
|
.L => .L,
|
|
.V => .V,
|
|
.T => .T,
|
|
.LV => .LV,
|
|
.LVT => .LVT,
|
|
.Prepend => .prepend,
|
|
.Extend => .extend,
|
|
.ZWJ => .zwj,
|
|
.SpacingMark => .spacing_mark,
|
|
.Regional_Indicator => .regional_indicator,
|
|
// This is obviously not INVALID invalid, there is SOME grapheme
|
|
// boundary class for every codepoint. But we don't care about
|
|
// anything that doesn't fit into the above categories.
|
|
.none, .Control, .CR, .LF => .invalid,
|
|
};
|
|
}
|
|
|
|
/// Returns true if this is an extended pictographic type. This
|
|
/// should be used instead of comparing the enum value directly
|
|
/// because we classify multiple.
|
|
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
|
|
return switch (self) {
|
|
.extended_pictographic,
|
|
.extended_pictographic_base,
|
|
=> true,
|
|
|
|
else => false,
|
|
};
|
|
}
|
|
};
|
|
|
|
pub fn get(ctx: props, cp: u21) !Properties {
|
|
if (cp > 0x10FFFF) {
|
|
return .{
|
|
.width = 0,
|
|
.grapheme_boundary_class = .invalid,
|
|
};
|
|
} else {
|
|
const zg_width = DisplayWidth.codePointWidth(ctx.display_width, cp);
|
|
|
|
return .{
|
|
.width = @intCast(@min(2, @max(0, zg_width))),
|
|
.grapheme_boundary_class = .init(ctx, cp),
|
|
};
|
|
}
|
|
}
|
|
|
|
pub fn eql(ctx: props, a: Properties, b: Properties) bool {
|
|
_ = ctx;
|
|
return a.eql(b);
|
|
}
|
|
|
|
/// Runnable binary to generate the lookup tables and output to stdout.
|
|
pub fn main() !void {
|
|
var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
defer arena_state.deinit();
|
|
const alloc = arena_state.allocator();
|
|
|
|
var self = try init(alloc);
|
|
defer self.deinit(alloc);
|
|
|
|
const gen: lut.Generator(
|
|
Properties,
|
|
props,
|
|
) = .{ .ctx = self };
|
|
|
|
const t = try gen.generate(alloc);
|
|
defer alloc.free(t.stage1);
|
|
defer alloc.free(t.stage2);
|
|
defer alloc.free(t.stage3);
|
|
try t.writeZig(std.io.getStdOut().writer());
|
|
|
|
// Uncomment when manually debugging to see our table sizes.
|
|
// std.log.warn("stage1={} stage2={} stage3={}", .{
|
|
// t.stage1.len,
|
|
// t.stage2.len,
|
|
// t.stage3.len,
|
|
// });
|
|
}
|