unicode: remove unused

This commit is contained in:
Mitchell Hashimoto
2024-02-09 20:23:29 -08:00
parent 5f3574a4bf
commit c47ad97f62
2 changed files with 35 additions and 135 deletions

View File

@ -1,18 +1,34 @@
const std = @import("std");
const props = @import("props.zig");
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
const table = props.table;
/// Grapheme break
// The algorithm in this file is based on the Ziglyph and utf8proc algorithm,
// only modified to use our own lookup tables.
//
// I'll note I also tried a fully precomputed table approach where all
// combinations of state and boundary classes were precomputed. It was
// marginally faster (about 2%) but the table is a few KB and I'm not
// sure it's worth it.
/// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls.
///
/// This function does NOT work with control characters. Control characters,
/// line feeds, and carriage returns are expected to be filtered out before
/// calling this function. This is because this function is tuned for
/// Ghostty.
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *u3) bool {
const gbc1 = table.get(cp1).grapheme_boundary_class;
const gbc2 = table.get(cp2).grapheme_boundary_class;
// std.log.warn("gbc1={} gbc2={}, new1={} new2={}", .{
// gbc1,
// gbc2,
// props.GraphemeBoundaryClass.init(cp1),
// props.GraphemeBoundaryClass.init(cp2),
// });
return graphemeBreakClass(gbc1, gbc2, state);
}
fn graphemeBreakClass(
gbc1: GraphemeBoundaryClass,
gbc2: GraphemeBoundaryClass,
state: *u3,
) bool {
// GB11: Emoji Extend* ZWJ x Emoji
if (!hasXpic(state) and gbc1 == .extended_pictographic) setXpic(state);
@ -72,107 +88,10 @@ pub fn graphemeBreak(cp1: u21, cp2: u21, state: *u3) bool {
return true;
}
const emoji = @import("ziglyph").emoji;
const gbp = @import("ziglyph").grapheme_break;
fn isBreaker(cp: u21) bool {
return cp == '\x0d' or cp == '\x0a' or gbp.isControl(cp);
}
pub fn zg_graphemeBreak(
cp1: u21,
cp2: u21,
state: *u3,
) bool {
// GB11: Emoji Extend* ZWJ x Emoji
if (!hasXpic(state) and emoji.isExtendedPictographic(cp1)) setXpic(state);
// GB3: CR x LF
if (cp1 == '\r' and cp2 == '\n') {
std.log.warn("GB3", .{});
return false;
}
// GB4: Control
if (isBreaker(cp1)) {
std.log.warn("GB4", .{});
return true;
}
// GB6: Hangul L x (L|V|LV|VT)
if (gbp.isL(cp1)) {
if (gbp.isL(cp2) or
gbp.isV(cp2) or
gbp.isLv(cp2) or
gbp.isLvt(cp2))
{
std.log.warn("GB6", .{});
return false;
}
}
// GB7: Hangul (LV | V) x (V | T)
if (gbp.isLv(cp1) or gbp.isV(cp1)) {
if (gbp.isV(cp2) or
gbp.isT(cp2))
{
std.log.warn("GB7", .{});
return false;
}
}
// GB8: Hangul (LVT | T) x T
if (gbp.isLvt(cp1) or gbp.isT(cp1)) {
if (gbp.isT(cp2)) {
std.log.warn("GB8", .{});
return false;
}
}
// GB9b: x (Extend | ZWJ)
if (gbp.isExtend(cp2) or gbp.isZwj(cp2)) {
std.log.warn("GB9b", .{});
return false;
}
// GB9a: x Spacing
if (gbp.isSpacingmark(cp2)) {
std.log.warn("GB9a", .{});
return false;
}
// GB9b: Prepend x
if (gbp.isPrepend(cp1) and !isBreaker(cp2)) {
std.log.warn("GB9b cp1={x} prepend={}", .{ cp1, gbp.isPrepend(cp1) });
return false;
}
// GB12, GB13: RI x RI
if (gbp.isRegionalIndicator(cp1) and gbp.isRegionalIndicator(cp2)) {
if (hasRegional(state)) {
unsetRegional(state);
std.log.warn("GB12", .{});
return true;
} else {
std.log.warn("GB13", .{});
setRegional(state);
return false;
}
}
// GB11: Emoji Extend* ZWJ x Emoji
if (hasXpic(state) and
gbp.isZwj(cp1) and
emoji.isExtendedPictographic(cp2))
{
std.log.warn("GB11", .{});
unsetXpic(state);
return false;
}
return true;
}
const State = packed struct(u2) {
extended_pictographic: bool = false,
regional_indicator: bool = false,
};
fn hasXpic(state: *const u3) bool {
return state.* & 1 == 1;
@ -204,15 +123,19 @@ fn unsetRegional(state: *u3) void {
pub fn main() !void {
const ziglyph = @import("ziglyph");
// Set the min and max to control the test range.
const min = 0;
const max = std.math.maxInt(u21) + 1;
var state: u3 = 0;
var zg_state: u3 = 0;
for (0..std.math.maxInt(u21) + 1) |cp1| {
for (min..max) |cp1| {
if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1});
if (cp1 == '\r' or cp1 == '\n' or
ziglyph.grapheme_break.isControl(@intCast(cp1))) continue;
for (0..std.math.maxInt(u21) + 1) |cp2| {
for (min..max) |cp2| {
if (cp2 == '\r' or cp2 == '\n' or
ziglyph.grapheme_break.isControl(@intCast(cp2))) continue;
@ -235,27 +158,3 @@ pub fn main() !void {
pub const std_options = struct {
pub const log_level: std.log.Level = .info;
};
// test "matches ziglyph specific" {
// const testing = std.testing;
//
// var state: u3 = 0;
// var zg_state: u3 = 0;
//
// const cp1 = 0x20;
// const cp2 = 0x300;
//
// const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state);
// const zg_gb = zg_graphemeBreak(@intCast(cp1), @intCast(cp2), &zg_state);
// if (gb != zg_gb) {
// std.log.warn("cp1={x} cp2={x} gb={} state={} zg_gb={} zg_state={}", .{
// cp1,
// cp2,
// gb,
// state,
// zg_gb,
// zg_state,
// });
// try testing.expect(false);
// }
// }

View File

@ -1,9 +1,10 @@
pub const lut = @import("lut.zig");
pub usingnamespace @import("grapheme.zig");
const grapheme = @import("grapheme.zig");
const props = @import("props.zig");
pub const table = props.table;
pub const Properties = props.Properties;
pub const graphemeBreak = grapheme.graphemeBreak;
test {
@import("std").testing.refAllDecls(@This());