From 4ae41579da37dad141697f3f8624aeaa63728b30 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 8 Feb 2024 13:21:36 -0800 Subject: [PATCH 1/7] add utf8proc back for bench --- build.zig | 9 +++++++++ build.zig.zon | 1 + pkg/utf8proc/build.zig | 37 +++++++++++++++++++++++++++++++++++ pkg/utf8proc/build.zig.zon | 11 +++++++++++ pkg/utf8proc/c.zig | 3 +++ pkg/utf8proc/main.zig | 20 +++++++++++++++++++ src/bench/codepoint-width.sh | 2 ++ src/bench/codepoint-width.zig | 29 +++++++++++++++++++++++++++ 8 files changed, 112 insertions(+) create mode 100644 pkg/utf8proc/build.zig create mode 100644 pkg/utf8proc/build.zig.zon create mode 100644 pkg/utf8proc/c.zig create mode 100644 pkg/utf8proc/main.zig diff --git a/build.zig b/build.zig index 1b234f15e..dfc6b3404 100644 --- a/build.zig +++ b/build.zig @@ -1082,6 +1082,15 @@ fn addDeps( step.linkLibrary(utfcpp_dep.artifact("utfcpp")); try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin()); + // utf8proc + const utf8proc_dep = b.dependency("utf8proc", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc")); + step.linkLibrary(utf8proc_dep.artifact("utf8proc")); + try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin()); + // Spirv-Cross step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); diff --git a/build.zig.zon b/build.zig.zon index a694562ea..c3b958591 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -38,6 +38,7 @@ .opengl = .{ .path = "./pkg/opengl" }, .pixman = .{ .path = "./pkg/pixman" }, .simdutf = .{ .path = "./pkg/simdutf" }, + .utf8proc = .{ .path = "./pkg/utf8proc" }, .utfcpp = .{ .path = "./pkg/utfcpp" }, .zlib = .{ .path = "./pkg/zlib" }, diff --git a/pkg/utf8proc/build.zig b/pkg/utf8proc/build.zig new file mode 100644 index 000000000..a29716983 --- /dev/null +++ b/pkg/utf8proc/build.zig @@ -0,0 +1,37 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const module = b.addModule("utf8proc", .{ .root_source_file = .{ .path = "main.zig" } }); + + const upstream = b.dependency("utf8proc", .{}); + const lib = b.addStaticLibrary(.{ + .name = "utf8proc", + .target = target, + .optimize = optimize, + }); + lib.linkLibC(); + + lib.addIncludePath(upstream.path("")); + module.addIncludePath(upstream.path("")); + + var flags = std.ArrayList([]const u8).init(b.allocator); + try flags.append("-DUTF8PROC_EXPORTS"); + defer flags.deinit(); + lib.addCSourceFiles(.{ + .dependency = upstream, + .files = &.{"utf8proc.c"}, + .flags = flags.items, + }); + + lib.installHeadersDirectoryOptions(.{ + .source_dir = upstream.path(""), + .install_dir = .header, + .install_subdir = "", + .include_extensions = &.{".h"}, + }); + + b.installArtifact(lib); +} diff --git a/pkg/utf8proc/build.zig.zon b/pkg/utf8proc/build.zig.zon new file mode 100644 index 000000000..cfb62de55 --- /dev/null +++ b/pkg/utf8proc/build.zig.zon @@ -0,0 +1,11 @@ +.{ + .name = "utf8proc", + .version = "2.8.0", + .paths = .{""}, + .dependencies = .{ + .utf8proc = .{ + .url = "https://github.com/JuliaStrings/utf8proc/archive/refs/tags/v2.8.0.tar.gz", + .hash = "1220056ce228a8c58f1fa66ab778f5c8965e62f720c1d30603c7d534cb7d8a605ad7", + }, + }, +} diff --git a/pkg/utf8proc/c.zig b/pkg/utf8proc/c.zig new file mode 100644 index 000000000..adeb226b0 --- /dev/null +++ b/pkg/utf8proc/c.zig @@ -0,0 +1,3 @@ +pub usingnamespace @cImport({ + @cInclude("utf8proc.h"); +}); diff --git a/pkg/utf8proc/main.zig b/pkg/utf8proc/main.zig new file mode 100644 index 000000000..a351fff4b --- /dev/null +++ b/pkg/utf8proc/main.zig @@ -0,0 +1,20 @@ +pub const c = @import("c.zig"); + +/// Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, +/// except that a width of 0 is returned for non-printable codepoints +/// instead of -1 as in `wcwidth`. +pub fn charwidth(codepoint: u21) u8 { + return @intCast(c.utf8proc_charwidth(@intCast(codepoint))); +} + +/// Given a pair of consecutive codepoints, return whether a grapheme break is +/// permitted between them (as defined by the extended grapheme clusters in UAX#29). +pub fn graphemeBreakStateful(cp1: u21, cp2: u21, state: *i32) bool { + return c.utf8proc_grapheme_break_stateful( + @intCast(cp1), + @intCast(cp2), + state, + ); +} + +test {} diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh index d0692bc43..8278370e3 100755 --- a/src/bench/codepoint-width.sh +++ b/src/bench/codepoint-width.sh @@ -27,6 +27,8 @@ hyperfine \ "./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} try benchNoop(reader, buf), .wcwidth => try benchWcwidth(reader, buf), + .utf8proc => try benchUtf8proc(reader, buf), .ziglyph => try benchZiglyph(reader, buf), .simd => try benchSimd(reader, buf), } @@ -124,6 +128,31 @@ noinline fn benchWcwidth( } } +noinline fn benchUtf8proc( + reader: anytype, + buf: []u8, +) !void { + const utf8proc = @import("utf8proc"); + var d: UTF8Decoder = .{}; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + const cp_, const consumed = d.next(c); + assert(consumed); + if (cp_) |cp| { + const width = utf8proc.charwidth(cp); + + // Write the width to the buffer to avoid it being compiled away + buf[0] = @intCast(width); + } + } + } +} + noinline fn benchZiglyph( reader: anytype, buf: []u8, From 9755d0696e59ab69c9969da7f0bc3142950168c8 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 8 Feb 2024 21:01:11 -0800 Subject: [PATCH 2/7] unicode: generate our own lookup tables --- build.zig | 37 +++++++ src/bench/codepoint-width.sh | 2 + src/bench/codepoint-width.zig | 28 ++++++ src/main_ghostty.zig | 1 + src/unicode/lut.zig | 179 ++++++++++++++++++++++++++++++++++ src/unicode/main.zig | 9 ++ src/unicode/props.zig | 105 ++++++++++++++++++++ 7 files changed, 361 insertions(+) create mode 100644 src/unicode/lut.zig create mode 100644 src/unicode/main.zig create mode 100644 src/unicode/props.zig diff --git a/build.zig b/build.zig index dfc6b3404..ff837b70d 100644 --- a/build.zig +++ b/build.zig @@ -1172,6 +1172,7 @@ fn addDeps( } try addHelp(b, step, config); + try addUnicodeTables(b, step); return static_libs; } @@ -1218,6 +1219,42 @@ fn addHelp( } } +/// Generate unicode fast lookup tables +fn addUnicodeTables( + b: *std.Build, + step_: ?*std.Build.Step.Compile, +) !void { + // Our static state between runs. We memoize our output to gen once + const State = struct { + var generated: ?std.Build.LazyPath = null; + }; + + const output = State.generated orelse strings: { + const exe = b.addExecutable(.{ + .name = "unigen", + .root_source_file = .{ .path = "src/unicode/props.zig" }, + .target = b.host, + }); + if (step_ == null) b.installArtifact(exe); + + const ziglyph_dep = b.dependency("ziglyph", .{ + .target = b.host, + }); + exe.root_module.addImport("ziglyph", ziglyph_dep.module("ziglyph")); + + const help_run = b.addRunArtifact(exe); + State.generated = help_run.captureStdOut(); + break :strings State.generated.?; + }; + + if (step_) |step| { + output.addStepDependencies(&step.step); + step.root_module.addAnonymousImport("unicode_tables", .{ + .root_source_file = output, + }); + } +} + /// Generate documentation (manpages, etc.) from help strings fn buildDocumentation( b: *std.Build, diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh index 8278370e3..b6fa4733e 100755 --- a/src/bench/codepoint-width.sh +++ b/src/bench/codepoint-width.sh @@ -31,6 +31,8 @@ hyperfine \ "./zig-out/bin/bench-codepoint-width --mode=utf8proc${ARGS} try benchUtf8proc(reader, buf), .ziglyph => try benchZiglyph(reader, buf), .simd => try benchSimd(reader, buf), + .table => try benchTable(reader, buf), } } @@ -153,6 +157,30 @@ noinline fn benchUtf8proc( } } +noinline fn benchTable( + reader: anytype, + buf: []u8, +) !void { + var d: UTF8Decoder = .{}; + while (true) { + const n = try reader.read(buf); + if (n == 0) break; + + // Using stream.next directly with a for loop applies a naive + // scalar approach. + for (buf[0..n]) |c| { + const cp_, const consumed = d.next(c); + assert(consumed); + if (cp_) |cp| { + const width = table.get(@intCast(cp)).width; + + // Write the width to the buffer to avoid it being compiled away + buf[0] = @intCast(width); + } + } + } +} + noinline fn benchZiglyph( reader: anytype, buf: []u8, diff --git a/src/main_ghostty.zig b/src/main_ghostty.zig index db08449f5..05f6a39ed 100644 --- a/src/main_ghostty.zig +++ b/src/main_ghostty.zig @@ -308,6 +308,7 @@ test { _ = @import("terminal/main.zig"); _ = @import("terminfo/main.zig"); _ = @import("simd/main.zig"); + _ = @import("unicode/main.zig"); // TODO _ = @import("blocking_queue.zig"); diff --git a/src/unicode/lut.zig b/src/unicode/lut.zig new file mode 100644 index 000000000..95c6a3688 --- /dev/null +++ b/src/unicode/lut.zig @@ -0,0 +1,179 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; + +// This whole file is based on the algorithm described here: +// https://here-be-braces.com/fast-lookup-of-unicode-properties/ + +/// Creates a type that is able to generate a 3-level lookup table +/// from a Unicode codepoint to a mapping of type Elem. The lookup table +/// generally is expected to be codegen'd and then reloaded, although it +/// can in theory be generated at runtime. +/// +/// Context must have two functions: +/// - `get(Context, u21) Elem`: returns the mapping for a given codepoint +/// - `eql(Context, Elem, Elem) bool`: returns true if two mappings are equal +/// +pub fn Generator( + comptime Elem: type, + comptime Context: type, +) type { + return struct { + const Self = @This(); + + const block_size = 256; + const Block = [block_size]u16; + + /// Mapping of a block to its index in the stage2 array. + const BlockMap = std.HashMap( + Block, + u16, + struct { + pub fn hash(ctx: @This(), k: Block) u64 { + _ = ctx; + var hasher = std.hash.Wyhash.init(0); + std.hash.autoHashStrat(&hasher, k, .DeepRecursive); + return hasher.final(); + } + + pub fn eql(ctx: @This(), a: Block, b: Block) bool { + _ = ctx; + return std.mem.eql(u16, &a, &b); + } + }, + std.hash_map.default_max_load_percentage, + ); + + ctx: Context = undefined, + + /// Generate the lookup tables. The arrays in the return value + /// are owned by the caller and must be freed. + pub fn generate(self: *const Self, alloc: Allocator) !Tables(Elem) { + // Maps block => stage2 index + var blocks_map = BlockMap.init(alloc); + defer blocks_map.deinit(); + + // Our stages + var stage1 = std.ArrayList(u16).init(alloc); + defer stage1.deinit(); + var stage2 = std.ArrayList(u16).init(alloc); + defer stage2.deinit(); + var stage3 = std.ArrayList(Elem).init(alloc); + defer stage3.deinit(); + + var block: Block = undefined; + var block_len: u16 = 0; + for (0..std.math.maxInt(u21) + 1) |cp| { + // Get our block value and find the matching result value + // in our list of possible values in stage3. This way, each + // possible mapping only gets one entry in stage3. + const elem = try self.ctx.get(@as(u21, @intCast(cp))); + const block_idx = block_idx: { + for (stage3.items, 0..) |item, i| { + if (self.ctx.eql(item, elem)) break :block_idx i; + } + + const idx = stage3.items.len; + try stage3.append(elem); + break :block_idx idx; + }; + + // The block stores the mapping to the stage3 index + block[block_len] = std.math.cast(u16, block_idx) orelse return error.BlockTooLarge; + block_len += 1; + + // If we still have space and we're not done with codepoints, + // we keep building up the bock. Conversely: we finalize this + // block if we've filled it or are out of codepoints. + if (block_len < block_size and cp != std.math.maxInt(u21)) continue; + if (block_len < block_size) @memset(block[block_len..block_size], 0); + + // Look for the stage2 index for this block. If it doesn't exist + // we add it to stage2 and update the mapping. + const gop = try blocks_map.getOrPut(block); + if (!gop.found_existing) { + gop.value_ptr.* = std.math.cast( + u16, + stage2.items.len, + ) orelse return error.Stage2TooLarge; + for (block[0..block_len]) |entry| try stage2.append(entry); + } + + // Map stage1 => stage2 and reset our block + try stage1.append(gop.value_ptr.*); + block_len = 0; + } + + // All of our lengths must fit in a u16 for this to work + assert(stage1.items.len <= std.math.maxInt(u16)); + assert(stage2.items.len <= std.math.maxInt(u16)); + assert(stage3.items.len <= std.math.maxInt(u16)); + + const stage1_owned = try stage1.toOwnedSlice(); + errdefer alloc.free(stage1_owned); + const stage2_owned = try stage2.toOwnedSlice(); + errdefer alloc.free(stage2_owned); + const stage3_owned = try stage3.toOwnedSlice(); + errdefer alloc.free(stage3_owned); + + return .{ + .stage1 = stage1_owned, + .stage2 = stage2_owned, + .stage3 = stage3_owned, + }; + } + }; +} + +/// Creates a type that given a 3-level lookup table, can be used to +/// look up a mapping for a given codepoint, encode it out to Zig, etc. +pub fn Tables(comptime Elem: type) type { + return struct { + const Self = @This(); + + stage1: []const u16, + stage2: []const u16, + stage3: []const Elem, + + /// Given a codepoint, returns the mapping for that codepoint. + pub fn get(self: *const Self, cp: u21) Elem { + const high = cp >> 8; + const low = cp & 0xFF; + return self.stage3[self.stage2[self.stage1[high] + low]]; + } + + /// Writes the lookup table as Zig to the given writer. The + /// written file exports three constants: stage1, stage2, and + /// stage3. These can be used to rebuild the lookup table in Zig. + pub fn writeZig(self: *const Self, writer: anytype) !void { + try writer.print( + \\//! This file is auto-generated. Do not edit. + \\ + \\pub fn Tables(comptime Elem: type) type {{ + \\ return struct {{ + \\pub const stage1: [{}]u16 = .{{ + , .{self.stage1.len}); + for (self.stage1) |entry| try writer.print("{},", .{entry}); + + try writer.print( + \\ + \\}}; + \\ + \\pub const stage2: [{}]u16 = .{{ + , .{self.stage2.len}); + for (self.stage2) |entry| try writer.print("{},", .{entry}); + try writer.writeAll("};"); + + try writer.print( + \\ + \\pub const stage3: [{}]Elem = .{{ + , .{self.stage3.len}); + for (self.stage3) |entry| try writer.print("{},", .{entry}); + try writer.writeAll( + \\}; + \\ }; + \\} + ); + } + }; +} diff --git a/src/unicode/main.zig b/src/unicode/main.zig new file mode 100644 index 000000000..fa0cb9fc8 --- /dev/null +++ b/src/unicode/main.zig @@ -0,0 +1,9 @@ +pub const lut = @import("lut.zig"); + +const props = @import("props.zig"); +pub const table = props.table; +pub const Properties = props.Properties; + +test { + @import("std").testing.refAllDecls(@This()); +} diff --git a/src/unicode/props.zig b/src/unicode/props.zig new file mode 100644 index 000000000..fe85844a5 --- /dev/null +++ b/src/unicode/props.zig @@ -0,0 +1,105 @@ +const props = @This(); +const std = @import("std"); +const ziglyph = @import("ziglyph"); +const lut = @import("lut.zig"); + +/// The lookup tables for Ghostty. +pub const table = table: { + // This is only available after running main() below as part of the Ghostty + // build.zig, but due to Zig's lazy analysis we can still reference it here. + const generated = @import("unicode_tables").Tables(Properties); + const Tables = lut.Tables(Properties); + break :table Tables{ + .stage1 = &generated.stage1, + .stage2 = &generated.stage2, + .stage3 = &generated.stage3, + }; +}; + +/// Property set per codepoint that Ghostty cares about. +/// +/// Adding to this lets you find new properties but also potentially makes +/// our lookup tables less efficient. Any changes to this should run the +/// benchmarks in src/bench to verify that we haven't regressed. +pub const Properties = struct { + /// Codepoint width. We clamp to [0, 2] since Ghostty handles control + /// characters and we max out at 2 for wide characters (i.e. 3-em dash + /// becomes a 2-em dash). + width: u2 = 0, + + // Needed for lut.Generator + pub fn eql(a: Properties, b: Properties) bool { + return a.width == b.width; + } + + // Needed for lut.Generator + pub fn format( + self: Properties, + comptime layout: []const u8, + opts: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = layout; + _ = opts; + try std.fmt.format(writer, ".{{ .width= {}, }}", .{ + self.width, + }); + } +}; + +pub fn get(cp: u21) Properties { + const zg_width = ziglyph.display_width.codePointWidth(cp, .half); + + return .{ + .width = @intCast(@min(2, @max(0, zg_width))), + }; +} + +/// Runnable binary to generate the lookup tables and output to stdout. +pub fn main() !void { + const alloc = std.heap.c_allocator; + + const gen: lut.Generator( + Properties, + struct { + pub fn get(ctx: @This(), cp: u21) !Properties { + _ = ctx; + return props.get(cp); + } + + pub fn eql(ctx: @This(), a: Properties, b: Properties) bool { + _ = ctx; + return a.eql(b); + } + }, + ) = .{}; + + const t = try gen.generate(alloc); + defer alloc.free(t.stage1); + defer alloc.free(t.stage2); + defer alloc.free(t.stage3); + try t.writeZig(std.io.getStdOut().writer()); + + // Uncomment when manually debugging to see our table sizes. + // std.log.warn("stage1={} stage2={} stage3={}", .{ + // t.stage1.len, + // t.stage2.len, + // t.stage3.len, + // }); +} + +// This is not very fast in debug modes, so its commented by default. +// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. +// test "tables match ziglyph" { +// const testing = std.testing; +// +// const min = 0xFF + 1; // start outside ascii +// for (min..std.math.maxInt(u21)) |cp| { +// const t = table.get(@intCast(cp)); +// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half))); +// if (t.width != zg) { +// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg }); +// try testing.expect(false); +// } +// } +// } From f6e694bf80d67b8f3d87f43f4acaabb0b325a31b Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 8 Feb 2024 21:10:06 -0800 Subject: [PATCH 3/7] bench: update codepoint-width --- src/bench/codepoint-width.sh | 2 -- src/bench/codepoint-width.zig | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh index b6fa4733e..83d01830b 100755 --- a/src/bench/codepoint-width.sh +++ b/src/bench/codepoint-width.sh @@ -29,8 +29,6 @@ hyperfine \ "./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} Date: Thu, 8 Feb 2024 21:10:37 -0800 Subject: [PATCH 4/7] remove utf8proc --- build.zig | 9 --------- build.zig.zon | 1 - src/bench/codepoint-width.sh | 2 -- src/bench/codepoint-width.zig | 30 +----------------------------- 4 files changed, 1 insertion(+), 41 deletions(-) diff --git a/build.zig b/build.zig index ff837b70d..4e6febe12 100644 --- a/build.zig +++ b/build.zig @@ -1082,15 +1082,6 @@ fn addDeps( step.linkLibrary(utfcpp_dep.artifact("utfcpp")); try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin()); - // utf8proc - const utf8proc_dep = b.dependency("utf8proc", .{ - .target = target, - .optimize = optimize, - }); - step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc")); - step.linkLibrary(utf8proc_dep.artifact("utf8proc")); - try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin()); - // Spirv-Cross step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); diff --git a/build.zig.zon b/build.zig.zon index c3b958591..a694562ea 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -38,7 +38,6 @@ .opengl = .{ .path = "./pkg/opengl" }, .pixman = .{ .path = "./pkg/pixman" }, .simdutf = .{ .path = "./pkg/simdutf" }, - .utf8proc = .{ .path = "./pkg/utf8proc" }, .utfcpp = .{ .path = "./pkg/utfcpp" }, .zlib = .{ .path = "./pkg/zlib" }, diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh index 83d01830b..43304ec2e 100755 --- a/src/bench/codepoint-width.sh +++ b/src/bench/codepoint-width.sh @@ -27,8 +27,6 @@ hyperfine \ "./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} try benchNoop(reader, buf), .wcwidth => try benchWcwidth(reader, buf), - .utf8proc => try benchUtf8proc(reader, buf), .ziglyph => try benchZiglyph(reader, buf), .simd => try benchSimd(reader, buf), .table => try benchTable(reader, buf), @@ -132,31 +129,6 @@ noinline fn benchWcwidth( } } -noinline fn benchUtf8proc( - reader: anytype, - buf: []u8, -) !void { - const utf8proc = @import("utf8proc"); - var d: UTF8Decoder = .{}; - while (true) { - const n = try reader.read(buf); - if (n == 0) break; - - // Using stream.next directly with a for loop applies a naive - // scalar approach. - for (buf[0..n]) |c| { - const cp_, const consumed = d.next(c); - assert(consumed); - if (cp_) |cp| { - const width = utf8proc.charwidth(cp); - - // Write the width to the buffer to avoid it being compiled away - buf[0] = @intCast(width); - } - } - } -} - noinline fn benchTable( reader: anytype, buf: []u8, From c838bfc1c1d59e2497c5e613a4599176eb17a3bd Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 8 Feb 2024 21:17:30 -0800 Subject: [PATCH 5/7] terminal: swap to table implementatino --- src/terminal/Terminal.zig | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index 5110a8d2a..f9f55ae83 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -11,6 +11,7 @@ const testing = std.testing; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const simd = @import("../simd/main.zig"); +const unicode = @import("../unicode/main.zig"); const ansi = @import("ansi.zig"); const modes = @import("modes.zig"); @@ -870,13 +871,7 @@ pub fn print(self: *Terminal, c: u21) !void { // Determine the width of this character so we can handle // non-single-width characters properly. - const width: usize = @intCast(simd.codepointWidth(c)); - - // Old implementation, 3x slower on ASCII, 2x slower on CJK, etc. - // const width: usize = @intCast(@min( - // @max(0, ziglyph.display_width.codePointWidth(c, .half)), - // 2, - // )); + const width: usize = @intCast(unicode.table.get(c).width); // Note: it is possible to have a width of "3" and a width of "-1" // from ziglyph. We should look into those cases and handle them From 8d891fb05c05662f2e0a395f749edcccddee35a3 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 8 Feb 2024 21:28:14 -0800 Subject: [PATCH 6/7] terminal: fast-path ASCII on char width --- src/terminal/Terminal.zig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index f9f55ae83..4d5616003 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -870,8 +870,10 @@ pub fn print(self: *Terminal, c: u21) !void { } // Determine the width of this character so we can handle - // non-single-width characters properly. - const width: usize = @intCast(unicode.table.get(c).width); + // non-single-width characters properly. We have a fast-path for + // byte-sized characters since they're so common. We can ignore + // control characters because they're always filtered prior. + const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width); // Note: it is possible to have a width of "3" and a width of "-1" // from ziglyph. We should look into those cases and handle them From a471756ee0b96a034050bcdd911f5d62d0f935fb Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 8 Feb 2024 21:34:28 -0800 Subject: [PATCH 7/7] build: unigen needs libc --- build.zig | 1 + 1 file changed, 1 insertion(+) diff --git a/build.zig b/build.zig index 4e6febe12..0669f27cc 100644 --- a/build.zig +++ b/build.zig @@ -1226,6 +1226,7 @@ fn addUnicodeTables( .root_source_file = .{ .path = "src/unicode/props.zig" }, .target = b.host, }); + exe.linkLibC(); if (step_ == null) b.installArtifact(exe); const ziglyph_dep = b.dependency("ziglyph", .{