From 4ae41579da37dad141697f3f8624aeaa63728b30 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Thu, 8 Feb 2024 13:21:36 -0800
Subject: [PATCH 1/7] add utf8proc back for bench

---
 build.zig                     |  9 +++++++++
 build.zig.zon                 |  1 +
 pkg/utf8proc/build.zig        | 37 +++++++++++++++++++++++++++++++++++
 pkg/utf8proc/build.zig.zon    | 11 +++++++++++
 pkg/utf8proc/c.zig            |  3 +++
 pkg/utf8proc/main.zig         | 20 +++++++++++++++++++
 src/bench/codepoint-width.sh  |  2 ++
 src/bench/codepoint-width.zig | 29 +++++++++++++++++++++++++++
 8 files changed, 112 insertions(+)
 create mode 100644 pkg/utf8proc/build.zig
 create mode 100644 pkg/utf8proc/build.zig.zon
 create mode 100644 pkg/utf8proc/c.zig
 create mode 100644 pkg/utf8proc/main.zig

diff --git a/build.zig b/build.zig
index 1b234f15e..dfc6b3404 100644
--- a/build.zig
+++ b/build.zig
@@ -1082,6 +1082,15 @@ fn addDeps(
     step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
     try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
 
+    // utf8proc
+    const utf8proc_dep = b.dependency("utf8proc", .{
+        .target = target,
+        .optimize = optimize,
+    });
+    step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc"));
+    step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
+    try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
+
     // Spirv-Cross
     step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
     try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
diff --git a/build.zig.zon b/build.zig.zon
index a694562ea..c3b958591 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -38,6 +38,7 @@
         .opengl = .{ .path = "./pkg/opengl" },
         .pixman = .{ .path = "./pkg/pixman" },
         .simdutf = .{ .path = "./pkg/simdutf" },
+        .utf8proc = .{ .path = "./pkg/utf8proc" },
         .utfcpp = .{ .path = "./pkg/utfcpp" },
         .zlib = .{ .path = "./pkg/zlib" },
 
diff --git a/pkg/utf8proc/build.zig b/pkg/utf8proc/build.zig
new file mode 100644
index 000000000..a29716983
--- /dev/null
+++ b/pkg/utf8proc/build.zig
@@ -0,0 +1,37 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) !void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+
+    const module = b.addModule("utf8proc", .{ .root_source_file = .{ .path = "main.zig" } });
+
+    const upstream = b.dependency("utf8proc", .{});
+    const lib = b.addStaticLibrary(.{
+        .name = "utf8proc",
+        .target = target,
+        .optimize = optimize,
+    });
+    lib.linkLibC();
+
+    lib.addIncludePath(upstream.path(""));
+    module.addIncludePath(upstream.path(""));
+
+    var flags = std.ArrayList([]const u8).init(b.allocator);
+    try flags.append("-DUTF8PROC_EXPORTS");
+    defer flags.deinit();
+    lib.addCSourceFiles(.{
+        .dependency = upstream,
+        .files = &.{"utf8proc.c"},
+        .flags = flags.items,
+    });
+
+    lib.installHeadersDirectoryOptions(.{
+        .source_dir = upstream.path(""),
+        .install_dir = .header,
+        .install_subdir = "",
+        .include_extensions = &.{".h"},
+    });
+
+    b.installArtifact(lib);
+}
diff --git a/pkg/utf8proc/build.zig.zon b/pkg/utf8proc/build.zig.zon
new file mode 100644
index 000000000..cfb62de55
--- /dev/null
+++ b/pkg/utf8proc/build.zig.zon
@@ -0,0 +1,11 @@
+.{
+    .name = "utf8proc",
+    .version = "2.8.0",
+    .paths = .{""},
+    .dependencies = .{
+        .utf8proc = .{
+            .url = "https://github.com/JuliaStrings/utf8proc/archive/refs/tags/v2.8.0.tar.gz",
+            .hash = "1220056ce228a8c58f1fa66ab778f5c8965e62f720c1d30603c7d534cb7d8a605ad7",
+        },
+    },
+}
diff --git a/pkg/utf8proc/c.zig b/pkg/utf8proc/c.zig
new file mode 100644
index 000000000..adeb226b0
--- /dev/null
+++ b/pkg/utf8proc/c.zig
@@ -0,0 +1,3 @@
+pub usingnamespace @cImport({
+    @cInclude("utf8proc.h");
+});
diff --git a/pkg/utf8proc/main.zig b/pkg/utf8proc/main.zig
new file mode 100644
index 000000000..a351fff4b
--- /dev/null
+++ b/pkg/utf8proc/main.zig
@@ -0,0 +1,20 @@
+pub const c = @import("c.zig");
+
+/// Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
+/// except that a width of 0 is returned for non-printable codepoints
+/// instead of -1 as in `wcwidth`.
+pub fn charwidth(codepoint: u21) u8 {
+    return @intCast(c.utf8proc_charwidth(@intCast(codepoint)));
+}
+
+/// Given a pair of consecutive codepoints, return whether a grapheme break is
+/// permitted between them (as defined by the extended grapheme clusters in UAX#29).
+pub fn graphemeBreakStateful(cp1: u21, cp2: u21, state: *i32) bool {
+    return c.utf8proc_grapheme_break_stateful(
+        @intCast(cp1),
+        @intCast(cp2),
+        state,
+    );
+}
+
+test {}
diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh
index d0692bc43..8278370e3 100755
--- a/src/bench/codepoint-width.sh
+++ b/src/bench/codepoint-width.sh
@@ -27,6 +27,8 @@ hyperfine \
   "./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} </tmp/ghostty_bench_data" \
   -n wcwidth \
   "./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \
+  -n utf8proc \
+  "./zig-out/bin/bench-codepoint-width --mode=utf8proc${ARGS} </tmp/ghostty_bench_data" \
   -n ziglyph \
   "./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
   -n simd \
diff --git a/src/bench/codepoint-width.zig b/src/bench/codepoint-width.zig
index 81b13c90d..ecef13c80 100644
--- a/src/bench/codepoint-width.zig
+++ b/src/bench/codepoint-width.zig
@@ -45,6 +45,9 @@ const Mode = enum {
     /// libc wcwidth
     wcwidth,
 
+    /// Use utf8proc library to calculate the display width of each codepoint.
+    utf8proc,
+
     /// Use ziglyph library to calculate the display width of each codepoint.
     ziglyph,
 
@@ -76,6 +79,7 @@ pub fn main() !void {
     switch (args.mode) {
         .noop => try benchNoop(reader, buf),
         .wcwidth => try benchWcwidth(reader, buf),
+        .utf8proc => try benchUtf8proc(reader, buf),
         .ziglyph => try benchZiglyph(reader, buf),
         .simd => try benchSimd(reader, buf),
     }
@@ -124,6 +128,31 @@ noinline fn benchWcwidth(
     }
 }
 
+noinline fn benchUtf8proc(
+    reader: anytype,
+    buf: []u8,
+) !void {
+    const utf8proc = @import("utf8proc");
+    var d: UTF8Decoder = .{};
+    while (true) {
+        const n = try reader.read(buf);
+        if (n == 0) break;
+
+        // Using stream.next directly with a for loop applies a naive
+        // scalar approach.
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp| {
+                const width = utf8proc.charwidth(cp);
+
+                // Write the width to the buffer to avoid it being compiled away
+                buf[0] = @intCast(width);
+            }
+        }
+    }
+}
+
 noinline fn benchZiglyph(
     reader: anytype,
     buf: []u8,

From 9755d0696e59ab69c9969da7f0bc3142950168c8 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Thu, 8 Feb 2024 21:01:11 -0800
Subject: [PATCH 2/7] unicode: generate our own lookup tables

---
 build.zig                     |  37 +++++++
 src/bench/codepoint-width.sh  |   2 +
 src/bench/codepoint-width.zig |  28 ++++++
 src/main_ghostty.zig          |   1 +
 src/unicode/lut.zig           | 179 ++++++++++++++++++++++++++++++++++
 src/unicode/main.zig          |   9 ++
 src/unicode/props.zig         | 105 ++++++++++++++++++++
 7 files changed, 361 insertions(+)
 create mode 100644 src/unicode/lut.zig
 create mode 100644 src/unicode/main.zig
 create mode 100644 src/unicode/props.zig

diff --git a/build.zig b/build.zig
index dfc6b3404..ff837b70d 100644
--- a/build.zig
+++ b/build.zig
@@ -1172,6 +1172,7 @@ fn addDeps(
     }
 
     try addHelp(b, step, config);
+    try addUnicodeTables(b, step);
 
     return static_libs;
 }
@@ -1218,6 +1219,42 @@ fn addHelp(
     }
 }
 
+/// Generate unicode fast lookup tables
+fn addUnicodeTables(
+    b: *std.Build,
+    step_: ?*std.Build.Step.Compile,
+) !void {
+    // Our static state between runs. We memoize our output to gen once
+    const State = struct {
+        var generated: ?std.Build.LazyPath = null;
+    };
+
+    const output = State.generated orelse strings: {
+        const exe = b.addExecutable(.{
+            .name = "unigen",
+            .root_source_file = .{ .path = "src/unicode/props.zig" },
+            .target = b.host,
+        });
+        if (step_ == null) b.installArtifact(exe);
+
+        const ziglyph_dep = b.dependency("ziglyph", .{
+            .target = b.host,
+        });
+        exe.root_module.addImport("ziglyph", ziglyph_dep.module("ziglyph"));
+
+        const help_run = b.addRunArtifact(exe);
+        State.generated = help_run.captureStdOut();
+        break :strings State.generated.?;
+    };
+
+    if (step_) |step| {
+        output.addStepDependencies(&step.step);
+        step.root_module.addAnonymousImport("unicode_tables", .{
+            .root_source_file = output,
+        });
+    }
+}
+
 /// Generate documentation (manpages, etc.) from help strings
 fn buildDocumentation(
     b: *std.Build,
diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh
index 8278370e3..b6fa4733e 100755
--- a/src/bench/codepoint-width.sh
+++ b/src/bench/codepoint-width.sh
@@ -31,6 +31,8 @@ hyperfine \
   "./zig-out/bin/bench-codepoint-width --mode=utf8proc${ARGS} </tmp/ghostty_bench_data" \
   -n ziglyph \
   "./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
+  -n table \
+  "./zig-out/bin/bench-codepoint-width --mode=table${ARGS} </tmp/ghostty_bench_data" \
   -n simd \
   "./zig-out/bin/bench-codepoint-width --mode=simd${ARGS} </tmp/ghostty_bench_data"
 
diff --git a/src/bench/codepoint-width.zig b/src/bench/codepoint-width.zig
index ecef13c80..1f4deae57 100644
--- a/src/bench/codepoint-width.zig
+++ b/src/bench/codepoint-width.zig
@@ -17,6 +17,7 @@ const ArenaAllocator = std.heap.ArenaAllocator;
 const ziglyph = @import("ziglyph");
 const cli = @import("../cli.zig");
 const simd = @import("../simd/main.zig");
+const table = @import("../unicode/main.zig").table;
 const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
 
 const Args = struct {
@@ -53,6 +54,8 @@ const Mode = enum {
 
     /// Our SIMD implementation.
     simd,
+
+    table,
 };
 
 pub const std_options = struct {
@@ -82,6 +85,7 @@ pub fn main() !void {
         .utf8proc => try benchUtf8proc(reader, buf),
         .ziglyph => try benchZiglyph(reader, buf),
         .simd => try benchSimd(reader, buf),
+        .table => try benchTable(reader, buf),
     }
 }
 
@@ -153,6 +157,30 @@ noinline fn benchUtf8proc(
     }
 }
 
+noinline fn benchTable(
+    reader: anytype,
+    buf: []u8,
+) !void {
+    var d: UTF8Decoder = .{};
+    while (true) {
+        const n = try reader.read(buf);
+        if (n == 0) break;
+
+        // Using stream.next directly with a for loop applies a naive
+        // scalar approach.
+        for (buf[0..n]) |c| {
+            const cp_, const consumed = d.next(c);
+            assert(consumed);
+            if (cp_) |cp| {
+                const width = table.get(@intCast(cp)).width;
+
+                // Write the width to the buffer to avoid it being compiled away
+                buf[0] = @intCast(width);
+            }
+        }
+    }
+}
+
 noinline fn benchZiglyph(
     reader: anytype,
     buf: []u8,
diff --git a/src/main_ghostty.zig b/src/main_ghostty.zig
index db08449f5..05f6a39ed 100644
--- a/src/main_ghostty.zig
+++ b/src/main_ghostty.zig
@@ -308,6 +308,7 @@ test {
     _ = @import("terminal/main.zig");
     _ = @import("terminfo/main.zig");
     _ = @import("simd/main.zig");
+    _ = @import("unicode/main.zig");
 
     // TODO
     _ = @import("blocking_queue.zig");
diff --git a/src/unicode/lut.zig b/src/unicode/lut.zig
new file mode 100644
index 000000000..95c6a3688
--- /dev/null
+++ b/src/unicode/lut.zig
@@ -0,0 +1,179 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+
+// This whole file is based on the algorithm described here:
+// https://here-be-braces.com/fast-lookup-of-unicode-properties/
+
+/// Creates a type that is able to generate a 3-level lookup table
+/// from a Unicode codepoint to a mapping of type Elem. The lookup table
+/// generally is expected to be codegen'd and then reloaded, although it
+/// can in theory be generated at runtime.
+///
+/// Context must have two functions:
+///   - `get(Context, u21) Elem`: returns the mapping for a given codepoint
+///   - `eql(Context, Elem, Elem) bool`: returns true if two mappings are equal
+///
+pub fn Generator(
+    comptime Elem: type,
+    comptime Context: type,
+) type {
+    return struct {
+        const Self = @This();
+
+        const block_size = 256;
+        const Block = [block_size]u16;
+
+        /// Mapping of a block to its index in the stage2 array.
+        const BlockMap = std.HashMap(
+            Block,
+            u16,
+            struct {
+                pub fn hash(ctx: @This(), k: Block) u64 {
+                    _ = ctx;
+                    var hasher = std.hash.Wyhash.init(0);
+                    std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
+                    return hasher.final();
+                }
+
+                pub fn eql(ctx: @This(), a: Block, b: Block) bool {
+                    _ = ctx;
+                    return std.mem.eql(u16, &a, &b);
+                }
+            },
+            std.hash_map.default_max_load_percentage,
+        );
+
+        ctx: Context = undefined,
+
+        /// Generate the lookup tables. The arrays in the return value
+        /// are owned by the caller and must be freed.
+        pub fn generate(self: *const Self, alloc: Allocator) !Tables(Elem) {
+            // Maps block => stage2 index
+            var blocks_map = BlockMap.init(alloc);
+            defer blocks_map.deinit();
+
+            // Our stages
+            var stage1 = std.ArrayList(u16).init(alloc);
+            defer stage1.deinit();
+            var stage2 = std.ArrayList(u16).init(alloc);
+            defer stage2.deinit();
+            var stage3 = std.ArrayList(Elem).init(alloc);
+            defer stage3.deinit();
+
+            var block: Block = undefined;
+            var block_len: u16 = 0;
+            for (0..std.math.maxInt(u21) + 1) |cp| {
+                // Get our block value and find the matching result value
+                // in our list of possible values in stage3. This way, each
+                // possible mapping only gets one entry in stage3.
+                const elem = try self.ctx.get(@as(u21, @intCast(cp)));
+                const block_idx = block_idx: {
+                    for (stage3.items, 0..) |item, i| {
+                        if (self.ctx.eql(item, elem)) break :block_idx i;
+                    }
+
+                    const idx = stage3.items.len;
+                    try stage3.append(elem);
+                    break :block_idx idx;
+                };
+
+                // The block stores the mapping to the stage3 index
+                block[block_len] = std.math.cast(u16, block_idx) orelse return error.BlockTooLarge;
+                block_len += 1;
+
+                // If we still have space and we're not done with codepoints,
+                // we keep building up the bock. Conversely: we finalize this
+                // block if we've filled it or are out of codepoints.
+                if (block_len < block_size and cp != std.math.maxInt(u21)) continue;
+                if (block_len < block_size) @memset(block[block_len..block_size], 0);
+
+                // Look for the stage2 index for this block. If it doesn't exist
+                // we add it to stage2 and update the mapping.
+                const gop = try blocks_map.getOrPut(block);
+                if (!gop.found_existing) {
+                    gop.value_ptr.* = std.math.cast(
+                        u16,
+                        stage2.items.len,
+                    ) orelse return error.Stage2TooLarge;
+                    for (block[0..block_len]) |entry| try stage2.append(entry);
+                }
+
+                // Map stage1 => stage2 and reset our block
+                try stage1.append(gop.value_ptr.*);
+                block_len = 0;
+            }
+
+            // All of our lengths must fit in a u16 for this to work
+            assert(stage1.items.len <= std.math.maxInt(u16));
+            assert(stage2.items.len <= std.math.maxInt(u16));
+            assert(stage3.items.len <= std.math.maxInt(u16));
+
+            const stage1_owned = try stage1.toOwnedSlice();
+            errdefer alloc.free(stage1_owned);
+            const stage2_owned = try stage2.toOwnedSlice();
+            errdefer alloc.free(stage2_owned);
+            const stage3_owned = try stage3.toOwnedSlice();
+            errdefer alloc.free(stage3_owned);
+
+            return .{
+                .stage1 = stage1_owned,
+                .stage2 = stage2_owned,
+                .stage3 = stage3_owned,
+            };
+        }
+    };
+}
+
+/// Creates a type that given a 3-level lookup table, can be used to
+/// look up a mapping for a given codepoint, encode it out to Zig, etc.
+pub fn Tables(comptime Elem: type) type {
+    return struct {
+        const Self = @This();
+
+        stage1: []const u16,
+        stage2: []const u16,
+        stage3: []const Elem,
+
+        /// Given a codepoint, returns the mapping for that codepoint.
+        pub fn get(self: *const Self, cp: u21) Elem {
+            const high = cp >> 8;
+            const low = cp & 0xFF;
+            return self.stage3[self.stage2[self.stage1[high] + low]];
+        }
+
+        /// Writes the lookup table as Zig to the given writer. The
+        /// written file exports three constants: stage1, stage2, and
+        /// stage3. These can be used to rebuild the lookup table in Zig.
+        pub fn writeZig(self: *const Self, writer: anytype) !void {
+            try writer.print(
+                \\//! This file is auto-generated. Do not edit.
+                \\
+                \\pub fn Tables(comptime Elem: type) type {{
+                \\    return struct {{
+                \\pub const stage1: [{}]u16 = .{{
+            , .{self.stage1.len});
+            for (self.stage1) |entry| try writer.print("{},", .{entry});
+
+            try writer.print(
+                \\
+                \\}};
+                \\
+                \\pub const stage2: [{}]u16 = .{{
+            , .{self.stage2.len});
+            for (self.stage2) |entry| try writer.print("{},", .{entry});
+            try writer.writeAll("};");
+
+            try writer.print(
+                \\
+                \\pub const stage3: [{}]Elem = .{{
+            , .{self.stage3.len});
+            for (self.stage3) |entry| try writer.print("{},", .{entry});
+            try writer.writeAll(
+                \\};
+                \\    };
+                \\}
+            );
+        }
+    };
+}
diff --git a/src/unicode/main.zig b/src/unicode/main.zig
new file mode 100644
index 000000000..fa0cb9fc8
--- /dev/null
+++ b/src/unicode/main.zig
@@ -0,0 +1,9 @@
+pub const lut = @import("lut.zig");
+
+const props = @import("props.zig");
+pub const table = props.table;
+pub const Properties = props.Properties;
+
+test {
+    @import("std").testing.refAllDecls(@This());
+}
diff --git a/src/unicode/props.zig b/src/unicode/props.zig
new file mode 100644
index 000000000..fe85844a5
--- /dev/null
+++ b/src/unicode/props.zig
@@ -0,0 +1,105 @@
+const props = @This();
+const std = @import("std");
+const ziglyph = @import("ziglyph");
+const lut = @import("lut.zig");
+
+/// The lookup tables for Ghostty.
+pub const table = table: {
+    // This is only available after running main() below as part of the Ghostty
+    // build.zig, but due to Zig's lazy analysis we can still reference it here.
+    const generated = @import("unicode_tables").Tables(Properties);
+    const Tables = lut.Tables(Properties);
+    break :table Tables{
+        .stage1 = &generated.stage1,
+        .stage2 = &generated.stage2,
+        .stage3 = &generated.stage3,
+    };
+};
+
+/// Property set per codepoint that Ghostty cares about.
+///
+/// Adding to this lets you find new properties but also potentially makes
+/// our lookup tables less efficient. Any changes to this should run the
+/// benchmarks in src/bench to verify that we haven't regressed.
+pub const Properties = struct {
+    /// Codepoint width. We clamp to [0, 2] since Ghostty handles control
+    /// characters and we max out at 2 for wide characters (i.e. 3-em dash
+    /// becomes a 2-em dash).
+    width: u2 = 0,
+
+    // Needed for lut.Generator
+    pub fn eql(a: Properties, b: Properties) bool {
+        return a.width == b.width;
+    }
+
+    // Needed for lut.Generator
+    pub fn format(
+        self: Properties,
+        comptime layout: []const u8,
+        opts: std.fmt.FormatOptions,
+        writer: anytype,
+    ) !void {
+        _ = layout;
+        _ = opts;
+        try std.fmt.format(writer, ".{{ .width= {}, }}", .{
+            self.width,
+        });
+    }
+};
+
+pub fn get(cp: u21) Properties {
+    const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
+
+    return .{
+        .width = @intCast(@min(2, @max(0, zg_width))),
+    };
+}
+
+/// Runnable binary to generate the lookup tables and output to stdout.
+pub fn main() !void {
+    const alloc = std.heap.c_allocator;
+
+    const gen: lut.Generator(
+        Properties,
+        struct {
+            pub fn get(ctx: @This(), cp: u21) !Properties {
+                _ = ctx;
+                return props.get(cp);
+            }
+
+            pub fn eql(ctx: @This(), a: Properties, b: Properties) bool {
+                _ = ctx;
+                return a.eql(b);
+            }
+        },
+    ) = .{};
+
+    const t = try gen.generate(alloc);
+    defer alloc.free(t.stage1);
+    defer alloc.free(t.stage2);
+    defer alloc.free(t.stage3);
+    try t.writeZig(std.io.getStdOut().writer());
+
+    // Uncomment when manually debugging to see our table sizes.
+    // std.log.warn("stage1={} stage2={} stage3={}", .{
+    //     t.stage1.len,
+    //     t.stage2.len,
+    //     t.stage3.len,
+    // });
+}
+
+// This is not very fast in debug modes, so its commented by default.
+// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
+// test "tables match ziglyph" {
+//     const testing = std.testing;
+//
+//     const min = 0xFF + 1; // start outside ascii
+//     for (min..std.math.maxInt(u21)) |cp| {
+//         const t = table.get(@intCast(cp));
+//         const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
+//         if (t.width != zg) {
+//             std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
+//             try testing.expect(false);
+//         }
+//     }
+// }

From f6e694bf80d67b8f3d87f43f4acaabb0b325a31b Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Thu, 8 Feb 2024 21:10:06 -0800
Subject: [PATCH 3/7] bench: update codepoint-width

---
 src/bench/codepoint-width.sh  | 2 --
 src/bench/codepoint-width.zig | 4 +++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh
index b6fa4733e..83d01830b 100755
--- a/src/bench/codepoint-width.sh
+++ b/src/bench/codepoint-width.sh
@@ -29,8 +29,6 @@ hyperfine \
   "./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \
   -n utf8proc \
   "./zig-out/bin/bench-codepoint-width --mode=utf8proc${ARGS} </tmp/ghostty_bench_data" \
-  -n ziglyph \
-  "./zig-out/bin/bench-codepoint-width --mode=ziglyph${ARGS} </tmp/ghostty_bench_data" \
   -n table \
   "./zig-out/bin/bench-codepoint-width --mode=table${ARGS} </tmp/ghostty_bench_data" \
   -n simd \
diff --git a/src/bench/codepoint-width.zig b/src/bench/codepoint-width.zig
index 1f4deae57..433b9b36b 100644
--- a/src/bench/codepoint-width.zig
+++ b/src/bench/codepoint-width.zig
@@ -172,7 +172,9 @@ noinline fn benchTable(
             const cp_, const consumed = d.next(c);
             assert(consumed);
             if (cp_) |cp| {
-                const width = table.get(@intCast(cp)).width;
+                // This is the same trick we do in terminal.zig so we
+                // keep it here.
+                const width = if (cp <= 0xFF) 1 else table.get(@intCast(cp)).width;
 
                 // Write the width to the buffer to avoid it being compiled away
                 buf[0] = @intCast(width);

From 4834b8e925b9ec598d4a34038d7188efc7698603 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Thu, 8 Feb 2024 21:10:37 -0800
Subject: [PATCH 4/7] remove utf8proc

---
 build.zig                     |  9 ---------
 build.zig.zon                 |  1 -
 src/bench/codepoint-width.sh  |  2 --
 src/bench/codepoint-width.zig | 30 +-----------------------------
 4 files changed, 1 insertion(+), 41 deletions(-)

diff --git a/build.zig b/build.zig
index ff837b70d..4e6febe12 100644
--- a/build.zig
+++ b/build.zig
@@ -1082,15 +1082,6 @@ fn addDeps(
     step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
     try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
 
-    // utf8proc
-    const utf8proc_dep = b.dependency("utf8proc", .{
-        .target = target,
-        .optimize = optimize,
-    });
-    step.root_module.addImport("utf8proc", utf8proc_dep.module("utf8proc"));
-    step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
-    try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
-
     // Spirv-Cross
     step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
     try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
diff --git a/build.zig.zon b/build.zig.zon
index c3b958591..a694562ea 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -38,7 +38,6 @@
         .opengl = .{ .path = "./pkg/opengl" },
         .pixman = .{ .path = "./pkg/pixman" },
         .simdutf = .{ .path = "./pkg/simdutf" },
-        .utf8proc = .{ .path = "./pkg/utf8proc" },
         .utfcpp = .{ .path = "./pkg/utfcpp" },
         .zlib = .{ .path = "./pkg/zlib" },
 
diff --git a/src/bench/codepoint-width.sh b/src/bench/codepoint-width.sh
index 83d01830b..43304ec2e 100755
--- a/src/bench/codepoint-width.sh
+++ b/src/bench/codepoint-width.sh
@@ -27,8 +27,6 @@ hyperfine \
   "./zig-out/bin/bench-codepoint-width --mode=noop${ARGS} </tmp/ghostty_bench_data" \
   -n wcwidth \
   "./zig-out/bin/bench-codepoint-width --mode=wcwidth${ARGS} </tmp/ghostty_bench_data" \
-  -n utf8proc \
-  "./zig-out/bin/bench-codepoint-width --mode=utf8proc${ARGS} </tmp/ghostty_bench_data" \
   -n table \
   "./zig-out/bin/bench-codepoint-width --mode=table${ARGS} </tmp/ghostty_bench_data" \
   -n simd \
diff --git a/src/bench/codepoint-width.zig b/src/bench/codepoint-width.zig
index 433b9b36b..c6b11c72d 100644
--- a/src/bench/codepoint-width.zig
+++ b/src/bench/codepoint-width.zig
@@ -46,15 +46,13 @@ const Mode = enum {
     /// libc wcwidth
     wcwidth,
 
-    /// Use utf8proc library to calculate the display width of each codepoint.
-    utf8proc,
-
     /// Use ziglyph library to calculate the display width of each codepoint.
     ziglyph,
 
     /// Our SIMD implementation.
     simd,
 
+    /// Test our lookup table implementation.
     table,
 };
 
@@ -82,7 +80,6 @@ pub fn main() !void {
     switch (args.mode) {
         .noop => try benchNoop(reader, buf),
         .wcwidth => try benchWcwidth(reader, buf),
-        .utf8proc => try benchUtf8proc(reader, buf),
         .ziglyph => try benchZiglyph(reader, buf),
         .simd => try benchSimd(reader, buf),
         .table => try benchTable(reader, buf),
@@ -132,31 +129,6 @@ noinline fn benchWcwidth(
     }
 }
 
-noinline fn benchUtf8proc(
-    reader: anytype,
-    buf: []u8,
-) !void {
-    const utf8proc = @import("utf8proc");
-    var d: UTF8Decoder = .{};
-    while (true) {
-        const n = try reader.read(buf);
-        if (n == 0) break;
-
-        // Using stream.next directly with a for loop applies a naive
-        // scalar approach.
-        for (buf[0..n]) |c| {
-            const cp_, const consumed = d.next(c);
-            assert(consumed);
-            if (cp_) |cp| {
-                const width = utf8proc.charwidth(cp);
-
-                // Write the width to the buffer to avoid it being compiled away
-                buf[0] = @intCast(width);
-            }
-        }
-    }
-}
-
 noinline fn benchTable(
     reader: anytype,
     buf: []u8,

From c838bfc1c1d59e2497c5e613a4599176eb17a3bd Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Thu, 8 Feb 2024 21:17:30 -0800
Subject: [PATCH 5/7] terminal: swap to table implementatino

---
 src/terminal/Terminal.zig | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig
index 5110a8d2a..f9f55ae83 100644
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@@ -11,6 +11,7 @@ const testing = std.testing;
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 const simd = @import("../simd/main.zig");
+const unicode = @import("../unicode/main.zig");
 
 const ansi = @import("ansi.zig");
 const modes = @import("modes.zig");
@@ -870,13 +871,7 @@ pub fn print(self: *Terminal, c: u21) !void {
 
     // Determine the width of this character so we can handle
     // non-single-width characters properly.
-    const width: usize = @intCast(simd.codepointWidth(c));
-
-    // Old implementation, 3x slower on ASCII, 2x slower on CJK, etc.
-    // const width: usize = @intCast(@min(
-    //     @max(0, ziglyph.display_width.codePointWidth(c, .half)),
-    //     2,
-    // ));
+    const width: usize = @intCast(unicode.table.get(c).width);
 
     // Note: it is possible to have a width of "3" and a width of "-1"
     // from ziglyph. We should look into those cases and handle them

From 8d891fb05c05662f2e0a395f749edcccddee35a3 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Thu, 8 Feb 2024 21:28:14 -0800
Subject: [PATCH 6/7] terminal: fast-path ASCII on char width

---
 src/terminal/Terminal.zig | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig
index f9f55ae83..4d5616003 100644
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@@ -870,8 +870,10 @@ pub fn print(self: *Terminal, c: u21) !void {
     }
 
     // Determine the width of this character so we can handle
-    // non-single-width characters properly.
-    const width: usize = @intCast(unicode.table.get(c).width);
+    // non-single-width characters properly. We have a fast-path for
+    // byte-sized characters since they're so common. We can ignore
+    // control characters because they're always filtered prior.
+    const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width);
 
     // Note: it is possible to have a width of "3" and a width of "-1"
     // from ziglyph. We should look into those cases and handle them

From a471756ee0b96a034050bcdd911f5d62d0f935fb Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Thu, 8 Feb 2024 21:34:28 -0800
Subject: [PATCH 7/7] build: unigen needs libc

---
 build.zig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build.zig b/build.zig
index 4e6febe12..0669f27cc 100644
--- a/build.zig
+++ b/build.zig
@@ -1226,6 +1226,7 @@ fn addUnicodeTables(
             .root_source_file = .{ .path = "src/unicode/props.zig" },
             .target = b.host,
         });
+        exe.linkLibC();
         if (step_ == null) b.installArtifact(exe);
 
         const ziglyph_dep = b.dependency("ziglyph", .{