From 4781a83e4a0b17ba111d42d68c750f8b25f8b48d Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Tue, 7 Nov 2023 13:17:56 -0800
Subject: [PATCH] replace utf8proc with ziglyph

---
 build.zig                      | 13 ------------
 build.zig.zon                  |  5 ++---
 pkg/utf8proc/build.zig         | 39 ----------------------------------
 pkg/utf8proc/build.zig.zon     | 10 ---------
 pkg/utf8proc/c.zig             |  3 ---
 pkg/utf8proc/main.zig          | 20 -----------------
 src/font/shaper/web_canvas.zig |  6 +++---
 src/terminal/Screen.zig        | 10 ++++-----
 src/terminal/Terminal.zig      | 12 +++++------
 9 files changed, 16 insertions(+), 102 deletions(-)
 delete mode 100644 pkg/utf8proc/build.zig
 delete mode 100644 pkg/utf8proc/build.zig.zon
 delete mode 100644 pkg/utf8proc/c.zig
 delete mode 100644 pkg/utf8proc/main.zig

diff --git a/build.zig b/build.zig
index 265ccdc9e..75378d0f8 100644
--- a/build.zig
+++ b/build.zig
@@ -667,10 +667,6 @@ fn addDeps(
         .target = step.target,
         .optimize = step.optimize,
     });
-    const utf8proc_dep = b.dependency("utf8proc", .{
-        .target = step.target,
-        .optimize = step.optimize,
-    });
     const harfbuzz_dep = b.dependency("harfbuzz", .{
         .target = step.target,
         .optimize = step.optimize,
@@ -687,12 +683,8 @@ fn addDeps(
         // We link this package but its a no-op since Tracy
         // never actually WORKS with wasm.
         step.addModule("tracy", tracy_dep.module("tracy"));
-        step.addModule("utf8proc", utf8proc_dep.module("utf8proc"));
         step.addModule("zig-js", js_dep.module("zig-js"));
 
-        // utf8proc
-        step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
-
         return static_libs;
     }
 
@@ -729,7 +721,6 @@ fn addDeps(
     step.addModule("harfbuzz", harfbuzz_dep.module("harfbuzz"));
     step.addModule("xev", libxev_dep.module("xev"));
     step.addModule("pixman", pixman_dep.module("pixman"));
-    step.addModule("utf8proc", utf8proc_dep.module("utf8proc"));
     step.addModule("ziglyph", ziglyph_dep.module("ziglyph"));
 
     // Mac Stuff
@@ -752,10 +743,6 @@ fn addDeps(
         try static_libs.append(tracy_dep.artifact("tracy").getEmittedBin());
     }
 
-    // utf8proc
-    step.linkLibrary(utf8proc_dep.artifact("utf8proc"));
-    try static_libs.append(utf8proc_dep.artifact("utf8proc").getEmittedBin());
-
     // Dynamic link
     if (!static) {
         step.addIncludePath(freetype_dep.path(""));
diff --git a/build.zig.zon b/build.zig.zon
index c0c0d21d2..ed9fec81b 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -21,8 +21,8 @@
             .hash = "1220319b42fbc0116f3f198343256018e9f1da9483cef259201afe4ebab0ce0d8f6a",
         },
         .ziglyph = .{
-            .url = "https://codeberg.org/dude_the_builder/ziglyph/archive/v0.11.1.tar.gz",
-            .hash = "1220dee955839b7f267c1bb21e0ee60888c08f408c30f0722b243cabcc8cce8b7508",
+            .url = "https://codeberg.org/dude_the_builder/ziglyph/archive/v0.11.2.tar.gz",
+            .hash = "1220c45655c6f107ca129a558ace8fb3c57afcd7290694c8c4a2d74df40f8c9a8937",
         },
 
         // C libs
@@ -34,7 +34,6 @@
         .macos = .{ .path = "./pkg/macos" },
         .pixman = .{ .path = "./pkg/pixman" },
         .tracy = .{ .path = "./pkg/tracy" },
-        .utf8proc = .{ .path = "./pkg/utf8proc" },
         .zlib = .{ .path = "./pkg/zlib" },
 
         // System headers
diff --git a/pkg/utf8proc/build.zig b/pkg/utf8proc/build.zig
deleted file mode 100644
index 4f0240d73..000000000
--- a/pkg/utf8proc/build.zig
+++ /dev/null
@@ -1,39 +0,0 @@
-const std = @import("std");
-
-pub fn build(b: *std.Build) !void {
-    const target = b.standardTargetOptions(.{});
-    const optimize = b.standardOptimizeOption(.{});
-
-    _ = b.addModule("utf8proc", .{ .source_file = .{ .path = "main.zig" } });
-
-    const upstream = b.dependency("utf8proc", .{});
-    const lib = b.addStaticLibrary(.{
-        .name = "utf8proc",
-        .target = target,
-        .optimize = optimize,
-    });
-    lib.linkLibC();
-    lib.addIncludePath(upstream.path(""));
-    lib.installHeadersDirectoryOptions(.{
-        .source_dir = upstream.path(""),
-        .install_dir = .header,
-        .install_subdir = "",
-        .include_extensions = &.{".h"},
-    });
-
-    var flags = std.ArrayList([]const u8).init(b.allocator);
-    try flags.append("-DUTF8PROC_EXPORTS");
-    defer flags.deinit();
-    for (srcs) |src| {
-        lib.addCSourceFile(.{
-            .file = upstream.path(src),
-            .flags = flags.items,
-        });
-    }
-
-    b.installArtifact(lib);
-}
-
-const srcs: []const []const u8 = &.{
-    "utf8proc.c",
-};
diff --git a/pkg/utf8proc/build.zig.zon b/pkg/utf8proc/build.zig.zon
deleted file mode 100644
index a4ecb6ab3..000000000
--- a/pkg/utf8proc/build.zig.zon
+++ /dev/null
@@ -1,10 +0,0 @@
-.{
-    .name = "utf8proc",
-    .version = "2.8.0",
-    .dependencies = .{
-        .utf8proc = .{
-            .url = "https://github.com/JuliaStrings/utf8proc/archive/refs/tags/v2.8.0.tar.gz",
-            .hash = "1220056ce228a8c58f1fa66ab778f5c8965e62f720c1d30603c7d534cb7d8a605ad7",
-        },
-    },
-}
diff --git a/pkg/utf8proc/c.zig b/pkg/utf8proc/c.zig
deleted file mode 100644
index adeb226b0..000000000
--- a/pkg/utf8proc/c.zig
+++ /dev/null
@@ -1,3 +0,0 @@
-pub usingnamespace @cImport({
-    @cInclude("utf8proc.h");
-});
diff --git a/pkg/utf8proc/main.zig b/pkg/utf8proc/main.zig
deleted file mode 100644
index a351fff4b..000000000
--- a/pkg/utf8proc/main.zig
+++ /dev/null
@@ -1,20 +0,0 @@
-pub const c = @import("c.zig");
-
-/// Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
-/// except that a width of 0 is returned for non-printable codepoints
-/// instead of -1 as in `wcwidth`.
-pub fn charwidth(codepoint: u21) u8 {
-    return @intCast(c.utf8proc_charwidth(@intCast(codepoint)));
-}
-
-/// Given a pair of consecutive codepoints, return whether a grapheme break is
-/// permitted between them (as defined by the extended grapheme clusters in UAX#29).
-pub fn graphemeBreakStateful(cp1: u21, cp2: u21, state: *i32) bool {
-    return c.utf8proc_grapheme_break_stateful(
-        @intCast(cp1),
-        @intCast(cp2),
-        state,
-    );
-}
-
-test {}
diff --git a/src/font/shaper/web_canvas.zig b/src/font/shaper/web_canvas.zig
index 4e46578e1..912192eb1 100644
--- a/src/font/shaper/web_canvas.zig
+++ b/src/font/shaper/web_canvas.zig
@@ -1,7 +1,7 @@
 const std = @import("std");
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
-const utf8proc = @import("utf8proc");
+const ziglyph = @import("ziglyph");
 const font = @import("../main.zig");
 const terminal = @import("../../terminal/main.zig");
 
@@ -113,7 +113,7 @@ pub const Shaper = struct {
         // font ligatures. However, we do support grapheme clustering.
         // This means we can render things like skin tone emoji but
         // we can't render things like single glyph "=>".
-        var break_state: i32 = 0;
+        var break_state: u3 = 0;
         var cp1: u21 = @intCast(codepoints[0]);
 
         var start: usize = 0;
@@ -128,7 +128,7 @@ pub const Shaper = struct {
                 const cp2: u21 = @intCast(codepoints[i]);
                 defer cp1 = cp2;
 
-                break :blk utf8proc.graphemeBreakStateful(
+                break :blk ziglyph.graphemeBreak(
                     cp1,
                     cp2,
                     &break_state,
diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig
index 5880d77af..353cfa8a8 100644
--- a/src/terminal/Screen.zig
+++ b/src/terminal/Screen.zig
@@ -54,7 +54,7 @@ const builtin = @import("builtin");
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
 
-const utf8proc = @import("utf8proc");
+const ziglyph = @import("ziglyph");
 const trace = @import("tracy").trace;
 const ansi = @import("ansi.zig");
 const modes = @import("modes.zig");
@@ -2795,12 +2795,12 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
             // If we have a previous cell, we check if we're part of a grapheme.
             if (grapheme.cell) |prev_cell| {
                 const grapheme_break = brk: {
-                    var state: i32 = 0;
+                    var state: u3 = 0;
                     var cp1 = @as(u21, @intCast(prev_cell.char));
                     if (prev_cell.attrs.grapheme) {
                         var it = row.codepointIterator(grapheme.x);
                         while (it.next()) |cp2| {
-                            assert(!utf8proc.graphemeBreakStateful(
+                            assert(!ziglyph.graphemeBreak(
                                 cp1,
                                 cp2,
                                 &state,
@@ -2810,7 +2810,7 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
                         }
                     }
 
-                    break :brk utf8proc.graphemeBreakStateful(cp1, c, &state);
+                    break :brk ziglyph.graphemeBreak(cp1, c, &state);
                 };
 
                 if (!grapheme_break) {
@@ -2820,7 +2820,7 @@ pub fn testWriteString(self: *Screen, text: []const u8) !void {
             }
         }
 
-        const width = utf8proc.charwidth(c);
+        const width: usize = @intCast(@max(0, ziglyph.display_width.codePointWidth(c, .half)));
         //log.warn("c={x} width={}", .{ c, width });
 
         // Zero-width are attached as grapheme data.
diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig
index cf3dbea71..70e0e2a3a 100644
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@@ -6,7 +6,7 @@ const Terminal = @This();
 
 const std = @import("std");
 const builtin = @import("builtin");
-const utf8proc = @import("utf8proc");
+const ziglyph = @import("ziglyph");
 const testing = std.testing;
 const assert = std.debug.assert;
 const Allocator = std.mem.Allocator;
@@ -682,12 +682,12 @@ pub fn print(self: *Terminal, c: u21) !void {
         if (prev.cell.char == 0) break :grapheme;
 
         const grapheme_break = brk: {
-            var state: i32 = 0;
-            var cp1 = @as(u21, @intCast(prev.cell.char));
+            var state: u3 = 0;
+            var cp1: u21 = @intCast(prev.cell.char);
             if (prev.cell.attrs.grapheme) {
                 var it = row.codepointIterator(prev.x);
                 while (it.next()) |cp2| {
-                    assert(!utf8proc.graphemeBreakStateful(
+                    assert(!ziglyph.graphemeBreak(
                         cp1,
                         cp2,
                         &state,
@@ -697,7 +697,7 @@ pub fn print(self: *Terminal, c: u21) !void {
                 }
             }
 
-            break :brk utf8proc.graphemeBreakStateful(cp1, c, &state);
+            break :brk ziglyph.graphemeBreak(cp1, c, &state);
         };
 
         // If we can NOT break, this means that "c" is part of a grapheme
@@ -764,7 +764,7 @@ pub fn print(self: *Terminal, c: u21) !void {
 
     // Determine the width of this character so we can handle
     // non-single-width characters properly.
-    const width = utf8proc.charwidth(c);
+    const width: usize = @intCast(@max(0, ziglyph.display_width.codePointWidth(c, .half)));
     assert(width <= 2);
     // log.debug("c={x} width={}", .{ c, width });