From 4e54c5389ec7e40875a2c0d33f305ec54bec4881 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 11:05:57 -0700
Subject: [PATCH 01/12] font: CodepointMap

---
 src/font/CodepointMap.zig | 48 +++++++++++++++++++++++++++++++++++++++
 src/font/Group.zig        |  1 +
 2 files changed, 49 insertions(+)
 create mode 100644 src/font/CodepointMap.zig

diff --git a/src/font/CodepointMap.zig b/src/font/CodepointMap.zig
new file mode 100644
index 000000000..82ec1462f
--- /dev/null
+++ b/src/font/CodepointMap.zig
@@ -0,0 +1,48 @@
+/// CodepointMap is a map of codepoints to a discovery descriptor of a font
+/// to use for that codepoint. If the descriptor doesn't return any matching
+/// font, the codepoint is rendered using the default font.
+const CodepointMap = @This();
+
+const std = @import("std");
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const discovery = @import("discovery.zig");
+
+pub const Entry = struct {
+    /// Unicode codepoint range. Asserts range[0] <= range[1].
+    range: [2]u21,
+
+    /// The discovery descriptor of the font to use for this range.
+    descriptor: discovery.Descriptor,
+};
+
+/// The list of entries. We use a multiarraylist because Descriptors are
+/// quite large and we will very rarely match, so we'd rather pack our
+/// ranges together to make everything more cache friendly for lookups.
+///
+/// Note: we just do a linear search because we expect to always have very
+/// few entries, so the overhead of a binary search is not worth it. This is
+/// possible to defeat with some pathological inputs, but there is no realistic
+/// scenario where this will be a problem except people trying to fuck around.
+list: std.MultiArrayList(Entry) = .{},
+
+/// Add an entry to the map.
+///
+/// For conflicting codepoints, entries added later take priority over
+/// entries added earlier.
+pub fn add(self: *CodepointMap, alloc: Allocator, entry: Entry) !void {
+    assert(entry.range[0] <= entry.range[1]);
+    try self.list.append(alloc, entry);
+}
+
+/// Get a descriptor for a codepoint.
+pub fn get(self: *const CodepointMap, cp: u21) ?discovery.Descriptor {
+    for (self.list.items(.range), 0..) |range, i| {
+        if (range[0] <= cp and cp <= range[1]) {
+            const descs = self.list.items(.descriptor);
+            return descs[i];
+        }
+    }
+
+    return null;
+}
diff --git a/src/font/Group.zig b/src/font/Group.zig
index e6ba73b13..4e9360dbc 100644
--- a/src/font/Group.zig
+++ b/src/font/Group.zig
@@ -32,6 +32,7 @@ const log = std.log.scoped(.font_group);
 // most important memory efficiency we can look for. This is totally opaque
 // to the user so we can change this later.
 const StyleArray = std.EnumArray(Style, std.ArrayListUnmanaged(GroupFace));
+
 /// The allocator for this group
 alloc: Allocator,
 

From 6b640c2d9f5a867199620b592464103bab0ad008 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 11:17:07 -0700
Subject: [PATCH 02/12] font: discovery descriptor can be hashed

---
 src/font/discovery.zig | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/src/font/discovery.zig b/src/font/discovery.zig
index 7ac5d6fff..b85f411ae 100644
--- a/src/font/discovery.zig
+++ b/src/font/discovery.zig
@@ -56,6 +56,30 @@ pub const Descriptor = struct {
     /// will be preferred, but not guaranteed.
     variations: []const Variation = &.{},
 
+    /// Returns a hash code that can be used to uniquely identify this
+    /// action.
+    pub fn hash(self: Descriptor) u64 {
+        const autoHash = std.hash.autoHash;
+        var hasher = std.hash.Wyhash.init(0);
+        autoHash(&hasher, self.family);
+        autoHash(&hasher, self.style);
+        autoHash(&hasher, self.codepoint);
+        autoHash(&hasher, self.size);
+        autoHash(&hasher, self.bold);
+        autoHash(&hasher, self.italic);
+        autoHash(&hasher, self.monospace);
+        autoHash(&hasher, self.variations.len);
+        for (self.variations) |variation| {
+            autoHash(&hasher, variation.id);
+
+            // This is not correct, but we don't currently depend on the
+            // hash value being different based on decimal values of variations.
+            autoHash(&hasher, @as(u64, @intFromFloat(variation.value)));
+        }
+
+        return hasher.final();
+    }
+
     /// Convert to Fontconfig pattern to use for lookup. The pattern does
     /// not have defaults filled/substituted (Fontconfig thing) so callers
     /// must still do this.
@@ -350,6 +374,21 @@ pub const CoreText = struct {
     };
 };
 
+test "descriptor hash" {
+    const testing = std.testing;
+
+    var d: Descriptor = .{};
+    try testing.expect(d.hash() != 0);
+}
+
+test "descriptor hash familiy names" {
+    const testing = std.testing;
+
+    var d1: Descriptor = .{ .family = "A" };
+    var d2: Descriptor = .{ .family = "B" };
+    try testing.expect(d1.hash() != d2.hash());
+}
+
 test "fontconfig" {
     if (options.backend != .fontconfig_freetype) return error.SkipZigTest;
 

From 3915d9ee3a53428b1114ec02dd6aeb613b73c23e Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 11:22:57 -0700
Subject: [PATCH 03/12] font: add CodepointMap with tests

---
 src/font/CodepointMap.zig | 35 ++++++++++++++++++++++++++++++++++-
 src/font/main.zig         |  1 +
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/font/CodepointMap.zig b/src/font/CodepointMap.zig
index 82ec1462f..58a8a7c43 100644
--- a/src/font/CodepointMap.zig
+++ b/src/font/CodepointMap.zig
@@ -26,6 +26,10 @@ pub const Entry = struct {
 /// scenario where this will be a problem except people trying to fuck around.
 list: std.MultiArrayList(Entry) = .{},
 
+pub fn deinit(self: *CodepointMap, alloc: Allocator) void {
+    self.list.deinit(alloc);
+}
+
 /// Add an entry to the map.
 ///
 /// For conflicting codepoints, entries added later take priority over
@@ -37,7 +41,9 @@ pub fn add(self: *CodepointMap, alloc: Allocator, entry: Entry) !void {
 
 /// Get a descriptor for a codepoint.
 pub fn get(self: *const CodepointMap, cp: u21) ?discovery.Descriptor {
-    for (self.list.items(.range), 0..) |range, i| {
+    const items = self.list.items(.range);
+    for (items, 0..) |range, forward_i| {
+        const i = items.len - forward_i - 1;
         if (range[0] <= cp and cp <= range[1]) {
             const descs = self.list.items(.descriptor);
             return descs[i];
@@ -46,3 +52,30 @@ pub fn get(self: *const CodepointMap, cp: u21) ?discovery.Descriptor {
 
     return null;
 }
+
+test "codepointmap" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    var m: CodepointMap = .{};
+    defer m.deinit(alloc);
+
+    // Exact range
+    try testing.expect(m.get(1) == null);
+    try m.add(alloc, .{ .range = .{ 1, 1 }, .descriptor = .{ .family = "A" } });
+    {
+        const d = m.get(1).?;
+        try testing.expectEqualStrings("A", d.family.?);
+    }
+
+    // Later entry takes priority
+    try m.add(alloc, .{ .range = .{ 1, 2 }, .descriptor = .{ .family = "B" } });
+    {
+        const d = m.get(1).?;
+        try testing.expectEqualStrings("B", d.family.?);
+    }
+
+    // Non-matching
+    try testing.expect(m.get(0) == null);
+    try testing.expect(m.get(3) == null);
+}
diff --git a/src/font/main.zig b/src/font/main.zig
index 562c31e3a..d660e67de 100644
--- a/src/font/main.zig
+++ b/src/font/main.zig
@@ -5,6 +5,7 @@ const build_config = @import("../build_config.zig");
 pub const Atlas = @import("Atlas.zig");
 pub const discovery = @import("discovery.zig");
 pub const face = @import("face.zig");
+pub const CodepointMap = @import("CodepointMap.zig");
 pub const DeferredFace = @import("DeferredFace.zig");
 pub const Face = face.Face;
 pub const Group = @import("Group.zig");

From 9e2e3acecf8840a278524cd145cf39b629090427 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 11:50:03 -0700
Subject: [PATCH 04/12] font: add codepoint map and descriptor cache to Group,
 not used yet

---
 src/font/Group.zig | 58 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/src/font/Group.zig b/src/font/Group.zig
index 4e9360dbc..118668bf7 100644
--- a/src/font/Group.zig
+++ b/src/font/Group.zig
@@ -33,6 +33,32 @@ const log = std.log.scoped(.font_group);
 // to the user so we can change this later.
 const StyleArray = std.EnumArray(Style, std.ArrayListUnmanaged(GroupFace));
 
+/// Map of descriptors to faces. This is used with manual codepoint maps
+/// to ensure that we don't load the same font multiple times.
+///
+/// Note that the current implementation will load the same font multiple
+/// times if the font used for a codepoint map is identical to a font used
+/// for a regular style. That's just an inefficient choice made now because
+/// the implementation is simpler and codepoint maps matching a regular
+/// font is a rare case.
+const DescriptorCache = std.HashMapUnmanaged(
+    font.discovery.Descriptor,
+    FontIndex,
+    struct {
+        const KeyType = font.discovery.Descriptor;
+
+        pub fn hash(ctx: @This(), k: KeyType) u64 {
+            _ = ctx;
+            return k.hash();
+        }
+
+        pub fn eql(ctx: @This(), a: KeyType, b: KeyType) bool {
+            return ctx.hash(a) == ctx.hash(b);
+        }
+    },
+    std.hash_map.default_max_load_percentage,
+);
+
 /// The allocator for this group
 alloc: Allocator,
 
@@ -50,6 +76,15 @@ faces: StyleArray,
 /// the codepoint. This can be set after initialization.
 discover: ?*font.Discover = null,
 
+/// A map of codepoints to font requests for codepoint-level overrides.
+/// The memory associated with the map is owned by the caller and is not
+/// modified or freed by Group.
+codepoint_map: ?font.CodepointMap = null,
+
+/// The descriptor cache is used to cache the descriptor to font face
+/// mapping for codepoint maps.
+descriptor_cache: DescriptorCache = .{},
+
 /// Set this to a non-null value to enable sprite glyph drawing. If this
 /// isn't enabled we'll just fall through to trying to use regular fonts
 /// to render sprite glyphs. But more than likely, if this isn't set then
@@ -87,11 +122,15 @@ pub fn init(
 }
 
 pub fn deinit(self: *Group) void {
-    var it = self.faces.iterator();
-    while (it.next()) |entry| {
-        for (entry.value.items) |*item| item.deinit();
-        entry.value.deinit(self.alloc);
+    {
+        var it = self.faces.iterator();
+        while (it.next()) |entry| {
+            for (entry.value.items) |*item| item.deinit();
+            entry.value.deinit(self.alloc);
+        }
     }
+
+    self.descriptor_cache.deinit(self.alloc);
 }
 
 /// Add a face to the list for the given style. This face will be added as
@@ -173,9 +212,12 @@ pub fn setSize(self: *Group, size: font.face.DesiredSize) !void {
 }
 
 /// This represents a specific font in the group.
-pub const FontIndex = packed struct(u8) {
+pub const FontIndex = packed struct(FontIndex.Backing) {
+    const Backing = u8;
+    const backing_bits = @typeInfo(Backing).Int.bits;
+
     /// The number of bits we use for the index.
-    const idx_bits = 8 - @typeInfo(@typeInfo(Style).Enum.tag_type).Int.bits;
+    const idx_bits = backing_bits - @typeInfo(@typeInfo(Style).Enum.tag_type).Int.bits;
     pub const IndexInt = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = idx_bits } });
 
     /// The special-case fonts that we support.
@@ -196,7 +238,7 @@ pub const FontIndex = packed struct(u8) {
     }
 
     /// Convert to int
-    pub fn int(self: FontIndex) u8 {
+    pub fn int(self: FontIndex) Backing {
         return @bitCast(self);
     }
 
@@ -212,7 +254,7 @@ pub const FontIndex = packed struct(u8) {
         // We never want to take up more than a byte since font indexes are
         // everywhere so if we increase the size of this we'll dramatically
         // increase our memory usage.
-        try std.testing.expectEqual(@sizeOf(u8), @sizeOf(FontIndex));
+        try std.testing.expectEqual(@sizeOf(Backing), @sizeOf(FontIndex));
     }
 };
 

From 5c1c090f393de8d5709cefa96ea7ec1434cadad8 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 15:53:39 -0700
Subject: [PATCH 05/12] font: change FontIndex to a u16 since we can expect
 more fonts with maps

---
 src/font/Group.zig | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/font/Group.zig b/src/font/Group.zig
index 118668bf7..11792603c 100644
--- a/src/font/Group.zig
+++ b/src/font/Group.zig
@@ -213,7 +213,7 @@ pub fn setSize(self: *Group, size: font.face.DesiredSize) !void {
 
 /// This represents a specific font in the group.
 pub const FontIndex = packed struct(FontIndex.Backing) {
-    const Backing = u8;
+    const Backing = u16;
     const backing_bits = @typeInfo(Backing).Int.bits;
 
     /// The number of bits we use for the index.
@@ -255,6 +255,10 @@ pub const FontIndex = packed struct(FontIndex.Backing) {
         // everywhere so if we increase the size of this we'll dramatically
         // increase our memory usage.
         try std.testing.expectEqual(@sizeOf(Backing), @sizeOf(FontIndex));
+
+        // Just so we're aware when this changes. The current maximum number
+        // of fonts for a style is 13 bits or 8192 fonts.
+        try std.testing.expectEqual(13, idx_bits);
     }
 };
 

From abd3e16ebdddf05e62e00ac32b9f40e9aae5f87b Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 19:44:06 -0700
Subject: [PATCH 06/12] font: Group will use the codepoint map for codepoint
 overrides

---
 src/font/Group.zig | 54 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/src/font/Group.zig b/src/font/Group.zig
index 11792603c..ef27dc02e 100644
--- a/src/font/Group.zig
+++ b/src/font/Group.zig
@@ -43,7 +43,7 @@ const StyleArray = std.EnumArray(Style, std.ArrayListUnmanaged(GroupFace));
 /// font is a rare case.
 const DescriptorCache = std.HashMapUnmanaged(
     font.discovery.Descriptor,
-    FontIndex,
+    ?FontIndex,
     struct {
         const KeyType = font.discovery.Descriptor;
 
@@ -278,6 +278,13 @@ pub fn indexForCodepoint(
     style: Style,
     p: ?Presentation,
 ) ?FontIndex {
+    // Codepoint overrides.
+    if (self.indexForCodepointOverride(cp)) |idx_| {
+        if (idx_) |idx| return idx;
+    } else |err| {
+        log.warn("codepoint override failed codepoint={} err={}", .{ cp, err });
+    }
+
     // If we have sprite drawing enabled, check if our sprite face can
     // handle this.
     if (self.sprite) |sprite| {
@@ -353,6 +360,51 @@ fn indexForCodepointExact(self: Group, cp: u32, style: Style, p: ?Presentation)
     return null;
 }
 
+/// Checks if the codepoint is in the map of codepoint overrides,
+/// finds the override font, and returns it.
+fn indexForCodepointOverride(self: *Group, cp: u32) !?FontIndex {
+    if (comptime font.Discover == void) return null;
+    const map = self.codepoint_map orelse return null;
+
+    // If we have a codepoint too large or isn't in the map, then we
+    // don't have an override.
+    const cp_u21 = std.math.cast(u21, cp) orelse return null;
+    const desc = map.get(cp_u21) orelse return null;
+
+    // Fast path: the descriptor is already loaded.
+    const idx_: ?FontIndex = self.descriptor_cache.get(desc) orelse idx: {
+        // Slow path: we have to find this descriptor and load the font
+        const discover = self.discover orelse return null;
+        var disco_it = try discover.discover(desc);
+        defer disco_it.deinit();
+
+        const face = (try disco_it.next()) orelse {
+            log.warn(
+                "font lookup for codepoint map failed codepoint={} err=FontNotFound",
+                .{cp},
+            );
+
+            // Add null to the cache so we don't do a lookup again later.
+            try self.descriptor_cache.put(self.alloc, desc, null);
+            return null;
+        };
+
+        // Add the font to our list of fonts so we can get an index for it,
+        // and ensure the index is stored in the descriptor cache for next time.
+        const idx = try self.addFace(.regular, .{ .deferred = face });
+        try self.descriptor_cache.put(self.alloc, desc, idx);
+
+        break :idx idx;
+    };
+
+    // The descriptor cache will populate null if the descriptor is not found
+    // to avoid expensive discoveries later.
+    const idx = idx_ orelse return null;
+
+    // We need to verify that this index has the codepoint we want.
+    return if (self.hasCodepoint(idx, cp, null)) idx else null;
+}
+
 /// Check if a specific font index has a specific codepoint. This does not
 /// necessarily force the font to load.
 pub fn hasCodepoint(self: *Group, index: FontIndex, cp: u32, p: ?Presentation) bool {

From bcafbc8abba142000c6598d5b403c378ba771b08 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 20:28:24 -0700
Subject: [PATCH 07/12] config: add font-codepoint-map

---
 src/config/Config.zig | 196 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 196 insertions(+)

diff --git a/src/config/Config.zig b/src/config/Config.zig
index 3e5836d8b..1327aa4f3 100644
--- a/src/config/Config.zig
+++ b/src/config/Config.zig
@@ -91,6 +91,21 @@ const c = @cImport({
 @"font-variation-italic": RepeatableFontVariation = .{},
 @"font-variation-bold-italic": RepeatableFontVariation = .{},
 
+/// Force one or a range of Unicode codepoints to map to a specific named
+/// font. This is useful if you want to support special symbols or if you
+/// want to use specific glyphs that render better for your specific font.
+///
+/// The syntax is "codepoint=fontname" where "codepoint" is either a
+/// single codepoint or a range. Codepoints must be specified as full
+/// Unicode hex values, such as "U+ABCD". Codepoints ranges are specified
+/// as "U+ABCD-U+DEFG". You can specify multiple ranges for the same font
+/// separated by commas, such as "U+ABCD-U+DEFG,U+1234-U+5678=fontname".
+/// The font name is the same value as you would use for "font-family".
+///
+/// This configuration can be repeated multiple times to specify multiple
+/// codepoint mappings.
+@"font-codepoint-map": RepeatableCodepointMap = .{},
+
 /// Draw fonts with a thicker stroke, if supported. This is only supported
 /// currently on macOS.
 @"font-thicken": bool = false,
@@ -1507,6 +1522,187 @@ pub const Keybinds = struct {
     }
 };
 
+/// See "font-codepoint-map" for documentation.
+pub const RepeatableCodepointMap = struct {
+    const Self = @This();
+
+    map: fontpkg.CodepointMap = .{},
+
+    pub fn parseCLI(self: *Self, alloc: Allocator, input_: ?[]const u8) !void {
+        const input = input_ orelse return error.ValueRequired;
+        const eql_idx = std.mem.indexOf(u8, input, "=") orelse return error.InvalidValue;
+        const whitespace = " \t";
+        const key = std.mem.trim(u8, input[0..eql_idx], whitespace);
+        const value = std.mem.trim(u8, input[eql_idx + 1 ..], whitespace);
+        const valueZ = try alloc.dupeZ(u8, value);
+
+        var p: UnicodeRangeParser = .{ .input = key };
+        while (try p.next()) |range| {
+            try self.map.add(alloc, .{
+                .range = range,
+                .descriptor = .{ .family = valueZ },
+            });
+        }
+    }
+
+    /// Deep copy of the struct. Required by Config.
+    pub fn clone(self: *const Self, alloc: Allocator) !Self {
+        return .{
+            .map = .{ .list = try self.map.list.clone(alloc) },
+        };
+    }
+
+    /// Compare if two of our value are requal. Required by Config.
+    pub fn equal(self: Self, other: Self) bool {
+        const itemsA = self.map.list.slice();
+        const itemsB = other.map.list.slice();
+        if (itemsA.len != itemsB.len) return false;
+        for (0..itemsA.len) |i| {
+            const a = itemsA.get(i);
+            const b = itemsB.get(i);
+            if (!std.meta.eql(a, b)) return false;
+        } else return true;
+    }
+
+    /// Parses the list of Unicode codepoint ranges. Valid syntax:
+    ///
+    ///   "" (empty returns null)
+    ///   U+1234
+    ///   U+1234-5678
+    ///   U+1234,U+5678
+    ///   U+1234-5678,U+5678
+    ///   U+1234,U+5678-9ABC
+    ///
+    /// etc.
+    const UnicodeRangeParser = struct {
+        input: []const u8,
+        i: usize = 0,
+
+        pub fn next(self: *UnicodeRangeParser) !?[2]u21 {
+            // Once we're EOF then we're done without an error.
+            if (self.eof()) return null;
+
+            // One codepoint no matter what
+            const start = try self.parseCodepoint();
+            if (self.eof()) return .{ start, start };
+
+            // Otherwise we expect either a range or a comma
+            switch (self.input[self.i]) {
+                // Comma means we have another codepoint but in a different
+                // range so we return our current codepoint.
+                ',' => {
+                    self.advance();
+                    if (self.eof()) return error.InvalidValue;
+                    return .{ start, start };
+                },
+
+                // Hyphen means we have a range.
+                '-' => {
+                    self.advance();
+                    if (self.eof()) return error.InvalidValue;
+                    const end = try self.parseCodepoint();
+                    if (!self.eof() and self.input[self.i] != ',') return error.InvalidValue;
+                    self.advance();
+                    return .{ start, end };
+                },
+
+                else => return error.InvalidValue,
+            }
+        }
+
+        fn parseCodepoint(self: *UnicodeRangeParser) !u21 {
+            if (self.input[self.i] != 'U') return error.InvalidValue;
+            self.advance();
+            if (self.eof()) return error.InvalidValue;
+            if (self.input[self.i] != '+') return error.InvalidValue;
+            self.advance();
+            if (self.eof()) return error.InvalidValue;
+
+            const start_i = self.i;
+            while (true) {
+                const current = self.input[self.i];
+                const is_hex = (current >= '0' and current <= '9') or
+                    (current >= 'A' and current <= 'F') or
+                    (current >= 'a' and current <= 'f');
+                if (!is_hex) break;
+
+                // Advance but break on EOF
+                self.advance();
+                if (self.eof()) break;
+            }
+
+            // If we didn't consume a single character, we have an error.
+            if (start_i == self.i) return error.InvalidValue;
+
+            return std.fmt.parseInt(u21, self.input[start_i..self.i], 16) catch
+                return error.InvalidValue;
+        }
+
+        fn advance(self: *UnicodeRangeParser) void {
+            self.i += 1;
+        }
+
+        fn eof(self: *const UnicodeRangeParser) bool {
+            return self.i >= self.input.len;
+        }
+    };
+
+    test "parseCLI" {
+        const testing = std.testing;
+        var arena = ArenaAllocator.init(testing.allocator);
+        defer arena.deinit();
+        const alloc = arena.allocator();
+
+        var list: Self = .{};
+        try list.parseCLI(alloc, "U+ABCD=Comic Sans");
+        try list.parseCLI(alloc, "U+0001-U+0005=Verdana");
+        try list.parseCLI(alloc, "U+0006-U+0009,U+ABCD=Courier");
+
+        try testing.expectEqual(@as(usize, 4), list.map.list.len);
+        {
+            const entry = list.map.list.get(0);
+            try testing.expectEqual([2]u21{ 0xABCD, 0xABCD }, entry.range);
+            try testing.expectEqualStrings("Comic Sans", entry.descriptor.family.?);
+        }
+        {
+            const entry = list.map.list.get(1);
+            try testing.expectEqual([2]u21{ 1, 5 }, entry.range);
+            try testing.expectEqualStrings("Verdana", entry.descriptor.family.?);
+        }
+        {
+            const entry = list.map.list.get(2);
+            try testing.expectEqual([2]u21{ 6, 9 }, entry.range);
+            try testing.expectEqualStrings("Courier", entry.descriptor.family.?);
+        }
+        {
+            const entry = list.map.list.get(3);
+            try testing.expectEqual([2]u21{ 0xABCD, 0xABCD }, entry.range);
+            try testing.expectEqualStrings("Courier", entry.descriptor.family.?);
+        }
+    }
+
+    // test "parseCLI with whitespace" {
+    //     const testing = std.testing;
+    //     var arena = ArenaAllocator.init(testing.allocator);
+    //     defer arena.deinit();
+    //     const alloc = arena.allocator();
+    //
+    //     var list: Self = .{};
+    //     try list.parseCLI(alloc, "wght =200");
+    //     try list.parseCLI(alloc, "slnt= -15");
+    //
+    //     try testing.expectEqual(@as(usize, 2), list.list.items.len);
+    //     try testing.expectEqual(fontpkg.face.Variation{
+    //         .id = fontpkg.face.Variation.Id.init("wght"),
+    //         .value = 200,
+    //     }, list.list.items[0]);
+    //     try testing.expectEqual(fontpkg.face.Variation{
+    //         .id = fontpkg.face.Variation.Id.init("slnt"),
+    //         .value = -15,
+    //     }, list.list.items[1]);
+    // }
+};
+
 /// Options for copy on select behavior.
 pub const CopyOnSelect = enum {
     /// Disables copy on select entirely.

From a8877d334c2f23a9b11076a80288c46d617530fc Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 20:32:35 -0700
Subject: [PATCH 08/12] config: unicode range parser is more lenient about
 whitespace

---
 src/config/Config.zig | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/config/Config.zig b/src/config/Config.zig
index 1327aa4f3..2a422dbdf 100644
--- a/src/config/Config.zig
+++ b/src/config/Config.zig
@@ -1571,7 +1571,7 @@ pub const RepeatableCodepointMap = struct {
     ///   U+1234-5678
     ///   U+1234,U+5678
     ///   U+1234-5678,U+5678
-    ///   U+1234,U+5678-9ABC
+    ///   U+1234,U+5678-U+9ABC
     ///
     /// etc.
     const UnicodeRangeParser = struct {
@@ -1586,12 +1586,16 @@ pub const RepeatableCodepointMap = struct {
             const start = try self.parseCodepoint();
             if (self.eof()) return .{ start, start };
 
+            // We're allowed to have any whitespace here
+            self.consumeWhitespace();
+
             // Otherwise we expect either a range or a comma
             switch (self.input[self.i]) {
                 // Comma means we have another codepoint but in a different
                 // range so we return our current codepoint.
                 ',' => {
                     self.advance();
+                    self.consumeWhitespace();
                     if (self.eof()) return error.InvalidValue;
                     return .{ start, start };
                 },
@@ -1599,10 +1603,14 @@ pub const RepeatableCodepointMap = struct {
                 // Hyphen means we have a range.
                 '-' => {
                     self.advance();
+                    self.consumeWhitespace();
                     if (self.eof()) return error.InvalidValue;
                     const end = try self.parseCodepoint();
+                    self.consumeWhitespace();
                     if (!self.eof() and self.input[self.i] != ',') return error.InvalidValue;
                     self.advance();
+                    self.consumeWhitespace();
+                    if (start > end) return error.InvalidValue;
                     return .{ start, end };
                 },
 
@@ -1610,6 +1618,15 @@ pub const RepeatableCodepointMap = struct {
             }
         }
 
+        fn consumeWhitespace(self: *UnicodeRangeParser) void {
+            while (!self.eof()) {
+                switch (self.input[self.i]) {
+                    ' ', '\t' => self.advance(),
+                    else => return,
+                }
+            }
+        }
+
         fn parseCodepoint(self: *UnicodeRangeParser) !u21 {
             if (self.input[self.i] != 'U') return error.InvalidValue;
             self.advance();

From c2cc2b8f0386f56865c2ff68c41dd136e03d6e62 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 20:35:05 -0700
Subject: [PATCH 09/12] core: hook up the codepoint map to the loaded font
 group

---
 src/Surface.zig       | 5 +++++
 src/config/Config.zig | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/src/Surface.zig b/src/Surface.zig
index 7bbe31745..d64147f0d 100644
--- a/src/Surface.zig
+++ b/src/Surface.zig
@@ -224,6 +224,11 @@ pub fn init(
         var group = try font.Group.init(alloc, font_lib, font_size);
         errdefer group.deinit();
 
+        // If we have codepoint mappings, set those.
+        if (config.@"font-codepoint-map".map.list.len > 0) {
+            group.codepoint_map = config.@"font-codepoint-map".map;
+        }
+
         // Search for fonts
         if (font.Discover != void) discover: {
             const disco = try app.fontDiscover() orelse {
diff --git a/src/config/Config.zig b/src/config/Config.zig
index 2a422dbdf..e28663984 100644
--- a/src/config/Config.zig
+++ b/src/config/Config.zig
@@ -104,6 +104,9 @@ const c = @cImport({
 ///
 /// This configuration can be repeated multiple times to specify multiple
 /// codepoint mappings.
+///
+/// Changing this configuration at runtime will only affect new terminals,
+/// i.e. new windows, tabs, etc.
 @"font-codepoint-map": RepeatableCodepointMap = .{},
 
 /// Draw fonts with a thicker stroke, if supported. This is only supported

From 41170bb4e0af117ffc95f9caf80b1f75d73bc913 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 20:36:07 -0700
Subject: [PATCH 10/12] config: codepoint override font descriptors should not
 require monospace

---
 src/config/Config.zig | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/config/Config.zig b/src/config/Config.zig
index e28663984..ff277511b 100644
--- a/src/config/Config.zig
+++ b/src/config/Config.zig
@@ -1543,7 +1543,10 @@ pub const RepeatableCodepointMap = struct {
         while (try p.next()) |range| {
             try self.map.add(alloc, .{
                 .range = range,
-                .descriptor = .{ .family = valueZ },
+                .descriptor = .{
+                    .family = valueZ,
+                    .monospace = false, // we allow any font
+                },
             });
         }
     }

From 6ac4cc9671dcc1831161bfcb6bd123e0e849bcb6 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 20:40:36 -0700
Subject: [PATCH 11/12] font: debug log for overide

---
 src/font/Group.zig | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/font/Group.zig b/src/font/Group.zig
index ef27dc02e..62b62fc72 100644
--- a/src/font/Group.zig
+++ b/src/font/Group.zig
@@ -402,7 +402,16 @@ fn indexForCodepointOverride(self: *Group, cp: u32) !?FontIndex {
     const idx = idx_ orelse return null;
 
     // We need to verify that this index has the codepoint we want.
-    return if (self.hasCodepoint(idx, cp, null)) idx else null;
+    if (self.hasCodepoint(idx, cp, null)) {
+        log.debug("codepoint override based on config codepoint={} family={s}", .{
+            cp,
+            desc.family orelse "",
+        });
+
+        return idx;
+    }
+
+    return null;
 }
 
 /// Check if a specific font index has a specific codepoint. This does not

From 7a9a36ecb0f06adfc47001e3fbbe8b8224d19b05 Mon Sep 17 00:00:00 2001
From: Mitchell Hashimoto <mitchell.hashimoto@gmail.com>
Date: Sun, 24 Sep 2023 20:49:53 -0700
Subject: [PATCH 12/12] config: remove unused test, make parse test more
 intense

---
 src/config/Config.zig | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/src/config/Config.zig b/src/config/Config.zig
index ff277511b..736a82747 100644
--- a/src/config/Config.zig
+++ b/src/config/Config.zig
@@ -1678,8 +1678,8 @@ pub const RepeatableCodepointMap = struct {
 
         var list: Self = .{};
         try list.parseCLI(alloc, "U+ABCD=Comic Sans");
-        try list.parseCLI(alloc, "U+0001-U+0005=Verdana");
-        try list.parseCLI(alloc, "U+0006-U+0009,U+ABCD=Courier");
+        try list.parseCLI(alloc, "U+0001 - U+0005=Verdana");
+        try list.parseCLI(alloc, "U+0006-U+0009, U+ABCD=Courier");
 
         try testing.expectEqual(@as(usize, 4), list.map.list.len);
         {
@@ -1703,27 +1703,6 @@ pub const RepeatableCodepointMap = struct {
             try testing.expectEqualStrings("Courier", entry.descriptor.family.?);
         }
     }
-
-    // test "parseCLI with whitespace" {
-    //     const testing = std.testing;
-    //     var arena = ArenaAllocator.init(testing.allocator);
-    //     defer arena.deinit();
-    //     const alloc = arena.allocator();
-    //
-    //     var list: Self = .{};
-    //     try list.parseCLI(alloc, "wght =200");
-    //     try list.parseCLI(alloc, "slnt= -15");
-    //
-    //     try testing.expectEqual(@as(usize, 2), list.list.items.len);
-    //     try testing.expectEqual(fontpkg.face.Variation{
-    //         .id = fontpkg.face.Variation.Id.init("wght"),
-    //         .value = 200,
-    //     }, list.list.items[0]);
-    //     try testing.expectEqual(fontpkg.face.Variation{
-    //         .id = fontpkg.face.Variation.Id.init("slnt"),
-    //         .value = -15,
-    //     }, list.list.items[1]);
-    // }
 };
 
 /// Options for copy on select behavior.