From e385e0f9d0759a036a51fc2a169fbb2ede64f127 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sun, 11 Aug 2024 15:23:05 -0700 Subject: [PATCH] font/shaper: split text runs on common bad ligature pairs Fixes #2081 Many fonts have a bad ligature for "fl", "fi", or "st". We previously maintained a list of such fonts in quirks.zig. However, these are so common that it was suggested we do something more systematic and this commit is that. This commit changes our text run splitting algorithm to always split on `fl`, `fi`, and `st`. This will cause some more runs for well behaved fonts but the combination of those characters is rare enough and our caching algorithm is good enough that it should be minimal overhead. This commit renders our existing quirks fonts obsolete but I kept that logic around so we can add to it if/when we find other quirky font behaviors. --- src/font/shaper/coretext.zig | 21 +++++++++++++++++++++ src/font/shaper/run.zig | 24 ++++++++++++++++++++++++ src/quirks.zig | 29 +++++++++++++---------------- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/src/font/shaper/coretext.zig b/src/font/shaper/coretext.zig index 1305a1b4a..6a1e0e7f2 100644 --- a/src/font/shaper/coretext.zig +++ b/src/font/shaper/coretext.zig @@ -698,6 +698,27 @@ test "run iterator" { while (try it.next(alloc)) |_| count += 1; try testing.expectEqual(@as(usize, 3), count); } + + // Bad ligatures + for (&[_][]const u8{ "fl", "fi", "st" }) |bad| { + // Make a screen with some data + var screen = try terminal.Screen.init(alloc, 5, 3, 0); + defer screen.deinit(); + try screen.testWriteString(bad); + + // Get our run iterator + var shaper = &testdata.shaper; + var it = shaper.runIterator( + testdata.grid, + &screen, + screen.pages.pin(.{ .screen = .{ .y = 0 } }).?, + null, + null, + ); + var count: usize = 0; + while (try it.next(alloc)) |_| count += 1; + try testing.expectEqual(@as(usize, 2), count); + } } test "run iterator: empty cells with background set" { diff --git a/src/font/shaper/run.zig b/src/font/shaper/run.zig index 8d53c601b..8ef0f790c 100644 --- a/src/font/shaper/run.zig +++ b/src/font/shaper/run.zig @@ -104,6 +104,30 @@ pub const RunIterator = struct { if (j > self.i) style: { const prev_cell = cells[j - 1]; + // If the prev cell and this cell are both plain + // codepoints then we check if they are commonly "bad" + // ligatures and spit the run if they are. + if (prev_cell.content_tag == .codepoint and + cell.content_tag == .codepoint) + { + const prev_cp = prev_cell.codepoint(); + switch (prev_cp) { + // fl, fi + 'f' => { + const cp = cell.codepoint(); + if (cp == 'l' or cp == 'i') break; + }, + + // st + 's' => { + const cp = cell.codepoint(); + if (cp == 't') break; + }, + + else => {}, + } + } + // If the style is exactly the change then fast path out. if (prev_cell.style_id == cell.style_id) break :style; diff --git a/src/quirks.zig b/src/quirks.zig index 419294f5d..e3288afb6 100644 --- a/src/quirks.zig +++ b/src/quirks.zig @@ -12,21 +12,18 @@ const font = @import("font/main.zig"); /// If true, the default font features should be disabled for the given face. pub fn disableDefaultFontFeatures(face: *const font.Face) bool { - var buf: [64]u8 = undefined; - const name = face.name(&buf) catch |err| switch (err) { - // If the name doesn't fit in buf we know this will be false - // because we have no quirks fonts that are longer than buf! - error.OutOfMemory => return false, - }; + _ = face; - // CodeNewRoman, Menlo and Monaco both have a default ligature of "fi" that - // looks really bad in terminal grids, so we want to disable ligatures - // by default for these faces. - // - // JuliaMono has a default ligature of "st" that looks bad. - return std.mem.eql(u8, name, "CodeNewRoman") or - std.mem.eql(u8, name, "CodeNewRoman Nerd Font") or - std.mem.eql(u8, name, "JuliaMono") or - std.mem.eql(u8, name, "Menlo") or - std.mem.eql(u8, name, "Monaco"); + // This function used to do something, but we integrated the logic + // we checked for directly into our shaping algorithm. It's likely + // there are other broken fonts for other reasons so I'm keeping this + // around so its easy to add more checks in the future. + return false; + + // var buf: [64]u8 = undefined; + // const name = face.name(&buf) catch |err| switch (err) { + // // If the name doesn't fit in buf we know this will be false + // // because we have no quirks fonts that are longer than buf! + // error.OutOfMemory => return false, + // }; }