From e6edf3105e43ccf41f345eee263f49f7faca18f3 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 26 Aug 2023 09:20:26 -0700 Subject: [PATCH] font: grapheme clusters need to find a single font for all codepoints When font shaping grapheme clusters, we erroneously used the font index of a font that only matches the first codepoint in the cell. This led to the combining characters being [usually] unknown and rendering as boxes. For a grapheme, we must find a font face that has a glyph for _all codepoints_ in the grapheme. This also fixes an issue where we now properly render the unicode replacement character if we can't find a font satisfying a codepoint. --- src/font/Group.zig | 14 ++++ src/font/shaper/harfbuzz.zig | 3 +- src/font/shaper/run.zig | 141 +++++++++++++++++++++++++++++------ src/terminal/Screen.zig | 4 + 4 files changed, 138 insertions(+), 24 deletions(-) diff --git a/src/font/Group.zig b/src/font/Group.zig index e14feb33a..ec1e18fbf 100644 --- a/src/font/Group.zig +++ b/src/font/Group.zig @@ -299,6 +299,20 @@ fn indexForCodepointExact(self: Group, cp: u32, style: Style, p: ?Presentation) return null; } +/// Check if a specific font index has a specific codepoint. This does not +/// necessarily force the font to load. +pub fn hasCodepoint(self: *Group, index: FontIndex, cp: u32, p: ?Presentation) bool { + const list = self.faces.getPtr(index.style); + const item = list.items[@intCast(index.idx)]; + return switch (item) { + .deferred => |v| v.hasCodepoint(cp, p), + .loaded => |face| loaded: { + if (p) |desired| if (face.presentation != desired) break :loaded false; + break :loaded face.glyphIndex(cp) != null; + }, + }; +} + /// Returns the presentation for a specific font index. This is useful for /// determining what atlas is needed. pub fn presentationFromIndex(self: *Group, index: FontIndex) !font.Presentation { diff --git a/src/font/shaper/harfbuzz.zig b/src/font/shaper/harfbuzz.zig index 688aed67c..e7afb3d06 100644 --- a/src/font/shaper/harfbuzz.zig +++ b/src/font/shaper/harfbuzz.zig @@ -137,7 +137,7 @@ pub const Shaper = struct { .glyph_index = v.codepoint, }; - //log.warn("i={} info={} pos={} cell={}", .{ i, v, pos[i], self.cell_buf[i] }); + // log.warn("i={} info={} pos={} cell={}", .{ i, v, pos[i], self.cell_buf[i] }); } return self.cell_buf[0..info.len]; @@ -154,6 +154,7 @@ pub const Shaper = struct { } pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void { + // log.warn("cluster={} cp={x}", .{ cluster, cp }); self.shaper.hb_buf.add(cp, cluster); } diff --git a/src/font/shaper/run.zig b/src/font/shaper/run.zig index c180cf12f..5cdc8fda2 100644 --- a/src/font/shaper/run.zig +++ b/src/font/shaper/run.zig @@ -135,33 +135,61 @@ pub const RunIterator = struct { } } - // Determine the font for this cell. We'll use fallbacks - // manually here to try replacement chars and then a space - // for unknown glyphs. - const font_idx_opt = (try self.group.indexForCodepoint( - alloc, - if (cell.empty() or cell.char == 0) ' ' else cell.char, - style, - presentation, - )) orelse (try self.group.indexForCodepoint( - alloc, - 0xFFFD, - style, - .text, - )) orelse - try self.group.indexForCodepoint(alloc, ' ', style, .text); - const font_idx = font_idx_opt.?; - //log.warn("char={x} idx={}", .{ cell.char, font_idx }); - if (j == self.i) current_font = font_idx; + // We need to find a font that supports this character. If + // there are additional zero-width codepoints (to form a single + // grapheme, i.e. combining characters), we need to find a font + // that supports all of them. + const font_info: struct { + idx: font.Group.FontIndex, + fallback: ?u32 = null, + } = font_info: { + // If we find a font that supports this entire grapheme + // then we use that. + if (try self.indexForCell( + alloc, + j, + cell, + style, + presentation, + )) |idx| break :font_info .{ .idx = idx }; + + // Otherwise we need a fallback character. Prefer the + // official replacement character. + if (try self.group.indexForCodepoint( + alloc, + 0xFFFD, // replacement char + style, + presentation, + )) |idx| break :font_info .{ .idx = idx, .fallback = 0xFFFD }; + + // Fallback to space + if (try self.group.indexForCodepoint( + alloc, + ' ', + style, + presentation, + )) |idx| break :font_info .{ .idx = idx, .fallback = ' ' }; + + // We can't render at all. This is a bug, we should always + // have a font that can render a space. + unreachable; + }; + + //log.warn("char={x} info={}", .{ cell.char, font_info }); + if (j == self.i) current_font = font_info.idx; // If our fonts are not equal, then we're done with our run. - if (font_idx.int() != current_font.int()) break; + if (font_info.idx.int() != current_font.int()) break; - // Continue with our run + // If we're a fallback character, add that and continue; we + // don't want to add the entire grapheme. + if (font_info.fallback) |cp| { + try self.hooks.addCodepoint(cp, @intCast(cluster)); + continue; + } + + // Add all the codepoints for our grapheme try self.hooks.addCodepoint(cell.char, @intCast(cluster)); - - // If this cell is part of a grapheme cluster, add all the grapheme - // data points. if (cell.attrs.grapheme) { var it = self.row.codepointIterator(j); while (it.next()) |cp| { @@ -184,4 +212,71 @@ pub const RunIterator = struct { .font_index = current_font, }; } + + /// Find a font index that supports the grapheme for the given cell, + /// or null if no such font exists. + /// + /// This is used to find a font that supports the entire grapheme. + /// We look for fonts that support each individual codepoint and then + /// find the common font amongst all candidates. + fn indexForCell( + self: *RunIterator, + alloc: Allocator, + j: usize, + cell: terminal.Screen.Cell, + style: font.Style, + presentation: ?font.Presentation, + ) !?font.Group.FontIndex { + // Get the font index for the primary codepoint. + const primary_cp: u32 = if (cell.empty() or cell.char == 0) ' ' else cell.char; + const primary = try self.group.indexForCodepoint( + alloc, + primary_cp, + style, + presentation, + ) orelse return null; + + // Easy, and common: we aren't a multi-codepoint grapheme, so + // we just return whatever index for the cell codepoint. + if (!cell.attrs.grapheme) return primary; + + // If this is a grapheme, we need to find a font that supports + // all of the codepoints in the grapheme. + var it = self.row.codepointIterator(j); + var candidates = try std.ArrayList(font.Group.FontIndex).initCapacity(alloc, it.len() + 1); + defer candidates.deinit(); + candidates.appendAssumeCapacity(primary); + + while (it.next()) |cp| { + // Ignore Emoji ZWJs + if (cp == 0xFE0E or cp == 0xFE0F) continue; + + // Find a font that supports this codepoint. If none support this + // then the whole grapheme can't be rendered so we return null. + const idx = try self.group.indexForCodepoint( + alloc, + cp, + style, + presentation, + ) orelse return null; + candidates.appendAssumeCapacity(idx); + } + + // We need to find a candidate that has ALL of our codepoints + for (candidates.items) |idx| { + if (!self.group.group.hasCodepoint(idx, primary_cp, presentation)) continue; + it.reset(); + while (it.next()) |cp| { + // Ignore Emoji ZWJs + if (cp == 0xFE0E or cp == 0xFE0F) continue; + if (!self.group.group.hasCodepoint(idx, cp, presentation)) break; + } else { + // If the while completed, then we have a candidate that + // supports all of our codepoints. + return idx; + } + } + + return null; + } }; diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index 0ae944a96..45c432308 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -623,6 +623,10 @@ pub const CodepointIterator = struct { }, } } + + pub fn reset(self: *CodepointIterator) void { + self.i = 0; + } }; /// RowIndex represents a row within the screen. There are various meanings