font: grapheme clusters need to find a single font for all codepoints

When font shaping grapheme clusters, we erroneously used the font index
of a font that only matches the first codepoint in the cell. This led to the
combining characters being [usually] unknown and rendering as boxes.

For a grapheme, we must find a font face that has a glyph for _all codepoints_
in the grapheme.

This also fixes an issue where we now properly render the unicode replacement
character if we can't find a font satisfying a codepoint.
This commit is contained in:
Mitchell Hashimoto
2023-08-26 09:20:26 -07:00
parent 39c2332d35
commit e6edf3105e
4 changed files with 138 additions and 24 deletions

View File

@ -299,6 +299,20 @@ fn indexForCodepointExact(self: Group, cp: u32, style: Style, p: ?Presentation)
return null; return null;
} }
/// Check if a specific font index has a specific codepoint. This does not
/// necessarily force the font to load.
pub fn hasCodepoint(self: *Group, index: FontIndex, cp: u32, p: ?Presentation) bool {
const list = self.faces.getPtr(index.style);
const item = list.items[@intCast(index.idx)];
return switch (item) {
.deferred => |v| v.hasCodepoint(cp, p),
.loaded => |face| loaded: {
if (p) |desired| if (face.presentation != desired) break :loaded false;
break :loaded face.glyphIndex(cp) != null;
},
};
}
/// Returns the presentation for a specific font index. This is useful for /// Returns the presentation for a specific font index. This is useful for
/// determining what atlas is needed. /// determining what atlas is needed.
pub fn presentationFromIndex(self: *Group, index: FontIndex) !font.Presentation { pub fn presentationFromIndex(self: *Group, index: FontIndex) !font.Presentation {

View File

@ -137,7 +137,7 @@ pub const Shaper = struct {
.glyph_index = v.codepoint, .glyph_index = v.codepoint,
}; };
//log.warn("i={} info={} pos={} cell={}", .{ i, v, pos[i], self.cell_buf[i] }); // log.warn("i={} info={} pos={} cell={}", .{ i, v, pos[i], self.cell_buf[i] });
} }
return self.cell_buf[0..info.len]; return self.cell_buf[0..info.len];
@ -154,6 +154,7 @@ pub const Shaper = struct {
} }
pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void { pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void {
// log.warn("cluster={} cp={x}", .{ cluster, cp });
self.shaper.hb_buf.add(cp, cluster); self.shaper.hb_buf.add(cp, cluster);
} }

View File

@ -135,33 +135,61 @@ pub const RunIterator = struct {
} }
} }
// Determine the font for this cell. We'll use fallbacks // We need to find a font that supports this character. If
// manually here to try replacement chars and then a space // there are additional zero-width codepoints (to form a single
// for unknown glyphs. // grapheme, i.e. combining characters), we need to find a font
const font_idx_opt = (try self.group.indexForCodepoint( // that supports all of them.
alloc, const font_info: struct {
if (cell.empty() or cell.char == 0) ' ' else cell.char, idx: font.Group.FontIndex,
style, fallback: ?u32 = null,
presentation, } = font_info: {
)) orelse (try self.group.indexForCodepoint( // If we find a font that supports this entire grapheme
alloc, // then we use that.
0xFFFD, if (try self.indexForCell(
style, alloc,
.text, j,
)) orelse cell,
try self.group.indexForCodepoint(alloc, ' ', style, .text); style,
const font_idx = font_idx_opt.?; presentation,
//log.warn("char={x} idx={}", .{ cell.char, font_idx }); )) |idx| break :font_info .{ .idx = idx };
if (j == self.i) current_font = font_idx;
// Otherwise we need a fallback character. Prefer the
// official replacement character.
if (try self.group.indexForCodepoint(
alloc,
0xFFFD, // replacement char
style,
presentation,
)) |idx| break :font_info .{ .idx = idx, .fallback = 0xFFFD };
// Fallback to space
if (try self.group.indexForCodepoint(
alloc,
' ',
style,
presentation,
)) |idx| break :font_info .{ .idx = idx, .fallback = ' ' };
// We can't render at all. This is a bug, we should always
// have a font that can render a space.
unreachable;
};
//log.warn("char={x} info={}", .{ cell.char, font_info });
if (j == self.i) current_font = font_info.idx;
// If our fonts are not equal, then we're done with our run. // If our fonts are not equal, then we're done with our run.
if (font_idx.int() != current_font.int()) break; if (font_info.idx.int() != current_font.int()) break;
// Continue with our run // If we're a fallback character, add that and continue; we
// don't want to add the entire grapheme.
if (font_info.fallback) |cp| {
try self.hooks.addCodepoint(cp, @intCast(cluster));
continue;
}
// Add all the codepoints for our grapheme
try self.hooks.addCodepoint(cell.char, @intCast(cluster)); try self.hooks.addCodepoint(cell.char, @intCast(cluster));
// If this cell is part of a grapheme cluster, add all the grapheme
// data points.
if (cell.attrs.grapheme) { if (cell.attrs.grapheme) {
var it = self.row.codepointIterator(j); var it = self.row.codepointIterator(j);
while (it.next()) |cp| { while (it.next()) |cp| {
@ -184,4 +212,71 @@ pub const RunIterator = struct {
.font_index = current_font, .font_index = current_font,
}; };
} }
/// Find a font index that supports the grapheme for the given cell,
/// or null if no such font exists.
///
/// This is used to find a font that supports the entire grapheme.
/// We look for fonts that support each individual codepoint and then
/// find the common font amongst all candidates.
fn indexForCell(
self: *RunIterator,
alloc: Allocator,
j: usize,
cell: terminal.Screen.Cell,
style: font.Style,
presentation: ?font.Presentation,
) !?font.Group.FontIndex {
// Get the font index for the primary codepoint.
const primary_cp: u32 = if (cell.empty() or cell.char == 0) ' ' else cell.char;
const primary = try self.group.indexForCodepoint(
alloc,
primary_cp,
style,
presentation,
) orelse return null;
// Easy, and common: we aren't a multi-codepoint grapheme, so
// we just return whatever index for the cell codepoint.
if (!cell.attrs.grapheme) return primary;
// If this is a grapheme, we need to find a font that supports
// all of the codepoints in the grapheme.
var it = self.row.codepointIterator(j);
var candidates = try std.ArrayList(font.Group.FontIndex).initCapacity(alloc, it.len() + 1);
defer candidates.deinit();
candidates.appendAssumeCapacity(primary);
while (it.next()) |cp| {
// Ignore Emoji ZWJs
if (cp == 0xFE0E or cp == 0xFE0F) continue;
// Find a font that supports this codepoint. If none support this
// then the whole grapheme can't be rendered so we return null.
const idx = try self.group.indexForCodepoint(
alloc,
cp,
style,
presentation,
) orelse return null;
candidates.appendAssumeCapacity(idx);
}
// We need to find a candidate that has ALL of our codepoints
for (candidates.items) |idx| {
if (!self.group.group.hasCodepoint(idx, primary_cp, presentation)) continue;
it.reset();
while (it.next()) |cp| {
// Ignore Emoji ZWJs
if (cp == 0xFE0E or cp == 0xFE0F) continue;
if (!self.group.group.hasCodepoint(idx, cp, presentation)) break;
} else {
// If the while completed, then we have a candidate that
// supports all of our codepoints.
return idx;
}
}
return null;
}
}; };

View File

@ -623,6 +623,10 @@ pub const CodepointIterator = struct {
}, },
} }
} }
pub fn reset(self: *CodepointIterator) void {
self.i = 0;
}
}; };
/// RowIndex represents a row within the screen. There are various meanings /// RowIndex represents a row within the screen. There are various meanings