mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-15 00:06:09 +03:00
unicode: emoji modifier requires emoji modifier base preceding to not… (#2954)
… break Fixes #2941 This fixes the rendering of the text below. For those that can't see it, it is the following in UTF-32: `0x22 0x1F3FF 0x22`. ``` "🏿" ``` `0x1F3FF` is the Fitzpatrick modifier for dark skin tone. It has the Unicode property `Emoji_Modifier`. Emoji modifiers are defined in UTS #51 and are only valid based on ED-13: ``` emoji_modifier_sequence := emoji_modifier_base emoji_modifier emoji_modifier_base := \p{Emoji_Modifier_Base} emoji_modifier := \p{Emoji_Modifier} ``` Additional quote from UTS #51: > To have an effect on an emoji, an emoji modifier must immediately follow > that base emoji character. Emoji presentation selectors are neither needed > nor recommended for emoji characters when they are followed by emoji > modifiers, and should not be used in newly generated emoji modifier > sequences; the emoji modifier automatically implies the emoji presentation > style. Our precomputed grapheme break table was mistakingly not following this rule. This commit fixes that by adding a check for that every `Emoji_Modifier` character must be preceded by an `Emoji_Modifier_Base`. This only has a cost during compilation (table generation). The runtime cost is identical; the table size didn't increase since we had leftover bits we could use.
This commit is contained in:
@ -342,7 +342,7 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||||||
if (c == 0xFE0F or c == 0xFE0E) {
|
if (c == 0xFE0F or c == 0xFE0E) {
|
||||||
// This only applies to emoji
|
// This only applies to emoji
|
||||||
const prev_props = unicode.getProperties(prev.cell.content.codepoint);
|
const prev_props = unicode.getProperties(prev.cell.content.codepoint);
|
||||||
const emoji = prev_props.grapheme_boundary_class == .extended_pictographic;
|
const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
|
||||||
if (!emoji) return;
|
if (!emoji) return;
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
@ -3193,6 +3193,51 @@ test "Terminal: print multicodepoint grapheme, mode 2027" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "Terminal: Fitzpatrick skin tone next to non-base" {
|
||||||
|
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||||
|
defer t.deinit(testing.allocator);
|
||||||
|
|
||||||
|
// Enable grapheme clustering
|
||||||
|
t.modes.set(.grapheme_cluster, true);
|
||||||
|
|
||||||
|
// This is: "🏿" (which may not render correctly in your editor!)
|
||||||
|
try t.print(0x22); // "
|
||||||
|
try t.print(0x1F3FF); // Dark skin tone
|
||||||
|
try t.print(0x22); // "
|
||||||
|
|
||||||
|
// We should have 4 cells taken up. Importantly, the skin tone
|
||||||
|
// should not join with the quotes.
|
||||||
|
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
|
||||||
|
try testing.expectEqual(@as(usize, 4), t.screen.cursor.x);
|
||||||
|
|
||||||
|
// Row should be dirty
|
||||||
|
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
|
||||||
|
|
||||||
|
// Assert various properties about our screen to verify
|
||||||
|
// we have all expected cells.
|
||||||
|
{
|
||||||
|
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
|
||||||
|
const cell = list_cell.cell;
|
||||||
|
try testing.expectEqual(@as(u21, 0x22), cell.content.codepoint);
|
||||||
|
try testing.expect(!cell.hasGrapheme());
|
||||||
|
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
|
||||||
|
const cell = list_cell.cell;
|
||||||
|
try testing.expectEqual(@as(u21, 0x1F3FF), cell.content.codepoint);
|
||||||
|
try testing.expect(!cell.hasGrapheme());
|
||||||
|
try testing.expectEqual(Cell.Wide.wide, cell.wide);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 3, .y = 0 } }).?;
|
||||||
|
const cell = list_cell.cell;
|
||||||
|
try testing.expectEqual(@as(u21, 0x22), cell.content.codepoint);
|
||||||
|
try testing.expect(!cell.hasGrapheme());
|
||||||
|
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test "Terminal: multicodepoint grapheme marks dirty on every codepoint" {
|
test "Terminal: multicodepoint grapheme marks dirty on every codepoint" {
|
||||||
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||||
defer t.deinit(testing.allocator);
|
defer t.deinit(testing.allocator);
|
||||||
|
@ -51,7 +51,7 @@ const Precompute = struct {
|
|||||||
const data = precompute: {
|
const data = precompute: {
|
||||||
var result: [std.math.maxInt(u10)]Value = undefined;
|
var result: [std.math.maxInt(u10)]Value = undefined;
|
||||||
|
|
||||||
@setEvalBranchQuota(2_000);
|
@setEvalBranchQuota(3_000);
|
||||||
const info = @typeInfo(GraphemeBoundaryClass).Enum;
|
const info = @typeInfo(GraphemeBoundaryClass).Enum;
|
||||||
for (0..std.math.maxInt(u2) + 1) |state_init| {
|
for (0..std.math.maxInt(u2) + 1) |state_init| {
|
||||||
for (info.fields) |field1| {
|
for (info.fields) |field1| {
|
||||||
@ -80,7 +80,7 @@ fn graphemeBreakClass(
|
|||||||
state: *BreakState,
|
state: *BreakState,
|
||||||
) bool {
|
) bool {
|
||||||
// GB11: Emoji Extend* ZWJ x Emoji
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
if (!state.extended_pictographic and gbc1 == .extended_pictographic) {
|
if (!state.extended_pictographic and gbc1.isExtendedPictographic()) {
|
||||||
state.extended_pictographic = true;
|
state.extended_pictographic = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -131,12 +131,21 @@ fn graphemeBreakClass(
|
|||||||
// GB11: Emoji Extend* ZWJ x Emoji
|
// GB11: Emoji Extend* ZWJ x Emoji
|
||||||
if (state.extended_pictographic and
|
if (state.extended_pictographic and
|
||||||
gbc1 == .zwj and
|
gbc1 == .zwj and
|
||||||
gbc2 == .extended_pictographic)
|
gbc2.isExtendedPictographic())
|
||||||
{
|
{
|
||||||
state.extended_pictographic = false;
|
state.extended_pictographic = false;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UTS #51. This isn't covered by UAX #29 as far as I can tell (but
|
||||||
|
// I'm probably wrong). This is a special case for emoji modifiers
|
||||||
|
// which only do not break if they're next to a base.
|
||||||
|
//
|
||||||
|
// emoji_modifier_sequence := emoji_modifier_base emoji_modifier
|
||||||
|
if (gbc2 == .emoji_modifier and gbc1 == .extended_pictographic_base) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,3 +190,19 @@ pub fn main() !void {
|
|||||||
pub const std_options = struct {
|
pub const std_options = struct {
|
||||||
pub const log_level: std.log.Level = .info;
|
pub const log_level: std.log.Level = .info;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
test "grapheme break: emoji modifier" {
|
||||||
|
const testing = std.testing;
|
||||||
|
|
||||||
|
// Emoji and modifier
|
||||||
|
{
|
||||||
|
var state: BreakState = .{};
|
||||||
|
try testing.expect(!graphemeBreak(0x261D, 0x1F3FF, &state));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-emoji and emoji modifier
|
||||||
|
{
|
||||||
|
var state: BreakState = .{};
|
||||||
|
try testing.expect(graphemeBreak(0x22, 0x1F3FF, &state));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
const props = @This();
|
const props = @This();
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
const assert = std.debug.assert;
|
||||||
const ziglyph = @import("ziglyph");
|
const ziglyph = @import("ziglyph");
|
||||||
const lut = @import("lut.zig");
|
const lut = @import("lut.zig");
|
||||||
|
|
||||||
@ -73,12 +74,21 @@ pub const GraphemeBoundaryClass = enum(u4) {
|
|||||||
spacing_mark,
|
spacing_mark,
|
||||||
regional_indicator,
|
regional_indicator,
|
||||||
extended_pictographic,
|
extended_pictographic,
|
||||||
|
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
||||||
|
emoji_modifier, // \p{Emoji_Modifier}
|
||||||
|
|
||||||
/// Gets the grapheme boundary class for a codepoint. This is VERY
|
/// Gets the grapheme boundary class for a codepoint. This is VERY
|
||||||
/// SLOW. The use case for this is only in generating lookup tables.
|
/// SLOW. The use case for this is only in generating lookup tables.
|
||||||
pub fn init(cp: u21) GraphemeBoundaryClass {
|
pub fn init(cp: u21) GraphemeBoundaryClass {
|
||||||
|
// We special-case modifier bases because we should not break
|
||||||
|
// if a modifier isn't next to a base.
|
||||||
|
if (ziglyph.emoji.isEmojiModifierBase(cp)) {
|
||||||
|
assert(ziglyph.emoji.isExtendedPictographic(cp));
|
||||||
|
return .extended_pictographic_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
|
||||||
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
|
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
|
||||||
if (ziglyph.emoji.isEmojiModifier(cp)) return .extend;
|
|
||||||
if (ziglyph.grapheme_break.isL(cp)) return .L;
|
if (ziglyph.grapheme_break.isL(cp)) return .L;
|
||||||
if (ziglyph.grapheme_break.isV(cp)) return .V;
|
if (ziglyph.grapheme_break.isV(cp)) return .V;
|
||||||
if (ziglyph.grapheme_break.isT(cp)) return .T;
|
if (ziglyph.grapheme_break.isT(cp)) return .T;
|
||||||
@ -95,6 +105,19 @@ pub const GraphemeBoundaryClass = enum(u4) {
|
|||||||
// anything that doesn't fit into the above categories.
|
// anything that doesn't fit into the above categories.
|
||||||
return .invalid;
|
return .invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if this is an extended pictographic type. This
|
||||||
|
/// should be used instead of comparing the enum value directly
|
||||||
|
/// because we classify multiple.
|
||||||
|
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
|
||||||
|
return switch (self) {
|
||||||
|
.extended_pictographic,
|
||||||
|
.extended_pictographic_base,
|
||||||
|
=> true,
|
||||||
|
|
||||||
|
else => false,
|
||||||
|
};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn get(cp: u21) Properties {
|
pub fn get(cp: u21) Properties {
|
||||||
|
Reference in New Issue
Block a user