Merge pull request #531 from mitchellh/codepoint-map

Specify font for specific codepoint ranges
This commit is contained in:
Mitchell Hashimoto
2023-09-24 21:06:51 -07:00
committed by GitHub
6 changed files with 440 additions and 8 deletions

View File

@ -224,6 +224,11 @@ pub fn init(
var group = try font.Group.init(alloc, font_lib, font_size);
errdefer group.deinit();
// If we have codepoint mappings, set those.
if (config.@"font-codepoint-map".map.list.len > 0) {
group.codepoint_map = config.@"font-codepoint-map".map;
}
// Search for fonts
if (font.Discover != void) discover: {
const disco = try app.fontDiscover() orelse {

View File

@ -91,6 +91,24 @@ const c = @cImport({
@"font-variation-italic": RepeatableFontVariation = .{},
@"font-variation-bold-italic": RepeatableFontVariation = .{},
/// Force one or a range of Unicode codepoints to map to a specific named
/// font. This is useful if you want to support special symbols or if you
/// want to use specific glyphs that render better for your specific font.
///
/// The syntax is "codepoint=fontname" where "codepoint" is either a
/// single codepoint or a range. Codepoints must be specified as full
/// Unicode hex values, such as "U+ABCD". Codepoints ranges are specified
/// as "U+ABCD-U+DEFG". You can specify multiple ranges for the same font
/// separated by commas, such as "U+ABCD-U+DEFG,U+1234-U+5678=fontname".
/// The font name is the same value as you would use for "font-family".
///
/// This configuration can be repeated multiple times to specify multiple
/// codepoint mappings.
///
/// Changing this configuration at runtime will only affect new terminals,
/// i.e. new windows, tabs, etc.
@"font-codepoint-map": RepeatableCodepointMap = .{},
/// Draw fonts with a thicker stroke, if supported. This is only supported
/// currently on macOS.
@"font-thicken": bool = false,
@ -1507,6 +1525,186 @@ pub const Keybinds = struct {
}
};
/// See "font-codepoint-map" for documentation.
pub const RepeatableCodepointMap = struct {
const Self = @This();
map: fontpkg.CodepointMap = .{},
pub fn parseCLI(self: *Self, alloc: Allocator, input_: ?[]const u8) !void {
const input = input_ orelse return error.ValueRequired;
const eql_idx = std.mem.indexOf(u8, input, "=") orelse return error.InvalidValue;
const whitespace = " \t";
const key = std.mem.trim(u8, input[0..eql_idx], whitespace);
const value = std.mem.trim(u8, input[eql_idx + 1 ..], whitespace);
const valueZ = try alloc.dupeZ(u8, value);
var p: UnicodeRangeParser = .{ .input = key };
while (try p.next()) |range| {
try self.map.add(alloc, .{
.range = range,
.descriptor = .{
.family = valueZ,
.monospace = false, // we allow any font
},
});
}
}
/// Deep copy of the struct. Required by Config.
pub fn clone(self: *const Self, alloc: Allocator) !Self {
return .{
.map = .{ .list = try self.map.list.clone(alloc) },
};
}
/// Compare if two of our value are requal. Required by Config.
pub fn equal(self: Self, other: Self) bool {
const itemsA = self.map.list.slice();
const itemsB = other.map.list.slice();
if (itemsA.len != itemsB.len) return false;
for (0..itemsA.len) |i| {
const a = itemsA.get(i);
const b = itemsB.get(i);
if (!std.meta.eql(a, b)) return false;
} else return true;
}
/// Parses the list of Unicode codepoint ranges. Valid syntax:
///
/// "" (empty returns null)
/// U+1234
/// U+1234-5678
/// U+1234,U+5678
/// U+1234-5678,U+5678
/// U+1234,U+5678-U+9ABC
///
/// etc.
const UnicodeRangeParser = struct {
input: []const u8,
i: usize = 0,
pub fn next(self: *UnicodeRangeParser) !?[2]u21 {
// Once we're EOF then we're done without an error.
if (self.eof()) return null;
// One codepoint no matter what
const start = try self.parseCodepoint();
if (self.eof()) return .{ start, start };
// We're allowed to have any whitespace here
self.consumeWhitespace();
// Otherwise we expect either a range or a comma
switch (self.input[self.i]) {
// Comma means we have another codepoint but in a different
// range so we return our current codepoint.
',' => {
self.advance();
self.consumeWhitespace();
if (self.eof()) return error.InvalidValue;
return .{ start, start };
},
// Hyphen means we have a range.
'-' => {
self.advance();
self.consumeWhitespace();
if (self.eof()) return error.InvalidValue;
const end = try self.parseCodepoint();
self.consumeWhitespace();
if (!self.eof() and self.input[self.i] != ',') return error.InvalidValue;
self.advance();
self.consumeWhitespace();
if (start > end) return error.InvalidValue;
return .{ start, end };
},
else => return error.InvalidValue,
}
}
fn consumeWhitespace(self: *UnicodeRangeParser) void {
while (!self.eof()) {
switch (self.input[self.i]) {
' ', '\t' => self.advance(),
else => return,
}
}
}
fn parseCodepoint(self: *UnicodeRangeParser) !u21 {
if (self.input[self.i] != 'U') return error.InvalidValue;
self.advance();
if (self.eof()) return error.InvalidValue;
if (self.input[self.i] != '+') return error.InvalidValue;
self.advance();
if (self.eof()) return error.InvalidValue;
const start_i = self.i;
while (true) {
const current = self.input[self.i];
const is_hex = (current >= '0' and current <= '9') or
(current >= 'A' and current <= 'F') or
(current >= 'a' and current <= 'f');
if (!is_hex) break;
// Advance but break on EOF
self.advance();
if (self.eof()) break;
}
// If we didn't consume a single character, we have an error.
if (start_i == self.i) return error.InvalidValue;
return std.fmt.parseInt(u21, self.input[start_i..self.i], 16) catch
return error.InvalidValue;
}
fn advance(self: *UnicodeRangeParser) void {
self.i += 1;
}
fn eof(self: *const UnicodeRangeParser) bool {
return self.i >= self.input.len;
}
};
test "parseCLI" {
const testing = std.testing;
var arena = ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
var list: Self = .{};
try list.parseCLI(alloc, "U+ABCD=Comic Sans");
try list.parseCLI(alloc, "U+0001 - U+0005=Verdana");
try list.parseCLI(alloc, "U+0006-U+0009, U+ABCD=Courier");
try testing.expectEqual(@as(usize, 4), list.map.list.len);
{
const entry = list.map.list.get(0);
try testing.expectEqual([2]u21{ 0xABCD, 0xABCD }, entry.range);
try testing.expectEqualStrings("Comic Sans", entry.descriptor.family.?);
}
{
const entry = list.map.list.get(1);
try testing.expectEqual([2]u21{ 1, 5 }, entry.range);
try testing.expectEqualStrings("Verdana", entry.descriptor.family.?);
}
{
const entry = list.map.list.get(2);
try testing.expectEqual([2]u21{ 6, 9 }, entry.range);
try testing.expectEqualStrings("Courier", entry.descriptor.family.?);
}
{
const entry = list.map.list.get(3);
try testing.expectEqual([2]u21{ 0xABCD, 0xABCD }, entry.range);
try testing.expectEqualStrings("Courier", entry.descriptor.family.?);
}
}
};
/// Options for copy on select behavior.
pub const CopyOnSelect = enum {
/// Disables copy on select entirely.

81
src/font/CodepointMap.zig Normal file
View File

@ -0,0 +1,81 @@
/// CodepointMap is a map of codepoints to a discovery descriptor of a font
/// to use for that codepoint. If the descriptor doesn't return any matching
/// font, the codepoint is rendered using the default font.
const CodepointMap = @This();
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const discovery = @import("discovery.zig");
pub const Entry = struct {
/// Unicode codepoint range. Asserts range[0] <= range[1].
range: [2]u21,
/// The discovery descriptor of the font to use for this range.
descriptor: discovery.Descriptor,
};
/// The list of entries. We use a multiarraylist because Descriptors are
/// quite large and we will very rarely match, so we'd rather pack our
/// ranges together to make everything more cache friendly for lookups.
///
/// Note: we just do a linear search because we expect to always have very
/// few entries, so the overhead of a binary search is not worth it. This is
/// possible to defeat with some pathological inputs, but there is no realistic
/// scenario where this will be a problem except people trying to fuck around.
list: std.MultiArrayList(Entry) = .{},
pub fn deinit(self: *CodepointMap, alloc: Allocator) void {
self.list.deinit(alloc);
}
/// Add an entry to the map.
///
/// For conflicting codepoints, entries added later take priority over
/// entries added earlier.
pub fn add(self: *CodepointMap, alloc: Allocator, entry: Entry) !void {
assert(entry.range[0] <= entry.range[1]);
try self.list.append(alloc, entry);
}
/// Get a descriptor for a codepoint.
pub fn get(self: *const CodepointMap, cp: u21) ?discovery.Descriptor {
const items = self.list.items(.range);
for (items, 0..) |range, forward_i| {
const i = items.len - forward_i - 1;
if (range[0] <= cp and cp <= range[1]) {
const descs = self.list.items(.descriptor);
return descs[i];
}
}
return null;
}
test "codepointmap" {
const testing = std.testing;
const alloc = testing.allocator;
var m: CodepointMap = .{};
defer m.deinit(alloc);
// Exact range
try testing.expect(m.get(1) == null);
try m.add(alloc, .{ .range = .{ 1, 1 }, .descriptor = .{ .family = "A" } });
{
const d = m.get(1).?;
try testing.expectEqualStrings("A", d.family.?);
}
// Later entry takes priority
try m.add(alloc, .{ .range = .{ 1, 2 }, .descriptor = .{ .family = "B" } });
{
const d = m.get(1).?;
try testing.expectEqualStrings("B", d.family.?);
}
// Non-matching
try testing.expect(m.get(0) == null);
try testing.expect(m.get(3) == null);
}

View File

@ -32,6 +32,33 @@ const log = std.log.scoped(.font_group);
// most important memory efficiency we can look for. This is totally opaque
// to the user so we can change this later.
const StyleArray = std.EnumArray(Style, std.ArrayListUnmanaged(GroupFace));
/// Map of descriptors to faces. This is used with manual codepoint maps
/// to ensure that we don't load the same font multiple times.
///
/// Note that the current implementation will load the same font multiple
/// times if the font used for a codepoint map is identical to a font used
/// for a regular style. That's just an inefficient choice made now because
/// the implementation is simpler and codepoint maps matching a regular
/// font is a rare case.
const DescriptorCache = std.HashMapUnmanaged(
font.discovery.Descriptor,
?FontIndex,
struct {
const KeyType = font.discovery.Descriptor;
pub fn hash(ctx: @This(), k: KeyType) u64 {
_ = ctx;
return k.hash();
}
pub fn eql(ctx: @This(), a: KeyType, b: KeyType) bool {
return ctx.hash(a) == ctx.hash(b);
}
},
std.hash_map.default_max_load_percentage,
);
/// The allocator for this group
alloc: Allocator,
@ -49,6 +76,15 @@ faces: StyleArray,
/// the codepoint. This can be set after initialization.
discover: ?*font.Discover = null,
/// A map of codepoints to font requests for codepoint-level overrides.
/// The memory associated with the map is owned by the caller and is not
/// modified or freed by Group.
codepoint_map: ?font.CodepointMap = null,
/// The descriptor cache is used to cache the descriptor to font face
/// mapping for codepoint maps.
descriptor_cache: DescriptorCache = .{},
/// Set this to a non-null value to enable sprite glyph drawing. If this
/// isn't enabled we'll just fall through to trying to use regular fonts
/// to render sprite glyphs. But more than likely, if this isn't set then
@ -86,11 +122,15 @@ pub fn init(
}
pub fn deinit(self: *Group) void {
var it = self.faces.iterator();
while (it.next()) |entry| {
for (entry.value.items) |*item| item.deinit();
entry.value.deinit(self.alloc);
{
var it = self.faces.iterator();
while (it.next()) |entry| {
for (entry.value.items) |*item| item.deinit();
entry.value.deinit(self.alloc);
}
}
self.descriptor_cache.deinit(self.alloc);
}
/// Add a face to the list for the given style. This face will be added as
@ -172,9 +212,12 @@ pub fn setSize(self: *Group, size: font.face.DesiredSize) !void {
}
/// This represents a specific font in the group.
pub const FontIndex = packed struct(u8) {
pub const FontIndex = packed struct(FontIndex.Backing) {
const Backing = u16;
const backing_bits = @typeInfo(Backing).Int.bits;
/// The number of bits we use for the index.
const idx_bits = 8 - @typeInfo(@typeInfo(Style).Enum.tag_type).Int.bits;
const idx_bits = backing_bits - @typeInfo(@typeInfo(Style).Enum.tag_type).Int.bits;
pub const IndexInt = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = idx_bits } });
/// The special-case fonts that we support.
@ -195,7 +238,7 @@ pub const FontIndex = packed struct(u8) {
}
/// Convert to int
pub fn int(self: FontIndex) u8 {
pub fn int(self: FontIndex) Backing {
return @bitCast(self);
}
@ -211,7 +254,11 @@ pub const FontIndex = packed struct(u8) {
// We never want to take up more than a byte since font indexes are
// everywhere so if we increase the size of this we'll dramatically
// increase our memory usage.
try std.testing.expectEqual(@sizeOf(u8), @sizeOf(FontIndex));
try std.testing.expectEqual(@sizeOf(Backing), @sizeOf(FontIndex));
// Just so we're aware when this changes. The current maximum number
// of fonts for a style is 13 bits or 8192 fonts.
try std.testing.expectEqual(13, idx_bits);
}
};
@ -231,6 +278,13 @@ pub fn indexForCodepoint(
style: Style,
p: ?Presentation,
) ?FontIndex {
// Codepoint overrides.
if (self.indexForCodepointOverride(cp)) |idx_| {
if (idx_) |idx| return idx;
} else |err| {
log.warn("codepoint override failed codepoint={} err={}", .{ cp, err });
}
// If we have sprite drawing enabled, check if our sprite face can
// handle this.
if (self.sprite) |sprite| {
@ -306,6 +360,60 @@ fn indexForCodepointExact(self: Group, cp: u32, style: Style, p: ?Presentation)
return null;
}
/// Checks if the codepoint is in the map of codepoint overrides,
/// finds the override font, and returns it.
fn indexForCodepointOverride(self: *Group, cp: u32) !?FontIndex {
if (comptime font.Discover == void) return null;
const map = self.codepoint_map orelse return null;
// If we have a codepoint too large or isn't in the map, then we
// don't have an override.
const cp_u21 = std.math.cast(u21, cp) orelse return null;
const desc = map.get(cp_u21) orelse return null;
// Fast path: the descriptor is already loaded.
const idx_: ?FontIndex = self.descriptor_cache.get(desc) orelse idx: {
// Slow path: we have to find this descriptor and load the font
const discover = self.discover orelse return null;
var disco_it = try discover.discover(desc);
defer disco_it.deinit();
const face = (try disco_it.next()) orelse {
log.warn(
"font lookup for codepoint map failed codepoint={} err=FontNotFound",
.{cp},
);
// Add null to the cache so we don't do a lookup again later.
try self.descriptor_cache.put(self.alloc, desc, null);
return null;
};
// Add the font to our list of fonts so we can get an index for it,
// and ensure the index is stored in the descriptor cache for next time.
const idx = try self.addFace(.regular, .{ .deferred = face });
try self.descriptor_cache.put(self.alloc, desc, idx);
break :idx idx;
};
// The descriptor cache will populate null if the descriptor is not found
// to avoid expensive discoveries later.
const idx = idx_ orelse return null;
// We need to verify that this index has the codepoint we want.
if (self.hasCodepoint(idx, cp, null)) {
log.debug("codepoint override based on config codepoint={} family={s}", .{
cp,
desc.family orelse "",
});
return idx;
}
return null;
}
/// Check if a specific font index has a specific codepoint. This does not
/// necessarily force the font to load.
pub fn hasCodepoint(self: *Group, index: FontIndex, cp: u32, p: ?Presentation) bool {

View File

@ -56,6 +56,30 @@ pub const Descriptor = struct {
/// will be preferred, but not guaranteed.
variations: []const Variation = &.{},
/// Returns a hash code that can be used to uniquely identify this
/// action.
pub fn hash(self: Descriptor) u64 {
const autoHash = std.hash.autoHash;
var hasher = std.hash.Wyhash.init(0);
autoHash(&hasher, self.family);
autoHash(&hasher, self.style);
autoHash(&hasher, self.codepoint);
autoHash(&hasher, self.size);
autoHash(&hasher, self.bold);
autoHash(&hasher, self.italic);
autoHash(&hasher, self.monospace);
autoHash(&hasher, self.variations.len);
for (self.variations) |variation| {
autoHash(&hasher, variation.id);
// This is not correct, but we don't currently depend on the
// hash value being different based on decimal values of variations.
autoHash(&hasher, @as(u64, @intFromFloat(variation.value)));
}
return hasher.final();
}
/// Convert to Fontconfig pattern to use for lookup. The pattern does
/// not have defaults filled/substituted (Fontconfig thing) so callers
/// must still do this.
@ -350,6 +374,21 @@ pub const CoreText = struct {
};
};
test "descriptor hash" {
const testing = std.testing;
var d: Descriptor = .{};
try testing.expect(d.hash() != 0);
}
test "descriptor hash familiy names" {
const testing = std.testing;
var d1: Descriptor = .{ .family = "A" };
var d2: Descriptor = .{ .family = "B" };
try testing.expect(d1.hash() != d2.hash());
}
test "fontconfig" {
if (options.backend != .fontconfig_freetype) return error.SkipZigTest;

View File

@ -5,6 +5,7 @@ const build_config = @import("../build_config.zig");
pub const Atlas = @import("Atlas.zig");
pub const discovery = @import("discovery.zig");
pub const face = @import("face.zig");
pub const CodepointMap = @import("CodepointMap.zig");
pub const DeferredFace = @import("DeferredFace.zig");
pub const Face = face.Face;
pub const Group = @import("Group.zig");