const std = @import("std"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; const utf8proc = @import("utf8proc"); const font = @import("../main.zig"); const terminal = @import("../../terminal/main.zig"); const log = std.log.scoped(.font_shaper); pub const Shaper = struct { const RunBuf = std.MultiArrayList(struct { /// The codepoint for this cell. This must be used in conjunction /// with cluster to find the total set of codepoints for a given /// cell. See cluster for more information. codepoint: u32, /// Cluster is set to the X value of the cell that this codepoint /// is part of. Note that a cell can have multiple codepoints /// with zero-width joiners (ZWJ) and such. Note that terminals /// do NOT handle full extended grapheme clustering well so it /// is possible a single grapheme extends multiple clusters. /// For example, skin tone emoji thumbs up may show up as two /// clusters: one with thumbs up and the ZWJ, and a second /// cluster with the tone block. It is up to the shaper to handle /// shaping these together into a single glyph, if it wishes. cluster: u32, }); /// The allocator used for run_buf. alloc: Allocator, /// The shared memory used for shaping results. cell_buf: []font.shape.Cell, /// The shared memory used for storing information about a run. run_buf: RunBuf, /// The cell_buf argument is the buffer to use for storing shaped results. /// This should be at least the number of columns in the terminal. pub fn init(alloc: Allocator, opts: font.shape.Options) !Shaper { // Note: we do not support opts.font_features return Shaper{ .alloc = alloc, .cell_buf = opts.cell_buf, .run_buf = .{}, }; } pub fn deinit(self: *Shaper) void { self.run_buf.deinit(self.alloc); self.* = undefined; } /// Returns an iterator that returns one text run at a time for the /// given terminal row. Note that text runs are are only valid one at a time /// for a Shaper struct since they share state. pub fn runIterator( self: *Shaper, group: *font.GroupCache, row: terminal.Screen.Row, selection: ?terminal.Selection, cursor_x: ?usize, ) font.shape.RunIterator { return .{ .hooks = .{ .shaper = self }, .group = group, .row = row, .selection = selection, .cursor_x = cursor_x, }; } /// Shape the given text run. The text run must be the immediately /// previous text run that was iterated since the text run does share /// state with the Shaper struct. /// /// The return value is only valid until the next shape call is called. /// /// If there is not enough space in the cell buffer, an error is /// returned. pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]font.shape.Cell { // TODO: memory check that cell_buf can fit results const codepoints = self.run_buf.items(.codepoint); const clusters = self.run_buf.items(.cluster); assert(codepoints.len == clusters.len); switch (codepoints.len) { // Special cases: if we have no codepoints (is this possible?) // then our result is also an empty cell run. 0 => return self.cell_buf[0..0], // If we have only 1 codepoint, then we assume that it is // a single grapheme and just let it through. At this point, // we can't have any more information to do anything else. 1 => { self.cell_buf[0] = .{ .x = @intCast(clusters[0]), .glyph_index = codepoints[0], }; return self.cell_buf[0..1]; }, else => {}, } // We know we have at least two codepoints, so we now go through // each and perform grapheme clustering. // // Note that due to limitations of canvas, we can NOT support // font ligatures. However, we do support grapheme clustering. // This means we can render things like skin tone emoji but // we can't render things like single glyph "=>". var break_state: i32 = 0; var cp1: u21 = @intCast(codepoints[0]); var start: usize = 0; var i: usize = 1; var cur: usize = 0; while (i <= codepoints.len) : (i += 1) { // We loop to codepoints.len so that we can handle the end // case. In the end case, we always assume it is a grapheme // break. This isn't strictly true but its how terminals // work today. const grapheme_break = i == codepoints.len or blk: { const cp2: u21 = @intCast(codepoints[i]); defer cp1 = cp2; break :blk utf8proc.graphemeBreakStateful( cp1, cp2, &break_state, ); }; // If this is NOT a grapheme break, cp2 is part of a single // grapheme cluster and we expect there could be more. We // move on to the next codepoint to try again. if (!grapheme_break) continue; // This IS a grapheme break, meaning that cp2 is NOT part // of cp1. So we need to render the prior grapheme. const len = i - start; assert(len > 0); switch (len) { // If we have only a single codepoint then just render it // as-is. 1 => self.cell_buf[cur] = .{ .x = @intCast(clusters[start]), .glyph_index = codepoints[start], }, // We must have multiple codepoints (see assert above). In // this case we UTF-8 encode the codepoints and send them // to the face to reserve a private glyph index. else => { // UTF-8 encode the codepoints in this cluster. const cluster = cluster: { const cluster_points = codepoints[start..i]; assert(cluster_points.len == len); const buf_len = buf_len: { var acc: usize = 0; for (cluster_points) |cp| { acc += try std.unicode.utf8CodepointSequenceLength( @intCast(cp), ); } break :buf_len acc; }; var buf = try self.alloc.alloc(u8, buf_len); errdefer self.alloc.free(buf); var buf_i: usize = 0; for (cluster_points) |cp| { buf_i += try std.unicode.utf8Encode( @intCast(cp), buf[buf_i..], ); } break :cluster buf; }; defer self.alloc.free(cluster); var face = try run.group.group.faceFromIndex(run.font_index); const index = try face.graphemeGlyphIndex(cluster); self.cell_buf[cur] = .{ .x = @intCast(clusters[start]), .glyph_index = index, }; }, } start = i; cur += 1; } return self.cell_buf[0..cur]; } /// The hooks for RunIterator. pub const RunIteratorHook = struct { shaper: *Shaper, pub fn prepare(self: RunIteratorHook) !void { // Reset the buffer for our current run self.shaper.run_buf.shrinkRetainingCapacity(0); } pub fn addCodepoint( self: RunIteratorHook, cp: u32, cluster: u32, ) !void { try self.shaper.run_buf.append(self.shaper.alloc, .{ .codepoint = cp, .cluster = cluster, }); } pub fn finalize(self: RunIteratorHook) !void { _ = self; } }; }; /// The wasm-compatible API. pub const Wasm = struct { const wasm = @import("../../os/wasm.zig"); const alloc = wasm.alloc; export fn shaper_new(cap: usize) ?*Shaper { return shaper_new_(cap) catch null; } fn shaper_new_(cap: usize) !*Shaper { var cell_buf = try alloc.alloc(font.shape.Cell, cap); errdefer alloc.free(cell_buf); var shaper = try Shaper.init(alloc, .{ .cell_buf = cell_buf }); errdefer shaper.deinit(); var result = try alloc.create(Shaper); errdefer alloc.destroy(result); result.* = shaper; return result; } export fn shaper_free(ptr: ?*Shaper) void { if (ptr) |v| { alloc.free(v.cell_buf); v.deinit(); alloc.destroy(v); } } /// Runs a test to verify shaping works properly. export fn shaper_test( self: *Shaper, group: *font.GroupCache, str: [*]const u8, len: usize, ) void { shaper_test_(self, group, str[0..len]) catch |err| { log.warn("error during shaper test err={}", .{err}); }; } fn shaper_test_(self: *Shaper, group: *font.GroupCache, str: []const u8) !void { // Create a terminal and print all our characters into it. var term = try terminal.Terminal.init(alloc, self.cell_buf.len, 80); defer term.deinit(alloc); // Iterate over unicode codepoints and add to terminal { const view = try std.unicode.Utf8View.init(str); var iter = view.iterator(); while (iter.nextCodepoint()) |c| { try term.print(c); } } // Iterate over the rows and print out all the runs we get. var rowIter = term.screen.rowIterator(.viewport); var y: usize = 0; while (rowIter.next()) |row| { defer y += 1; var iter = self.runIterator(group, row, null, null); while (try iter.next(alloc)) |run| { const cells = try self.shape(run); log.info("y={} run={d} shape={any} idx={}", .{ y, run.cells, cells, run.font_index, }); } } } };