ghostty/src/font/shaper/web_canvas.zig
2023-07-18 16:20:30 -07:00

307 lines
11 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const utf8proc = @import("utf8proc");
const font = @import("../main.zig");
const terminal = @import("../../terminal/main.zig");
const log = std.log.scoped(.font_shaper);
pub const Shaper = struct {
const RunBuf = std.MultiArrayList(struct {
/// The codepoint for this cell. This must be used in conjunction
/// with cluster to find the total set of codepoints for a given
/// cell. See cluster for more information.
codepoint: u32,
/// Cluster is set to the X value of the cell that this codepoint
/// is part of. Note that a cell can have multiple codepoints
/// with zero-width joiners (ZWJ) and such. Note that terminals
/// do NOT handle full extended grapheme clustering well so it
/// is possible a single grapheme extends multiple clusters.
/// For example, skin tone emoji thumbs up may show up as two
/// clusters: one with thumbs up and the ZWJ, and a second
/// cluster with the tone block. It is up to the shaper to handle
/// shaping these together into a single glyph, if it wishes.
cluster: u32,
});
/// The allocator used for run_buf.
alloc: Allocator,
/// The shared memory used for shaping results.
cell_buf: []font.shape.Cell,
/// The shared memory used for storing information about a run.
run_buf: RunBuf,
/// The cell_buf argument is the buffer to use for storing shaped results.
/// This should be at least the number of columns in the terminal.
pub fn init(alloc: Allocator, opts: font.shape.Options) !Shaper {
// Note: we do not support opts.font_features
return Shaper{
.alloc = alloc,
.cell_buf = opts.cell_buf,
.run_buf = .{},
};
}
pub fn deinit(self: *Shaper) void {
self.run_buf.deinit(self.alloc);
self.* = undefined;
}
/// Returns an iterator that returns one text run at a time for the
/// given terminal row. Note that text runs are are only valid one at a time
/// for a Shaper struct since they share state.
pub fn runIterator(
self: *Shaper,
group: *font.GroupCache,
row: terminal.Screen.Row,
selection: ?terminal.Selection,
cursor_x: ?usize,
) font.shape.RunIterator {
return .{
.hooks = .{ .shaper = self },
.group = group,
.row = row,
.selection = selection,
.cursor_x = cursor_x,
};
}
/// Shape the given text run. The text run must be the immediately
/// previous text run that was iterated since the text run does share
/// state with the Shaper struct.
///
/// The return value is only valid until the next shape call is called.
///
/// If there is not enough space in the cell buffer, an error is
/// returned.
pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]font.shape.Cell {
// TODO: memory check that cell_buf can fit results
const codepoints = self.run_buf.items(.codepoint);
const clusters = self.run_buf.items(.cluster);
assert(codepoints.len == clusters.len);
switch (codepoints.len) {
// Special cases: if we have no codepoints (is this possible?)
// then our result is also an empty cell run.
0 => return self.cell_buf[0..0],
// If we have only 1 codepoint, then we assume that it is
// a single grapheme and just let it through. At this point,
// we can't have any more information to do anything else.
1 => {
self.cell_buf[0] = .{
.x = @intCast(clusters[0]),
.glyph_index = codepoints[0],
};
return self.cell_buf[0..1];
},
else => {},
}
// We know we have at least two codepoints, so we now go through
// each and perform grapheme clustering.
//
// Note that due to limitations of canvas, we can NOT support
// font ligatures. However, we do support grapheme clustering.
// This means we can render things like skin tone emoji but
// we can't render things like single glyph "=>".
var break_state: i32 = 0;
var cp1: u21 = @intCast(codepoints[0]);
var start: usize = 0;
var i: usize = 1;
var cur: usize = 0;
while (i <= codepoints.len) : (i += 1) {
// We loop to codepoints.len so that we can handle the end
// case. In the end case, we always assume it is a grapheme
// break. This isn't strictly true but its how terminals
// work today.
const grapheme_break = i == codepoints.len or blk: {
const cp2: u21 = @intCast(codepoints[i]);
defer cp1 = cp2;
break :blk utf8proc.graphemeBreakStateful(
cp1,
cp2,
&break_state,
);
};
// If this is NOT a grapheme break, cp2 is part of a single
// grapheme cluster and we expect there could be more. We
// move on to the next codepoint to try again.
if (!grapheme_break) continue;
// This IS a grapheme break, meaning that cp2 is NOT part
// of cp1. So we need to render the prior grapheme.
const len = i - start;
assert(len > 0);
switch (len) {
// If we have only a single codepoint then just render it
// as-is.
1 => self.cell_buf[cur] = .{
.x = @intCast(clusters[start]),
.glyph_index = codepoints[start],
},
// We must have multiple codepoints (see assert above). In
// this case we UTF-8 encode the codepoints and send them
// to the face to reserve a private glyph index.
else => {
// UTF-8 encode the codepoints in this cluster.
const cluster = cluster: {
const cluster_points = codepoints[start..i];
assert(cluster_points.len == len);
const buf_len = buf_len: {
var acc: usize = 0;
for (cluster_points) |cp| {
acc += try std.unicode.utf8CodepointSequenceLength(
@intCast(cp),
);
}
break :buf_len acc;
};
var buf = try self.alloc.alloc(u8, buf_len);
errdefer self.alloc.free(buf);
var buf_i: usize = 0;
for (cluster_points) |cp| {
buf_i += try std.unicode.utf8Encode(
@intCast(cp),
buf[buf_i..],
);
}
break :cluster buf;
};
defer self.alloc.free(cluster);
var face = try run.group.group.faceFromIndex(run.font_index);
const index = try face.graphemeGlyphIndex(cluster);
self.cell_buf[cur] = .{
.x = @intCast(clusters[start]),
.glyph_index = index,
};
},
}
start = i;
cur += 1;
}
return self.cell_buf[0..cur];
}
/// The hooks for RunIterator.
pub const RunIteratorHook = struct {
shaper: *Shaper,
pub fn prepare(self: RunIteratorHook) !void {
// Reset the buffer for our current run
self.shaper.run_buf.shrinkRetainingCapacity(0);
}
pub fn addCodepoint(
self: RunIteratorHook,
cp: u32,
cluster: u32,
) !void {
try self.shaper.run_buf.append(self.shaper.alloc, .{
.codepoint = cp,
.cluster = cluster,
});
}
pub fn finalize(self: RunIteratorHook) !void {
_ = self;
}
};
};
/// The wasm-compatible API.
pub const Wasm = struct {
const wasm = @import("../../os/wasm.zig");
const alloc = wasm.alloc;
export fn shaper_new(cap: usize) ?*Shaper {
return shaper_new_(cap) catch null;
}
fn shaper_new_(cap: usize) !*Shaper {
var cell_buf = try alloc.alloc(font.shape.Cell, cap);
errdefer alloc.free(cell_buf);
var shaper = try Shaper.init(alloc, .{ .cell_buf = cell_buf });
errdefer shaper.deinit();
var result = try alloc.create(Shaper);
errdefer alloc.destroy(result);
result.* = shaper;
return result;
}
export fn shaper_free(ptr: ?*Shaper) void {
if (ptr) |v| {
alloc.free(v.cell_buf);
v.deinit();
alloc.destroy(v);
}
}
/// Runs a test to verify shaping works properly.
export fn shaper_test(
self: *Shaper,
group: *font.GroupCache,
str: [*]const u8,
len: usize,
) void {
shaper_test_(self, group, str[0..len]) catch |err| {
log.warn("error during shaper test err={}", .{err});
};
}
fn shaper_test_(self: *Shaper, group: *font.GroupCache, str: []const u8) !void {
// Create a terminal and print all our characters into it.
var term = try terminal.Terminal.init(alloc, self.cell_buf.len, 80);
defer term.deinit(alloc);
// Iterate over unicode codepoints and add to terminal
{
const view = try std.unicode.Utf8View.init(str);
var iter = view.iterator();
while (iter.nextCodepoint()) |c| {
try term.print(c);
}
}
// Iterate over the rows and print out all the runs we get.
var rowIter = term.screen.rowIterator(.viewport);
var y: usize = 0;
while (rowIter.next()) |row| {
defer y += 1;
var iter = self.runIterator(group, row, null, null);
while (try iter.next(alloc)) |run| {
const cells = try self.shape(run);
log.info("y={} run={d} shape={any} idx={}", .{
y,
run.cells,
cells,
run.font_index,
});
}
}
}
};