Merge pull request #64 from mitchellh/shaper-abs

wasm: Text shaping for canvas faces
This commit is contained in:
Mitchell Hashimoto
2022-12-12 21:57:40 -08:00
committed by GitHub
12 changed files with 919 additions and 366 deletions

View File

@ -124,10 +124,7 @@ pub fn build(b: *std.build.Builder) !void {
wasm.addOptions("build_options", exe_options);
// Wasm-specific deps
wasm.addPackage(js.pkg);
wasm.addPackage(tracylib.pkg);
wasm.addPackage(utf8proc.pkg);
_ = try utf8proc.link(b, wasm);
try addDeps(b, wasm, true);
const step = b.step("wasm", "Build the wasm library");
step.dependOn(&wasm.step);
@ -139,7 +136,7 @@ pub fn build(b: *std.build.Builder) !void {
const main_test = b.addTest("src/main_wasm.zig");
main_test.setTarget(.{ .cpu_arch = .wasm32, .os_tag = .wasi });
main_test.addOptions("build_options", exe_options);
main_test.addPackage(js.pkg);
try addDeps(b, main_test, true);
test_step.dependOn(&main_test.step);
}
@ -222,6 +219,20 @@ fn addDeps(
step: *std.build.LibExeObjStep,
static: bool,
) !void {
// Wasm we do manually since it is such a different build.
if (step.target.getCpuArch() == .wasm32) {
// We link this package but its a no-op since Tracy
// never actualy WORKS with wasm.
step.addPackage(tracylib.pkg);
step.addPackage(utf8proc.pkg);
step.addPackage(js.pkg);
// utf8proc
_ = try utf8proc.link(b, step);
return;
}
// We always need the Zig packages
if (enable_fontconfig) step.addPackage(fontconfig.pkg);
step.addPackage(freetype.pkg);
@ -240,11 +251,6 @@ fn addDeps(
_ = try macos.link(b, step, .{});
}
// Wasm
if (step.target.getCpuArch() == .wasm32) {
step.addPackage(js.pkg);
}
// We always statically compile glad
step.addIncludePath("vendor/glad/include/");
step.addCSourceFile("vendor/glad/src/gl.c", &.{});

View File

@ -46,6 +46,9 @@ fetch(url.href).then(response =>
atlas_new,
atlas_free,
atlas_debug_canvas,
shaper_new,
shaper_free,
shaper_test,
} = results.instance.exports;
// Give us access to the zjs value for debugging.
globalThis.zjs = zjs;
@ -54,13 +57,19 @@ fetch(url.href).then(response =>
// Initialize our zig-js memory
zjs.memory = memory;
// Helpers
const makeStr = (str) => {
const utf8 = new TextEncoder().encode(str);
const ptr = malloc(utf8.byteLength);
new Uint8Array(memory.buffer, ptr).set(utf8);
return { ptr: ptr, len: utf8.byteLength };
};
// Create our atlas
// const atlas = atlas_new(512, 0 /* greyscale */);
// Create some memory for our string
const font = new TextEncoder().encode("monospace");
const font_ptr = malloc(font.byteLength);
new Uint8Array(memory.buffer, font_ptr).set(font);
const font_name = makeStr("monospace");
// Initialize our deferred face
// const df = deferred_face_new(font_ptr, font.byteLength, 0 /* text */);
@ -73,8 +82,8 @@ fetch(url.href).then(response =>
// Create our group
const group = group_new(72 /* size */);
group_add_face(group, 0 /* regular */, deferred_face_new(font_ptr, font.byteLength, 0 /* text */));
group_add_face(group, 0 /* regular */, deferred_face_new(font_ptr, font.byteLength, 1 /* emoji */));
group_add_face(group, 0 /* regular */, deferred_face_new(font_name.ptr, font_name.len, 0 /* text */));
group_add_face(group, 0 /* regular */, deferred_face_new(font_name.ptr, font_name.len, 1 /* emoji */));
// Create our group cache
const group_cache = group_cache_new(group);
@ -112,5 +121,10 @@ fetch(url.href).then(response =>
document.getElementById("atlas-color-canvas").append(zjs.deleteValue(id));
}
// Let's try shaping
const shaper = shaper_new(120);
const input = makeStr("hello");
shaper_test(shaper, group_cache, input.ptr, input.len);
//face_free(face);
});

View File

@ -286,12 +286,29 @@ pub fn hasCodepoint(self: DeferredFace, cp: u32, p: ?Presentation) bool {
// Canvas always has the codepoint because we have no way of
// really checking and we let the browser handle it.
.web_canvas => {
if (self.wc) |wc| {
.web_canvas => if (self.wc) |wc| {
// Fast-path if we have a specific presentation and we
// don't match, then it is definitely not this face.
if (p) |desired| if (wc.presentation != desired) return false;
}
return true;
// Slow-path: we initialize the font, render it, and check
// if it works and the presentation matches.
var face = Face.initNamed(
wc.alloc,
wc.font_str,
.{ .points = 12 },
wc.presentation,
) catch |err| {
log.warn("failed to init face for codepoint check " ++
"face={s} err={}", .{
wc.font_str,
err,
});
return false;
};
defer face.deinit();
return face.glyphIndex(cp) != null;
},
.freetype => {},

View File

@ -245,11 +245,11 @@ pub fn presentationFromIndex(self: Group, index: FontIndex) !font.Presentation {
/// Return the Face represented by a given FontIndex. Note that special
/// fonts (i.e. box glyphs) do not have a face.
pub fn faceFromIndex(self: Group, index: FontIndex) !Face {
pub fn faceFromIndex(self: Group, index: FontIndex) !*Face {
if (index.special() != null) return error.SpecialHasNoFace;
const deferred = &self.faces.get(index.style).items[@intCast(usize, index.idx)];
try deferred.load(self.lib, self.size);
return deferred.face.?;
return &deferred.face.?;
}
/// Render a glyph by glyph index into the given font atlas and return
@ -556,3 +556,27 @@ test "discover monospace with fontconfig and freetype" {
);
}
}
test "faceFromIndex returns pointer" {
const testing = std.testing;
const alloc = testing.allocator;
const testFont = @import("test.zig").fontRegular;
var atlas_greyscale = try font.Atlas.init(alloc, 512, .greyscale);
defer atlas_greyscale.deinit(alloc);
var lib = try Library.init();
defer lib.deinit();
var group = try init(alloc, lib, .{ .points = 12, .xdpi = 96, .ydpi = 96 });
defer group.deinit();
try group.addFace(alloc, .regular, DeferredFace.initLoaded(try Face.init(lib, testFont, .{ .points = 12, .xdpi = 96, .ydpi = 96 })));
{
const idx = group.indexForCodepoint('A', .regular, null).?;
const face1 = try group.faceFromIndex(idx);
const face2 = try group.faceFromIndex(idx);
try testing.expectEqual(@ptrToInt(face1), @ptrToInt(face2));
}
}

View File

@ -9,6 +9,10 @@ const font = @import("../main.zig");
const log = std.log.scoped(.font_face);
pub const Face = struct {
/// See graphemes field for more details.
const grapheme_start: u32 = 0x10FFFF + 1;
const grapheme_end: u32 = std.math.maxInt(u32);
/// The web canvas face makes use of an allocator when interacting
/// with the JS environment.
alloc: Allocator,
@ -28,6 +32,13 @@ pub const Face = struct {
/// The canvas element that we will reuse to render glyphs
canvas: js.Object,
/// The map to store multi-codepoint grapheme clusters that are rendered.
/// We use 1 above the maximum unicode codepoint up to the max 32-bit
/// unsigned integer to store the "glyph index" for graphemes.
grapheme_to_glyph: std.StringHashMapUnmanaged(u32) = .{},
glyph_to_grapheme: std.AutoHashMapUnmanaged(u32, []u8) = .{},
grapheme_next: u32 = grapheme_start,
/// Initialize a web canvas font with a "raw" value. The "raw" value can
/// be any valid value for a CSS "font" property EXCLUDING the size. The
/// size is always added via the `size` parameter.
@ -72,6 +83,12 @@ pub const Face = struct {
pub fn deinit(self: *Face) void {
self.alloc.free(self.font_str);
self.grapheme_to_glyph.deinit(self.alloc);
{
var it = self.glyph_to_grapheme.valueIterator();
while (it.next()) |value| self.alloc.free(value.*);
self.glyph_to_grapheme.deinit(self.alloc);
}
self.canvas.deinit();
self.* = undefined;
}
@ -90,12 +107,84 @@ pub const Face = struct {
/// have access to the underlying tables anyways. We let the browser deal
/// with bad codepoints.
pub fn glyphIndex(self: Face, cp: u32) ?u32 {
_ = self;
// If this is a multi-codepoint grapheme then we only check if
// we actually know about it.
if (cp >= grapheme_start) {
if (!self.glyph_to_grapheme.contains(cp)) return null;
}
// Render the glyph to determine if it is colored or not. We
// have to do this because the browser will always try to render
// whatever we give it and we have no API to determine color.
//
// We don't want to say yes to the wrong presentation because
// it will go into the wrong Atlas.
const p: font.Presentation = if (cp <= 255) .text else p: {
break :p self.glyphPresentation(cp) catch {
// In this case, we assume we are unable to render
// this glyph and therefore jus say we don't support it.
return null;
};
};
if (p != self.presentation) return null;
return cp;
}
/// Render a glyph using the glyph index. The rendered glyph is stored in the
/// given texture atlas.
/// This determines the presentation of the glyph by literally
/// inspecting the image data to look for any color. This isn't
/// super performant but we don't have a better choice given the
/// canvas APIs.
fn glyphPresentation(
self: Face,
cp: u32,
) !font.Presentation {
// Render the glyph
var render = try self.renderGlyphInternal(self.alloc, cp);
defer render.deinit();
// Inspect the image data for any non-zeros in the RGB value.
// NOTE(perf): this is an easy candidate for SIMD.
var i: usize = 0;
while (i < render.bitmap.len) : (i += 4) {
if (render.bitmap[i] > 0 or
render.bitmap[i + 1] > 0 or
render.bitmap[i + 2] > 0) return .emoji;
}
return .text;
}
/// Returns the glyph index for the given grapheme cluster. The same
/// cluster will always map to the same glyph index. This does not render
/// the grapheme at this time, only reserves the index.
pub fn graphemeGlyphIndex(self: *Face, cluster: []const u8) error{OutOfMemory}!u32 {
// If we already have this stored then return it
const gop = try self.grapheme_to_glyph.getOrPut(self.alloc, cluster);
if (gop.found_existing) return gop.value_ptr.*;
errdefer _ = self.grapheme_to_glyph.remove(cluster);
// We don't have it stored. Ensure we have space to store. The
// next will be "0" if we're out of space due to unsigned int wrapping.
if (self.grapheme_next == 0) return error.OutOfMemory;
// Copy the cluster for our reverse mapping
const copy = try self.alloc.dupe(u8, cluster);
errdefer self.alloc.free(copy);
// Grow space for the reverse mapping
try self.glyph_to_grapheme.ensureUnusedCapacity(self.alloc, 1);
// Store it
gop.value_ptr.* = self.grapheme_next;
self.glyph_to_grapheme.putAssumeCapacity(self.grapheme_next, copy);
self.grapheme_next +%= 1;
return gop.value_ptr.*;
}
/// Render a glyph using the glyph index. The rendered glyph is stored
/// in the given texture atlas.
pub fn renderGlyph(
self: Face,
alloc: Allocator,
@ -105,10 +194,174 @@ pub const Face = struct {
) !font.Glyph {
_ = max_height;
// Encode our glyph into UTF-8 so we can build a JS string out of it.
var render = try self.renderGlyphInternal(alloc, glyph_index);
defer render.deinit();
// Convert the format of the bitmap if necessary
const bitmap_formatted: []u8 = switch (atlas.format) {
// Bitmap is already in RGBA
.rgba => render.bitmap,
// Convert down to A8
.greyscale => a8: {
assert(@mod(render.bitmap.len, 4) == 0);
var bitmap_a8 = try alloc.alloc(u8, render.bitmap.len / 4);
errdefer alloc.free(bitmap_a8);
var i: usize = 0;
while (i < bitmap_a8.len) : (i += 1) {
bitmap_a8[i] = render.bitmap[(i * 4) + 3];
}
break :a8 bitmap_a8;
},
else => return error.UnsupportedAtlasFormat,
};
defer if (bitmap_formatted.ptr != render.bitmap.ptr) {
alloc.free(bitmap_formatted);
};
// Put it in our atlas
const region = try atlas.reserve(alloc, render.width, render.height);
if (region.width > 0 and region.height > 0) {
atlas.set(region, bitmap_formatted);
}
return font.Glyph{
.width = render.width,
.height = render.height,
// TODO: this can't be right
.offset_x = 0,
.offset_y = 0,
.atlas_x = region.x,
.atlas_y = region.y,
.advance_x = 0,
};
}
/// Calculate the metrics associated with a given face.
fn calcMetrics(self: *Face) !void {
const ctx = try self.context();
defer ctx.deinit();
// Cell width is the width of our M text
const cell_width: f32 = cell_width: {
const metrics = try ctx.call(js.Object, "measureText", .{js.string("M")});
defer metrics.deinit();
// We prefer the bounding box since it is tighter but certain
// text such as emoji do not have a bounding box set so we use
// the full run width instead.
const bounding_right = try metrics.get(f32, "actualBoundingBoxRight");
if (bounding_right > 0) break :cell_width bounding_right;
break :cell_width try metrics.get(f32, "width");
};
// To get the cell height we render a high and low character and get
// the total of the ascent and descent. This should equal our
// pixel height but this is a more surefire way to get it.
const height_metrics = try ctx.call(js.Object, "measureText", .{js.string("M_")});
defer height_metrics.deinit();
const asc = try height_metrics.get(f32, "actualBoundingBoxAscent");
const desc = try height_metrics.get(f32, "actualBoundingBoxDescent");
const cell_height = asc + desc;
const cell_baseline = desc;
// There isn't a declared underline position for canvas measurements
// so we just go 1 under the cell height to match freetype logic
// at this time (our freetype logic).
const underline_position = cell_height - 1;
const underline_thickness: f32 = 1;
self.metrics = .{
.cell_width = cell_width,
.cell_height = cell_height,
.cell_baseline = cell_baseline,
.underline_position = underline_position,
.underline_thickness = underline_thickness,
.strikethrough_position = underline_position,
.strikethrough_thickness = underline_thickness,
};
log.debug("metrics font={s} value={}", .{ self.font_str, self.metrics });
}
/// Returns the 2d context configured for drawing
fn context(self: Face) !js.Object {
// This will return the same context on subsequent calls so it
// is important to reset it.
const ctx = try self.canvas.call(js.Object, "getContext", .{js.string("2d")});
errdefer ctx.deinit();
// Clear the canvas
{
const width = try self.canvas.get(f64, "width");
const height = try self.canvas.get(f64, "height");
try ctx.call(void, "clearRect", .{ 0, 0, width, height });
}
// Set our context font
var font_val = try std.fmt.allocPrint(
self.alloc,
"{d}px {s}",
.{ self.size.points, self.font_str },
);
defer self.alloc.free(font_val);
try ctx.set("font", js.string(font_val));
// If the font property didn't change, then the font set didn't work.
// We do this check because it is very easy to put an invalid font
// in and this at least makes it show up in the logs.
const check = try ctx.getAlloc(js.String, self.alloc, "font");
defer self.alloc.free(check);
if (!std.mem.eql(u8, font_val, check)) {
log.warn("canvas font didn't set, fonts may be broken, expected={s} got={s}", .{
font_val,
check,
});
}
return ctx;
}
/// An internal (web-canvas-only) format for rendered glyphs
/// since we do render passes in multiple different situations.
const RenderedGlyph = struct {
alloc: Allocator,
metrics: js.Object,
width: u32,
height: u32,
bitmap: []u8,
pub fn deinit(self: *RenderedGlyph) void {
self.metrics.deinit();
self.alloc.free(self.bitmap);
self.* = undefined;
}
};
/// Shared logic for rendering a glyph.
fn renderGlyphInternal(
self: Face,
alloc: Allocator,
glyph_index: u32,
) !RenderedGlyph {
// Encode our glyph to UTF-8 so we can build a JS string out of it.
var utf8: [4]u8 = undefined;
const glyph_str = glyph_str: {
// If we are a normal glyph then we are a single codepoint and
// we just UTF8 encode it as-is.
if (glyph_index < grapheme_start) {
const utf8_len = try std.unicode.utf8Encode(@intCast(u21, glyph_index), &utf8);
const glyph_str = js.string(utf8[0..utf8_len]);
break :glyph_str js.string(utf8[0..utf8_len]);
}
// We are a multi-codepoint glyph so we have to read the glyph
// from the map and it is already utf8 encoded.
const slice = self.glyph_to_grapheme.get(glyph_index) orelse
return error.UnknownGraphemeCluster;
break :glyph_str js.string(slice);
};
// Get our drawing context
const measure_ctx = try self.context();
@ -116,7 +369,7 @@ pub const Face = struct {
// Get the width and height of the render
const metrics = try measure_ctx.call(js.Object, "measureText", .{glyph_str});
defer metrics.deinit();
errdefer metrics.deinit();
const width: u32 = @floatToInt(u32, @ceil(width: {
// We prefer the bounding box since it is tighter but certain
// text such as emoji do not have a bounding box set so we use
@ -222,130 +475,16 @@ pub const Face = struct {
break :bitmap bitmap;
};
defer alloc.free(bitmap);
errdefer alloc.free(bitmap);
// Convert the format of the bitmap if necessary
const bitmap_formatted: []u8 = switch (atlas.format) {
// Bitmap is already in RGBA
.rgba => bitmap,
// Convert down to A8
.greyscale => a8: {
assert(@mod(bitmap.len, 4) == 0);
var bitmap_a8 = try alloc.alloc(u8, bitmap.len / 4);
errdefer alloc.free(bitmap_a8);
var i: usize = 0;
while (i < bitmap_a8.len) : (i += 1) {
bitmap_a8[i] = bitmap[(i * 4) + 3];
}
break :a8 bitmap_a8;
},
else => return error.UnsupportedAtlasFormat,
};
defer if (bitmap_formatted.ptr != bitmap.ptr) alloc.free(bitmap_formatted);
// Put it in our atlas
const region = try atlas.reserve(alloc, width, height);
if (region.width > 0 and region.height > 0) atlas.set(region, bitmap_formatted);
return font.Glyph{
return RenderedGlyph{
.alloc = alloc,
.metrics = metrics,
.width = width,
.height = height,
// TODO: this can't be right
.offset_x = 0,
.offset_y = 0,
.atlas_x = region.x,
.atlas_y = region.y,
.advance_x = 0,
.bitmap = bitmap,
};
}
/// Calculate the metrics associated with a given face.
fn calcMetrics(self: *Face) !void {
const ctx = try self.context();
defer ctx.deinit();
// Cell width is the width of our M text
const cell_width: f32 = cell_width: {
const metrics = try ctx.call(js.Object, "measureText", .{js.string("M")});
defer metrics.deinit();
// We prefer the bounding box since it is tighter but certain
// text such as emoji do not have a bounding box set so we use
// the full run width instead.
const bounding_right = try metrics.get(f32, "actualBoundingBoxRight");
if (bounding_right > 0) break :cell_width bounding_right;
break :cell_width try metrics.get(f32, "width");
};
// To get the cell height we render a high and low character and get
// the total of the ascent and descent. This should equal our
// pixel height but this is a more surefire way to get it.
const height_metrics = try ctx.call(js.Object, "measureText", .{js.string("M_")});
defer height_metrics.deinit();
const asc = try height_metrics.get(f32, "actualBoundingBoxAscent");
const desc = try height_metrics.get(f32, "actualBoundingBoxDescent");
const cell_height = asc + desc;
const cell_baseline = desc;
// There isn't a declared underline position for canvas measurements
// so we just go 1 under the cell height to match freetype logic
// at this time (our freetype logic).
const underline_position = cell_height - 1;
const underline_thickness: f32 = 1;
self.metrics = .{
.cell_width = cell_width,
.cell_height = cell_height,
.cell_baseline = cell_baseline,
.underline_position = underline_position,
.underline_thickness = underline_thickness,
.strikethrough_position = underline_position,
.strikethrough_thickness = underline_thickness,
};
log.debug("metrics font={s} value={}", .{ self.font_str, self.metrics });
}
/// Returns the 2d context configured for drawing
fn context(self: Face) !js.Object {
// This will return the same context on subsequent calls so it
// is important to reset it.
const ctx = try self.canvas.call(js.Object, "getContext", .{js.string("2d")});
errdefer ctx.deinit();
// Clear the canvas
{
const width = try self.canvas.get(f64, "width");
const height = try self.canvas.get(f64, "height");
try ctx.call(void, "clearRect", .{ 0, 0, width, height });
}
// Set our context font
var font_val = try std.fmt.allocPrint(
self.alloc,
"{d}px {s}",
.{ self.size.points, self.font_str },
);
defer self.alloc.free(font_val);
try ctx.set("font", js.string(font_val));
// If the font property didn't change, then the font set didn't work.
// We do this check because it is very easy to put an invalid font
// in and this at least makes it show up in the logs.
const check = try ctx.getAlloc(js.String, self.alloc, "font");
defer self.alloc.free(check);
if (!std.mem.eql(u8, font_val, check)) {
log.warn("canvas font didn't set, fonts may be broken, expected={s} got={s}", .{
font_val,
check,
});
}
return ctx;
}
};
/// The wasm-compatible API.

View File

@ -10,7 +10,8 @@ pub const Face = face.Face;
pub const Group = @import("Group.zig");
pub const GroupCache = @import("GroupCache.zig");
pub const Glyph = @import("Glyph.zig");
pub const Shaper = @import("Shaper.zig");
pub const shape = @import("shape.zig");
pub const Shaper = shape.Shaper;
pub const sprite = @import("sprite.zig");
pub const Sprite = sprite.Sprite;
pub const Descriptor = discovery.Descriptor;
@ -24,6 +25,7 @@ pub usingnamespace if (builtin.target.isWasm()) struct {
pub usingnamespace Group.Wasm;
pub usingnamespace GroupCache.Wasm;
pub usingnamespace face.web_canvas.Wasm;
pub usingnamespace shape.web_canvas.Wasm;
} else struct {};
/// Build options

32
src/font/shape.zig Normal file
View File

@ -0,0 +1,32 @@
const builtin = @import("builtin");
const options = @import("main.zig").options;
const harfbuzz = @import("shaper/harfbuzz.zig");
pub const web_canvas = @import("shaper/web_canvas.zig");
pub usingnamespace @import("shaper/run.zig");
/// Shaper implementation for our compile options.
pub const Shaper = switch (options.backend) {
.freetype,
.fontconfig_freetype,
.coretext_freetype,
.coretext,
=> harfbuzz.Shaper,
.web_canvas => web_canvas.Shaper,
};
/// A cell is a single single within a terminal that should be rendered
/// for a shaping call. Note all terminal cells may be present; only
/// cells that have a glyph that needs to be rendered.
pub const Cell = struct {
/// The column that this cell occupies. Since a set of shaper cells is
/// always on the same line, only the X is stored. It is expected the
/// caller has access to the original screen cell.
x: u16,
/// The glyph index for this cell. The font index to use alongside
/// this cell is available in the text run. This glyph index is only
/// valid for a given GroupCache and FontIndex that was used to create
/// the runs.
glyph_index: u32,
};

View File

@ -1,33 +1,35 @@
//! This struct handles text shaping.
const Shaper = @This();
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const harfbuzz = @import("harfbuzz");
const trace = @import("tracy").trace;
const font = @import("main.zig");
const Face = @import("main.zig").Face;
const DeferredFace = @import("main.zig").DeferredFace;
const Group = @import("main.zig").Group;
const GroupCache = @import("main.zig").GroupCache;
const Library = @import("main.zig").Library;
const Style = @import("main.zig").Style;
const Presentation = @import("main.zig").Presentation;
const terminal = @import("../terminal/main.zig");
const font = @import("../main.zig");
const Face = font.Face;
const DeferredFace = font.DeferredFace;
const Group = font.Group;
const GroupCache = font.GroupCache;
const Library = font.Library;
const Style = font.Style;
const Presentation = font.Presentation;
const terminal = @import("../../terminal/main.zig");
const log = std.log.scoped(.font_shaper);
/// Shaper that uses Harfbuzz.
pub const Shaper = struct {
/// The buffer used for text shaping. We reuse it across multiple shaping
/// calls to prevent allocations.
hb_buf: harfbuzz.Buffer,
/// The shared memory used for shaping results.
cell_buf: []Cell,
cell_buf: []font.shape.Cell,
/// The cell_buf argument is the buffer to use for storing shaped results.
/// This should be at least the number of columns in the terminal.
pub fn init(cell_buf: []Cell) !Shaper {
pub fn init(alloc: Allocator, cell_buf: []font.shape.Cell) !Shaper {
// Allocator is not used because harfbuzz uses libc
_ = alloc;
return Shaper{
.hb_buf = try harfbuzz.Buffer.create(),
.cell_buf = cell_buf,
@ -41,8 +43,12 @@ pub fn deinit(self: *Shaper) void {
/// Returns an iterator that returns one text run at a time for the
/// given terminal row. Note that text runs are are only valid one at a time
/// for a Shaper struct since they share state.
pub fn runIterator(self: *Shaper, group: *GroupCache, row: terminal.Screen.Row) RunIterator {
return .{ .shaper = self, .group = group, .row = row };
pub fn runIterator(
self: *Shaper,
group: *GroupCache,
row: terminal.Screen.Row,
) font.shape.RunIterator {
return .{ .hooks = .{ .shaper = self }, .group = group, .row = row };
}
/// Shape the given text run. The text run must be the immediately previous
@ -52,7 +58,7 @@ pub fn runIterator(self: *Shaper, group: *GroupCache, row: terminal.Screen.Row)
/// The return value is only valid until the next shape call is called.
///
/// If there is not enough space in the cell buffer, an error is returned.
pub fn shape(self: *Shaper, run: TextRun) ![]Cell {
pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]font.shape.Cell {
const tracy = trace(@src());
defer tracy.end();
@ -95,137 +101,25 @@ pub fn shape(self: *Shaper, run: TextRun) ![]Cell {
return self.cell_buf[0..info.len];
}
pub const Cell = struct {
/// The column that this cell occupies. Since a set of shaper cells is
/// always on the same line, only the X is stored. It is expected the
/// caller has access to the original screen cell.
x: u16,
/// The glyph index for this cell. The font index to use alongside
/// this cell is available in the text run.
glyph_index: u32,
};
/// A single text run. A text run is only valid for one Shaper and
/// until the next run is created.
pub const TextRun = struct {
/// The offset in the row where this run started
offset: u16,
/// The total number of cells produced by this run.
cells: u16,
/// The font group that built this run.
group: *GroupCache,
/// The font index to use for the glyphs of this run.
font_index: Group.FontIndex,
};
pub const RunIterator = struct {
/// The hooks for RunIterator.
pub const RunIteratorHook = struct {
shaper: *Shaper,
group: *GroupCache,
row: terminal.Screen.Row,
i: usize = 0,
pub fn next(self: *RunIterator, alloc: Allocator) !?TextRun {
const tracy = trace(@src());
defer tracy.end();
// Trim the right side of a row that might be empty
const max: usize = max: {
var j: usize = self.row.lenCells();
while (j > 0) : (j -= 1) if (!self.row.getCell(j - 1).empty()) break;
break :max j;
};
// We're over at the max
if (self.i >= max) return null;
// Track the font for our curent run
var current_font: Group.FontIndex = .{};
pub fn prepare(self: RunIteratorHook) !void {
// Reset the buffer for our current run
self.shaper.hb_buf.reset();
self.shaper.hb_buf.setContentType(.unicode);
// Go through cell by cell and accumulate while we build our run.
var j: usize = self.i;
while (j < max) : (j += 1) {
const cluster = j;
const cell = self.row.getCell(j);
// If we're a spacer, then we ignore it
if (cell.attrs.wide_spacer_tail) continue;
const style: Style = if (cell.attrs.bold)
.bold
else
.regular;
// Determine the presentation format for this glyph.
const presentation: ?Presentation = if (cell.attrs.grapheme) p: {
// We only check the FIRST codepoint because I believe the
// presentation format must be directly adjacent to the codepoint.
var it = self.row.codepointIterator(j);
if (it.next()) |cp| {
if (cp == 0xFE0E) break :p Presentation.text;
if (cp == 0xFE0F) break :p Presentation.emoji;
}
break :p null;
} else null;
// Determine the font for this cell. We'll use fallbacks
// manually here to try replacement chars and then a space
// for unknown glyphs.
const font_idx_opt = (try self.group.indexForCodepoint(
alloc,
if (cell.empty() or cell.char == 0) ' ' else cell.char,
style,
presentation,
)) orelse (try self.group.indexForCodepoint(
alloc,
0xFFFD,
style,
.text,
)) orelse
try self.group.indexForCodepoint(alloc, ' ', style, .text);
const font_idx = font_idx_opt.?;
//log.warn("char={x} idx={}", .{ cell.char, font_idx });
if (j == self.i) current_font = font_idx;
// If our fonts are not equal, then we're done with our run.
if (font_idx.int() != current_font.int()) break;
// Continue with our run
self.shaper.hb_buf.add(cell.char, @intCast(u32, cluster));
// If this cell is part of a grapheme cluster, add all the grapheme
// data points.
if (cell.attrs.grapheme) {
var it = self.row.codepointIterator(j);
while (it.next()) |cp| {
if (cp == 0xFE0E or cp == 0xFE0F) continue;
self.shaper.hb_buf.add(cp, @intCast(u32, cluster));
}
}
pub fn addCodepoint(self: RunIteratorHook, cp: u32, cluster: u32) !void {
self.shaper.hb_buf.add(cp, cluster);
}
// Finalize our buffer
pub fn finalize(self: RunIteratorHook) !void {
self.shaper.hb_buf.guessSegmentProperties();
// Move our cursor. Must defer since we use self.i below.
defer self.i = j;
return TextRun{
.offset = @intCast(u16, self.i),
.cells = @intCast(u16, j - self.i),
.group = self.group,
.font_index = current_font,
};
}
};
};
test "run iterator" {
const testing = std.testing;
@ -619,7 +513,7 @@ const TestShaper = struct {
shaper: Shaper,
cache: *GroupCache,
lib: Library,
cell_buf: []Cell,
cell_buf: []font.shape.Cell,
pub fn deinit(self: *TestShaper) void {
self.shaper.deinit();
@ -632,9 +526,9 @@ const TestShaper = struct {
/// Helper to return a fully initialized shaper.
fn testShaper(alloc: Allocator) !TestShaper {
const testFont = @import("test.zig").fontRegular;
const testEmoji = @import("test.zig").fontEmoji;
const testEmojiText = @import("test.zig").fontEmojiText;
const testFont = @import("../test.zig").fontRegular;
const testEmoji = @import("../test.zig").fontEmoji;
const testEmojiText = @import("../test.zig").fontEmojiText;
var lib = try Library.init();
errdefer lib.deinit();
@ -653,10 +547,10 @@ fn testShaper(alloc: Allocator) !TestShaper {
try cache_ptr.group.addFace(alloc, .regular, DeferredFace.initLoaded(try Face.init(lib, testEmoji, .{ .points = 12 })));
try cache_ptr.group.addFace(alloc, .regular, DeferredFace.initLoaded(try Face.init(lib, testEmojiText, .{ .points = 12 })));
var cell_buf = try alloc.alloc(Cell, 80);
var cell_buf = try alloc.alloc(font.shape.Cell, 80);
errdefer alloc.free(cell_buf);
var shaper = try init(cell_buf);
var shaper = try Shaper.init(alloc, cell_buf);
errdefer shaper.deinit();
return TestShaper{

129
src/font/shaper/run.zig Normal file
View File

@ -0,0 +1,129 @@
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const font = @import("../main.zig");
const shape = @import("../shape.zig");
const terminal = @import("../../terminal/main.zig");
const trace = @import("tracy").trace;
/// A single text run. A text run is only valid for one Shaper instance and
/// until the next run is created. A text run never goes across multiple
/// rows in a terminal, so it is guaranteed to always be one line.
pub const TextRun = struct {
/// The offset in the row where this run started
offset: u16,
/// The total number of cells produced by this run.
cells: u16,
/// The font group that built this run.
group: *font.GroupCache,
/// The font index to use for the glyphs of this run.
font_index: font.Group.FontIndex,
};
/// RunIterator is an iterator that yields text runs.
pub const RunIterator = struct {
hooks: font.Shaper.RunIteratorHook,
group: *font.GroupCache,
row: terminal.Screen.Row,
i: usize = 0,
pub fn next(self: *RunIterator, alloc: Allocator) !?TextRun {
const tracy = trace(@src());
defer tracy.end();
// Trim the right side of a row that might be empty
const max: usize = max: {
var j: usize = self.row.lenCells();
while (j > 0) : (j -= 1) if (!self.row.getCell(j - 1).empty()) break;
break :max j;
};
// We're over at the max
if (self.i >= max) return null;
// Track the font for our curent run
var current_font: font.Group.FontIndex = .{};
// Allow the hook to prepare
try self.hooks.prepare();
// Go through cell by cell and accumulate while we build our run.
var j: usize = self.i;
while (j < max) : (j += 1) {
const cluster = j;
const cell = self.row.getCell(j);
// If we're a spacer, then we ignore it
if (cell.attrs.wide_spacer_tail) continue;
const style: font.Style = if (cell.attrs.bold)
.bold
else
.regular;
// Determine the presentation format for this glyph.
const presentation: ?font.Presentation = if (cell.attrs.grapheme) p: {
// We only check the FIRST codepoint because I believe the
// presentation format must be directly adjacent to the codepoint.
var it = self.row.codepointIterator(j);
if (it.next()) |cp| {
if (cp == 0xFE0E) break :p font.Presentation.text;
if (cp == 0xFE0F) break :p font.Presentation.emoji;
}
break :p null;
} else null;
// Determine the font for this cell. We'll use fallbacks
// manually here to try replacement chars and then a space
// for unknown glyphs.
const font_idx_opt = (try self.group.indexForCodepoint(
alloc,
if (cell.empty() or cell.char == 0) ' ' else cell.char,
style,
presentation,
)) orelse (try self.group.indexForCodepoint(
alloc,
0xFFFD,
style,
.text,
)) orelse
try self.group.indexForCodepoint(alloc, ' ', style, .text);
const font_idx = font_idx_opt.?;
//log.warn("char={x} idx={}", .{ cell.char, font_idx });
if (j == self.i) current_font = font_idx;
// If our fonts are not equal, then we're done with our run.
if (font_idx.int() != current_font.int()) break;
// Continue with our run
try self.hooks.addCodepoint(cell.char, @intCast(u32, cluster));
// If this cell is part of a grapheme cluster, add all the grapheme
// data points.
if (cell.attrs.grapheme) {
var it = self.row.codepointIterator(j);
while (it.next()) |cp| {
if (cp == 0xFE0E or cp == 0xFE0F) continue;
try self.hooks.addCodepoint(cp, @intCast(u32, cluster));
}
}
}
// Finalize our buffer
try self.hooks.finalize();
// Move our cursor. Must defer since we use self.i below.
defer self.i = j;
return TextRun{
.offset = @intCast(u16, self.i),
.cells = @intCast(u16, j - self.i),
.group = self.group,
.font_index = current_font,
};
}
};

View File

@ -0,0 +1,296 @@
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const utf8proc = @import("utf8proc");
const font = @import("../main.zig");
const terminal = @import("../../terminal/main.zig");
const log = std.log.scoped(.font_shaper);
pub const Shaper = struct {
const RunBuf = std.MultiArrayList(struct {
/// The codepoint for this cell. This must be used in conjunction
/// with cluster to find the total set of codepoints for a given
/// cell. See cluster for more information.
codepoint: u32,
/// Cluster is set to the X value of the cell that this codepoint
/// is part of. Note that a cell can have multiple codepoints
/// with zero-width joiners (ZWJ) and such. Note that terminals
/// do NOT handle full extended grapheme clustering well so it
/// is possible a single grapheme extends multiple clusters.
/// For example, skin tone emoji thumbs up may show up as two
/// clusters: one with thumbs up and the ZWJ, and a second
/// cluster with the tone block. It is up to the shaper to handle
/// shaping these together into a single glyph, if it wishes.
cluster: u32,
});
/// The allocator used for run_buf.
alloc: Allocator,
/// The shared memory used for shaping results.
cell_buf: []font.shape.Cell,
/// The shared memory used for storing information about a run.
run_buf: RunBuf,
/// The cell_buf argument is the buffer to use for storing shaped results.
/// This should be at least the number of columns in the terminal.
pub fn init(alloc: Allocator, cell_buf: []font.shape.Cell) !Shaper {
return Shaper{
.alloc = alloc,
.cell_buf = cell_buf,
.run_buf = .{},
};
}
pub fn deinit(self: *Shaper) void {
self.run_buf.deinit(self.alloc);
self.* = undefined;
}
/// Returns an iterator that returns one text run at a time for the
/// given terminal row. Note that text runs are are only valid one at a time
/// for a Shaper struct since they share state.
pub fn runIterator(
self: *Shaper,
group: *font.GroupCache,
row: terminal.Screen.Row,
) font.shape.RunIterator {
return .{ .hooks = .{ .shaper = self }, .group = group, .row = row };
}
/// Shape the given text run. The text run must be the immediately
/// previous text run that was iterated since the text run does share
/// state with the Shaper struct.
///
/// The return value is only valid until the next shape call is called.
///
/// If there is not enough space in the cell buffer, an error is
/// returned.
pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]font.shape.Cell {
// TODO: memory check that cell_buf can fit results
const codepoints = self.run_buf.items(.codepoint);
const clusters = self.run_buf.items(.cluster);
assert(codepoints.len == clusters.len);
switch (codepoints.len) {
// Special cases: if we have no codepoints (is this possible?)
// then our result is also an empty cell run.
0 => return self.cell_buf[0..0],
// If we have only 1 codepoint, then we assume that it is
// a single grapheme and just let it through. At this point,
// we can't have any more information to do anything else.
1 => {
self.cell_buf[0] = .{
.x = @intCast(u16, clusters[0]),
.glyph_index = codepoints[0],
};
return self.cell_buf[0..1];
},
else => {},
}
// We know we have at least two codepoints, so we now go through
// each and perform grapheme clustering.
//
// Note that due to limitations of canvas, we can NOT support
// font ligatures. However, we do support grapheme clustering.
// This means we can render things like skin tone emoji but
// we can't render things like single glyph "=>".
var break_state: i32 = 0;
var cp1 = @intCast(u21, codepoints[0]);
var start: usize = 0;
var i: usize = 1;
var cur: usize = 0;
while (i <= codepoints.len) : (i += 1) {
// We loop to codepoints.len so that we can handle the end
// case. In the end case, we always assume it is a grapheme
// break. This isn't strictly true but its how terminals
// work today.
const grapheme_break = i == codepoints.len or blk: {
const cp2 = @intCast(u21, codepoints[i]);
defer cp1 = cp2;
break :blk utf8proc.graphemeBreakStateful(
cp1,
cp2,
&break_state,
);
};
// If this is NOT a grapheme break, cp2 is part of a single
// grapheme cluster and we expect there could be more. We
// move on to the next codepoint to try again.
if (!grapheme_break) continue;
// This IS a grapheme break, meaning that cp2 is NOT part
// of cp1. So we need to render the prior grapheme.
const len = i - start;
assert(len > 0);
switch (len) {
// If we have only a single codepoint then just render it
// as-is.
1 => self.cell_buf[cur] = .{
.x = @intCast(u16, clusters[start]),
.glyph_index = codepoints[start],
},
// We must have multiple codepoints (see assert above). In
// this case we UTF-8 encode the codepoints and send them
// to the face to reserve a private glyph index.
else => {
// UTF-8 encode the codepoints in this cluster.
const cluster = cluster: {
const cluster_points = codepoints[start..i];
assert(cluster_points.len == len);
const buf_len = buf_len: {
var acc: usize = 0;
for (cluster_points) |cp| {
acc += try std.unicode.utf8CodepointSequenceLength(
@intCast(u21, cp),
);
}
break :buf_len acc;
};
var buf = try self.alloc.alloc(u8, buf_len);
errdefer self.alloc.free(buf);
var buf_i: usize = 0;
for (cluster_points) |cp| {
buf_i += try std.unicode.utf8Encode(
@intCast(u21, cp),
buf[buf_i..],
);
}
break :cluster buf;
};
defer self.alloc.free(cluster);
var face = try run.group.group.faceFromIndex(run.font_index);
const index = try face.graphemeGlyphIndex(cluster);
self.cell_buf[cur] = .{
.x = @intCast(u16, clusters[start]),
.glyph_index = index,
};
},
}
start = i;
cur += 1;
}
return self.cell_buf[0..cur];
}
/// The hooks for RunIterator.
pub const RunIteratorHook = struct {
shaper: *Shaper,
pub fn prepare(self: RunIteratorHook) !void {
// Reset the buffer for our current run
self.shaper.run_buf.shrinkRetainingCapacity(0);
}
pub fn addCodepoint(
self: RunIteratorHook,
cp: u32,
cluster: u32,
) !void {
try self.shaper.run_buf.append(self.shaper.alloc, .{
.codepoint = cp,
.cluster = cluster,
});
}
pub fn finalize(self: RunIteratorHook) !void {
_ = self;
}
};
};
/// The wasm-compatible API.
pub const Wasm = struct {
const wasm = @import("../../os/wasm.zig");
const alloc = wasm.alloc;
export fn shaper_new(cap: usize) ?*Shaper {
return shaper_new_(cap) catch null;
}
fn shaper_new_(cap: usize) !*Shaper {
var cell_buf = try alloc.alloc(font.shape.Cell, cap);
errdefer alloc.free(cell_buf);
var shaper = try Shaper.init(alloc, cell_buf);
errdefer shaper.deinit();
var result = try alloc.create(Shaper);
errdefer alloc.destroy(result);
result.* = shaper;
return result;
}
export fn shaper_free(ptr: ?*Shaper) void {
if (ptr) |v| {
alloc.free(v.cell_buf);
v.deinit();
alloc.destroy(v);
}
}
/// Runs a test to verify shaping works properly.
export fn shaper_test(
self: *Shaper,
group: *font.GroupCache,
str: [*]const u8,
len: usize,
) void {
shaper_test_(self, group, str[0..len]) catch |err| {
log.warn("error during shaper test err={}", .{err});
};
}
fn shaper_test_(self: *Shaper, group: *font.GroupCache, str: []const u8) !void {
// Create a terminal and print all our characters into it.
var term = try terminal.Terminal.init(alloc, self.cell_buf.len, 80);
defer term.deinit(alloc);
// Iterate over unicode codepoints and add to terminal
{
const view = try std.unicode.Utf8View.init(str);
var iter = view.iterator();
while (iter.nextCodepoint()) |c| {
try term.print(c);
}
}
// Iterate over the rows and print out all the runs we get.
var rowIter = term.screen.rowIterator(.viewport);
var y: usize = 0;
while (rowIter.next()) |row| {
defer y += 1;
var iter = self.runIterator(group, row);
while (try iter.next(alloc)) |run| {
const cells = try self.shape(run);
log.info("y={} run={d} shape={any} idx={}", .{
y,
run.cells,
cells,
run.font_index,
});
}
}
}
};

View File

@ -178,9 +178,9 @@ pub fn init(alloc: Allocator, options: renderer.Options) !Metal {
// Create the font shaper. We initially create a shaper that can support
// a width of 160 which is a common width for modern screens to help
// avoid allocations later.
var shape_buf = try alloc.alloc(font.Shaper.Cell, 160);
var shape_buf = try alloc.alloc(font.shape.Cell, 160);
errdefer alloc.free(shape_buf);
var font_shaper = try font.Shaper.init(shape_buf);
var font_shaper = try font.Shaper.init(alloc, shape_buf);
errdefer font_shaper.deinit();
// Initialize our Metal buffers
@ -691,7 +691,7 @@ pub fn setScreenSize(self: *Metal, _: renderer.ScreenSize) !void {
// Update our shaper
// TODO: don't reallocate if it is close enough (but bigger)
var shape_buf = try self.alloc.alloc(font.Shaper.Cell, grid_size.columns * 2);
var shape_buf = try self.alloc.alloc(font.shape.Cell, grid_size.columns * 2);
errdefer self.alloc.free(shape_buf);
self.alloc.free(self.font_shaper.cell_buf);
self.font_shaper.cell_buf = shape_buf;
@ -817,8 +817,8 @@ pub fn updateCell(
selection: ?terminal.Selection,
screen: *terminal.Screen,
cell: terminal.Screen.Cell,
shaper_cell: font.Shaper.Cell,
shaper_run: font.Shaper.TextRun,
shaper_cell: font.shape.Cell,
shaper_run: font.shape.TextRun,
x: usize,
y: usize,
) !bool {

View File

@ -160,9 +160,9 @@ const GPUCellMode = enum(u8) {
pub fn init(alloc: Allocator, options: renderer.Options) !OpenGL {
// Create the initial font shaper
var shape_buf = try alloc.alloc(font.Shaper.Cell, 1);
var shape_buf = try alloc.alloc(font.shape.Cell, 1);
errdefer alloc.free(shape_buf);
var shaper = try font.Shaper.init(shape_buf);
var shaper = try font.Shaper.init(alloc, shape_buf);
errdefer shaper.deinit();
// Create our shader
@ -863,8 +863,8 @@ pub fn updateCell(
selection: ?terminal.Selection,
screen: *terminal.Screen,
cell: terminal.Screen.Cell,
shaper_cell: font.Shaper.Cell,
shaper_run: font.Shaper.TextRun,
shaper_cell: font.shape.Cell,
shaper_run: font.shape.TextRun,
x: usize,
y: usize,
) !bool {
@ -1101,7 +1101,7 @@ pub fn setScreenSize(self: *OpenGL, dim: renderer.ScreenSize) !void {
}
// Update our shaper
var shape_buf = try self.alloc.alloc(font.Shaper.Cell, grid_size.columns * 2);
var shape_buf = try self.alloc.alloc(font.shape.Cell, grid_size.columns * 2);
errdefer self.alloc.free(shape_buf);
self.alloc.free(self.font_shaper.cell_buf);
self.font_shaper.cell_buf = shape_buf;