kitty graphics: performance enhancements

Improve the performance of Kitty graphics by switching to the WUFFS
library for decoding PNG images and for "swizzling" G, GA, and RGB data
to RGBA formats needed by the renderers.

WUFFS claims 2-3x performance benefits over other implementations, as
well as memory-safe operations.

Although not thorougly benchmarked, performance is on par with Kitty's
graphics decoding.

https://github.com/google/wuffs
This commit is contained in:
Jeffrey C. Ollie
2024-09-01 14:02:33 -05:00
committed by Mitchell Hashimoto
parent b8b89acf5e
commit 6dbb82c259
10 changed files with 315 additions and 92 deletions

View File

@ -1038,6 +1038,7 @@ fn addDeps(
.images = false,
.text_input = false,
});
const wuffs_dep = b.dependency("wuffs", .{});
// Wasm we do manually since it is such a different build.
if (step.rootModuleTarget().cpu.arch == .wasm32) {
@ -1062,6 +1063,21 @@ fn addDeps(
step.addIncludePath(b.path("src/apprt/gtk"));
}
step.addIncludePath(wuffs_dep.path("release/c"));
step.addCSourceFile(
.{
.file = wuffs_dep.path("release/c/wuffs-v0.4.c"),
.flags = f: {
const flags = @import("src/wuffs/defs.zig").build;
var a: [flags.len][]const u8 = undefined;
inline for (flags, 0..) |flag, i| {
a[i] = "-D" ++ flag ++ "=1";
}
break :f &a;
},
},
);
// C++ files
step.linkLibCpp();
step.addIncludePath(b.path("src"));

View File

@ -41,6 +41,10 @@
.simdutf = .{ .path = "./pkg/simdutf" },
.utfcpp = .{ .path = "./pkg/utfcpp" },
.zlib = .{ .path = "./pkg/zlib" },
.wuffs = .{
.url = "https://github.com/google/wuffs/archive/refs/tags/v0.4.0-alpha.8.tar.gz",
.hash = "12200984439edc817fbcbbaff564020e5104a0d04a2d0f53080700827052de700462",
},
// Shader translation
.glslang = .{ .path = "./pkg/glslang" },

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const gl = @import("opengl");
const wuffs = @import("../../wuffs/main.zig");
/// Represents a single image placement on the grid. A placement is a
/// request to render an instance of an image.
@ -300,9 +301,8 @@ pub const Image = union(enum) {
// RGB needs to be converted to RGBA because Metal textures
// don't support RGB.
.pending_rgb => |*p| {
// Note: this is the slowest possible way to do this...
const data = p.dataSlice(3);
const rgba = try rgbToRgba(alloc, data);
const rgba = try wuffs.swizzle.rgbToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -310,7 +310,7 @@ pub const Image = union(enum) {
.replace_rgb => |*r| {
const data = r.pending.dataSlice(3);
const rgba = try rgbToRgba(alloc, data);
const rgba = try wuffs.swizzle.rgbToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -319,7 +319,7 @@ pub const Image = union(enum) {
// Gray and Gray+Alpha need to be converted to RGBA, too.
.pending_gray => |*p| {
const data = p.dataSlice(1);
const rgba = try grayToRgba(alloc, data);
const rgba = try wuffs.swizzle.gToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -327,7 +327,7 @@ pub const Image = union(enum) {
.replace_gray => |*r| {
const data = r.pending.dataSlice(2);
const rgba = try grayToRgba(alloc, data);
const rgba = try wuffs.swizzle.gToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -335,7 +335,7 @@ pub const Image = union(enum) {
.pending_gray_alpha => |*p| {
const data = p.dataSlice(2);
const rgba = try gaToRgba(alloc, data);
const rgba = try wuffs.swizzle.gaToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -343,7 +343,7 @@ pub const Image = union(enum) {
.replace_gray_alpha => |*r| {
const data = r.pending.dataSlice(2);
const rgba = try gaToRgba(alloc, data);
const rgba = try wuffs.swizzle.gaToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -351,56 +351,6 @@ pub const Image = union(enum) {
}
}
fn grayToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const rgba_i = i * 4;
rgba[rgba_i] = data[i];
rgba[rgba_i + 1] = data[i];
rgba[rgba_i + 2] = data[i];
rgba[rgba_i + 3] = 255;
}
return rgba;
}
fn gaToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len / 2;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const data_i = i * 2;
const rgba_i = i * 4;
rgba[rgba_i] = data[data_i];
rgba[rgba_i + 1] = data[data_i];
rgba[rgba_i + 2] = data[data_i];
rgba[rgba_i + 3] = data[data_i + 1];
}
return rgba;
}
fn rgbToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len / 3;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const data_i = i * 3;
const rgba_i = i * 4;
rgba[rgba_i] = data[data_i];
rgba[rgba_i + 1] = data[data_i + 1];
rgba[rgba_i + 2] = data[data_i + 2];
rgba[rgba_i + 3] = 255;
}
return rgba;
}
/// Upload the pending image to the GPU and change the state of this
/// image to ready.
pub fn upload(

View File

@ -10,7 +10,7 @@ const command = @import("graphics_command.zig");
const point = @import("../point.zig");
const PageList = @import("../PageList.zig");
const internal_os = @import("../../os/main.zig");
const stb = @import("../../stb/main.zig");
const wuffs = @import("../../wuffs/main.zig");
const log = std.log.scoped(.kitty_gfx);
@ -412,47 +412,27 @@ pub const LoadingImage = struct {
fn decodePng(self: *LoadingImage, alloc: Allocator) !void {
assert(self.image.format == .png);
// Decode PNG
var width: c_int = 0;
var height: c_int = 0;
var bpp: c_int = 0;
const data = stb.stbi_load_from_memory(
self.data.items.ptr,
@intCast(self.data.items.len),
&width,
&height,
&bpp,
0,
) orelse return error.InvalidData;
defer stb.stbi_image_free(data);
const len: usize = @intCast(width * height * bpp);
if (len > max_size) {
log.warn("png image too large size={} max_size={}", .{ len, max_size });
return error.InvalidData;
}
const result = wuffs.png.decode(alloc, self.data.items) catch |err| switch (err) {
error.WuffsError => return error.InvalidData,
else => |e| return e,
};
defer alloc.free(result.data);
// Validate our bpp
if (bpp < 1 or bpp > 4) {
log.warn("png with unsupported bpp={}", .{bpp});
return error.UnsupportedDepth;
if (result.data.len > max_size) {
log.warn("png image too large size={} max_size={}", .{ result.data.len, max_size });
return error.InvalidData;
}
// Replace our data
self.data.deinit(alloc);
self.data = .{};
try self.data.ensureUnusedCapacity(alloc, len);
try self.data.appendSlice(alloc, data[0..len]);
try self.data.ensureUnusedCapacity(alloc, result.data.len);
try self.data.appendSlice(alloc, result.data[0..result.data.len]);
// Store updated image dimensions
self.image.width = @intCast(width);
self.image.height = @intCast(height);
self.image.format = switch (bpp) {
1 => .gray,
2 => .gray_alpha,
3 => .rgb,
4 => .rgba,
else => unreachable, // validated above
};
self.image.width = result.width;
self.image.height = result.height;
self.image.format = .rgba;
}
};
@ -792,6 +772,6 @@ test "image load: png, not compressed, regular file" {
var img = try loading.complete(alloc);
defer img.deinit(alloc);
try testing.expect(img.compression == .none);
try testing.expect(img.format == .rgb);
try testing.expect(img.format == .rgba);
try tmp_dir.dir.access(path, .{});
}

6
src/wuffs/c.zig Normal file
View File

@ -0,0 +1,6 @@
pub const c = @cImport({
for (@import("defs.zig").cimport) |d| {
@cDefine(d, "1");
}
@cInclude("wuffs-v0.4.c");
});

21
src/wuffs/defs.zig Normal file
View File

@ -0,0 +1,21 @@
//! Define all of the C macros that WUFFS uses to configure itself here so
//! that the settings used to import the C "header" file stay in sync with the
//! settings used build the C "source" file.
pub const cimport = [_][]const u8{
"WUFFS_CONFIG__MODULES",
"WUFFS_CONFIG__MODULE__AUX__BASE",
"WUFFS_CONFIG__MODULE__AUX__IMAGE",
"WUFFS_CONFIG__MODULE__BASE",
"WUFFS_CONFIG__MODULE__ADLER32",
"WUFFS_CONFIG__MODULE__CRC32",
"WUFFS_CONFIG__MODULE__DEFLATE",
"WUFFS_CONFIG__MODULE__JPEG",
"WUFFS_CONFIG__MODULE__PNG",
"WUFFS_CONFIG__MODULE__ZLIB",
};
// The only difference should be that the "build" defines WUFFS_IMPLEMENTATION
pub const build = [_][]const u8{
"WUFFS_IMPLEMENTATION",
} ++ cimport;

3
src/wuffs/error.zig Normal file
View File

@ -0,0 +1,3 @@
const std = @import("std");
pub const Error = std.mem.Allocator.Error || error{WuffsError};

2
src/wuffs/main.zig Normal file
View File

@ -0,0 +1,2 @@
pub const png = @import("png.zig");
pub const swizzle = @import("swizzle.zig");

139
src/wuffs/png.zig Normal file
View File

@ -0,0 +1,139 @@
const std = @import("std");
const c = @import("c.zig").c;
const Error = @import("error.zig").Error;
const log = std.log.scoped(.wuffs_png);
pub fn decode(alloc: std.mem.Allocator, data: []const u8) Error!struct {
width: u32,
height: u32,
data: []const u8,
} {
log.info("data is {d} bytes", .{data.len});
// Work around some wierdness in WUFFS/Zig, there are some structs that
// are defined as "extern" by the Zig compiler which means that Zig won't
// allocate them on the stack at compile time. WUFFS has functions for
// dynamically allocating these structs but they use the C malloc/free. This
// gets around that by using the Zig allocator to allocate enough memory for
// the struct and then casts it to the appropropriate pointer.
const decoder_buf = try alloc.alloc(u8, c.sizeof__wuffs_png__decoder());
defer alloc.free(decoder_buf);
const decoder: ?*c.wuffs_png__decoder = @constCast(@ptrCast(decoder_buf));
{
const status = c.wuffs_png__decoder__initialize(
decoder,
c.sizeof__wuffs_png__decoder(),
c.WUFFS_VERSION,
0,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.err("{s}", .{e});
return error.WuffsError;
}
}
var source_buffer = std.mem.zeroes(c.wuffs_base__io_buffer);
source_buffer.data.ptr = @constCast(@ptrCast(data.ptr));
source_buffer.data.len = data.len;
source_buffer.meta.wi = data.len;
source_buffer.meta.ri = 0;
source_buffer.meta.pos = 0;
source_buffer.meta.closed = true;
var image_config = std.mem.zeroes(c.wuffs_base__image_config);
{
const status = c.wuffs_png__decoder__decode_image_config(
decoder,
&image_config,
&source_buffer,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.err("{s}", .{e});
return error.WuffsError;
}
}
const width = c.wuffs_base__pixel_config__width(&image_config.pixcfg);
const height = c.wuffs_base__pixel_config__height(&image_config.pixcfg);
c.wuffs_base__pixel_config__set(
&image_config.pixcfg,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
c.WUFFS_BASE__PIXEL_SUBSAMPLING__NONE,
width,
height,
);
const color = c.wuffs_base__color_u32_argb_premul;
const destination = try alloc.alloc(u8, width * height * @sizeOf(color));
errdefer alloc.free(destination);
// temporary buffer for intermediate processing of image
const work_buffer = try alloc.alloc(
u8,
c.wuffs_png__decoder__workbuf_len(decoder).max_incl,
);
defer alloc.free(work_buffer);
const work_slice = c.wuffs_base__make_slice_u8(
work_buffer.ptr,
work_buffer.len,
);
var pixel_buffer = std.mem.zeroes(c.wuffs_base__pixel_buffer);
{
const status = c.wuffs_base__pixel_buffer__set_from_slice(
&pixel_buffer,
&image_config.pixcfg,
c.wuffs_base__make_slice_u8(destination.ptr, destination.len),
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.err("{s}", .{e});
return error.WuffsError;
}
}
var frame_config = std.mem.zeroes(c.wuffs_base__frame_config);
{
const status = c.wuffs_png__decoder__decode_frame_config(
decoder,
&frame_config,
&source_buffer,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.err("{s}", .{e});
return error.WuffsError;
}
}
{
const status = c.wuffs_png__decoder__decode_frame(
decoder,
&pixel_buffer,
&source_buffer,
c.WUFFS_BASE__PIXEL_BLEND__SRC_OVER,
work_slice,
null,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.err("{s}", .{e});
return error.WuffsError;
}
}
return .{
.width = width,
.height = height,
.data = destination,
};
}

102
src/wuffs/swizzle.zig Normal file
View File

@ -0,0 +1,102 @@
const std = @import("std");
const c = @import("c.zig").c;
const Error = @import("error.zig").Error;
const log = std.log.scoped(.wuffs_swizzler);
pub fn gToRgba(alloc: std.mem.Allocator, src: []const u8) Error![]u8 {
return swizzle(
alloc,
src,
c.WUFFS_BASE__PIXEL_FORMAT__Y,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
);
}
pub fn gaToRgba(alloc: std.mem.Allocator, src: []const u8) Error![]u8 {
return swizzle(
alloc,
src,
c.WUFFS_BASE__PIXEL_FORMAT__YA_PREMUL,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
);
}
pub fn rgbToRgba(alloc: std.mem.Allocator, src: []const u8) Error![]u8 {
return swizzle(
alloc,
src,
c.WUFFS_BASE__PIXEL_FORMAT__RGB,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
);
}
fn swizzle(
alloc: std.mem.Allocator,
src: []const u8,
comptime src_pixel_format: u32,
comptime dst_pixel_format: u32,
) Error![]u8 {
const src_slice = c.wuffs_base__make_slice_u8(
@constCast(src.ptr),
src.len,
);
const dst_fmt = c.wuffs_base__make_pixel_format(
dst_pixel_format,
);
std.debug.assert(c.wuffs_base__pixel_format__is_direct(&dst_fmt));
std.debug.assert(c.wuffs_base__pixel_format__is_interleaved(&dst_fmt));
std.debug.assert(c.wuffs_base__pixel_format__bits_per_pixel(&dst_fmt) % 8 == 0);
const dst_size = c.wuffs_base__pixel_format__bits_per_pixel(&dst_fmt) / 8;
const src_fmt = c.wuffs_base__make_pixel_format(
src_pixel_format,
);
std.debug.assert(c.wuffs_base__pixel_format__is_direct(&src_fmt));
std.debug.assert(c.wuffs_base__pixel_format__is_interleaved(&src_fmt));
std.debug.assert(c.wuffs_base__pixel_format__bits_per_pixel(&src_fmt) % 8 == 0);
const src_size = c.wuffs_base__pixel_format__bits_per_pixel(&src_fmt) / 8;
std.debug.assert(src.len % src_size == 0);
const dst = try alloc.alloc(u8, src.len * dst_size / src_size);
errdefer alloc.free(dst);
const dst_slice = c.wuffs_base__make_slice_u8(
dst.ptr,
dst.len,
);
var swizzler: c.wuffs_base__pixel_swizzler = undefined;
{
const status = c.wuffs_base__pixel_swizzler__prepare(
&swizzler,
dst_fmt,
c.wuffs_base__empty_slice_u8(),
src_fmt,
c.wuffs_base__empty_slice_u8(),
c.WUFFS_BASE__PIXEL_BLEND__SRC_OVER,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.err("{s}", .{e});
return error.WuffsError;
}
}
{
_ = c.wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(
&swizzler,
dst_slice,
c.wuffs_base__empty_slice_u8(),
src_slice,
);
}
return dst;
}