From 6dbb82c2592d4c33751b86eb12aade01a4b36e69 Mon Sep 17 00:00:00 2001 From: "Jeffrey C. Ollie" Date: Sun, 1 Sep 2024 14:02:33 -0500 Subject: [PATCH] kitty graphics: performance enhancements Improve the performance of Kitty graphics by switching to the WUFFS library for decoding PNG images and for "swizzling" G, GA, and RGB data to RGBA formats needed by the renderers. WUFFS claims 2-3x performance benefits over other implementations, as well as memory-safe operations. Although not thorougly benchmarked, performance is on par with Kitty's graphics decoding. https://github.com/google/wuffs --- build.zig | 16 +++ build.zig.zon | 4 + src/renderer/opengl/image.zig | 64 ++---------- src/terminal/kitty/graphics_image.zig | 50 +++------ src/wuffs/c.zig | 6 ++ src/wuffs/defs.zig | 21 ++++ src/wuffs/error.zig | 3 + src/wuffs/main.zig | 2 + src/wuffs/png.zig | 139 ++++++++++++++++++++++++++ src/wuffs/swizzle.zig | 102 +++++++++++++++++++ 10 files changed, 315 insertions(+), 92 deletions(-) create mode 100644 src/wuffs/c.zig create mode 100644 src/wuffs/defs.zig create mode 100644 src/wuffs/error.zig create mode 100644 src/wuffs/main.zig create mode 100644 src/wuffs/png.zig create mode 100644 src/wuffs/swizzle.zig diff --git a/build.zig b/build.zig index 862d50d39..8bf51def8 100644 --- a/build.zig +++ b/build.zig @@ -1038,6 +1038,7 @@ fn addDeps( .images = false, .text_input = false, }); + const wuffs_dep = b.dependency("wuffs", .{}); // Wasm we do manually since it is such a different build. if (step.rootModuleTarget().cpu.arch == .wasm32) { @@ -1062,6 +1063,21 @@ fn addDeps( step.addIncludePath(b.path("src/apprt/gtk")); } + step.addIncludePath(wuffs_dep.path("release/c")); + step.addCSourceFile( + .{ + .file = wuffs_dep.path("release/c/wuffs-v0.4.c"), + .flags = f: { + const flags = @import("src/wuffs/defs.zig").build; + var a: [flags.len][]const u8 = undefined; + inline for (flags, 0..) |flag, i| { + a[i] = "-D" ++ flag ++ "=1"; + } + break :f &a; + }, + }, + ); + // C++ files step.linkLibCpp(); step.addIncludePath(b.path("src")); diff --git a/build.zig.zon b/build.zig.zon index 08ed9cd71..90bcc8ecd 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -41,6 +41,10 @@ .simdutf = .{ .path = "./pkg/simdutf" }, .utfcpp = .{ .path = "./pkg/utfcpp" }, .zlib = .{ .path = "./pkg/zlib" }, + .wuffs = .{ + .url = "https://github.com/google/wuffs/archive/refs/tags/v0.4.0-alpha.8.tar.gz", + .hash = "12200984439edc817fbcbbaff564020e5104a0d04a2d0f53080700827052de700462", + }, // Shader translation .glslang = .{ .path = "./pkg/glslang" }, diff --git a/src/renderer/opengl/image.zig b/src/renderer/opengl/image.zig index f9904d8b5..414da81bd 100644 --- a/src/renderer/opengl/image.zig +++ b/src/renderer/opengl/image.zig @@ -2,6 +2,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const assert = std.debug.assert; const gl = @import("opengl"); +const wuffs = @import("../../wuffs/main.zig"); /// Represents a single image placement on the grid. A placement is a /// request to render an instance of an image. @@ -300,9 +301,8 @@ pub const Image = union(enum) { // RGB needs to be converted to RGBA because Metal textures // don't support RGB. .pending_rgb => |*p| { - // Note: this is the slowest possible way to do this... const data = p.dataSlice(3); - const rgba = try rgbToRgba(alloc, data); + const rgba = try wuffs.swizzle.rgbToRgba(alloc, data); alloc.free(data); p.data = rgba.ptr; self.* = .{ .pending_rgba = p.* }; @@ -310,7 +310,7 @@ pub const Image = union(enum) { .replace_rgb => |*r| { const data = r.pending.dataSlice(3); - const rgba = try rgbToRgba(alloc, data); + const rgba = try wuffs.swizzle.rgbToRgba(alloc, data); alloc.free(data); r.pending.data = rgba.ptr; self.* = .{ .replace_rgba = r.* }; @@ -319,7 +319,7 @@ pub const Image = union(enum) { // Gray and Gray+Alpha need to be converted to RGBA, too. .pending_gray => |*p| { const data = p.dataSlice(1); - const rgba = try grayToRgba(alloc, data); + const rgba = try wuffs.swizzle.gToRgba(alloc, data); alloc.free(data); p.data = rgba.ptr; self.* = .{ .pending_rgba = p.* }; @@ -327,7 +327,7 @@ pub const Image = union(enum) { .replace_gray => |*r| { const data = r.pending.dataSlice(2); - const rgba = try grayToRgba(alloc, data); + const rgba = try wuffs.swizzle.gToRgba(alloc, data); alloc.free(data); r.pending.data = rgba.ptr; self.* = .{ .replace_rgba = r.* }; @@ -335,7 +335,7 @@ pub const Image = union(enum) { .pending_gray_alpha => |*p| { const data = p.dataSlice(2); - const rgba = try gaToRgba(alloc, data); + const rgba = try wuffs.swizzle.gaToRgba(alloc, data); alloc.free(data); p.data = rgba.ptr; self.* = .{ .pending_rgba = p.* }; @@ -343,7 +343,7 @@ pub const Image = union(enum) { .replace_gray_alpha => |*r| { const data = r.pending.dataSlice(2); - const rgba = try gaToRgba(alloc, data); + const rgba = try wuffs.swizzle.gaToRgba(alloc, data); alloc.free(data); r.pending.data = rgba.ptr; self.* = .{ .replace_rgba = r.* }; @@ -351,56 +351,6 @@ pub const Image = union(enum) { } } - fn grayToRgba(alloc: Allocator, data: []const u8) ![]u8 { - const pixels = data.len; - var rgba = try alloc.alloc(u8, pixels * 4); - errdefer alloc.free(rgba); - var i: usize = 0; - while (i < pixels) : (i += 1) { - const rgba_i = i * 4; - rgba[rgba_i] = data[i]; - rgba[rgba_i + 1] = data[i]; - rgba[rgba_i + 2] = data[i]; - rgba[rgba_i + 3] = 255; - } - - return rgba; - } - - fn gaToRgba(alloc: Allocator, data: []const u8) ![]u8 { - const pixels = data.len / 2; - var rgba = try alloc.alloc(u8, pixels * 4); - errdefer alloc.free(rgba); - var i: usize = 0; - while (i < pixels) : (i += 1) { - const data_i = i * 2; - const rgba_i = i * 4; - rgba[rgba_i] = data[data_i]; - rgba[rgba_i + 1] = data[data_i]; - rgba[rgba_i + 2] = data[data_i]; - rgba[rgba_i + 3] = data[data_i + 1]; - } - - return rgba; - } - - fn rgbToRgba(alloc: Allocator, data: []const u8) ![]u8 { - const pixels = data.len / 3; - var rgba = try alloc.alloc(u8, pixels * 4); - errdefer alloc.free(rgba); - var i: usize = 0; - while (i < pixels) : (i += 1) { - const data_i = i * 3; - const rgba_i = i * 4; - rgba[rgba_i] = data[data_i]; - rgba[rgba_i + 1] = data[data_i + 1]; - rgba[rgba_i + 2] = data[data_i + 2]; - rgba[rgba_i + 3] = 255; - } - - return rgba; - } - /// Upload the pending image to the GPU and change the state of this /// image to ready. pub fn upload( diff --git a/src/terminal/kitty/graphics_image.zig b/src/terminal/kitty/graphics_image.zig index 2dd12ccc6..3af3874c9 100644 --- a/src/terminal/kitty/graphics_image.zig +++ b/src/terminal/kitty/graphics_image.zig @@ -10,7 +10,7 @@ const command = @import("graphics_command.zig"); const point = @import("../point.zig"); const PageList = @import("../PageList.zig"); const internal_os = @import("../../os/main.zig"); -const stb = @import("../../stb/main.zig"); +const wuffs = @import("../../wuffs/main.zig"); const log = std.log.scoped(.kitty_gfx); @@ -412,47 +412,27 @@ pub const LoadingImage = struct { fn decodePng(self: *LoadingImage, alloc: Allocator) !void { assert(self.image.format == .png); - // Decode PNG - var width: c_int = 0; - var height: c_int = 0; - var bpp: c_int = 0; - const data = stb.stbi_load_from_memory( - self.data.items.ptr, - @intCast(self.data.items.len), - &width, - &height, - &bpp, - 0, - ) orelse return error.InvalidData; - defer stb.stbi_image_free(data); - const len: usize = @intCast(width * height * bpp); - if (len > max_size) { - log.warn("png image too large size={} max_size={}", .{ len, max_size }); - return error.InvalidData; - } + const result = wuffs.png.decode(alloc, self.data.items) catch |err| switch (err) { + error.WuffsError => return error.InvalidData, + else => |e| return e, + }; + defer alloc.free(result.data); - // Validate our bpp - if (bpp < 1 or bpp > 4) { - log.warn("png with unsupported bpp={}", .{bpp}); - return error.UnsupportedDepth; + if (result.data.len > max_size) { + log.warn("png image too large size={} max_size={}", .{ result.data.len, max_size }); + return error.InvalidData; } // Replace our data self.data.deinit(alloc); self.data = .{}; - try self.data.ensureUnusedCapacity(alloc, len); - try self.data.appendSlice(alloc, data[0..len]); + try self.data.ensureUnusedCapacity(alloc, result.data.len); + try self.data.appendSlice(alloc, result.data[0..result.data.len]); // Store updated image dimensions - self.image.width = @intCast(width); - self.image.height = @intCast(height); - self.image.format = switch (bpp) { - 1 => .gray, - 2 => .gray_alpha, - 3 => .rgb, - 4 => .rgba, - else => unreachable, // validated above - }; + self.image.width = result.width; + self.image.height = result.height; + self.image.format = .rgba; } }; @@ -792,6 +772,6 @@ test "image load: png, not compressed, regular file" { var img = try loading.complete(alloc); defer img.deinit(alloc); try testing.expect(img.compression == .none); - try testing.expect(img.format == .rgb); + try testing.expect(img.format == .rgba); try tmp_dir.dir.access(path, .{}); } diff --git a/src/wuffs/c.zig b/src/wuffs/c.zig new file mode 100644 index 000000000..ab86dadcb --- /dev/null +++ b/src/wuffs/c.zig @@ -0,0 +1,6 @@ +pub const c = @cImport({ + for (@import("defs.zig").cimport) |d| { + @cDefine(d, "1"); + } + @cInclude("wuffs-v0.4.c"); +}); diff --git a/src/wuffs/defs.zig b/src/wuffs/defs.zig new file mode 100644 index 000000000..dacaa914d --- /dev/null +++ b/src/wuffs/defs.zig @@ -0,0 +1,21 @@ +//! Define all of the C macros that WUFFS uses to configure itself here so +//! that the settings used to import the C "header" file stay in sync with the +//! settings used build the C "source" file. + +pub const cimport = [_][]const u8{ + "WUFFS_CONFIG__MODULES", + "WUFFS_CONFIG__MODULE__AUX__BASE", + "WUFFS_CONFIG__MODULE__AUX__IMAGE", + "WUFFS_CONFIG__MODULE__BASE", + "WUFFS_CONFIG__MODULE__ADLER32", + "WUFFS_CONFIG__MODULE__CRC32", + "WUFFS_CONFIG__MODULE__DEFLATE", + "WUFFS_CONFIG__MODULE__JPEG", + "WUFFS_CONFIG__MODULE__PNG", + "WUFFS_CONFIG__MODULE__ZLIB", +}; + +// The only difference should be that the "build" defines WUFFS_IMPLEMENTATION +pub const build = [_][]const u8{ + "WUFFS_IMPLEMENTATION", +} ++ cimport; diff --git a/src/wuffs/error.zig b/src/wuffs/error.zig new file mode 100644 index 000000000..609deec9c --- /dev/null +++ b/src/wuffs/error.zig @@ -0,0 +1,3 @@ +const std = @import("std"); + +pub const Error = std.mem.Allocator.Error || error{WuffsError}; diff --git a/src/wuffs/main.zig b/src/wuffs/main.zig new file mode 100644 index 000000000..3f03a4158 --- /dev/null +++ b/src/wuffs/main.zig @@ -0,0 +1,2 @@ +pub const png = @import("png.zig"); +pub const swizzle = @import("swizzle.zig"); diff --git a/src/wuffs/png.zig b/src/wuffs/png.zig new file mode 100644 index 000000000..c827edba5 --- /dev/null +++ b/src/wuffs/png.zig @@ -0,0 +1,139 @@ +const std = @import("std"); + +const c = @import("c.zig").c; +const Error = @import("error.zig").Error; + +const log = std.log.scoped(.wuffs_png); + +pub fn decode(alloc: std.mem.Allocator, data: []const u8) Error!struct { + width: u32, + height: u32, + data: []const u8, +} { + log.info("data is {d} bytes", .{data.len}); + + // Work around some wierdness in WUFFS/Zig, there are some structs that + // are defined as "extern" by the Zig compiler which means that Zig won't + // allocate them on the stack at compile time. WUFFS has functions for + // dynamically allocating these structs but they use the C malloc/free. This + // gets around that by using the Zig allocator to allocate enough memory for + // the struct and then casts it to the appropropriate pointer. + + const decoder_buf = try alloc.alloc(u8, c.sizeof__wuffs_png__decoder()); + defer alloc.free(decoder_buf); + + const decoder: ?*c.wuffs_png__decoder = @constCast(@ptrCast(decoder_buf)); + { + const status = c.wuffs_png__decoder__initialize( + decoder, + c.sizeof__wuffs_png__decoder(), + c.WUFFS_VERSION, + 0, + ); + if (!c.wuffs_base__status__is_ok(&status)) { + const e = c.wuffs_base__status__message(&status); + log.err("{s}", .{e}); + return error.WuffsError; + } + } + + var source_buffer = std.mem.zeroes(c.wuffs_base__io_buffer); + source_buffer.data.ptr = @constCast(@ptrCast(data.ptr)); + source_buffer.data.len = data.len; + source_buffer.meta.wi = data.len; + source_buffer.meta.ri = 0; + source_buffer.meta.pos = 0; + source_buffer.meta.closed = true; + + var image_config = std.mem.zeroes(c.wuffs_base__image_config); + { + const status = c.wuffs_png__decoder__decode_image_config( + decoder, + &image_config, + &source_buffer, + ); + if (!c.wuffs_base__status__is_ok(&status)) { + const e = c.wuffs_base__status__message(&status); + log.err("{s}", .{e}); + return error.WuffsError; + } + } + + const width = c.wuffs_base__pixel_config__width(&image_config.pixcfg); + const height = c.wuffs_base__pixel_config__height(&image_config.pixcfg); + + c.wuffs_base__pixel_config__set( + &image_config.pixcfg, + c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL, + c.WUFFS_BASE__PIXEL_SUBSAMPLING__NONE, + width, + height, + ); + + const color = c.wuffs_base__color_u32_argb_premul; + + const destination = try alloc.alloc(u8, width * height * @sizeOf(color)); + errdefer alloc.free(destination); + + // temporary buffer for intermediate processing of image + const work_buffer = try alloc.alloc( + u8, + c.wuffs_png__decoder__workbuf_len(decoder).max_incl, + ); + defer alloc.free(work_buffer); + + const work_slice = c.wuffs_base__make_slice_u8( + work_buffer.ptr, + work_buffer.len, + ); + + var pixel_buffer = std.mem.zeroes(c.wuffs_base__pixel_buffer); + { + const status = c.wuffs_base__pixel_buffer__set_from_slice( + &pixel_buffer, + &image_config.pixcfg, + c.wuffs_base__make_slice_u8(destination.ptr, destination.len), + ); + if (!c.wuffs_base__status__is_ok(&status)) { + const e = c.wuffs_base__status__message(&status); + log.err("{s}", .{e}); + return error.WuffsError; + } + } + + var frame_config = std.mem.zeroes(c.wuffs_base__frame_config); + { + const status = c.wuffs_png__decoder__decode_frame_config( + decoder, + &frame_config, + &source_buffer, + ); + if (!c.wuffs_base__status__is_ok(&status)) { + const e = c.wuffs_base__status__message(&status); + log.err("{s}", .{e}); + return error.WuffsError; + } + } + + { + const status = c.wuffs_png__decoder__decode_frame( + decoder, + &pixel_buffer, + &source_buffer, + c.WUFFS_BASE__PIXEL_BLEND__SRC_OVER, + work_slice, + null, + ); + if (!c.wuffs_base__status__is_ok(&status)) { + const e = c.wuffs_base__status__message(&status); + log.err("{s}", .{e}); + return error.WuffsError; + } + } + + return .{ + .width = width, + .height = height, + .data = destination, + }; +} diff --git a/src/wuffs/swizzle.zig b/src/wuffs/swizzle.zig new file mode 100644 index 000000000..6f2c4de2d --- /dev/null +++ b/src/wuffs/swizzle.zig @@ -0,0 +1,102 @@ +const std = @import("std"); + +const c = @import("c.zig").c; +const Error = @import("error.zig").Error; + +const log = std.log.scoped(.wuffs_swizzler); + +pub fn gToRgba(alloc: std.mem.Allocator, src: []const u8) Error![]u8 { + return swizzle( + alloc, + src, + c.WUFFS_BASE__PIXEL_FORMAT__Y, + c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL, + ); +} + +pub fn gaToRgba(alloc: std.mem.Allocator, src: []const u8) Error![]u8 { + return swizzle( + alloc, + src, + c.WUFFS_BASE__PIXEL_FORMAT__YA_PREMUL, + c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL, + ); +} + +pub fn rgbToRgba(alloc: std.mem.Allocator, src: []const u8) Error![]u8 { + return swizzle( + alloc, + src, + c.WUFFS_BASE__PIXEL_FORMAT__RGB, + c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL, + ); +} + +fn swizzle( + alloc: std.mem.Allocator, + src: []const u8, + comptime src_pixel_format: u32, + comptime dst_pixel_format: u32, +) Error![]u8 { + const src_slice = c.wuffs_base__make_slice_u8( + @constCast(src.ptr), + src.len, + ); + + const dst_fmt = c.wuffs_base__make_pixel_format( + dst_pixel_format, + ); + + std.debug.assert(c.wuffs_base__pixel_format__is_direct(&dst_fmt)); + std.debug.assert(c.wuffs_base__pixel_format__is_interleaved(&dst_fmt)); + std.debug.assert(c.wuffs_base__pixel_format__bits_per_pixel(&dst_fmt) % 8 == 0); + + const dst_size = c.wuffs_base__pixel_format__bits_per_pixel(&dst_fmt) / 8; + + const src_fmt = c.wuffs_base__make_pixel_format( + src_pixel_format, + ); + + std.debug.assert(c.wuffs_base__pixel_format__is_direct(&src_fmt)); + std.debug.assert(c.wuffs_base__pixel_format__is_interleaved(&src_fmt)); + std.debug.assert(c.wuffs_base__pixel_format__bits_per_pixel(&src_fmt) % 8 == 0); + + const src_size = c.wuffs_base__pixel_format__bits_per_pixel(&src_fmt) / 8; + std.debug.assert(src.len % src_size == 0); + + const dst = try alloc.alloc(u8, src.len * dst_size / src_size); + errdefer alloc.free(dst); + + const dst_slice = c.wuffs_base__make_slice_u8( + dst.ptr, + dst.len, + ); + + var swizzler: c.wuffs_base__pixel_swizzler = undefined; + + { + const status = c.wuffs_base__pixel_swizzler__prepare( + &swizzler, + dst_fmt, + c.wuffs_base__empty_slice_u8(), + src_fmt, + c.wuffs_base__empty_slice_u8(), + c.WUFFS_BASE__PIXEL_BLEND__SRC_OVER, + ); + if (!c.wuffs_base__status__is_ok(&status)) { + const e = c.wuffs_base__status__message(&status); + log.err("{s}", .{e}); + return error.WuffsError; + } + } + { + _ = c.wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice( + &swizzler, + dst_slice, + c.wuffs_base__empty_slice_u8(), + src_slice, + ); + } + + return dst; +}