Merge pull request #2178 from jcollie/kitty-graphics-performance

kitty graphics: performance enhancements
This commit is contained in:
Mitchell Hashimoto
2024-09-02 21:19:41 -07:00
committed by GitHub
13 changed files with 362 additions and 150 deletions

View File

@ -1038,6 +1038,10 @@ fn addDeps(
.images = false,
.text_input = false,
});
const wuffs_dep = b.dependency("wuffs", .{
.target = target,
.optimize = optimize,
});
// Wasm we do manually since it is such a different build.
if (step.rootModuleTarget().cpu.arch == .wasm32) {
@ -1123,6 +1127,7 @@ fn addDeps(
step.root_module.addImport("sentry", sentry_dep.module("sentry"));
step.root_module.addImport("ziglyph", ziglyph_dep.module("ziglyph"));
step.root_module.addImport("vaxis", vaxis_dep.module("vaxis"));
step.root_module.addImport("wuffs", wuffs_dep.module("wuffs"));
// Mac Stuff
if (step.rootModuleTarget().isDarwin()) {

View File

@ -40,6 +40,7 @@
.sentry = .{ .path = "./pkg/sentry" },
.simdutf = .{ .path = "./pkg/simdutf" },
.utfcpp = .{ .path = "./pkg/utfcpp" },
.wuffs = .{ .path = "./pkg/wuffs" },
.zlib = .{ .path = "./pkg/zlib" },
// Shader translation

View File

@ -1,3 +1,3 @@
# This file is auto-generated! check build-support/check-zig-cache-hash.sh for
# more details.
"sha256-mIUl5j3JxtydoV7ayy3aNrt/jR8+a68lQw6lQimLZEw="
"sha256-YLopoyRgXV6GYiTiaKt64mH6lWjlKJbi61ck0fO4WvQ="

33
pkg/wuffs/build.zig Normal file
View File

@ -0,0 +1,33 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const wuffs = b.dependency("wuffs", .{});
const module = b.addModule("wuffs", .{
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
.link_libc = true,
});
if (target.result.isDarwin()) {
const apple_sdk = @import("apple_sdk");
try apple_sdk.addPaths(b, module);
}
var flags = std.ArrayList([]const u8).init(b.allocator);
defer flags.deinit();
try flags.append("-DWUFFS_IMPLEMENTATION");
inline for (@import("src/c.zig").defines) |key| {
try flags.append("-D" ++ key);
}
module.addIncludePath(wuffs.path("release/c"));
module.addCSourceFile(.{
.file = wuffs.path("release/c/wuffs-v0.4.c"),
.flags = flags.items,
});
}

17
pkg/wuffs/build.zig.zon Normal file
View File

@ -0,0 +1,17 @@
.{
.name = "wuffs",
.version = "0.0.0",
.dependencies = .{
.wuffs = .{
.url = "https://github.com/google/wuffs/archive/refs/tags/v0.4.0-alpha.8.tar.gz",
.hash = "12200984439edc817fbcbbaff564020e5104a0d04a2d0f53080700827052de700462",
},
.apple_sdk = .{ .path = "../apple-sdk" },
},
.paths = .{
"build.zig",
"build.zig.zon",
"src",
},
}

18
pkg/wuffs/src/c.zig Normal file
View File

@ -0,0 +1,18 @@
pub const c = @cImport({
for (defines) |d| @cDefine(d, "1");
@cInclude("wuffs-v0.4.c");
});
/// All the C macros defined so that the header matches the build.
pub const defines: []const []const u8 = &[_][]const u8{
"WUFFS_CONFIG__MODULES",
"WUFFS_CONFIG__MODULE__AUX__BASE",
"WUFFS_CONFIG__MODULE__AUX__IMAGE",
"WUFFS_CONFIG__MODULE__BASE",
"WUFFS_CONFIG__MODULE__ADLER32",
"WUFFS_CONFIG__MODULE__CRC32",
"WUFFS_CONFIG__MODULE__DEFLATE",
"WUFFS_CONFIG__MODULE__JPEG",
"WUFFS_CONFIG__MODULE__PNG",
"WUFFS_CONFIG__MODULE__ZLIB",
};

3
pkg/wuffs/src/error.zig Normal file
View File

@ -0,0 +1,3 @@
const std = @import("std");
pub const Error = std.mem.Allocator.Error || error{WuffsError};

2
pkg/wuffs/src/main.zig Normal file
View File

@ -0,0 +1,2 @@
pub const png = @import("png.zig");
pub const swizzle = @import("swizzle.zig");

147
pkg/wuffs/src/png.zig Normal file
View File

@ -0,0 +1,147 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const c = @import("c.zig").c;
const Error = @import("error.zig").Error;
const log = std.log.scoped(.wuffs_png);
/// Decode a PNG image.
pub fn decode(alloc: Allocator, data: []const u8) Error!struct {
width: u32,
height: u32,
data: []const u8,
} {
// Work around some weirdness in WUFFS/Zig, there are some structs that
// are defined as "extern" by the Zig compiler which means that Zig won't
// allocate them on the stack at compile time. WUFFS has functions for
// dynamically allocating these structs but they use the C malloc/free. This
// gets around that by using the Zig allocator to allocate enough memory for
// the struct and then casts it to the appropriate pointer.
const decoder_buf = try alloc.alloc(u8, c.sizeof__wuffs_png__decoder());
defer alloc.free(decoder_buf);
const decoder: ?*c.wuffs_png__decoder = @ptrCast(decoder_buf);
{
const status = c.wuffs_png__decoder__initialize(
decoder,
c.sizeof__wuffs_png__decoder(),
c.WUFFS_VERSION,
0,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.warn("decode err={s}", .{e});
return error.WuffsError;
}
}
var source_buffer: c.wuffs_base__io_buffer = .{
.data = .{ .ptr = @constCast(@ptrCast(data.ptr)), .len = data.len },
.meta = .{
.wi = data.len,
.ri = 0,
.pos = 0,
.closed = true,
},
};
var image_config: c.wuffs_base__image_config = undefined;
{
const status = c.wuffs_png__decoder__decode_image_config(
decoder,
&image_config,
&source_buffer,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.warn("decode err={s}", .{e});
return error.WuffsError;
}
}
const width = c.wuffs_base__pixel_config__width(&image_config.pixcfg);
const height = c.wuffs_base__pixel_config__height(&image_config.pixcfg);
c.wuffs_base__pixel_config__set(
&image_config.pixcfg,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
c.WUFFS_BASE__PIXEL_SUBSAMPLING__NONE,
width,
height,
);
const destination = try alloc.alloc(
u8,
width * height * @sizeOf(c.wuffs_base__color_u32_argb_premul),
);
errdefer alloc.free(destination);
// temporary buffer for intermediate processing of image
const work_buffer = try alloc.alloc(
u8,
// The type of this is a u64 on all systems but our allocator
// uses a usize which is a u32 on 32-bit systems.
std.math.cast(
usize,
c.wuffs_png__decoder__workbuf_len(decoder).max_incl,
) orelse return error.OutOfMemory,
);
defer alloc.free(work_buffer);
const work_slice = c.wuffs_base__make_slice_u8(
work_buffer.ptr,
work_buffer.len,
);
var pixel_buffer: c.wuffs_base__pixel_buffer = undefined;
{
const status = c.wuffs_base__pixel_buffer__set_from_slice(
&pixel_buffer,
&image_config.pixcfg,
c.wuffs_base__make_slice_u8(destination.ptr, destination.len),
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.warn("decode err={s}", .{e});
return error.WuffsError;
}
}
var frame_config: c.wuffs_base__frame_config = undefined;
{
const status = c.wuffs_png__decoder__decode_frame_config(
decoder,
&frame_config,
&source_buffer,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.warn("decode err={s}", .{e});
return error.WuffsError;
}
}
{
const status = c.wuffs_png__decoder__decode_frame(
decoder,
&pixel_buffer,
&source_buffer,
c.WUFFS_BASE__PIXEL_BLEND__SRC_OVER,
work_slice,
null,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.warn("decode err={s}", .{e});
return error.WuffsError;
}
}
return .{
.width = width,
.height = height,
.data = destination,
};
}

103
pkg/wuffs/src/swizzle.zig Normal file
View File

@ -0,0 +1,103 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const c = @import("c.zig").c;
const Error = @import("error.zig").Error;
const log = std.log.scoped(.wuffs_swizzler);
pub fn gToRgba(alloc: Allocator, src: []const u8) Error![]u8 {
return swizzle(
alloc,
src,
c.WUFFS_BASE__PIXEL_FORMAT__Y,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
);
}
pub fn gaToRgba(alloc: Allocator, src: []const u8) Error![]u8 {
return swizzle(
alloc,
src,
c.WUFFS_BASE__PIXEL_FORMAT__YA_PREMUL,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
);
}
pub fn rgbToRgba(alloc: Allocator, src: []const u8) Error![]u8 {
return swizzle(
alloc,
src,
c.WUFFS_BASE__PIXEL_FORMAT__RGB,
c.WUFFS_BASE__PIXEL_FORMAT__RGBA_PREMUL,
);
}
fn swizzle(
alloc: Allocator,
src: []const u8,
comptime src_pixel_format: u32,
comptime dst_pixel_format: u32,
) Error![]u8 {
const src_slice = c.wuffs_base__make_slice_u8(
@constCast(src.ptr),
src.len,
);
const dst_fmt = c.wuffs_base__make_pixel_format(
dst_pixel_format,
);
assert(c.wuffs_base__pixel_format__is_direct(&dst_fmt));
assert(c.wuffs_base__pixel_format__is_interleaved(&dst_fmt));
assert(c.wuffs_base__pixel_format__bits_per_pixel(&dst_fmt) % 8 == 0);
const dst_size = c.wuffs_base__pixel_format__bits_per_pixel(&dst_fmt) / 8;
const src_fmt = c.wuffs_base__make_pixel_format(
src_pixel_format,
);
assert(c.wuffs_base__pixel_format__is_direct(&src_fmt));
assert(c.wuffs_base__pixel_format__is_interleaved(&src_fmt));
assert(c.wuffs_base__pixel_format__bits_per_pixel(&src_fmt) % 8 == 0);
const src_size = c.wuffs_base__pixel_format__bits_per_pixel(&src_fmt) / 8;
assert(src.len % src_size == 0);
const dst = try alloc.alloc(u8, src.len * dst_size / src_size);
errdefer alloc.free(dst);
const dst_slice = c.wuffs_base__make_slice_u8(
dst.ptr,
dst.len,
);
var swizzler: c.wuffs_base__pixel_swizzler = undefined;
{
const status = c.wuffs_base__pixel_swizzler__prepare(
&swizzler,
dst_fmt,
c.wuffs_base__empty_slice_u8(),
src_fmt,
c.wuffs_base__empty_slice_u8(),
c.WUFFS_BASE__PIXEL_BLEND__SRC_OVER,
);
if (!c.wuffs_base__status__is_ok(&status)) {
const e = c.wuffs_base__status__message(&status);
log.warn("{s}", .{e});
return error.WuffsError;
}
}
{
_ = c.wuffs_base__pixel_swizzler__swizzle_interleaved_from_slice(
&swizzler,
dst_slice,
c.wuffs_base__empty_slice_u8(),
src_slice,
);
}
return dst;
}

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const objc = @import("objc");
const wuffs = @import("wuffs");
const mtl = @import("api.zig");
@ -301,9 +302,8 @@ pub const Image = union(enum) {
// RGB needs to be converted to RGBA because Metal textures
// don't support RGB.
.pending_rgb => |*p| {
// Note: this is the slowest possible way to do this...
const data = p.dataSlice(3);
const rgba = try rgbToRgba(alloc, data);
const rgba = try wuffs.swizzle.rgbToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -311,7 +311,7 @@ pub const Image = union(enum) {
.replace_rgb => |*r| {
const data = r.pending.dataSlice(3);
const rgba = try rgbToRgba(alloc, data);
const rgba = try wuffs.swizzle.rgbToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -320,7 +320,7 @@ pub const Image = union(enum) {
// Gray and Gray+Alpha need to be converted to RGBA, too.
.pending_gray => |*p| {
const data = p.dataSlice(1);
const rgba = try grayToRgba(alloc, data);
const rgba = try wuffs.swizzle.gToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -328,7 +328,7 @@ pub const Image = union(enum) {
.replace_gray => |*r| {
const data = r.pending.dataSlice(2);
const rgba = try grayToRgba(alloc, data);
const rgba = try wuffs.swizzle.gToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -336,7 +336,7 @@ pub const Image = union(enum) {
.pending_gray_alpha => |*p| {
const data = p.dataSlice(2);
const rgba = try gaToRgba(alloc, data);
const rgba = try wuffs.swizzle.gaToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -344,7 +344,7 @@ pub const Image = union(enum) {
.replace_gray_alpha => |*r| {
const data = r.pending.dataSlice(2);
const rgba = try gaToRgba(alloc, data);
const rgba = try wuffs.swizzle.gaToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -352,56 +352,6 @@ pub const Image = union(enum) {
}
}
fn grayToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const rgba_i = i * 4;
rgba[rgba_i] = data[i];
rgba[rgba_i + 1] = data[i];
rgba[rgba_i + 2] = data[i];
rgba[rgba_i + 3] = 255;
}
return rgba;
}
fn gaToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len / 2;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const data_i = i * 2;
const rgba_i = i * 4;
rgba[rgba_i] = data[data_i];
rgba[rgba_i + 1] = data[data_i];
rgba[rgba_i + 2] = data[data_i];
rgba[rgba_i + 3] = data[data_i + 1];
}
return rgba;
}
fn rgbToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len / 3;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const data_i = i * 3;
const rgba_i = i * 4;
rgba[rgba_i] = data[data_i];
rgba[rgba_i + 1] = data[data_i + 1];
rgba[rgba_i + 2] = data[data_i + 2];
rgba[rgba_i + 3] = 255;
}
return rgba;
}
/// Upload the pending image to the GPU and change the state of this
/// image to ready.
pub fn upload(

View File

@ -2,6 +2,7 @@ const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const gl = @import("opengl");
const wuffs = @import("wuffs");
/// Represents a single image placement on the grid. A placement is a
/// request to render an instance of an image.
@ -300,9 +301,8 @@ pub const Image = union(enum) {
// RGB needs to be converted to RGBA because Metal textures
// don't support RGB.
.pending_rgb => |*p| {
// Note: this is the slowest possible way to do this...
const data = p.dataSlice(3);
const rgba = try rgbToRgba(alloc, data);
const rgba = try wuffs.swizzle.rgbToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -310,7 +310,7 @@ pub const Image = union(enum) {
.replace_rgb => |*r| {
const data = r.pending.dataSlice(3);
const rgba = try rgbToRgba(alloc, data);
const rgba = try wuffs.swizzle.rgbToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -319,7 +319,7 @@ pub const Image = union(enum) {
// Gray and Gray+Alpha need to be converted to RGBA, too.
.pending_gray => |*p| {
const data = p.dataSlice(1);
const rgba = try grayToRgba(alloc, data);
const rgba = try wuffs.swizzle.gToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -327,7 +327,7 @@ pub const Image = union(enum) {
.replace_gray => |*r| {
const data = r.pending.dataSlice(2);
const rgba = try grayToRgba(alloc, data);
const rgba = try wuffs.swizzle.gToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -335,7 +335,7 @@ pub const Image = union(enum) {
.pending_gray_alpha => |*p| {
const data = p.dataSlice(2);
const rgba = try gaToRgba(alloc, data);
const rgba = try wuffs.swizzle.gaToRgba(alloc, data);
alloc.free(data);
p.data = rgba.ptr;
self.* = .{ .pending_rgba = p.* };
@ -343,7 +343,7 @@ pub const Image = union(enum) {
.replace_gray_alpha => |*r| {
const data = r.pending.dataSlice(2);
const rgba = try gaToRgba(alloc, data);
const rgba = try wuffs.swizzle.gaToRgba(alloc, data);
alloc.free(data);
r.pending.data = rgba.ptr;
self.* = .{ .replace_rgba = r.* };
@ -351,56 +351,6 @@ pub const Image = union(enum) {
}
}
fn grayToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const rgba_i = i * 4;
rgba[rgba_i] = data[i];
rgba[rgba_i + 1] = data[i];
rgba[rgba_i + 2] = data[i];
rgba[rgba_i + 3] = 255;
}
return rgba;
}
fn gaToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len / 2;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const data_i = i * 2;
const rgba_i = i * 4;
rgba[rgba_i] = data[data_i];
rgba[rgba_i + 1] = data[data_i];
rgba[rgba_i + 2] = data[data_i];
rgba[rgba_i + 3] = data[data_i + 1];
}
return rgba;
}
fn rgbToRgba(alloc: Allocator, data: []const u8) ![]u8 {
const pixels = data.len / 3;
var rgba = try alloc.alloc(u8, pixels * 4);
errdefer alloc.free(rgba);
var i: usize = 0;
while (i < pixels) : (i += 1) {
const data_i = i * 3;
const rgba_i = i * 4;
rgba[rgba_i] = data[data_i];
rgba[rgba_i + 1] = data[data_i + 1];
rgba[rgba_i + 2] = data[data_i + 2];
rgba[rgba_i + 3] = 255;
}
return rgba;
}
/// Upload the pending image to the GPU and change the state of this
/// image to ready.
pub fn upload(

View File

@ -10,7 +10,7 @@ const command = @import("graphics_command.zig");
const point = @import("../point.zig");
const PageList = @import("../PageList.zig");
const internal_os = @import("../../os/main.zig");
const stb = @import("../../stb/main.zig");
const wuffs = @import("wuffs");
const log = std.log.scoped(.kitty_gfx);
@ -412,47 +412,30 @@ pub const LoadingImage = struct {
fn decodePng(self: *LoadingImage, alloc: Allocator) !void {
assert(self.image.format == .png);
// Decode PNG
var width: c_int = 0;
var height: c_int = 0;
var bpp: c_int = 0;
const data = stb.stbi_load_from_memory(
self.data.items.ptr,
@intCast(self.data.items.len),
&width,
&height,
&bpp,
0,
) orelse return error.InvalidData;
defer stb.stbi_image_free(data);
const len: usize = @intCast(width * height * bpp);
if (len > max_size) {
log.warn("png image too large size={} max_size={}", .{ len, max_size });
return error.InvalidData;
}
const result = wuffs.png.decode(
alloc,
self.data.items,
) catch |err| switch (err) {
error.WuffsError => return error.InvalidData,
error.OutOfMemory => return error.OutOfMemory,
};
defer alloc.free(result.data);
// Validate our bpp
if (bpp < 1 or bpp > 4) {
log.warn("png with unsupported bpp={}", .{bpp});
return error.UnsupportedDepth;
if (result.data.len > max_size) {
log.warn("png image too large size={} max_size={}", .{ result.data.len, max_size });
return error.InvalidData;
}
// Replace our data
self.data.deinit(alloc);
self.data = .{};
try self.data.ensureUnusedCapacity(alloc, len);
try self.data.appendSlice(alloc, data[0..len]);
try self.data.ensureUnusedCapacity(alloc, result.data.len);
try self.data.appendSlice(alloc, result.data[0..result.data.len]);
// Store updated image dimensions
self.image.width = @intCast(width);
self.image.height = @intCast(height);
self.image.format = switch (bpp) {
1 => .gray,
2 => .gray_alpha,
3 => .rgb,
4 => .rgba,
else => unreachable, // validated above
};
self.image.width = result.width;
self.image.height = result.height;
self.image.format = .rgba;
}
};
@ -792,6 +775,6 @@ test "image load: png, not compressed, regular file" {
var img = try loading.complete(alloc);
defer img.deinit(alloc);
try testing.expect(img.compression == .none);
try testing.expect(img.format == .rgb);
try testing.expect(img.format == .rgba);
try tmp_dir.dir.access(path, .{});
}