diff --git a/build.zig b/build.zig index ce295bbdf..78a8e72ad 100644 --- a/build.zig +++ b/build.zig @@ -204,6 +204,28 @@ pub fn build(b: *std.Build) !void { // Add our benchmarks try benchSteps(b, target, optimize, config, emit_bench); + // TODO: temporary simd tester binary + { + const simd_exe = b.addExecutable(.{ + .name = "simd", + .root_source_file = .{ .path = "src/simd/main.zig" }, + .target = target, + .optimize = optimize, + }); + + { + const simd_install = b.addInstallArtifact(simd_exe, .{}); + const step = b.step("simd", "Build the simd test exe"); + step.dependOn(&simd_install.step); + } + + { + const simd_run = b.addRunArtifact(simd_exe); + const step = b.step("simd-run", "Run the app"); + step.dependOn(&simd_run.step); + } + } + // We only build an exe if we have a runtime set. const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{ .name = "ghostty", diff --git a/nix/devShell.nix b/nix/devShell.nix index 782498b57..8515b86fb 100644 --- a/nix/devShell.nix +++ b/nix/devShell.nix @@ -12,6 +12,7 @@ parallel, pkg-config, python3, + qemu, scdoc, tracy, valgrind, @@ -110,6 +111,10 @@ in # by default so we have to include this. bashInteractive + # Used for testing SIMD codegen. This is Linux only because the macOS + # build only has the qemu-system files. + qemu + gdb valgrind wraptest diff --git a/src/simd/isa.zig b/src/simd/isa.zig new file mode 100644 index 000000000..fd1bf6a65 --- /dev/null +++ b/src/simd/isa.zig @@ -0,0 +1,112 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// Possible instruction set architectures for SIMD operations. These are +/// coarse grained and are targeted specifically so we can detect exactly +/// what is available to us in Ghostty. +pub const ISA = enum { + scalar, + neon, + avx2, + + /// Detect the available ISA at runtime. This will use comptime information + /// as well to minimize the number of runtime checks. + pub fn detect() ISA { + return switch (builtin.cpu.arch) { + // Neon is mandatory on aarch64. No runtime checks necessary. + .aarch64 => .neon, + .x86_64 => detectX86(), + else => .scalar, + }; + } + + fn detectX86() ISA { + // NOTE: this is just some boilerplate to detect AVX2. We + // can probably support earlier forms of SIMD such as plain + // SSE, and we can definitely take advtange of later forms. This + // is just some boilerplate to ONLY detect AVX2 right now. + + // If we support less than 7 for the maximum leaf level then we + // don't support any AVX instructions. + var leaf = X86.cpuid(0, 0); + if (leaf.eax < 7) return .scalar; + + // If we don't have xsave or avx, then we don't support anything. + leaf = X86.cpuid(1, 0); + const has_xsave = hasBit(leaf.ecx, 27); + const has_avx = hasBit(leaf.ecx, 28); + if (!has_xsave or !has_avx) return .scalar; + + // We require AVX save state in order to use AVX instructions. + const xcr0_eax = X86.getXCR0(); // requires xsave+avx + const has_avx_save = hasMask(xcr0_eax, X86.XCR0_XMM | X86.XCR0_YMM); + if (!has_avx_save) return .scalar; + + // Check for AVX2. + leaf = X86.cpuid(7, 0); + const has_avx2 = hasBit(leaf.ebx, 5); + if (has_avx2) return .avx2; + + return .scalar; + } +}; + +/// Constants and functions related to x86 and x86_64. Reference for this +/// can be found in the Intel Architectures Software Developer's Manual, +/// mostly around the cpuid instruction. +const X86 = struct { + const XCR0_XMM = 0x02; + const XCR0_YMM = 0x04; + const XCR0_MASKREG = 0x20; + const XCR0_ZMM0_15 = 0x40; + const XCR0_ZMM16_31 = 0x80; + + const CpuidLeaf = packed struct { + eax: u32, + ebx: u32, + ecx: u32, + edx: u32, + }; + + /// Wrapper around x86 and x86_64 `cpuid` in order to gather processor + /// and feature information. This is explicitly and specifically only + /// for x86 and x86_64. + fn cpuid(leaf_id: u32, subid: u32) CpuidLeaf { + var eax: u32 = undefined; + var ebx: u32 = undefined; + var ecx: u32 = undefined; + var edx: u32 = undefined; + + asm volatile ("cpuid" + : [_] "={eax}" (eax), + [_] "={ebx}" (ebx), + [_] "={ecx}" (ecx), + [_] "={edx}" (edx), + : [_] "{eax}" (leaf_id), + [_] "{ecx}" (subid), + ); + + return .{ .eax = eax, .ebx = ebx, .ecx = ecx, .edx = edx }; + } + + // Read control register 0 (XCR0). Used to detect features such as AVX. + fn getXCR0() u32 { + return asm volatile ( + \\ xor %%ecx, %%ecx + \\ xgetbv + : [_] "={eax}" (-> u32), + : + : "edx", "ecx" + ); + } +}; + +/// Check if a bit is set at the given offset +inline fn hasBit(input: u32, offset: u5) bool { + return (input >> offset) & 1 != 0; +} + +/// Checks if a mask exactly matches the input +inline fn hasMask(input: u32, mask: u32) bool { + return (input & mask) == mask; +} diff --git a/src/simd/main.zig b/src/simd/main.zig new file mode 100644 index 000000000..d87d306ed --- /dev/null +++ b/src/simd/main.zig @@ -0,0 +1,8 @@ +const std = @import("std"); + +const isa = @import("isa.zig"); +pub usingnamespace isa; + +pub fn main() !void { + std.log.warn("ISA={}", .{isa.ISA.detect()}); +}