simd: basic ISA detection, boilerplate

This commit is contained in:
Mitchell Hashimoto
2024-01-27 22:20:20 -08:00
parent 4362eeaedc
commit 31d5785105
4 changed files with 147 additions and 0 deletions

View File

@ -204,6 +204,28 @@ pub fn build(b: *std.Build) !void {
// Add our benchmarks // Add our benchmarks
try benchSteps(b, target, optimize, config, emit_bench); try benchSteps(b, target, optimize, config, emit_bench);
// TODO: temporary simd tester binary
{
const simd_exe = b.addExecutable(.{
.name = "simd",
.root_source_file = .{ .path = "src/simd/main.zig" },
.target = target,
.optimize = optimize,
});
{
const simd_install = b.addInstallArtifact(simd_exe, .{});
const step = b.step("simd", "Build the simd test exe");
step.dependOn(&simd_install.step);
}
{
const simd_run = b.addRunArtifact(simd_exe);
const step = b.step("simd-run", "Run the app");
step.dependOn(&simd_run.step);
}
}
// We only build an exe if we have a runtime set. // We only build an exe if we have a runtime set.
const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{ const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{
.name = "ghostty", .name = "ghostty",

View File

@ -12,6 +12,7 @@
parallel, parallel,
pkg-config, pkg-config,
python3, python3,
qemu,
scdoc, scdoc,
tracy, tracy,
valgrind, valgrind,
@ -110,6 +111,10 @@ in
# by default so we have to include this. # by default so we have to include this.
bashInteractive bashInteractive
# Used for testing SIMD codegen. This is Linux only because the macOS
# build only has the qemu-system files.
qemu
gdb gdb
valgrind valgrind
wraptest wraptest

112
src/simd/isa.zig Normal file
View File

@ -0,0 +1,112 @@
const std = @import("std");
const builtin = @import("builtin");
/// Possible instruction set architectures for SIMD operations. These are
/// coarse grained and are targeted specifically so we can detect exactly
/// what is available to us in Ghostty.
pub const ISA = enum {
scalar,
neon,
avx2,
/// Detect the available ISA at runtime. This will use comptime information
/// as well to minimize the number of runtime checks.
pub fn detect() ISA {
return switch (builtin.cpu.arch) {
// Neon is mandatory on aarch64. No runtime checks necessary.
.aarch64 => .neon,
.x86_64 => detectX86(),
else => .scalar,
};
}
fn detectX86() ISA {
// NOTE: this is just some boilerplate to detect AVX2. We
// can probably support earlier forms of SIMD such as plain
// SSE, and we can definitely take advtange of later forms. This
// is just some boilerplate to ONLY detect AVX2 right now.
// If we support less than 7 for the maximum leaf level then we
// don't support any AVX instructions.
var leaf = X86.cpuid(0, 0);
if (leaf.eax < 7) return .scalar;
// If we don't have xsave or avx, then we don't support anything.
leaf = X86.cpuid(1, 0);
const has_xsave = hasBit(leaf.ecx, 27);
const has_avx = hasBit(leaf.ecx, 28);
if (!has_xsave or !has_avx) return .scalar;
// We require AVX save state in order to use AVX instructions.
const xcr0_eax = X86.getXCR0(); // requires xsave+avx
const has_avx_save = hasMask(xcr0_eax, X86.XCR0_XMM | X86.XCR0_YMM);
if (!has_avx_save) return .scalar;
// Check for AVX2.
leaf = X86.cpuid(7, 0);
const has_avx2 = hasBit(leaf.ebx, 5);
if (has_avx2) return .avx2;
return .scalar;
}
};
/// Constants and functions related to x86 and x86_64. Reference for this
/// can be found in the Intel Architectures Software Developer's Manual,
/// mostly around the cpuid instruction.
const X86 = struct {
const XCR0_XMM = 0x02;
const XCR0_YMM = 0x04;
const XCR0_MASKREG = 0x20;
const XCR0_ZMM0_15 = 0x40;
const XCR0_ZMM16_31 = 0x80;
const CpuidLeaf = packed struct {
eax: u32,
ebx: u32,
ecx: u32,
edx: u32,
};
/// Wrapper around x86 and x86_64 `cpuid` in order to gather processor
/// and feature information. This is explicitly and specifically only
/// for x86 and x86_64.
fn cpuid(leaf_id: u32, subid: u32) CpuidLeaf {
var eax: u32 = undefined;
var ebx: u32 = undefined;
var ecx: u32 = undefined;
var edx: u32 = undefined;
asm volatile ("cpuid"
: [_] "={eax}" (eax),
[_] "={ebx}" (ebx),
[_] "={ecx}" (ecx),
[_] "={edx}" (edx),
: [_] "{eax}" (leaf_id),
[_] "{ecx}" (subid),
);
return .{ .eax = eax, .ebx = ebx, .ecx = ecx, .edx = edx };
}
// Read control register 0 (XCR0). Used to detect features such as AVX.
fn getXCR0() u32 {
return asm volatile (
\\ xor %%ecx, %%ecx
\\ xgetbv
: [_] "={eax}" (-> u32),
:
: "edx", "ecx"
);
}
};
/// Check if a bit is set at the given offset
inline fn hasBit(input: u32, offset: u5) bool {
return (input >> offset) & 1 != 0;
}
/// Checks if a mask exactly matches the input
inline fn hasMask(input: u32, mask: u32) bool {
return (input & mask) == mask;
}

8
src/simd/main.zig Normal file
View File

@ -0,0 +1,8 @@
const std = @import("std");
const isa = @import("isa.zig");
pub usingnamespace isa;
pub fn main() !void {
std.log.warn("ISA={}", .{isa.ISA.detect()});
}