diff --git a/build.zig b/build.zig index 282760689..d10936a3e 100644 --- a/build.zig +++ b/build.zig @@ -947,6 +947,10 @@ fn addDeps( .target = target, .optimize = optimize, }); + const highway_dep = b.dependency("highway", .{ + .target = target, + .optimize = optimize, + }); const libpng_dep = b.dependency("libpng", .{ .target = target, .optimize = optimize, @@ -1003,6 +1007,8 @@ fn addDeps( step.linkLibCpp(); step.addIncludePath(.{ .path = "src/simd" }); step.addCSourceFiles(.{ .files = &.{"src/simd/simdutf_c.cpp"} }); + step.addIncludePath(.{ .path = "src/terminal/simdvt" }); + step.addCSourceFiles(.{ .files = &.{"src/terminal/simdvt/example.cpp"} }); // If we're building a lib we have some different deps const lib = step.kind == .lib; @@ -1054,6 +1060,10 @@ fn addDeps( step.linkLibrary(glslang_dep.artifact("glslang")); try static_libs.append(glslang_dep.artifact("glslang").getEmittedBin()); + // Highway + step.linkLibrary(highway_dep.artifact("highway")); + try static_libs.append(highway_dep.artifact("highway").getEmittedBin()); + // Spirv-Cross step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); diff --git a/build.zig.zon b/build.zig.zon index d42fa0fc1..ce295cc28 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -31,6 +31,7 @@ .fontconfig = .{ .path = "./pkg/fontconfig" }, .freetype = .{ .path = "./pkg/freetype" }, .harfbuzz = .{ .path = "./pkg/harfbuzz" }, + .highway = .{ .path = "./pkg/highway" }, .libpng = .{ .path = "./pkg/libpng" }, .macos = .{ .path = "./pkg/macos" }, .oniguruma = .{ .path = "./pkg/oniguruma" }, diff --git a/pkg/highway/build.zig b/pkg/highway/build.zig new file mode 100644 index 000000000..47625a572 --- /dev/null +++ b/pkg/highway/build.zig @@ -0,0 +1,110 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + const upstream = b.dependency("highway", .{}); + + const module = b.addModule("highway", .{ + .root_source_file = .{ .path = "main.zig" }, + .target = target, + .optimize = optimize, + }); + + const lib = b.addStaticLibrary(.{ + .name = "highway", + .target = target, + .optimize = optimize, + }); + lib.linkLibCpp(); + lib.addIncludePath(upstream.path("")); + module.addIncludePath(upstream.path("")); + + if (target.result.isDarwin()) { + const apple_sdk = @import("apple_sdk"); + try apple_sdk.addPaths(b, &lib.root_module); + try apple_sdk.addPaths(b, module); + } + + var flags = std.ArrayList([]const u8).init(b.allocator); + defer flags.deinit(); + try flags.appendSlice(&.{ + // Avoid changing binaries based on the current time and date. + "-Wno-builtin-macro-redefined", + "-D__DATE__=\"redacted\"", + "-D__TIMESTAMP__=\"redacted\"", + "-D__TIME__=\"redacted\"", + + // Optimizations + "-fmerge-all-constants", + + // Warnings + "-Wall", + "-Wextra", + + // These are not included in Wall nor Wextra: + "-Wconversion", + "-Wsign-conversion", + "-Wvla", + "-Wnon-virtual-dtor", + + "-Wfloat-overflow-conversion", + "-Wfloat-zero-conversion", + "-Wfor-loop-analysis", + "-Wgnu-redeclared-enum", + "-Winfinite-recursion", + "-Wself-assign", + "-Wstring-conversion", + "-Wtautological-overlap-compare", + "-Wthread-safety-analysis", + "-Wundefined-func-template", + + "-fno-cxx-exceptions", + "-fno-slp-vectorize", + "-fno-vectorize", + }); + if (target.result.os.tag != .windows) { + try flags.appendSlice(&.{ + "-fmath-errno", + "-fno-exceptions", + }); + } + + lib.addCSourceFiles(.{ + .dependency = upstream, + .flags = flags.items, + .files = &.{ + "hwy/aligned_allocator.cc", + "hwy/nanobenchmark.cc", + "hwy/per_target.cc", + "hwy/print.cc", + "hwy/targets.cc", + "hwy/timer.cc", + }, + }); + lib.installHeadersDirectoryOptions(.{ + .source_dir = upstream.path("hwy"), + .install_dir = .header, + .install_subdir = "hwy", + .include_extensions = &.{".h"}, + }); + + b.installArtifact(lib); + + { + const test_exe = b.addTest(.{ + .name = "test", + .root_source_file = .{ .path = "main.zig" }, + .target = target, + .optimize = optimize, + }); + test_exe.linkLibrary(lib); + + var it = module.import_table.iterator(); + while (it.next()) |entry| test_exe.root_module.addImport(entry.key_ptr.*, entry.value_ptr.*); + const tests_run = b.addRunArtifact(test_exe); + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&tests_run.step); + } +} diff --git a/pkg/highway/build.zig.zon b/pkg/highway/build.zig.zon new file mode 100644 index 000000000..b0af8c3c1 --- /dev/null +++ b/pkg/highway/build.zig.zon @@ -0,0 +1,13 @@ +.{ + .name = "highway", + .version = "1.0.7", + .paths = .{""}, + .dependencies = .{ + .highway = .{ + .url = "https://github.com/google/highway/archive/refs/tags/1.0.7.tar.gz", + .hash = "122060ea43a9403ad53b4a33e19416c0e9949fb3e175035791bd2b7462091079d5a2", + }, + + .apple_sdk = .{ .path = "../apple-sdk" }, + }, +} diff --git a/pkg/highway/main.zig b/pkg/highway/main.zig new file mode 100644 index 000000000..e69de29bb diff --git a/src/terminal/simdvt/example.cpp b/src/terminal/simdvt/example.cpp new file mode 100644 index 000000000..894a0e451 --- /dev/null +++ b/src/terminal/simdvt/example.cpp @@ -0,0 +1,61 @@ +// Generates code for every target that this compiler can support. +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "example.cpp" // this file +#include // must come before highway.h +#include + +namespace project { +namespace HWY_NAMESPACE { // required: unique per target + +// Can skip hn:: prefixes if already inside hwy::HWY_NAMESPACE. +namespace hn = hwy::HWY_NAMESPACE; + +using T = float; + +// Alternative to per-function HWY_ATTR: see HWY_BEFORE_NAMESPACE +HWY_ATTR void MulAddLoop(const T* HWY_RESTRICT mul_array, + const T* HWY_RESTRICT add_array, + const size_t size, T* HWY_RESTRICT x_array) { + const hn::ScalableTag d; + for (size_t i = 0; i < size; i += hn::Lanes(d)) { + const auto mul = hn::Load(d, mul_array + i); + const auto add = hn::Load(d, add_array + i); + auto x = hn::Load(d, x_array + i); + x = hn::MulAdd(mul, x, add); + hn::Store(x, d, x_array + i); + } +} + +} // namespace HWY_NAMESPACE +} // namespace project + +// The table of pointers to the various implementations in HWY_NAMESPACE must +// be compiled only once (foreach_target #includes this file multiple times). +// HWY_ONCE is true for only one of these 'compilation passes'. +#if HWY_ONCE + +namespace project { + +// This macro declares a static array used for dynamic dispatch. +HWY_EXPORT(MulAddLoop); + +void CallMulAddLoop(const float* HWY_RESTRICT mul_array, + const float* HWY_RESTRICT add_array, + const size_t size, float* HWY_RESTRICT x_array) { + // This must reside outside of HWY_NAMESPACE because it references (calls the + // appropriate one from) the per-target implementations there. + // For static dispatch, use HWY_STATIC_DISPATCH. + return HWY_DYNAMIC_DISPATCH(MulAddLoop)(mul_array, add_array, size, x_array); +} + +} // namespace project + +extern "C" float example() { + float mul_array[] {1, 2, 3, 4, 5}; + float add_array[] {2, 3, 4, 5, 6}; + float x_array[] {0, 0, 0, 0, 0}; + project::CallMulAddLoop(mul_array, add_array, 5, x_array); + return x_array[0]; +} + +#endif // HWY_ONCE