diff --git a/build.zig b/build.zig index d10936a3e..9fd1802ed 100644 --- a/build.zig +++ b/build.zig @@ -1005,10 +1005,12 @@ fn addDeps( // C++ files step.linkLibCpp(); + step.addIncludePath(.{ .path = "src" }); step.addIncludePath(.{ .path = "src/simd" }); step.addCSourceFiles(.{ .files = &.{"src/simd/simdutf_c.cpp"} }); step.addIncludePath(.{ .path = "src/terminal/simdvt" }); step.addCSourceFiles(.{ .files = &.{"src/terminal/simdvt/example.cpp"} }); + step.addCSourceFiles(.{ .files = &.{"src/simd/index_of.cpp"} }); // If we're building a lib we have some different deps const lib = step.kind == .lib; diff --git a/src/simd/index_of.cpp b/src/simd/index_of.cpp new file mode 100644 index 000000000..38846adc1 --- /dev/null +++ b/src/simd/index_of.cpp @@ -0,0 +1,104 @@ +// Generates code for every target that this compiler can support. +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "simd/index_of.cpp" // this file +#include // must come before highway.h +#include + +HWY_BEFORE_NAMESPACE(); +namespace ghostty { +namespace HWY_NAMESPACE { + +namespace hn = hwy::HWY_NAMESPACE; + +// Return the index of the first occurrence of `needle` in `input` or +// `count` if not found. +template > +size_t IndexOfImpl(D d, T needle, const T* HWY_RESTRICT input, size_t count) { + // Note: due to the simplicity of this operation and the general complexity + // of SIMD, I'm going to overly comment this function to help explain the + // implementation for future maintainers. + + // The number of lanes in the vector type. + const size_t N = hn::Lanes(d); + + // Create a vector with all lanes set to `needle` so we can do a lane-wise + // comparison with the input. + const hn::Vec needle_vec = Set(d, needle); + + // Compare N elements at a time. + size_t i = 0; + for (; i + N <= count; i += N) { + // Load the N elements from our input into a vector. + const hn::Vec input_vec = hn::LoadU(d, input + i); + + // Compare the input vector with the needle vector. This produces + // a vector where each lane is 0xFF if the corresponding lane in + // `input_vec` is equal to the corresponding lane in `needle_vec`. + const hn::Mask eq_mask = hn::Eq(needle_vec, input_vec); + + // Find the index within the vector where the first true value is. + const intptr_t pos = hn::FindFirstTrue(d, eq_mask); + + // If we found a match, return the index into the input. + if (pos >= 0) return i + static_cast(pos); + } + + // Since we compare N elements at a time, we may have some elements left + // if count modulo N != 0. We need to scan the remaining elements. To + // be simple, we search one element at a time. + if (i != count) { + // Create a new vector with only one relevant lane. + const hn::CappedTag d1; + using D1 = decltype(d1); + + // Get an equally sized needle vector with only one lane. + const hn::Vec needle1 = Set(d1, GetLane(needle_vec)); + + // Go through the remaining elements and do similar logic to + // the previous loop to find any matches. + for (; i < count; ++i) { + const hn::Vec input_vec = hn::LoadU(d1, input + i); + const hn::Mask eq_mask = hn::Eq(needle1, input_vec); + if (hn::AllTrue(d1, eq_mask)) return i; + } + } + + return count; +} + +size_t IndexOf(const uint8_t needle, + const uint8_t* HWY_RESTRICT input, + size_t count) { + const hn::ScalableTag d; + return IndexOfImpl(d, needle, input, count); +} + +} // namespace HWY_NAMESPACE +} // namespace ghostty +HWY_AFTER_NAMESPACE(); + +// HWY_ONCE is true for only one of the target passes +#if HWY_ONCE + +namespace ghostty { + +// This macro declares a static array used for dynamic dispatch. +HWY_EXPORT(IndexOf); + +size_t IndexOf(const uint8_t needle, + const uint8_t* HWY_RESTRICT input, + size_t count) { + return HWY_DYNAMIC_DISPATCH(IndexOf)(needle, input, count); +} + +} // namespace ghostty + +extern "C" { + +size_t ghostty_simd_index_of(const uint8_t needle, const uint8_t* HWY_RESTRICT input, size_t count) { + return ghostty::IndexOf(needle, input, count); +} + +} + +#endif // HWY_ONCE diff --git a/src/simd/index_of.zig b/src/simd/index_of.zig index 4b4affe56..5647d6bc4 100644 --- a/src/simd/index_of.zig +++ b/src/simd/index_of.zig @@ -99,8 +99,22 @@ fn testIndexOf(func: *const IndexOf) !void { , ' ').?); } +pub const Hwy = struct { + extern "c" fn ghostty_simd_index_of( + needle: u8, + input: [*]const u8, + count: usize, + ) usize; + + pub fn indexOf(input: []const u8, needle: u8) ?usize { + const result = ghostty_simd_index_of(needle, input.ptr, input.len); + return if (result == input.len) null else result; + } +}; + test "indexOf" { const v = isa.detect(); var it = v.iterator(); while (it.next()) |isa_v| try testIndexOf(indexOfFunc(isa_v)); + try testIndexOf(&Hwy.indexOf); } diff --git a/src/terminal/simdvt/example.cpp b/src/terminal/simdvt/example.cpp index 894a0e451..e51cb69a8 100644 --- a/src/terminal/simdvt/example.cpp +++ b/src/terminal/simdvt/example.cpp @@ -4,7 +4,8 @@ #include // must come before highway.h #include -namespace project { +HWY_BEFORE_NAMESPACE(); +namespace ghostty { namespace HWY_NAMESPACE { // required: unique per target // Can skip hn:: prefixes if already inside hwy::HWY_NAMESPACE. @@ -13,7 +14,7 @@ namespace hn = hwy::HWY_NAMESPACE; using T = float; // Alternative to per-function HWY_ATTR: see HWY_BEFORE_NAMESPACE -HWY_ATTR void MulAddLoop(const T* HWY_RESTRICT mul_array, +void MulAddLoop(const T* HWY_RESTRICT mul_array, const T* HWY_RESTRICT add_array, const size_t size, T* HWY_RESTRICT x_array) { const hn::ScalableTag d; @@ -27,14 +28,15 @@ HWY_ATTR void MulAddLoop(const T* HWY_RESTRICT mul_array, } } // namespace HWY_NAMESPACE -} // namespace project +} // namespace ghostty +HWY_AFTER_NAMESPACE(); // The table of pointers to the various implementations in HWY_NAMESPACE must // be compiled only once (foreach_target #includes this file multiple times). // HWY_ONCE is true for only one of these 'compilation passes'. #if HWY_ONCE -namespace project { +namespace ghostty { // This macro declares a static array used for dynamic dispatch. HWY_EXPORT(MulAddLoop); @@ -48,13 +50,13 @@ void CallMulAddLoop(const float* HWY_RESTRICT mul_array, return HWY_DYNAMIC_DISPATCH(MulAddLoop)(mul_array, add_array, size, x_array); } -} // namespace project +} // namespace ghostty extern "C" float example() { float mul_array[] {1, 2, 3, 4, 5}; float add_array[] {2, 3, 4, 5, 6}; float x_array[] {0, 0, 0, 0, 0}; - project::CallMulAddLoop(mul_array, add_array, 5, x_array); + ghostty::CallMulAddLoop(mul_array, add_array, 5, x_array); return x_array[0]; } diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index c562918f0..7ac46ab5a 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -53,13 +53,6 @@ pub fn Stream(comptime Handler: type) type { /// Process a string of characters. pub fn nextSlice(self: *Self, c: []const u8) !void { - // TODO: we only have a direct Neon implementation of the fast - // path right now, just for testing. - if (comptime !simd.isa.possible(.neon)) { - for (c) |single| try self.next(single); - return; - } - // If we're not in the ground state then we process until we are. var offset: usize = 0; if (self.parser.state != .ground) { @@ -76,7 +69,7 @@ pub fn Stream(comptime Handler: type) type { while (self.parser.state == .ground and offset < c.len) { // Find the next ESC character to trigger a control sequence. //const idx = std.mem.indexOfScalar(u8, c[offset..], 0x1B) orelse { - const idx = simd.index_of.Neon.indexOf(c[offset..], 0x1B) orelse { + const idx = simd.index_of.Hwy.indexOf(c[offset..], 0x1B) orelse { // No ESC character, remainder is all UTF-8. try self.nextAssumeUtf8(c[offset..]); return;