mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-15 00:06:09 +03:00
terminal: use highway-based indexOf to support all targets
This commit is contained in:
@ -1005,10 +1005,12 @@ fn addDeps(
|
|||||||
|
|
||||||
// C++ files
|
// C++ files
|
||||||
step.linkLibCpp();
|
step.linkLibCpp();
|
||||||
|
step.addIncludePath(.{ .path = "src" });
|
||||||
step.addIncludePath(.{ .path = "src/simd" });
|
step.addIncludePath(.{ .path = "src/simd" });
|
||||||
step.addCSourceFiles(.{ .files = &.{"src/simd/simdutf_c.cpp"} });
|
step.addCSourceFiles(.{ .files = &.{"src/simd/simdutf_c.cpp"} });
|
||||||
step.addIncludePath(.{ .path = "src/terminal/simdvt" });
|
step.addIncludePath(.{ .path = "src/terminal/simdvt" });
|
||||||
step.addCSourceFiles(.{ .files = &.{"src/terminal/simdvt/example.cpp"} });
|
step.addCSourceFiles(.{ .files = &.{"src/terminal/simdvt/example.cpp"} });
|
||||||
|
step.addCSourceFiles(.{ .files = &.{"src/simd/index_of.cpp"} });
|
||||||
|
|
||||||
// If we're building a lib we have some different deps
|
// If we're building a lib we have some different deps
|
||||||
const lib = step.kind == .lib;
|
const lib = step.kind == .lib;
|
||||||
|
104
src/simd/index_of.cpp
Normal file
104
src/simd/index_of.cpp
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
// Generates code for every target that this compiler can support.
|
||||||
|
#undef HWY_TARGET_INCLUDE
|
||||||
|
#define HWY_TARGET_INCLUDE "simd/index_of.cpp" // this file
|
||||||
|
#include <hwy/foreach_target.h> // must come before highway.h
|
||||||
|
#include <hwy/highway.h>
|
||||||
|
|
||||||
|
HWY_BEFORE_NAMESPACE();
|
||||||
|
namespace ghostty {
|
||||||
|
namespace HWY_NAMESPACE {
|
||||||
|
|
||||||
|
namespace hn = hwy::HWY_NAMESPACE;
|
||||||
|
|
||||||
|
// Return the index of the first occurrence of `needle` in `input` or
|
||||||
|
// `count` if not found.
|
||||||
|
template <class D, typename T = hn::TFromD<D>>
|
||||||
|
size_t IndexOfImpl(D d, T needle, const T* HWY_RESTRICT input, size_t count) {
|
||||||
|
// Note: due to the simplicity of this operation and the general complexity
|
||||||
|
// of SIMD, I'm going to overly comment this function to help explain the
|
||||||
|
// implementation for future maintainers.
|
||||||
|
|
||||||
|
// The number of lanes in the vector type.
|
||||||
|
const size_t N = hn::Lanes(d);
|
||||||
|
|
||||||
|
// Create a vector with all lanes set to `needle` so we can do a lane-wise
|
||||||
|
// comparison with the input.
|
||||||
|
const hn::Vec<D> needle_vec = Set(d, needle);
|
||||||
|
|
||||||
|
// Compare N elements at a time.
|
||||||
|
size_t i = 0;
|
||||||
|
for (; i + N <= count; i += N) {
|
||||||
|
// Load the N elements from our input into a vector.
|
||||||
|
const hn::Vec<D> input_vec = hn::LoadU(d, input + i);
|
||||||
|
|
||||||
|
// Compare the input vector with the needle vector. This produces
|
||||||
|
// a vector where each lane is 0xFF if the corresponding lane in
|
||||||
|
// `input_vec` is equal to the corresponding lane in `needle_vec`.
|
||||||
|
const hn::Mask<D> eq_mask = hn::Eq(needle_vec, input_vec);
|
||||||
|
|
||||||
|
// Find the index within the vector where the first true value is.
|
||||||
|
const intptr_t pos = hn::FindFirstTrue(d, eq_mask);
|
||||||
|
|
||||||
|
// If we found a match, return the index into the input.
|
||||||
|
if (pos >= 0) return i + static_cast<size_t>(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since we compare N elements at a time, we may have some elements left
|
||||||
|
// if count modulo N != 0. We need to scan the remaining elements. To
|
||||||
|
// be simple, we search one element at a time.
|
||||||
|
if (i != count) {
|
||||||
|
// Create a new vector with only one relevant lane.
|
||||||
|
const hn::CappedTag<T, 1> d1;
|
||||||
|
using D1 = decltype(d1);
|
||||||
|
|
||||||
|
// Get an equally sized needle vector with only one lane.
|
||||||
|
const hn::Vec<D1> needle1 = Set(d1, GetLane(needle_vec));
|
||||||
|
|
||||||
|
// Go through the remaining elements and do similar logic to
|
||||||
|
// the previous loop to find any matches.
|
||||||
|
for (; i < count; ++i) {
|
||||||
|
const hn::Vec<D1> input_vec = hn::LoadU(d1, input + i);
|
||||||
|
const hn::Mask<D1> eq_mask = hn::Eq(needle1, input_vec);
|
||||||
|
if (hn::AllTrue(d1, eq_mask)) return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t IndexOf(const uint8_t needle,
|
||||||
|
const uint8_t* HWY_RESTRICT input,
|
||||||
|
size_t count) {
|
||||||
|
const hn::ScalableTag<uint8_t> d;
|
||||||
|
return IndexOfImpl(d, needle, input, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace HWY_NAMESPACE
|
||||||
|
} // namespace ghostty
|
||||||
|
HWY_AFTER_NAMESPACE();
|
||||||
|
|
||||||
|
// HWY_ONCE is true for only one of the target passes
|
||||||
|
#if HWY_ONCE
|
||||||
|
|
||||||
|
namespace ghostty {
|
||||||
|
|
||||||
|
// This macro declares a static array used for dynamic dispatch.
|
||||||
|
HWY_EXPORT(IndexOf);
|
||||||
|
|
||||||
|
size_t IndexOf(const uint8_t needle,
|
||||||
|
const uint8_t* HWY_RESTRICT input,
|
||||||
|
size_t count) {
|
||||||
|
return HWY_DYNAMIC_DISPATCH(IndexOf)(needle, input, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ghostty
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
size_t ghostty_simd_index_of(const uint8_t needle, const uint8_t* HWY_RESTRICT input, size_t count) {
|
||||||
|
return ghostty::IndexOf(needle, input, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HWY_ONCE
|
@ -99,8 +99,22 @@ fn testIndexOf(func: *const IndexOf) !void {
|
|||||||
, ' ').?);
|
, ' ').?);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub const Hwy = struct {
|
||||||
|
extern "c" fn ghostty_simd_index_of(
|
||||||
|
needle: u8,
|
||||||
|
input: [*]const u8,
|
||||||
|
count: usize,
|
||||||
|
) usize;
|
||||||
|
|
||||||
|
pub fn indexOf(input: []const u8, needle: u8) ?usize {
|
||||||
|
const result = ghostty_simd_index_of(needle, input.ptr, input.len);
|
||||||
|
return if (result == input.len) null else result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
test "indexOf" {
|
test "indexOf" {
|
||||||
const v = isa.detect();
|
const v = isa.detect();
|
||||||
var it = v.iterator();
|
var it = v.iterator();
|
||||||
while (it.next()) |isa_v| try testIndexOf(indexOfFunc(isa_v));
|
while (it.next()) |isa_v| try testIndexOf(indexOfFunc(isa_v));
|
||||||
|
try testIndexOf(&Hwy.indexOf);
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,8 @@
|
|||||||
#include <hwy/foreach_target.h> // must come before highway.h
|
#include <hwy/foreach_target.h> // must come before highway.h
|
||||||
#include <hwy/highway.h>
|
#include <hwy/highway.h>
|
||||||
|
|
||||||
namespace project {
|
HWY_BEFORE_NAMESPACE();
|
||||||
|
namespace ghostty {
|
||||||
namespace HWY_NAMESPACE { // required: unique per target
|
namespace HWY_NAMESPACE { // required: unique per target
|
||||||
|
|
||||||
// Can skip hn:: prefixes if already inside hwy::HWY_NAMESPACE.
|
// Can skip hn:: prefixes if already inside hwy::HWY_NAMESPACE.
|
||||||
@ -13,7 +14,7 @@ namespace hn = hwy::HWY_NAMESPACE;
|
|||||||
using T = float;
|
using T = float;
|
||||||
|
|
||||||
// Alternative to per-function HWY_ATTR: see HWY_BEFORE_NAMESPACE
|
// Alternative to per-function HWY_ATTR: see HWY_BEFORE_NAMESPACE
|
||||||
HWY_ATTR void MulAddLoop(const T* HWY_RESTRICT mul_array,
|
void MulAddLoop(const T* HWY_RESTRICT mul_array,
|
||||||
const T* HWY_RESTRICT add_array,
|
const T* HWY_RESTRICT add_array,
|
||||||
const size_t size, T* HWY_RESTRICT x_array) {
|
const size_t size, T* HWY_RESTRICT x_array) {
|
||||||
const hn::ScalableTag<T> d;
|
const hn::ScalableTag<T> d;
|
||||||
@ -27,14 +28,15 @@ HWY_ATTR void MulAddLoop(const T* HWY_RESTRICT mul_array,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace HWY_NAMESPACE
|
} // namespace HWY_NAMESPACE
|
||||||
} // namespace project
|
} // namespace ghostty
|
||||||
|
HWY_AFTER_NAMESPACE();
|
||||||
|
|
||||||
// The table of pointers to the various implementations in HWY_NAMESPACE must
|
// The table of pointers to the various implementations in HWY_NAMESPACE must
|
||||||
// be compiled only once (foreach_target #includes this file multiple times).
|
// be compiled only once (foreach_target #includes this file multiple times).
|
||||||
// HWY_ONCE is true for only one of these 'compilation passes'.
|
// HWY_ONCE is true for only one of these 'compilation passes'.
|
||||||
#if HWY_ONCE
|
#if HWY_ONCE
|
||||||
|
|
||||||
namespace project {
|
namespace ghostty {
|
||||||
|
|
||||||
// This macro declares a static array used for dynamic dispatch.
|
// This macro declares a static array used for dynamic dispatch.
|
||||||
HWY_EXPORT(MulAddLoop);
|
HWY_EXPORT(MulAddLoop);
|
||||||
@ -48,13 +50,13 @@ void CallMulAddLoop(const float* HWY_RESTRICT mul_array,
|
|||||||
return HWY_DYNAMIC_DISPATCH(MulAddLoop)(mul_array, add_array, size, x_array);
|
return HWY_DYNAMIC_DISPATCH(MulAddLoop)(mul_array, add_array, size, x_array);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace project
|
} // namespace ghostty
|
||||||
|
|
||||||
extern "C" float example() {
|
extern "C" float example() {
|
||||||
float mul_array[] {1, 2, 3, 4, 5};
|
float mul_array[] {1, 2, 3, 4, 5};
|
||||||
float add_array[] {2, 3, 4, 5, 6};
|
float add_array[] {2, 3, 4, 5, 6};
|
||||||
float x_array[] {0, 0, 0, 0, 0};
|
float x_array[] {0, 0, 0, 0, 0};
|
||||||
project::CallMulAddLoop(mul_array, add_array, 5, x_array);
|
ghostty::CallMulAddLoop(mul_array, add_array, 5, x_array);
|
||||||
return x_array[0];
|
return x_array[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,13 +53,6 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
|
|
||||||
/// Process a string of characters.
|
/// Process a string of characters.
|
||||||
pub fn nextSlice(self: *Self, c: []const u8) !void {
|
pub fn nextSlice(self: *Self, c: []const u8) !void {
|
||||||
// TODO: we only have a direct Neon implementation of the fast
|
|
||||||
// path right now, just for testing.
|
|
||||||
if (comptime !simd.isa.possible(.neon)) {
|
|
||||||
for (c) |single| try self.next(single);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we're not in the ground state then we process until we are.
|
// If we're not in the ground state then we process until we are.
|
||||||
var offset: usize = 0;
|
var offset: usize = 0;
|
||||||
if (self.parser.state != .ground) {
|
if (self.parser.state != .ground) {
|
||||||
@ -76,7 +69,7 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
while (self.parser.state == .ground and offset < c.len) {
|
while (self.parser.state == .ground and offset < c.len) {
|
||||||
// Find the next ESC character to trigger a control sequence.
|
// Find the next ESC character to trigger a control sequence.
|
||||||
//const idx = std.mem.indexOfScalar(u8, c[offset..], 0x1B) orelse {
|
//const idx = std.mem.indexOfScalar(u8, c[offset..], 0x1B) orelse {
|
||||||
const idx = simd.index_of.Neon.indexOf(c[offset..], 0x1B) orelse {
|
const idx = simd.index_of.Hwy.indexOf(c[offset..], 0x1B) orelse {
|
||||||
// No ESC character, remainder is all UTF-8.
|
// No ESC character, remainder is all UTF-8.
|
||||||
try self.nextAssumeUtf8(c[offset..]);
|
try self.nextAssumeUtf8(c[offset..]);
|
||||||
return;
|
return;
|
||||||
|
Reference in New Issue
Block a user