simd: remove old attempts

This commit is contained in:
Mitchell Hashimoto
2024-02-04 11:19:19 -08:00
parent 449d3b49a4
commit 12885a445a
14 changed files with 13 additions and 5708 deletions

View File

@ -1007,9 +1007,6 @@ fn addDeps(
step.linkLibCpp();
step.addIncludePath(.{ .path = "src" });
step.addIncludePath(.{ .path = "src/simd" });
step.addCSourceFiles(.{ .files = &.{"src/simd/simdutf_c.cpp"} });
step.addIncludePath(.{ .path = "src/terminal/simdvt" });
step.addCSourceFiles(.{ .files = &.{"src/terminal/simdvt/example.cpp"} });
step.addCSourceFiles(.{ .files = &.{
"src/simd/index_of.cpp",
"src/simd/vt.cpp",

View File

@ -1,395 +0,0 @@
// https://developer.arm.com/architectures/instruction-sets/intrinsics
// https://llvm.org/docs/LangRef.html#inline-assembler-expressions
const std = @import("std");
const assert = std.debug.assert;
pub inline fn vaddlvq_u8(v: @Vector(16, u8)) u16 {
const result = asm (
\\ uaddlv %[ret:h], %[v].16b
: [ret] "=w" (-> @Vector(8, u16)),
: [v] "w" (v),
);
return result[0];
}
pub inline fn vaddvq_u8(v: @Vector(16, u8)) u8 {
const result = asm (
\\ addv %[ret:b], %[v].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [v] "w" (v),
);
return result[0];
}
pub inline fn vaddv_u8(v: @Vector(8, u8)) u8 {
const result = asm (
\\ addv %[ret:b], %[v].8b
: [ret] "=w" (-> @Vector(8, u8)),
: [v] "w" (v),
);
return result[0];
}
pub inline fn vandq_u8(a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ and %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vandq_u16(a: @Vector(8, u16), b: @Vector(8, u16)) @Vector(8, u16) {
return asm (
\\ and %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(8, u16)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vandq_u32(a: @Vector(4, u32), b: @Vector(4, u32)) @Vector(4, u32) {
return asm (
\\ and %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(4, u32)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vbicq_u16(a: @Vector(8, u16), b: @Vector(8, u16)) @Vector(8, u16) {
return asm (
\\ bic %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(8, u16)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vbslq_u32(
a: @Vector(4, u32),
b: @Vector(4, u32),
c: @Vector(4, u32),
) @Vector(4, u32) {
return asm (
\\ mov %[ret].16b, %[a].16b
\\ bsl %[ret].16b, %[b].16b, %[c].16b
: [ret] "=&w" (-> @Vector(4, u32)),
: [a] "w" (a),
[b] "w" (b),
[c] "w" (c),
);
}
pub inline fn vceqq_u8(a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ cmeq %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vcgeq_u8(a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ cmhs %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vcgtq_s8(a: @Vector(16, i8), b: @Vector(16, i8)) @Vector(16, u8) {
return asm (
\\ cmgt %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vcgtq_u8(a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ cmhi %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vcltq_s8(a: @Vector(16, i8), b: @Vector(16, i8)) @Vector(16, u8) {
return asm (
\\ cmgt %[ret].16b, %[b].16b, %[a].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vcnt_u8(v: @Vector(8, u8)) @Vector(8, u8) {
return asm (
\\ cnt %[ret].8b, %[v].8b
: [ret] "=w" (-> @Vector(8, u8)),
: [v] "w" (v),
);
}
pub inline fn vcreate_u8(v: u64) @Vector(8, u8) {
return asm (
\\ ins %[ret].D[0], %[value]
: [ret] "=w" (-> @Vector(8, u8)),
: [value] "r" (v),
);
}
pub inline fn vdupq_n_s8(v: i8) @Vector(16, i8) {
return asm (
\\ dup %[ret].16b, %[value:w]
: [ret] "=w" (-> @Vector(16, i8)),
: [value] "r" (v),
);
}
pub inline fn vdupq_n_u8(v: u8) @Vector(16, u8) {
return asm (
\\ dup %[ret].16b, %[value:w]
: [ret] "=w" (-> @Vector(16, u8)),
: [value] "r" (v),
);
}
pub inline fn veorq_u8(a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ eor %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vextq_u8(a: @Vector(16, u8), b: @Vector(16, u8), n: u8) @Vector(16, u8) {
assert(n <= 16);
return asm (
\\ ext %[ret].16b, %[a].16b, %[b].16b, %[n]
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
[n] "I" (n),
);
}
pub inline fn vget_lane_u64(v: @Vector(1, u64)) u64 {
return asm (
\\ umov %[ret], %[v].d[0]
: [ret] "=r" (-> u64),
: [v] "w" (v),
);
}
pub inline fn vgetq_lane_u16(v: @Vector(8, u16), n: u3) u16 {
return asm (
\\ umov %[ret:w], %[v].h[%[n]]
: [ret] "=r" (-> u16),
: [v] "w" (v),
[n] "I" (n),
);
}
pub inline fn vgetq_lane_u64(v: @Vector(2, u64), n: u1) u64 {
return asm (
\\ umov %[ret], %[v].d[%[n]]
: [ret] "=r" (-> u64),
: [v] "w" (v),
[n] "I" (n),
);
}
pub inline fn vld1q_u8(v: []const u8) @Vector(16, u8) {
return asm (
\\ ld1 { %[ret].16b }, [%[value]]
: [ret] "=w" (-> @Vector(16, u8)),
: [value] "r" (v.ptr),
);
}
pub inline fn vmaxvq_u8(v: @Vector(16, u8)) u8 {
const result = asm (
\\ umaxv %[ret:b], %[v].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [v] "w" (v),
);
return result[0];
}
pub inline fn vmovq_n_u32(v: u32) @Vector(4, u32) {
return asm (
\\ dup %[ret].4s, %[value:w]
: [ret] "=w" (-> @Vector(4, u32)),
: [value] "r" (v),
);
}
pub inline fn vmovq_n_u16(v: u16) @Vector(8, u16) {
return asm (
\\ dup %[ret].8h, %[value:w]
: [ret] "=w" (-> @Vector(8, u16)),
: [value] "r" (v),
);
}
pub inline fn vorrq_u8(a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ orr %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vpaddq_u8(a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ addp %[ret].16b, %[a].16b, %[b].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
);
}
pub inline fn vqtbl1q_u8(t: @Vector(16, u8), idx: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ tbl %[ret].16b, { %[t].16b }, %[idx].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [idx] "w" (idx),
[t] "w" (t),
);
}
pub inline fn vqtbl2q_u8(t: [2]@Vector(16, u8), idx: @Vector(16, u8)) @Vector(16, u8) {
return asm (
\\ tbl %[ret].16b, { %[t0].16b, %[t1].16b }, %[idx].16b
: [ret] "=w" (-> @Vector(16, u8)),
: [idx] "w" (idx),
[t0] "w" (t[0]),
[t1] "w" (t[1]),
);
}
pub inline fn vshrn_n_u16(a: @Vector(8, u16), n: u4) @Vector(8, u8) {
assert(n <= 8);
return asm (
\\ shrn %[ret].8b, %[a].8h, %[n]
: [ret] "=w" (-> @Vector(8, u8)),
: [a] "w" (a),
[n] "I" (n),
);
}
pub inline fn vshrq_n_u8(a: @Vector(16, u8), n: u8) @Vector(16, u8) {
assert(n <= 8);
return asm (
\\ ushr %[ret].16b, %[a].16b, %[n]
: [ret] "=w" (-> @Vector(16, u8)),
: [a] "w" (a),
[n] "I" (n),
);
}
pub inline fn vshrq_n_u32(a: @Vector(4, u32), n: u8) @Vector(4, u32) {
assert(n <= 32);
return asm (
\\ ushr %[ret].4s, %[a].4s, %[n]
: [ret] "=w" (-> @Vector(4, u32)),
: [a] "w" (a),
[n] "I" (n),
);
}
pub inline fn vsraq_n_u8(a: @Vector(16, u8), b: @Vector(16, u8), n: u8) @Vector(16, u8) {
assert(n <= 8);
return asm (
\\ mov %[ret].16b, %[a].16b
\\ usra %[ret].16b, %[b].16b, %[n]
: [ret] "=&w" (-> @Vector(16, u8)),
: [a] "w" (a),
[b] "w" (b),
[n] "I" (n),
);
}
pub inline fn vsraq_n_u16(a: @Vector(8, u16), b: @Vector(8, u16), n: u4) @Vector(8, u16) {
assert(n <= 16);
// note: usra modifies the first operand, but I can't figure out how to
// specify that without the mov safely.
return asm (
\\ mov %[ret].8h, %[a].8h
\\ usra %[ret].8h, %[b].8h, %[n]
: [ret] "=&w" (-> @Vector(8, u16)),
: [a] "w" (a),
[b] "w" (b),
[n] "I" (n),
);
}
pub inline fn vsraq_n_u32(a: @Vector(4, u32), b: @Vector(4, u32), n: u8) @Vector(4, u32) {
assert(n <= 32);
return asm (
\\ mov %[ret].4s, %[a].4s
\\ usra %[ret].4s, %[b].4s, %[n]
: [ret] "=&w" (-> @Vector(4, u32)),
: [a] "w" (a),
[b] "w" (b),
[n] "I" (n),
);
}
pub inline fn vst1q_u8(out: [*]u8, a: @Vector(16, u8)) void {
asm volatile (
\\ st1 { %[a].16b }, [%[out]]
:
: [out] "r" (out),
[a] "w" (a),
);
}
pub inline fn vst1q_u32(out: [*]u32, a: @Vector(4, u32)) void {
asm volatile (
\\ st1 { %[a].4s }, [%[out]]
:
: [out] "r" (out),
[a] "w" (a),
);
}
pub inline fn vst2q_u16(out: [*]u16, vs: [2]@Vector(8, u16)) void {
asm volatile (
\\ st2 { %[v1].8h - %[v2].8h }, [%[out]]
:
: [out] "r" (out),
[v1] "w" (vs[0]),
[v2] "w" (vs[1]),
);
}
pub inline fn rbit(comptime T: type, v: T) T {
assert(T == u32 or T == u64);
return asm (
\\ rbit %[ret], %[v]
: [ret] "=r" (-> T),
: [v] "r" (v),
);
}
pub inline fn clz(comptime T: type, v: T) T {
assert(T == u32 or T == u64);
return asm (
\\ clz %[ret], %[v]
: [ret] "=r" (-> T),
: [v] "r" (v),
);
}

View File

@ -1,120 +1,27 @@
const std = @import("std");
const builtin = @import("builtin");
const isa = @import("isa.zig");
const aarch64 = @import("aarch64.zig");
// Note this is a reimplementation of std.mem.indexOfScalar. The Zig stdlib
// version is already SIMD-optimized but not using runtime ISA detection. This
// expands the stdlib version to use runtime ISA detection. This also, at the
// time of writing this comment, reimplements it using manual assembly. This is
// so I can compare to Zig's @Vector lowering.
extern "c" fn ghostty_simd_index_of(
needle: u8,
input: [*]const u8,
count: usize,
) usize;
pub const IndexOf = fn ([]const u8, u8) ?usize;
/// Returns the indexOf function for the given ISA.
pub fn indexOfFunc(v: isa.ISA) *const IndexOf {
return isa.funcMap(IndexOf, v, .{
.{ .avx2, Scalar.indexOf }, // todo
.{ .neon, Neon.indexOf },
.{ .scalar, Scalar.indexOf },
});
pub fn indexOf(input: []const u8, needle: u8) ?usize {
const result = ghostty_simd_index_of(needle, input.ptr, input.len);
return if (result == input.len) null else result;
}
pub const Scalar = struct {
pub fn indexOf(input: []const u8, needle: u8) ?usize {
return std.mem.indexOfScalar(u8, input, needle);
}
};
pub const Neon = struct {
/// indexOf implementation using ARM NEON instructions.
pub fn indexOf(input: []const u8, needle: u8) ?usize {
// This function is going to be commented in a lot of detail. SIMD is
// complicated and nonintuitive, so I want to make sure I understand what's
// going on. More importantly, I want to make sure when I look back on this
// code in the future, I understand what's going on.
// Load our needle into a vector register. This duplicates the needle 16
// times, once for each byte in the 128-bit vector register.
const needle_vec = aarch64.vdupq_n_u8(needle);
// note(mitchellh): benchmark to see if we should align to 16 bytes here
// Iterate 16 bytes at a time, which is the max size of a vector register.
var i: usize = 0;
while (i + 16 <= input.len) : (i += 16) {
const input_vec = aarch64.vld1q_u8(input[i..]);
if (indexOfVec(input_vec, needle_vec)) |index| {
return i + index;
}
}
// Handle the remaining bytes
if (i < input.len) {
while (i < input.len) : (i += 1) {
if (input[i] == needle) return i;
}
}
return null;
}
pub fn indexOfVec(input_vec: @Vector(16, u8), needle_vec: @Vector(16, u8)) ?usize {
// Compare the input vector to the needle vector. This will set
// all bits to "1" in the output vector for each matching byte.
const match_vec = aarch64.vceqq_u8(input_vec, needle_vec);
// This is a neat trick in order to efficiently find the index of
// the first matching byte. Details for this can be found here:
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
const shift_vec = aarch64.vshrn_n_u16(@bitCast(match_vec), 4);
const shift_u64 = aarch64.vget_lane_u64(@bitCast(shift_vec));
if (shift_u64 == 0) {
// This means no matches were found.
return null;
}
// A match was found! Reverse the bits and divide by 4 to get the
// index of the first matching byte. The reversal is due to the
// bits being reversed in the shift operation, the division by 4
// is due to all data being repeated 4 times by vceqq.
const reversed = aarch64.rbit(u64, shift_u64);
const index = aarch64.clz(u64, reversed) >> 2;
return index;
}
};
/// Generic test function so we can test against multiple implementations.
fn testIndexOf(func: *const IndexOf) !void {
test "indexOf" {
const testing = std.testing;
try testing.expect(func("hello", ' ') == null);
try testing.expectEqual(@as(usize, 2), func("hi lo", ' ').?);
try testing.expectEqual(@as(usize, 5), func(
try testing.expect(indexOf("hello", ' ') == null);
try testing.expectEqual(@as(usize, 2), indexOf("hi lo", ' ').?);
try testing.expectEqual(@as(usize, 5), indexOf(
\\XXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
\\XXXXXXXXXXXX XXXXXXXXXXX XXXXXXXXXXXXXXX
, ' ').?);
try testing.expectEqual(@as(usize, 53), func(
try testing.expectEqual(@as(usize, 53), indexOf(
\\XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
\\XXXXXXXXXXXX XXXXXXXXXXX XXXXXXXXXXXXXXX
, ' ').?);
}
pub const Hwy = struct {
extern "c" fn ghostty_simd_index_of(
needle: u8,
input: [*]const u8,
count: usize,
) usize;
pub fn indexOf(input: []const u8, needle: u8) ?usize {
const result = ghostty_simd_index_of(needle, input.ptr, input.len);
return if (result == input.len) null else result;
}
};
test "indexOf" {
const v = isa.detect();
var it = v.iterator();
while (it.next()) |isa_v| try testIndexOf(indexOfFunc(isa_v));
try testIndexOf(&Hwy.indexOf);
}

View File

@ -1,204 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const x86_64 = @import("x86_64.zig");
/// Raw comptime entry of poissible ISA. The arch is the arch that the
/// ISA is even possible on (e.g. neon is only possible on aarch64) but
/// the actual ISA may not be available at runtime.
const Entry = struct {
name: [:0]const u8,
arch: []const std.Target.Cpu.Arch = &.{},
};
const entries: []const Entry = &.{
.{ .name = "scalar" },
.{ .name = "neon", .arch = &.{.aarch64} },
.{ .name = "avx2", .arch = &.{ .x86, .x86_64 } },
};
/// Enum of possible ISAs for our SIMD operations. Note that these are
/// coarse-grained because they match possible implementations rather than
/// a fine-grained packed struct of available CPU features.
pub const ISA = isa: {
const EnumField = std.builtin.Type.EnumField;
var fields: [entries.len]EnumField = undefined;
for (entries, 0..) |entry, i| {
fields[i] = .{ .name = entry.name, .value = i };
}
break :isa @Type(.{ .Enum = .{
.tag_type = std.math.IntFittingRange(0, entries.len - 1),
.fields = &fields,
.decls = &.{},
.is_exhaustive = true,
} });
};
/// A set of ISAs.
pub const Set = std.EnumSet(ISA);
/// Check if the given ISA is possible on the current target. This is
/// available at comptime to help prevent invalid architectures from
/// being used.
pub fn possible(comptime isa: ISA) bool {
inline for (entries) |entry| {
if (std.mem.eql(u8, entry.name, @tagName(isa))) {
for (entry.arch) |arch| {
if (arch == builtin.cpu.arch) return true;
}
// If we have no valid archs then its always valid.
return entry.arch.len == 0;
}
}
unreachable;
}
/// Detect all possible ISAs at runtime.
pub fn detect() Set {
var set: Set = .{};
set.insert(.scalar);
switch (builtin.cpu.arch) {
// Neon is mandatory on aarch64. No runtime checks necessary.
.aarch64 => set.insert(.neon),
.x86_64 => detectX86(&set),
else => {},
}
return set;
}
/// Returns the preferred ISA to use that is available.
pub fn preferred(set: Set) ISA {
const order: []const ISA = &.{ .avx2, .neon, .scalar };
// We should have all of our ISAs present in order
comptime {
for (@typeInfo(ISA).Enum.fields) |field| {
const v = @field(ISA, field.name);
assert(std.mem.indexOfScalar(ISA, order, v) != null);
}
}
inline for (order) |isa| {
if (comptime possible(isa)) {
if (set.contains(isa)) return isa;
}
}
return .scalar;
}
fn detectX86(set: *Set) void {
// NOTE: this is just some boilerplate to detect AVX2. We
// can probably support earlier forms of SIMD such as plain
// SSE, and we can definitely take advtange of later forms. This
// is just some boilerplate to ONLY detect AVX2 right now.
// If we support less than 7 for the maximum leaf level then we
// don't support any AVX instructions.
var leaf = x86_64.cpuid(0, 0);
if (leaf.eax < 7) return;
// If we don't have xsave or avx, then we don't support anything.
leaf = x86_64.cpuid(1, 0);
const has_xsave = hasBit(leaf.ecx, 27);
const has_avx = hasBit(leaf.ecx, 28);
if (!has_xsave or !has_avx) return;
// We require AVX save state in order to use AVX instructions.
const xcr0_eax = x86_64.getXCR0(); // requires xsave+avx
const has_avx_save = hasMask(xcr0_eax, x86_64.XCR0_XMM | x86_64.XCR0_YMM);
if (!has_avx_save) return;
// Check for AVX2.
leaf = x86_64.cpuid(7, 0);
const has_avx2 = hasBit(leaf.ebx, 5);
if (has_avx2) set.insert(.avx2);
}
/// Check if a bit is set at the given offset
inline fn hasBit(input: u32, offset: u5) bool {
return (input >> offset) & 1 != 0;
}
/// Checks if a mask exactly matches the input
inline fn hasMask(input: u32, mask: u32) bool {
return (input & mask) == mask;
}
/// This is a helper to provide a runtime lookup map for the ISA to
/// the proper function implementation. Func is the function type,
/// and map is an array of tuples of the form (ISA, Struct) where
/// Struct has a decl named `name` that is a Func.
///
/// The slightly awkward parameters are to ensure that functions
/// are only analyzed for possible ISAs for the target.
///
/// This will ensure that impossible ISAs for the build target are
/// not included so they're not analyzed. For example, a NEON implementation
/// will not be included on x86_64.
pub fn funcMap(
comptime Func: type,
v: ISA,
comptime map: anytype,
) *const Func {
switch (v) {
inline else => |tag| {
// If this tag isn't possible, compile no code for this case.
if (comptime !possible(tag)) unreachable;
// Find the entry for this tag and return the function.
inline for (map) |entry| {
if (entry[0] == tag) {
// If we return &entry[1] directly the compiler crashes:
// https://github.com/ziglang/zig/issues/18754
const func = entry[1];
return &func;
}
} else unreachable;
},
}
}
test "detect" {
const testing = std.testing;
const set = detect();
try testing.expect(set.contains(.scalar));
switch (builtin.cpu.arch) {
.aarch64 => {
// Neon is always available on aarch64
try testing.expect(set.contains(.neon));
try testing.expect(!set.contains(.avx2));
},
else => {},
}
}
test "preferred" {
_ = preferred(detect());
}
test "possible" {
const testing = std.testing;
try testing.expect(possible(.scalar)); // always possible
// hardcode some other common realities
switch (builtin.cpu.arch) {
.aarch64 => {
try testing.expect(possible(.neon));
try testing.expect(!possible(.avx2));
},
.x86, .x86_64 => {
try testing.expect(!possible(.neon));
try testing.expect(possible(.avx2));
},
else => {},
}
}

View File

@ -1,24 +1,8 @@
const std = @import("std");
pub const isa = @import("isa.zig");
pub const aarch64 = @import("aarch64.zig");
pub const utf8_count = @import("utf8_count.zig");
pub const utf8_decode = @import("utf8_decode.zig");
pub const utf8_validate = @import("utf8_validate.zig");
pub const index_of = @import("index_of.zig");
pub const vt = @import("vt.zig");
// TODO: temporary, only for zig build simd to inspect disasm easily
// pub fn main() !void {
// //std.log.warn("ISA={}", .{isa.ISA.detect()});
// const input = "1234567\x1b1234567\x1b";
// //const input = "1234567812345678";
// std.log.warn("result={any}", .{index_of.indexOf(input, 0x1B)});
// std.log.warn("result={any}", .{utf8.utf8Validate(input)});
// }
test {
@import("std").testing.refAllDecls(@This());
}

View File

@ -1,11 +0,0 @@
#include "simdutf.cpp"
// This is just the C API we need from Zig. This is manually maintained
// because the surface area is so small.
extern "C" {
size_t simdutf_convert_utf8_to_utf32(const char *src, size_t len, char32_t *dst) {
return simdutf::convert_utf8_to_utf32(src, len, dst);
}
}

View File

@ -1,72 +0,0 @@
const std = @import("std");
const isa = @import("isa.zig");
const aarch64 = @import("aarch64.zig");
/// Count the number of UTF-8 codepoints in the given string. The string
/// is assumed to be valid UTF-8. Invalid UTF-8 will result in undefined
/// (and probably incorrect) behaviour.
pub const Count = fn ([]const u8) usize;
/// Returns the count function for the given ISA.
pub fn countFunc(v: isa.ISA) *const Count {
return isa.funcMap(Count, v, .{
.{ .avx2, Scalar.count }, // todo
.{ .neon, Neon.count },
.{ .scalar, Scalar.count },
});
}
pub const Scalar = struct {
pub fn count(input: []const u8) usize {
return std.unicode.utf8CountCodepoints(input) catch unreachable;
}
};
/// Arm NEON implementation of the count function.
pub const Neon = struct {
pub fn count(input: []const u8) usize {
var result: usize = 0;
var i: usize = 0;
while (i + 16 <= input.len) : (i += 16) {
const input_vec = aarch64.vld1q_u8(input[i..]);
result += @intCast(process(input_vec));
}
if (i < input.len) result += Scalar.count(input[i..]);
return result;
}
pub fn process(v: @Vector(16, u8)) u8 {
// Find all the bits greater than -65 in binary (0b10000001) which
// are a leading byte of a UTF-8 codepoint. This will set the resulting
// vector to 0xFF for all leading bytes and 0x00 for all non-leading.
const mask = aarch64.vcgtq_s8(@bitCast(v), aarch64.vdupq_n_s8(-65));
// Shift to turn 0xFF to 0x01.
const mask_shift = aarch64.vshrq_n_u8(mask, 7);
// Sum across the vector
const sum = aarch64.vaddvq_u8(mask_shift);
// std.log.warn("mask={}", .{mask});
// std.log.warn("mask_shift={}", .{mask_shift});
// std.log.warn("sum={}", .{sum});
return sum;
}
};
/// Generic test function so we can test against multiple implementations.
/// This is initially copied from the Zig stdlib but may be expanded.
fn testCount(func: *const Count) !void {
const testing = std.testing;
try testing.expectEqual(@as(usize, 16), func("hello friends!!!"));
try testing.expectEqual(@as(usize, 10), func("abcdefghij"));
try testing.expectEqual(@as(usize, 10), func("äåéëþüúíóö"));
try testing.expectEqual(@as(usize, 5), func("こんにちは"));
}
test "count" {
const v = isa.detect();
var it = v.iterator();
while (it.next()) |isa_v| try testCount(countFunc(isa_v));
}

View File

@ -1,93 +0,0 @@
const std = @import("std");
const assert = std.debug.assert;
const isa = @import("isa.zig");
const aarch64 = @import("aarch64.zig");
const utf_tables = @import("utf_tables.zig");
/// Decode UTF-8 codepoints to UTF-32. Returns the number of codepoints
/// decoded. The output buffer must be large enough to hold the decoded
/// codepoints (worst case is 4x the number of bytes).
///
/// This also assumes the UTF-8 is valid. If it may not be, you should
/// validate first.
pub const Decode = fn ([]u32, []const u8) []const u32;
/// Returns the function for the given ISA.
pub fn decodeFunc(v: isa.ISA) *const Decode {
return isa.funcMap(Decode, v, .{
.{ .scalar, Simdutf.decode },
.{ .neon, Simdutf.decode },
.{ .avx2, Simdutf.decode }, // todo
});
}
pub const Stdlib = struct {
pub fn decode(out: []u32, in: []const u8) []const u32 {
const view = std.unicode.Utf8View.initUnchecked(in);
var it = view.iterator();
var i: usize = 0;
while (it.nextCodepoint()) |cp| {
out[i] = cp;
i += 1;
}
return out[0..i];
}
};
/// Uses the simdutf project
pub const Simdutf = struct {
pub fn decode(out: []u32, in: []const u8) []const u32 {
const len = simdutf_convert_utf8_to_utf32(
in.ptr,
in.len,
out.ptr,
);
return out[0..len];
}
extern "c" fn simdutf_convert_utf8_to_utf32(
[*]const u8,
usize,
[*]u32,
) usize;
};
/// Generic test function so we can test against multiple implementations.
fn testDecode(func: *const Decode) !void {
const testing = std.testing;
// This is pitifully small, but it's enough to test the basic logic.
// simdutf is extremely well tested, so we don't need to test the
// edge cases so much.
const inputs: []const []const u8 = &.{
"hello friends!!!",
"hello friends!!!",
"abc",
"abc\xdf\xbf",
"Ж",
"ЖЖ",
"брэд-ЛГТМ",
"☺☻☹",
"a\u{fffdb}",
"\xf4\x8f\xbf\xbf",
};
inline for (inputs) |input_raw| {
const input = if (input_raw.len >= 64) input_raw else input_raw ++ ("hello" ** 15);
assert(input.len >= 64);
var buf: [1024]u32 = undefined;
var buf2: [1024]u32 = undefined;
const scalar = Stdlib.decode(&buf, input);
const actual = func(&buf2, input);
try testing.expectEqualSlices(u32, scalar, actual);
}
}
test "count" {
const v = isa.detect();
var it = v.iterator();
while (it.next()) |isa_v| try testDecode(decodeFunc(isa_v));
}

View File

@ -1,301 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const isa = @import("isa.zig");
const aarch64 = @import("aarch64.zig");
// All of the work in this file is based heavily on the work of
// Daniel Lemire and John Keiser. Their original work can be found here:
// - https://arxiv.org/pdf/2010.03090.pdf
// - https://simdutf.github.io/simdutf/ (MIT License)
pub const Validate = fn ([]const u8) bool;
pub fn validateFunc(v: isa.ISA) *const Validate {
return isa.funcMap(Validate, v, .{
.{ .avx2, Scalar.validate }, // todo
.{ .neon, Neon.validate },
.{ .scalar, Scalar.validate },
});
}
pub const Scalar = struct {
pub fn validate(input: []const u8) bool {
return std.unicode.utf8ValidateSlice(input);
}
};
pub const Neon = struct {
/// The previous input in a vector. This is required because to check
/// the validity of a UTF-8 byte, we need to sometimes know previous
/// state if it the first byte is a continuation byte.
prev_input: @Vector(16, u8),
/// The current error status. Once an error is set, it is never unset.
prev_error: @Vector(16, u8),
/// The current incomplete status. This is non-zero if the last chunk
/// requires more bytes to be valid UTF-8.
prev_incomplete: @Vector(16, u8),
pub fn init() Neon {
return .{
.prev_input = aarch64.vdupq_n_u8(0),
.prev_error = aarch64.vdupq_n_u8(0),
.prev_incomplete = aarch64.vdupq_n_u8(0),
};
}
pub fn validate(input: []const u8) bool {
var neon = Neon.init();
neon.feed(input);
return !neon.hasErrors();
}
/// Validate a chunk of UTF-8 data. This function is designed to be
/// called multiple times with successive chunks of data. When the
/// data is complete, you must call `finalize` to check for any
/// remaining errors.
pub fn feed(self: *Neon, input: []const u8) void {
// Break up our input into 16 byte chunks, and process each chunk
// separately. The size of a Neon register is 16 bytes.
var i: usize = 0;
while (i + 16 <= input.len) : (i += 16) {
const input_vec = aarch64.vld1q_u8(input[i..]);
self.process(input_vec);
}
// If we have any data remaining, we pad it with zeroes since that
// is valid UTF-8, and then treat it like a normal block.
if (i < input.len) {
const remaining = input.len - i;
assert(remaining < 16);
var buf: [16]u8 = undefined;
@memcpy(buf[0..remaining], input[i..]);
@memset(buf[remaining..], 0);
const input_vec = aarch64.vld1q_u8(&buf);
self.process(input_vec);
}
}
/// Call to finalize the validation (EOF is reached).
pub fn finalize(self: *Neon) void {
// Its possible for our last chunk to end expecting more
// continuation bytes.
self.prev_error = aarch64.vorrq_u8(self.prev_error, self.prev_incomplete);
}
/// Returns true if there are any errors.
pub fn hasErrors(self: *Neon) bool {
return aarch64.vmaxvq_u8(self.prev_error) != 0;
}
/// Process a single vector of input.
///
/// This function generally isn't called directly, but it is very useful
/// if you want to compose this validation with other SIMD operations
/// and already have your data in a SIMD register.
pub fn process(self: *Neon, input_vec: @Vector(16, u8)) void {
// If all we have is ASCII, then we can skip the rest.
if (aarch64.vmaxvq_u8(input_vec) <= 0b10000000) {
self.prev_error = aarch64.vorrq_u8(self.prev_error, self.prev_incomplete);
return;
}
const prev1 = aarch64.vextq_u8(self.prev_input, input_vec, 15);
const prev1_shr4 = aarch64.vshrq_n_u8(prev1, 4);
const prev1_lownibs = aarch64.vandq_u8(prev1, aarch64.vdupq_n_u8(0x0F));
const input_highnibs = aarch64.vshrq_n_u8(input_vec, 4);
const byte_1_high = aarch64.vqtbl1q_u8(byte1HighTable(), prev1_shr4);
const byte_2_low = aarch64.vqtbl1q_u8(byte2LowTable(), prev1_lownibs);
const byte_2_high = aarch64.vqtbl1q_u8(byte2HighTable(), input_highnibs);
const special_cases = aarch64.vandq_u8(
byte_1_high,
aarch64.vandq_u8(byte_2_low, byte_2_high),
);
const prev2 = aarch64.vextq_u8(self.prev_input, input_vec, 14);
const prev3 = aarch64.vextq_u8(self.prev_input, input_vec, 13);
const is_third_byte = aarch64.vcgeq_u8(prev2, aarch64.vdupq_n_u8(0xE0));
const is_fourth_byte = aarch64.vcgeq_u8(prev3, aarch64.vdupq_n_u8(0xF0));
const must23 = aarch64.veorq_u8(is_third_byte, is_fourth_byte);
const must23_80 = aarch64.vandq_u8(must23, aarch64.vdupq_n_u8(0x80));
const multibyte_len = aarch64.veorq_u8(must23_80, special_cases);
self.prev_error = aarch64.vorrq_u8(self.prev_error, multibyte_len);
self.prev_input = input_vec;
self.prev_incomplete = aarch64.vcgtq_u8(input_vec, incomplete: {
var bytes: [16]u8 = .{255} ** 16;
bytes[15] = 0b11000000 - 1;
bytes[14] = 0b11100000 - 1;
bytes[13] = 0b11110000 - 1;
break :incomplete aarch64.vld1q_u8(&bytes);
});
// Debug all the vector registers:
// std.log.warn("input={}", .{input_vec});
// std.log.warn("prev_input={}", .{self.prev_input});
// std.log.warn("prev1={}", .{prev1});
// std.log.warn("prev1_shr4={}", .{prev1_shr4});
// std.log.warn("prev1_lownibs={}", .{prev1_lownibs});
// std.log.warn("input_highnibs={}", .{input_highnibs});
// std.log.warn("byte_1_high={}", .{byte_1_high});
// std.log.warn("byte_2_low={}", .{byte_2_low});
// std.log.warn("byte_2_high={}", .{byte_2_high});
// std.log.warn("special_cases={}", .{special_cases});
// std.log.warn("prev2={}", .{prev2});
// std.log.warn("prev3={}", .{prev3});
// std.log.warn("is_third_byte={}", .{is_third_byte});
// std.log.warn("is_fourth_byte={}", .{is_fourth_byte});
// std.log.warn("must23={}", .{must23});
// std.log.warn("must23_80={}", .{must23_80});
// std.log.warn("multibyte_len={}", .{multibyte_len});
// std.log.warn("error={}", .{self.prev_error});
// std.log.warn("incomplete={}", .{self.prev_incomplete});
}
inline fn byte1HighTable() @Vector(16, u8) {
// zig fmt: off
return aarch64.vld1q_u8(&.{
// 0_______ ________ <ASCII in byte 1>
TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
// 10______ ________ <continuation in byte 1>
TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
// 1100____ ________ <two byte lead in byte 1>
TOO_SHORT | OVERLONG_2,
// 1101____ ________ <two byte lead in byte 1>
TOO_SHORT,
// 1110____ ________ <three byte lead in byte 1>
TOO_SHORT | OVERLONG_3 | SURROGATE,
// 1111____ ________ <four+ byte lead in byte 1>
TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
});
// zig fmt: on
}
inline fn byte2LowTable() @Vector(16, u8) {
// zig fmt: off
return aarch64.vld1q_u8(&.{
// ____0000 ________
CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
// ____0001 ________
CARRY | OVERLONG_2,
// ____001_ ________
CARRY,
CARRY,
// ____0100 ________
CARRY | TOO_LARGE,
// ____0101 ________
CARRY | TOO_LARGE | TOO_LARGE_1000,
// ____011_ ________
CARRY | TOO_LARGE | TOO_LARGE_1000,
CARRY | TOO_LARGE | TOO_LARGE_1000,
// ____1___ ________
CARRY | TOO_LARGE | TOO_LARGE_1000,
CARRY | TOO_LARGE | TOO_LARGE_1000,
CARRY | TOO_LARGE | TOO_LARGE_1000,
CARRY | TOO_LARGE | TOO_LARGE_1000,
CARRY | TOO_LARGE | TOO_LARGE_1000,
// ____1101 ________
CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
CARRY | TOO_LARGE | TOO_LARGE_1000,
CARRY | TOO_LARGE | TOO_LARGE_1000
});
// zig fmt: on
}
inline fn byte2HighTable() @Vector(16, u8) {
// zig fmt: off
return aarch64.vld1q_u8(&.{
// ________ 0_______ <ASCII in byte 2>
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
// ________ 1000____
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
// ________ 1001____
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
// ________ 101_____
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
// ________ 11______
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
});
// zig fmt: on
}
};
// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
// Bit 1 = Too Long (ASCII followed by continuation)
// Bit 2 = Overlong 3-byte
// Bit 4 = Surrogate
// Bit 5 = Overlong 2-byte
// Bit 7 = Two Continuations
const TOO_SHORT: u8 = 1 << 0; // 11______ 0_______
// 11______ 11______
// 0_______ 10______
const TOO_LONG: u8 = 1 << 1;
const OVERLONG_3: u8 = 1 << 2; // 11100000 100_____
const SURROGATE: u8 = 1 << 4; // 11101101 101_____
const OVERLONG_2: u8 = 1 << 5; // 1100000_ 10______
const TWO_CONTS: u8 = 1 << 7; // 10______ 10______
const TOO_LARGE: u8 = 1 << 3; // 11110100 1001____
// 11110100 101_____
// 11110101 1001____
// 11110101 101_____
// 1111011_ 1001____
// 1111011_ 101_____
// 11111___ 1001____
// 11111___ 101_____
const TOO_LARGE_1000: u8 = 1 << 6;
// 11110101 1000____
// 1111011_ 1000____
// 11111___ 1000____
// 11110000 1000____
const OVERLONG_4: u8 = 1 << 6;
const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
/// Generic test function so we can test against multiple implementations.
/// This is initially copied from the Zig stdlib but may be expanded.
fn testValidate(func: *const Validate) !void {
const testing = std.testing;
try testing.expect(func("hello friends!!!"));
try testing.expect(func("abc"));
try testing.expect(func("abc\xdf\xbf"));
try testing.expect(func(""));
try testing.expect(func("a"));
try testing.expect(func("abc"));
try testing.expect(func("Ж"));
try testing.expect(func("ЖЖ"));
try testing.expect(func("брэд-ЛГТМ"));
try testing.expect(func("☺☻☹"));
try testing.expect(func("a\u{fffdb}"));
try testing.expect(func("\xf4\x8f\xbf\xbf"));
try testing.expect(func("abc\xdf\xbf"));
try testing.expect(!func("abc\xc0"));
try testing.expect(!func("abc\xc0abc"));
try testing.expect(!func("aa\xe2"));
try testing.expect(!func("\x42\xfa"));
try testing.expect(!func("\x42\xfa\x43"));
try testing.expect(!func("abc\xc0"));
try testing.expect(!func("abc\xc0abc"));
try testing.expect(!func("\xf4\x90\x80\x80"));
try testing.expect(!func("\xf7\xbf\xbf\xbf"));
try testing.expect(!func("\xfb\xbf\xbf\xbf\xbf"));
try testing.expect(!func("\xc0\x80"));
try testing.expect(!func("\xed\xa0\x80"));
try testing.expect(!func("\xed\xbf\xbf"));
}
test "validate" {
const v = isa.detect();
var it = v.iterator();
while (it.next()) |isa_v| try testValidate(validateFunc(isa_v));
}

File diff suppressed because it is too large Load Diff

View File

@ -1,44 +0,0 @@
pub const XCR0_XMM = 0x02;
pub const XCR0_YMM = 0x04;
pub const XCR0_MASKREG = 0x20;
pub const XCR0_ZMM0_15 = 0x40;
pub const XCR0_ZMM16_31 = 0x80;
pub const CpuidLeaf = packed struct {
eax: u32,
ebx: u32,
ecx: u32,
edx: u32,
};
/// Wrapper around x86 and x86_64 `cpuid` in order to gather processor
/// and feature information. This is explicitly and specifically only
/// for x86 and x86_64.
pub fn cpuid(leaf_id: u32, subid: u32) CpuidLeaf {
var eax: u32 = undefined;
var ebx: u32 = undefined;
var ecx: u32 = undefined;
var edx: u32 = undefined;
asm volatile ("cpuid"
: [_] "={eax}" (eax),
[_] "={ebx}" (ebx),
[_] "={ecx}" (ecx),
[_] "={edx}" (edx),
: [_] "{eax}" (leaf_id),
[_] "{ecx}" (subid),
);
return .{ .eax = eax, .ebx = ebx, .ecx = ecx, .edx = edx };
}
// Read control register 0 (XCR0). Used to detect features such as AVX.
pub fn getXCR0() u32 {
return asm volatile (
\\ xor %%ecx, %%ecx
\\ xgetbv
: [_] "={eax}" (-> u32),
:
: "edx", "ecx"
);
}

View File

@ -43,11 +43,6 @@ pub const EraseLine = csi.EraseLine;
pub const TabClear = csi.TabClear;
pub const Attribute = sgr.Attribute;
// TODO: we only have a hardcoded Neon implementation for now
pub usingnamespace if (builtin.target.cpu.arch == .aarch64) struct {
pub const simdvt = @import("simdvt.zig");
} else struct {};
/// If we're targeting wasm then we export some wasm APIs.
pub usingnamespace if (builtin.target.isWasm()) struct {
pub usingnamespace @import("wasm.zig");

View File

@ -1,63 +0,0 @@
// Generates code for every target that this compiler can support.
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "example.cpp" // this file
#include <hwy/foreach_target.h> // must come before highway.h
#include <hwy/highway.h>
HWY_BEFORE_NAMESPACE();
namespace ghostty {
namespace HWY_NAMESPACE { // required: unique per target
// Can skip hn:: prefixes if already inside hwy::HWY_NAMESPACE.
namespace hn = hwy::HWY_NAMESPACE;
using T = float;
// Alternative to per-function HWY_ATTR: see HWY_BEFORE_NAMESPACE
void MulAddLoop(const T* HWY_RESTRICT mul_array,
const T* HWY_RESTRICT add_array,
const size_t size, T* HWY_RESTRICT x_array) {
const hn::ScalableTag<T> d;
for (size_t i = 0; i < size; i += hn::Lanes(d)) {
const auto mul = hn::Load(d, mul_array + i);
const auto add = hn::Load(d, add_array + i);
auto x = hn::Load(d, x_array + i);
x = hn::MulAdd(mul, x, add);
hn::Store(x, d, x_array + i);
}
}
} // namespace HWY_NAMESPACE
} // namespace ghostty
HWY_AFTER_NAMESPACE();
// The table of pointers to the various implementations in HWY_NAMESPACE must
// be compiled only once (foreach_target #includes this file multiple times).
// HWY_ONCE is true for only one of these 'compilation passes'.
#if HWY_ONCE
namespace ghostty {
// This macro declares a static array used for dynamic dispatch.
HWY_EXPORT(MulAddLoop);
void CallMulAddLoop(const float* HWY_RESTRICT mul_array,
const float* HWY_RESTRICT add_array,
const size_t size, float* HWY_RESTRICT x_array) {
// This must reside outside of HWY_NAMESPACE because it references (calls the
// appropriate one from) the per-target implementations there.
// For static dispatch, use HWY_STATIC_DISPATCH.
return HWY_DYNAMIC_DISPATCH(MulAddLoop)(mul_array, add_array, size, x_array);
}
} // namespace ghostty
extern "C" float example() {
float mul_array[] {1, 2, 3, 4, 5};
float add_array[] {2, 3, 4, 5, 6};
float x_array[] {0, 0, 0, 0, 0};
ghostty::CallMulAddLoop(mul_array, add_array, 5, x_array);
return x_array[0];
}
#endif // HWY_ONCE

View File

@ -1,82 +0,0 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const terminal = @import("../main.zig");
const ScalarStream = terminal.Stream;
const simd = @import("../../simd/main.zig");
const aarch64 = simd.aarch64;
pub fn Stream(comptime Handler: type) type {
return struct {
const Self = @This();
handler: Handler,
pub fn init(h: Handler) Self {
return .{ .handler = h };
}
pub fn feed(self: *Self, input: []const u8) void {
// TODO: I want to do the UTF-8 decoding as we stream the input,
// but I don't want to deal with UTF-8 decode in SIMD right now.
// So for now we just go back over the input and decode using
// a scalar loop. Ugh.
// We search for ESC (0x1B) very frequently, since this is what triggers
// the start of a terminal escape sequence of any kind, so put this into
// a register immediately.
const esc_vec = aarch64.vdupq_n_u8(0x1B);
// Iterate 16 bytes at a time, which is the max size of a vector register.
var i: usize = 0;
while (i + 16 <= input.len) : (i += 16) {
// Load the next 16 bytes into a vector register.
const input_vec = aarch64.vld1q_u8(input[i..]);
// Check for ESC to determine if we should go to the next state.
if (simd.index_of.Neon.indexOfVec(input_vec, esc_vec)) |index| {
_ = index;
@panic("TODO");
}
// No ESC found, decode UTF-8.
// TODO(mitchellh): I don't have a UTF-8 decoder in SIMD yet, so
// for now we just use a scalar loop. This is slow.
const view = std.unicode.Utf8View.initUnchecked(input[i .. i + 16]);
var it = view.iterator();
while (it.nextCodepoint()) |cp| {
self.handler.print(cp);
}
}
// Handle the remaining bytes
if (i < input.len) {
@panic("input must be a multiple of 16 bytes for now");
}
}
};
}
test "ascii" {
const testing = std.testing;
var arena = ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const alloc = arena.allocator();
const H = struct {
const Self = @This();
alloc: Allocator,
buf: std.ArrayListUnmanaged(u21) = .{},
pub fn print(self: *Self, c: u21) void {
self.buf.append(self.alloc, c) catch unreachable;
}
};
const str = "hello" ** 16;
var s = Stream(H).init(.{ .alloc = alloc });
s.feed(str);
try testing.expectEqual(str.len, s.handler.buf.items.len);
}