Merge pull request #1472 from mitchellh/simd

SIMD Beginnings
This commit is contained in:
Mitchell Hashimoto
2024-02-05 21:46:32 -08:00
committed by GitHub
32 changed files with 38120 additions and 46 deletions

182
.clang-format Normal file
View File

@ -0,0 +1,182 @@
---
Language: Cpp
# BasedOnStyle: Chromium
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveMacros: false
AlignConsecutiveAssignments: false
AlignConsecutiveBitFields: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Left
AlignOperands: Align
AlignTrailingComments: true
AllowAllArgumentsOnNextLine: true
AllowAllConstructorInitializersOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortEnumsOnASingleLine: true
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortLambdasOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Never
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: true
BinPackParameters: false
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakInheritanceList: BeforeColon
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: "^ IWYU pragma:"
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DeriveLineEnding: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Preserve
IncludeCategories:
- Regex: '^<ext/.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: "^<.*"
Priority: 2
SortPriority: 0
- Regex: ".*"
Priority: 3
SortPriority: 0
IncludeIsMainRegex: "([-_](test|unittest))?$"
IncludeIsMainSourceRegex: ""
IndentCaseLabels: true
IndentCaseBlocks: false
IndentGotoLabels: true
IndentPPDirectives: None
IndentExternBlock: AfterExternBlock
IndentWidth: 2
IndentWrappedFunctionNames: false
InsertTrailingCommas: None
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ""
MacroBlockEnd: ""
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Never
ObjCBlockIndentWidth: 2
ObjCBreakBeforeNestedBlockParam: true
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Left
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- "c++"
- "C++"
CanonicalDelimiter: ""
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
- ParseTestProto
- ParsePartialTestProto
CanonicalDelimiter: ""
BasedOnStyle: google
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: Auto
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
TabWidth: 8
UseCRLF: false
UseTab: Never
WhitespaceSensitiveMacros:
- STRINGIZE
- PP_STRINGIZE
- BOOST_PP_STRINGIZE
---

1
.gitattributes vendored
View File

@ -1,4 +1,5 @@
vendor/** linguist-vendored vendor/** linguist-vendored
website/** linguist-documentation website/** linguist-documentation
pkg/cimgui/vendor/** linguist-vendored pkg/cimgui/vendor/** linguist-vendored
pkg/simdutf/vendor/** linguist-vendored
src/terminal/res/** linguist-vendored src/terminal/res/** linguist-vendored

View File

@ -202,7 +202,7 @@ pub fn build(b: *std.Build) !void {
if (emit_helpgen) try addHelp(b, null, config); if (emit_helpgen) try addHelp(b, null, config);
// Add our benchmarks // Add our benchmarks
try benchSteps(b, target, optimize, config, emit_bench); try benchSteps(b, target, config, emit_bench);
// We only build an exe if we have a runtime set. // We only build an exe if we have a runtime set.
const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{ const exe_: ?*std.Build.Step.Compile = if (config.app_runtime != .none) b.addExecutable(.{
@ -925,6 +925,18 @@ fn addDeps(
.target = target, .target = target,
.optimize = optimize, .optimize = optimize,
}); });
const highway_dep = b.dependency("highway", .{
.target = target,
.optimize = optimize,
});
const simdutf_dep = b.dependency("simdutf", .{
.target = target,
.optimize = optimize,
});
const utfcpp_dep = b.dependency("utfcpp", .{
.target = target,
.optimize = optimize,
});
const libpng_dep = b.dependency("libpng", .{ const libpng_dep = b.dependency("libpng", .{
.target = target, .target = target,
.optimize = optimize, .optimize = optimize,
@ -977,6 +989,14 @@ fn addDeps(
step.addIncludePath(.{ .path = "src/stb" }); step.addIncludePath(.{ .path = "src/stb" });
step.addCSourceFiles(.{ .files = &.{"src/stb/stb.c"} }); step.addCSourceFiles(.{ .files = &.{"src/stb/stb.c"} });
// C++ files
step.linkLibCpp();
step.addIncludePath(.{ .path = "src" });
step.addCSourceFiles(.{ .files = &.{
"src/simd/index_of.cpp",
"src/simd/vt.cpp",
} });
// If we're building a lib we have some different deps // If we're building a lib we have some different deps
const lib = step.kind == .lib; const lib = step.kind == .lib;
@ -1027,6 +1047,18 @@ fn addDeps(
step.linkLibrary(glslang_dep.artifact("glslang")); step.linkLibrary(glslang_dep.artifact("glslang"));
try static_libs.append(glslang_dep.artifact("glslang").getEmittedBin()); try static_libs.append(glslang_dep.artifact("glslang").getEmittedBin());
// Highway
step.linkLibrary(highway_dep.artifact("highway"));
try static_libs.append(highway_dep.artifact("highway").getEmittedBin());
// simdutf
step.linkLibrary(simdutf_dep.artifact("simdutf"));
try static_libs.append(simdutf_dep.artifact("simdutf").getEmittedBin());
// utfcpp
step.linkLibrary(utfcpp_dep.artifact("utfcpp"));
try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin());
// Spirv-Cross // Spirv-Cross
step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); step.linkLibrary(spirv_cross_dep.artifact("spirv_cross"));
try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin());
@ -1231,7 +1263,6 @@ fn buildDocumentation(
fn benchSteps( fn benchSteps(
b: *std.Build, b: *std.Build,
target: std.Build.ResolvedTarget, target: std.Build.ResolvedTarget,
optimize: std.builtin.OptimizeMode,
config: BuildConfig, config: BuildConfig,
install: bool, install: bool,
) !void { ) !void {
@ -1259,8 +1290,11 @@ fn benchSteps(
.name = bin_name, .name = bin_name,
.root_source_file = .{ .path = "src/main.zig" }, .root_source_file = .{ .path = "src/main.zig" },
.target = target, .target = target,
.optimize = optimize,
// We always want our benchmarks to be in release mode.
.optimize = .ReleaseFast,
}); });
c_exe.linkLibC();
if (install) b.installArtifact(c_exe); if (install) b.installArtifact(c_exe);
_ = try addDeps(b, c_exe, config: { _ = try addDeps(b, c_exe, config: {
var copy = config; var copy = config;

View File

@ -31,11 +31,14 @@
.fontconfig = .{ .path = "./pkg/fontconfig" }, .fontconfig = .{ .path = "./pkg/fontconfig" },
.freetype = .{ .path = "./pkg/freetype" }, .freetype = .{ .path = "./pkg/freetype" },
.harfbuzz = .{ .path = "./pkg/harfbuzz" }, .harfbuzz = .{ .path = "./pkg/harfbuzz" },
.highway = .{ .path = "./pkg/highway" },
.libpng = .{ .path = "./pkg/libpng" }, .libpng = .{ .path = "./pkg/libpng" },
.macos = .{ .path = "./pkg/macos" }, .macos = .{ .path = "./pkg/macos" },
.oniguruma = .{ .path = "./pkg/oniguruma" }, .oniguruma = .{ .path = "./pkg/oniguruma" },
.opengl = .{ .path = "./pkg/opengl" }, .opengl = .{ .path = "./pkg/opengl" },
.pixman = .{ .path = "./pkg/pixman" }, .pixman = .{ .path = "./pkg/pixman" },
.simdutf = .{ .path = "./pkg/simdutf" },
.utfcpp = .{ .path = "./pkg/utfcpp" },
.zlib = .{ .path = "./pkg/zlib" }, .zlib = .{ .path = "./pkg/zlib" },
// Shader translation // Shader translation

View File

@ -12,6 +12,7 @@
parallel, parallel,
pkg-config, pkg-config,
python3, python3,
qemu,
scdoc, scdoc,
tracy, tracy,
valgrind, valgrind,
@ -110,6 +111,10 @@ in
# by default so we have to include this. # by default so we have to include this.
bashInteractive bashInteractive
# Used for testing SIMD codegen. This is Linux only because the macOS
# build only has the qemu-system files.
qemu
gdb gdb
valgrind valgrind
wraptest wraptest

View File

@ -1,3 +1,3 @@
# This file is auto-generated! check build-support/check-zig-cache-hash.sh for # This file is auto-generated! check build-support/check-zig-cache-hash.sh for
# more details. # more details.
"sha256-YXSgZynCPCwahbV4cQx05IrtzOaUxG75715dc+j+8/c=" "sha256-5QLmMiZFiWFTtEKCPn3ruXo2vkCVU870mPbKmmKqLvs="

9
pkg/highway/bridge.cpp Normal file
View File

@ -0,0 +1,9 @@
#include <hwy/targets.h>
#include <stdint.h>
extern "C" {
int64_t hwy_supported_targets() {
return HWY_SUPPORTED_TARGETS;
}
}

111
pkg/highway/build.zig Normal file
View File

@ -0,0 +1,111 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const upstream = b.dependency("highway", .{});
const module = b.addModule("highway", .{
.root_source_file = .{ .path = "main.zig" },
.target = target,
.optimize = optimize,
});
const lib = b.addStaticLibrary(.{
.name = "highway",
.target = target,
.optimize = optimize,
});
lib.linkLibCpp();
lib.addIncludePath(upstream.path(""));
module.addIncludePath(upstream.path(""));
if (target.result.isDarwin()) {
const apple_sdk = @import("apple_sdk");
try apple_sdk.addPaths(b, &lib.root_module);
try apple_sdk.addPaths(b, module);
}
var flags = std.ArrayList([]const u8).init(b.allocator);
defer flags.deinit();
try flags.appendSlice(&.{
// Avoid changing binaries based on the current time and date.
"-Wno-builtin-macro-redefined",
"-D__DATE__=\"redacted\"",
"-D__TIMESTAMP__=\"redacted\"",
"-D__TIME__=\"redacted\"",
// Optimizations
"-fmerge-all-constants",
// Warnings
"-Wall",
"-Wextra",
// These are not included in Wall nor Wextra:
"-Wconversion",
"-Wsign-conversion",
"-Wvla",
"-Wnon-virtual-dtor",
"-Wfloat-overflow-conversion",
"-Wfloat-zero-conversion",
"-Wfor-loop-analysis",
"-Wgnu-redeclared-enum",
"-Winfinite-recursion",
"-Wself-assign",
"-Wstring-conversion",
"-Wtautological-overlap-compare",
"-Wthread-safety-analysis",
"-Wundefined-func-template",
"-fno-cxx-exceptions",
"-fno-slp-vectorize",
"-fno-vectorize",
});
if (target.result.os.tag != .windows) {
try flags.appendSlice(&.{
"-fmath-errno",
"-fno-exceptions",
});
}
lib.addCSourceFiles(.{ .flags = flags.items, .files = &.{"bridge.cpp"} });
lib.addCSourceFiles(.{
.dependency = upstream,
.flags = flags.items,
.files = &.{
"hwy/aligned_allocator.cc",
"hwy/nanobenchmark.cc",
"hwy/per_target.cc",
"hwy/print.cc",
"hwy/targets.cc",
"hwy/timer.cc",
},
});
lib.installHeadersDirectoryOptions(.{
.source_dir = upstream.path("hwy"),
.install_dir = .header,
.install_subdir = "hwy",
.include_extensions = &.{".h"},
});
b.installArtifact(lib);
{
const test_exe = b.addTest(.{
.name = "test",
.root_source_file = .{ .path = "main.zig" },
.target = target,
.optimize = optimize,
});
test_exe.linkLibrary(lib);
var it = module.import_table.iterator();
while (it.next()) |entry| test_exe.root_module.addImport(entry.key_ptr.*, entry.value_ptr.*);
const tests_run = b.addRunArtifact(test_exe);
const test_step = b.step("test", "Run tests");
test_step.dependOn(&tests_run.step);
}
}

13
pkg/highway/build.zig.zon Normal file
View File

@ -0,0 +1,13 @@
.{
.name = "highway",
.version = "1.0.7",
.paths = .{""},
.dependencies = .{
.highway = .{
.url = "https://github.com/google/highway/archive/refs/tags/1.0.7.tar.gz",
.hash = "122060ea43a9403ad53b4a33e19416c0e9949fb3e175035791bd2b7462091079d5a2",
},
.apple_sdk = .{ .path = "../apple-sdk" },
},
}

57
pkg/highway/main.zig Normal file
View File

@ -0,0 +1,57 @@
extern "c" fn hwy_supported_targets() i64;
pub const Targets = packed struct(i64) {
// x86_64
_reserved: u4 = 0,
avx3_spr: bool = false,
_reserved_5: u1 = 0,
avx3_zen4: bool = false,
avx3_dl: bool = false,
avx3: bool = false,
avx2: bool = false,
_reserved_10: u1 = 0,
sse4: bool = false,
ssse3: bool = false,
_reserved_13: u1 = 0, // SSE3 reserved
sse2: bool = false,
_reserved_15_23: u9 = 0,
// aarch64
sve2_128: bool = false,
sve_256: bool = false,
sve2: bool = false,
sve: bool = false,
neon: bool = false,
neon_without_aes: bool = false,
_reserved_30_36: u6 = 0,
// risc-v
rvv: bool = false,
_reserved_38_46: u9 = 0,
// IBM Power
ppc10: bool = false,
ppc9: bool = false,
ppc8: bool = false,
z15: bool = false,
z14: bool = false,
_reserved_52_57: u6 = 0,
// WebAssembly
wasm_emu256: bool = false,
wasm: bool = false,
_reserved_60_61: u2 = 0,
// Emulation
emu128: bool = false,
scalar: bool = false,
_reserved_63: u1 = 0,
};
pub fn supported_targets() Targets {
return @bitCast(hwy_supported_targets());
}
test {
_ = supported_targets();
}

54
pkg/simdutf/build.zig Normal file
View File

@ -0,0 +1,54 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const lib = b.addStaticLibrary(.{
.name = "simdutf",
.target = target,
.optimize = optimize,
});
lib.linkLibCpp();
lib.addIncludePath(.{ .path = "vendor" });
if (target.result.isDarwin()) {
const apple_sdk = @import("apple_sdk");
try apple_sdk.addPaths(b, &lib.root_module);
}
var flags = std.ArrayList([]const u8).init(b.allocator);
defer flags.deinit();
try flags.appendSlice(&.{});
lib.addCSourceFiles(.{
.flags = flags.items,
.files = &.{
"vendor/simdutf.cpp",
},
});
lib.installHeadersDirectoryOptions(.{
.source_dir = .{ .path = "vendor" },
.install_dir = .header,
.install_subdir = "",
.include_extensions = &.{".h"},
});
b.installArtifact(lib);
// {
// const test_exe = b.addTest(.{
// .name = "test",
// .root_source_file = .{ .path = "main.zig" },
// .target = target,
// .optimize = optimize,
// });
// test_exe.linkLibrary(lib);
//
// var it = module.import_table.iterator();
// while (it.next()) |entry| test_exe.root_module.addImport(entry.key_ptr.*, entry.value_ptr.*);
// const tests_run = b.addRunArtifact(test_exe);
// const test_step = b.step("test", "Run tests");
// test_step.dependOn(&tests_run.step);
// }
}

View File

@ -0,0 +1,8 @@
.{
.name = "simdutf",
.version = "4.0.9",
.paths = .{""},
.dependencies = .{
.apple_sdk = .{ .path = "../apple-sdk" },
},
}

33245
pkg/simdutf/vendor/simdutf.cpp vendored Normal file

File diff suppressed because it is too large Load Diff

3437
pkg/simdutf/vendor/simdutf.h vendored Normal file

File diff suppressed because it is too large Load Diff

54
pkg/utfcpp/build.zig Normal file
View File

@ -0,0 +1,54 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const upstream = b.dependency("utfcpp", .{});
const lib = b.addStaticLibrary(.{
.name = "utfcpp",
.target = target,
.optimize = optimize,
});
lib.linkLibCpp();
lib.addIncludePath(upstream.path(""));
if (target.result.isDarwin()) {
const apple_sdk = @import("apple_sdk");
try apple_sdk.addPaths(b, &lib.root_module);
}
var flags = std.ArrayList([]const u8).init(b.allocator);
defer flags.deinit();
try flags.appendSlice(&.{});
lib.addCSourceFiles(.{
.flags = flags.items,
.files = &.{"empty.cc"},
});
lib.installHeadersDirectoryOptions(.{
.source_dir = upstream.path("source"),
.install_dir = .header,
.install_subdir = "",
.include_extensions = &.{".h"},
});
b.installArtifact(lib);
// {
// const test_exe = b.addTest(.{
// .name = "test",
// .root_source_file = .{ .path = "main.zig" },
// .target = target,
// .optimize = optimize,
// });
// test_exe.linkLibrary(lib);
//
// var it = module.import_table.iterator();
// while (it.next()) |entry| test_exe.root_module.addImport(entry.key_ptr.*, entry.value_ptr.*);
// const tests_run = b.addRunArtifact(test_exe);
// const test_step = b.step("test", "Run tests");
// test_step.dependOn(&tests_run.step);
// }
}

13
pkg/utfcpp/build.zig.zon Normal file
View File

@ -0,0 +1,13 @@
.{
.name = "utfcpp",
.version = "4.0.5",
.paths = .{""},
.dependencies = .{
.utfcpp = .{
.url = "https://github.com/nemtrif/utfcpp/archive/refs/tags/v4.0.5.tar.gz",
.hash = "1220d4d18426ca72fc2b7e56ce47273149815501d0d2395c2a98c726b31ba931e641",
},
.apple_sdk = .{ .path = "../apple-sdk" },
},
}

2
pkg/utfcpp/empty.cc Normal file
View File

@ -0,0 +1,2 @@
// Needed for Zig build to be happy
void ghostty_utfcpp_stub() {}

20
src/bench/stream.sh Executable file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
#
# This is a trivial helper script to help run the stream benchmark.
# You probably want to tweak this script depending on what you're
# trying to measure.
DATA="ascii"
SIZE="25M"
# Uncomment to test with an active terminal state.
#ARGS=" --terminal"
hyperfine \
--warmup 10 \
-n memcpy \
"./zig-out/bin/bench-stream --mode=gen-${DATA} | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=noop${ARGS}" \
-n scalar \
"./zig-out/bin/bench-stream --mode=gen-${DATA} | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=scalar${ARGS}" \
-n simd \
"./zig-out/bin/bench-stream --mode=gen-${DATA} | head -c ${SIZE} | ./zig-out/bin/bench-stream --mode=simd${ARGS}"

215
src/bench/stream.zig Normal file
View File

@ -0,0 +1,215 @@
//! This benchmark tests the throughput of the VT stream. It has a few
//! modes in order to test different methods of stream processing. It
//! provides a "noop" mode to give us the `memcpy` speed.
//!
//! This will consume all of the available stdin, so you should run it
//! with `head` in a pipe to restrict. For example, to test ASCII input:
//!
//! bench-stream --mode=gen-ascii | head -c 50M | bench-stream --mode=simd
//!
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ArenaAllocator = std.heap.ArenaAllocator;
const ziglyph = @import("ziglyph");
const cli = @import("../cli.zig");
const terminal = @import("../terminal/main.zig");
const Args = struct {
mode: Mode = .noop,
/// Process input with a real terminal. This will be MUCH slower than
/// the other modes because it has to maintain terminal state but will
/// help get more realistic numbers.
terminal: bool = false,
@"terminal-rows": usize = 80,
@"terminal-cols": usize = 120,
/// The size for read buffers. Doesn't usually need to be changed. The
/// main point is to make this runtime known so we can avoid compiler
/// optimizations.
@"buffer-size": usize = 4096,
/// This is set by the CLI parser for deinit.
_arena: ?ArenaAllocator = null,
pub fn deinit(self: *Args) void {
if (self._arena) |arena| arena.deinit();
self.* = undefined;
}
};
const Mode = enum {
// Do nothing, just read from stdin into a stack-allocated buffer.
// This is used to benchmark our base-case: it gives us our maximum
// throughput on a basic read.
noop,
// These benchmark the throughput of the terminal stream parsing
// with and without SIMD. The "simd" option will use whatever is best
// for the running platform.
//
// Note that these run through the full VT parser but do not apply
// the operations to terminal state, so there is no terminal state
// overhead.
scalar,
simd,
// Generate an infinite stream of random printable ASCII characters.
@"gen-ascii",
// Generate an infinite stream of repeated UTF-8 characters. We don't
// currently do random generation because trivial implementations are
// too slow and I'm a simple man.
@"gen-utf8",
};
pub const std_options = struct {
pub const log_level: std.log.Level = .debug;
};
pub fn main() !void {
// We want to use the c allocator because it is much faster than GPA.
const alloc = std.heap.c_allocator;
// Parse our args
var args: Args = .{};
defer args.deinit();
{
var iter = try std.process.argsWithAllocator(alloc);
defer iter.deinit();
try cli.args.parse(Args, alloc, &args, &iter);
}
const reader = std.io.getStdIn().reader();
const writer = std.io.getStdOut().writer();
const buf = try alloc.alloc(u8, args.@"buffer-size");
// Handle the modes that do not depend on terminal state first.
switch (args.mode) {
.@"gen-ascii" => try genAscii(writer),
.@"gen-utf8" => try genUtf8(writer),
.noop => try benchNoop(reader, buf),
// Handle the ones that depend on terminal state next
inline .scalar,
.simd,
=> |tag| {
if (args.terminal) {
const TerminalStream = terminal.Stream(*TerminalHandler);
var t = try terminal.Terminal.init(
alloc,
args.@"terminal-cols",
args.@"terminal-rows",
);
var handler: TerminalHandler = .{ .t = &t };
var stream: TerminalStream = .{ .handler = &handler };
switch (tag) {
.scalar => try benchScalar(reader, &stream, buf),
.simd => try benchSimd(reader, &stream, buf),
else => @compileError("missing case"),
}
} else {
var stream: terminal.Stream(NoopHandler) = .{ .handler = .{} };
switch (tag) {
.scalar => try benchScalar(reader, &stream, buf),
.simd => try benchSimd(reader, &stream, buf),
else => @compileError("missing case"),
}
}
},
}
}
/// Generates an infinite stream of random printable ASCII characters.
/// This has no control characters in it at all.
fn genAscii(writer: anytype) !void {
const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;':\\\",./<>?`~";
try genData(writer, alphabet);
}
/// Generates an infinite stream of bytes from the given alphabet.
fn genData(writer: anytype, alphabet: []const u8) !void {
var prng = std.rand.DefaultPrng.init(0x12345678);
const rnd = prng.random();
while (true) {
var buf: [1024]u8 = undefined;
for (&buf) |*c| {
const idx = rnd.uintLessThanBiased(usize, alphabet.len);
c.* = alphabet[idx];
}
writer.writeAll(&buf) catch |err| switch (err) {
error.BrokenPipe => return, // stdout closed
else => return err,
};
}
}
fn genUtf8(writer: anytype) !void {
while (true) {
writer.writeAll(random_utf8) catch |err| switch (err) {
error.BrokenPipe => return, // stdout closed
else => return err,
};
}
}
noinline fn benchNoop(reader: anytype, buf: []u8) !void {
var total: usize = 0;
while (true) {
const n = try reader.readAll(buf);
if (n == 0) break;
total += n;
}
std.log.info("total bytes len={}", .{total});
}
noinline fn benchScalar(
reader: anytype,
stream: anytype,
buf: []u8,
) !void {
while (true) {
const n = try reader.read(buf);
if (n == 0) break;
// Using stream.next directly with a for loop applies a naive
// scalar approach.
for (buf[0..n]) |c| try stream.next(c);
}
}
noinline fn benchSimd(
reader: anytype,
stream: anytype,
buf: []u8,
) !void {
while (true) {
const n = try reader.read(buf);
if (n == 0) break;
try stream.nextSlice(buf[0..n]);
}
}
const NoopHandler = struct {
pub fn print(self: NoopHandler, cp: u21) !void {
_ = self;
_ = cp;
}
};
const TerminalHandler = struct {
t: *terminal.Terminal,
pub fn print(self: *TerminalHandler, cp: u21) !void {
try self.t.print(cp);
}
};
/// Offline-generated random UTF-8 bytes, because generating them at runtime
/// was too slow for our benchmarks. We should replace this if we can come
/// up with something that doesn't bottleneck our benchmark.
const random_utf8 = "⨴⭬∎⯀Ⳟ⳨⍈♍⒄⣹⇚ⱎ⯡⯴↩ⵆ⼳ⶦ⑑⦥➍Ⲡ⽉❞⹀⢧€⣁ⶐ⸲⣷⏝⣶⫿▝⨽⬃ↁ↵⯙ⶵ╡∾⭡′⫼↼┫⮡ↅ⍞‡▱⺁⿒⽛⎭☜Ⱝ⣘✬⢟⁴⟹⪝ℌ❓␆╣┳⽑⴩⺄✽ⳗ␮ⵍ⦵ⱍ⭑⛒ⅉ⛠➌₯ⵔⷋ⹶❷ⱳ⣖⭐⮋ₒ⥚ⷃ╶⌈⸣❥⑎⦿⪶₮╋⅌ⳬⴛ⥚♇╬❜⺷⡬⏠⧥┺⃻❼⏲↍Ⓙ⽕╶⾉⺪⁑⎕⅕⼧⊀ⲡ⊺⪭⟾Ⅵ⍌⛄⠻⃽⣻₮ⰹⴺ⪂⃾∖⊹⤔⵫⦒⽳⫄⍮↷⣌⩐⨼⯂⵺◺⍙⭺⟂⎯ⱼ⴬⫺⹦∌⡉ⳅ⛲⡏⃘⺃⵬ⴜ⾩⭦ⷭ⨟☌⍃⧪⮧ⓛ⃄♮ⲓ∘⣝⤐⎭ⷺⰫⶔ☎⾨⾐≦␢⋔⢟ⶐ⏁⚄⦡⾞✊⾾⫿⴩⪨⮰ⓙ⌽⭲⫬⒈⊻⸣⌳⋡ⱄⲛ⓬➼⌧⟮⹖♞ℚⷱ⭥⚣⏳⟾❠☏⦻⑽−∪ⅆ☁⿑⦣⵽Ⱳ⺧⺊Ⓞ⫽⦀⃐⚽⎌⥰⚪⢌⛗⸋⛂⾽Ⰳ⍧⛗◁❠↺≍‸ⴣ⭰‾⡸⩛⭷ⵒ⵼⚉❚⨳⑫⹾⷟∇┬⚌⨙╘ℹ⢱⏴∸⴨⾀⌟⡄⺣⦦ⱏ⼚​⿇├⌮⸿⯔₮—⥟╖◡⻵ⶕ┧⒞⏖⏧⟀❲➚‏➳Ⰼ┸⬖⸓⁃⹚⫣┭↜〈☶≍☨╟⿹ⳙ⺽⸡⵵⛞⚟⯓⥟┞⿄⮖⃫⭒⠤ⓣ⬱⃅⓼ⱒ⥖✜⛘⠶ⰽ⿉⾣➌⣋⚨⒯◱⢃◔ⱕ⫡⓱⅌Ⱨ⧵⯾┰⁠ⱌ⼳♠⨽⪢⸳⠹⩡Ⓨ⡪⭞⼰⡧ⓖ⤘⽶⵶ⴺ ⨨▅⏟⊕ⴡⴰ␌⚯⦀⫭⨔⬯⨢ⱽ⟓⥫⑤⊘⟧❐▜⵸℅⋣⚏⇭⽁⪂ⲡ⯊⦥⭳⠾⹫⠮℞⒡Ⰼ⦈⭅≉⋆☈▓⺑⡻▷Ⱑ⋖⬜┃ⵍ←⣢ↁ☚⟴⦡⨍⼡◝⯤❓◢⌡⏿⭲✏⎑⧊⼤⪠⋂⚜┯▤⑘⟾⬬Ⓜ⨸⥪ⱘ⳷⷟⒖⋐⡈⏌∠⏁⓳Ⲟ⦽⢯┏Ⲹ⍰ⅹ⚏⍐⟍⣩␖⛂∜❆⤗⒨⓽";

View File

@ -139,4 +139,5 @@ pub const ExeEntrypoint = enum {
mdgen_ghostty_1, mdgen_ghostty_1,
mdgen_ghostty_5, mdgen_ghostty_5,
bench_parser, bench_parser,
bench_stream,
}; };

View File

@ -7,4 +7,5 @@ pub usingnamespace switch (build_config.exe_entrypoint) {
.mdgen_ghostty_1 => @import("build/mdgen/main_ghostty_1.zig"), .mdgen_ghostty_1 => @import("build/mdgen/main_ghostty_1.zig"),
.mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"), .mdgen_ghostty_5 => @import("build/mdgen/main_ghostty_5.zig"),
.bench_parser => @import("bench/parser.zig"), .bench_parser => @import("bench/parser.zig"),
.bench_stream => @import("bench/stream.zig"),
}; };

View File

@ -307,6 +307,7 @@ test {
_ = @import("inspector/main.zig"); _ = @import("inspector/main.zig");
_ = @import("terminal/main.zig"); _ = @import("terminal/main.zig");
_ = @import("terminfo/main.zig"); _ = @import("terminfo/main.zig");
_ = @import("simd/main.zig");
// TODO // TODO
_ = @import("blocking_queue.zig"); _ = @import("blocking_queue.zig");

53
src/simd/index_of.cpp Normal file
View File

@ -0,0 +1,53 @@
// Generates code for every target that this compiler can support.
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "simd/index_of.cpp" // this file
#include <hwy/foreach_target.h> // must come before highway.h
#include <hwy/highway.h>
#include <simd/index_of.h>
#include <optional>
HWY_BEFORE_NAMESPACE();
namespace ghostty {
namespace HWY_NAMESPACE {
namespace hn = hwy::HWY_NAMESPACE;
size_t IndexOf(const uint8_t needle,
const uint8_t* HWY_RESTRICT input,
size_t count) {
const hn::ScalableTag<uint8_t> d;
return IndexOfImpl(d, needle, input, count);
}
} // namespace HWY_NAMESPACE
} // namespace ghostty
HWY_AFTER_NAMESPACE();
// HWY_ONCE is true for only one of the target passes
#if HWY_ONCE
namespace ghostty {
// This macro declares a static array used for dynamic dispatch.
HWY_EXPORT(IndexOf);
size_t IndexOf(const uint8_t needle,
const uint8_t* HWY_RESTRICT input,
size_t count) {
return HWY_DYNAMIC_DISPATCH(IndexOf)(needle, input, count);
}
} // namespace ghostty
extern "C" {
size_t ghostty_simd_index_of(const uint8_t needle,
const uint8_t* HWY_RESTRICT input,
size_t count) {
return ghostty::IndexOf(needle, input, count);
}
}
#endif // HWY_ONCE

96
src/simd/index_of.h Normal file
View File

@ -0,0 +1,96 @@
#if defined(GHOSTTY_SIMD_INDEX_OF_H_) == defined(HWY_TARGET_TOGGLE)
#ifdef GHOSTTY_SIMD_INDEX_OF_H_
#undef GHOSTTY_SIMD_INDEX_OF_H_
#else
#define GHOSTTY_SIMD_INDEX_OF_H_
#endif
#include <hwy/highway.h>
#include <optional>
HWY_BEFORE_NAMESPACE();
namespace ghostty {
namespace HWY_NAMESPACE {
namespace hn = hwy::HWY_NAMESPACE;
// Return the index of the first occurrence of `needle` in `input`, where
// the input and needle are already loaded into vectors.
template <class D, typename T = hn::TFromD<D>>
std::optional<size_t> IndexOfChunk(D d,
hn::Vec<D> needle_vec,
hn::Vec<D> input_vec) {
// Compare the input vector with the needle vector. This produces
// a vector where each lane is 0xFF if the corresponding lane in
// `input_vec` is equal to the corresponding lane in `needle_vec`.
const hn::Mask<D> eq_mask = hn::Eq(needle_vec, input_vec);
// Find the index within the vector where the first true value is.
const intptr_t pos = hn::FindFirstTrue(d, eq_mask);
// If we found a match, return the index into the input.
if (pos >= 0) {
return std::optional<size_t>(static_cast<size_t>(pos));
} else {
return std::nullopt;
}
}
// Return the index of the first occurrence of `needle` in `input` or
// `count` if not found.
template <class D, typename T = hn::TFromD<D>>
size_t IndexOfImpl(D d, T needle, const T* HWY_RESTRICT input, size_t count) {
// Note: due to the simplicity of this operation and the general complexity
// of SIMD, I'm going to overly comment this function to help explain the
// implementation for future maintainers.
// The number of lanes in the vector type.
const size_t N = hn::Lanes(d);
// Create a vector with all lanes set to `needle` so we can do a lane-wise
// comparison with the input.
const hn::Vec<D> needle_vec = Set(d, needle);
// Compare N elements at a time.
size_t i = 0;
for (; i + N <= count; i += N) {
// Load the N elements from our input into a vector and check the chunk.
const hn::Vec<D> input_vec = hn::LoadU(d, input + i);
if (auto pos = IndexOfChunk(d, needle_vec, input_vec)) {
return i + pos.value();
}
}
// Since we compare N elements at a time, we may have some elements left
// if count modulo N != 0. We need to scan the remaining elements. To
// be simple, we search one element at a time.
if (i != count) {
// Create a new vector with only one relevant lane.
const hn::CappedTag<T, 1> d1;
using D1 = decltype(d1);
// Get an equally sized needle vector with only one lane.
const hn::Vec<D1> needle1 = Set(d1, GetLane(needle_vec));
// Go through the remaining elements and do similar logic to
// the previous loop to find any matches.
for (; i < count; ++i) {
const hn::Vec<D1> input_vec = hn::LoadU(d1, input + i);
const hn::Mask<D1> eq_mask = hn::Eq(needle1, input_vec);
if (hn::AllTrue(d1, eq_mask))
return i;
}
}
return count;
}
size_t IndexOf(const uint8_t needle,
const uint8_t* HWY_RESTRICT input,
size_t count);
} // namespace HWY_NAMESPACE
} // namespace ghostty
HWY_AFTER_NAMESPACE();
#endif // GHOSTTY_SIMD_INDEX_OF_H_

27
src/simd/index_of.zig Normal file
View File

@ -0,0 +1,27 @@
const std = @import("std");
const builtin = @import("builtin");
extern "c" fn ghostty_simd_index_of(
needle: u8,
input: [*]const u8,
count: usize,
) usize;
pub fn indexOf(input: []const u8, needle: u8) ?usize {
const result = ghostty_simd_index_of(needle, input.ptr, input.len);
return if (result == input.len) null else result;
}
test "indexOf" {
const testing = std.testing;
try testing.expect(indexOf("hello", ' ') == null);
try testing.expectEqual(@as(usize, 2), indexOf("hi lo", ' ').?);
try testing.expectEqual(@as(usize, 5), indexOf(
\\XXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
\\XXXXXXXXXXXX XXXXXXXXXXX XXXXXXXXXXXXXXX
, ' ').?);
try testing.expectEqual(@as(usize, 53), indexOf(
\\XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
\\XXXXXXXXXXXX XXXXXXXXXXX XXXXXXXXXXXXXXX
, ' ').?);
}

8
src/simd/main.zig Normal file
View File

@ -0,0 +1,8 @@
const std = @import("std");
pub const index_of = @import("index_of.zig");
pub const vt = @import("vt.zig");
test {
@import("std").testing.refAllDecls(@This());
}

169
src/simd/vt.cpp Normal file
View File

@ -0,0 +1,169 @@
// Generates code for every target that this compiler can support.
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "simd/vt.cpp" // this file
#include <hwy/foreach_target.h> // must come before highway.h
#include <hwy/highway.h>
#include <simdutf.h>
#include <utf8.h>
#include <vector>
#include <simd/index_of.h>
#include <simd/vt.h>
HWY_BEFORE_NAMESPACE();
namespace ghostty {
namespace HWY_NAMESPACE {
namespace hn = hwy::HWY_NAMESPACE;
using T = uint8_t;
// Decode the UTF-8 text in input into output. Returns the number of decoded
// characters. This function assumes output is large enough.
//
// This function handles malformed UTF-8 sequences by inserting a
// replacement character (U+FFFD) and continuing to decode. This function
// will consume the entire input no matter what.
size_t DecodeUTF8(const uint8_t* HWY_RESTRICT input,
size_t count,
char32_t* output) {
// Its possible for our input to be empty since DecodeUTF8UntilControlSeq
// doesn't check for this.
if (count == 0) {
return 0;
}
// Assume no errors for fast path.
const size_t decoded = simdutf::convert_utf8_to_utf32(
reinterpret_cast<const char*>(input), count, output);
if (decoded > 0) {
return decoded;
}
// Errors in the UTF input, take a slow path and do a decode with
// replacement (with U+FFFD). Note that simdutf doesn't have a
// decode with replacement API:
// https://github.com/simdutf/simdutf/issues/147
//
// Because of this, we use a separate library with heap allocation
// that is much, much slower (the allocation is slower, the algorithm
// is slower, etc.) This is just so we have something that works.
// I want to replace this.
std::vector<char> replacement_result;
utf8::replace_invalid(input, input + count,
std::back_inserter(replacement_result), 0xFFFD);
return DecodeUTF8(reinterpret_cast<const uint8_t*>(replacement_result.data()),
replacement_result.size(), output);
}
/// Decode the UTF-8 text in input into output until an escape
/// character is found. This returns the number of bytes consumed
/// from input and writes the number of decoded characters into
/// output_count.
///
/// This may return a value less than count even with no escape
/// character if the input ends with an incomplete UTF-8 sequence.
/// The caller should check the next byte manually to determine
/// if it is incomplete.
template <class D>
size_t DecodeUTF8UntilControlSeqImpl(D d,
const T* HWY_RESTRICT input,
size_t count,
char32_t* output,
size_t* output_count) {
const size_t N = hn::Lanes(d);
// Create a vector containing ESC since that denotes a control sequence.
const hn::Vec<D> esc_vec = Set(d, 0x1B);
// Compare N elements at a time.
size_t i = 0;
for (; i + N <= count; i += N) {
// Load the N elements from our input into a vector.
const hn::Vec<D> input_vec = hn::LoadU(d, input + i);
// If we don't have any escapes we keep going. We want to accumulate
// the largest possible valid UTF-8 sequence before decoding.
// TODO(mitchellh): benchmark this vs decoding every time
const auto esc_idx = IndexOfChunk(d, esc_vec, input_vec);
if (!esc_idx) {
continue;
}
// We have an ESC char, decode up to this point. We start by assuming
// a valid UTF-8 sequence and slow-path into error handling if we find
// an invalid sequence.
*output_count = DecodeUTF8(input, i + esc_idx.value(), output);
return i + esc_idx.value();
}
// If we have leftover input then we decode it one byte at a time (slow!)
// using pretty much the same logic as above.
if (i != count) {
const hn::CappedTag<T, 1> d1;
using D1 = decltype(d1);
const hn::Vec<D1> esc1 = Set(d1, GetLane(esc_vec));
for (; i < count; ++i) {
const hn::Vec<D1> input_vec = hn::LoadU(d1, input + i);
const auto esc_idx = IndexOfChunk(d1, esc1, input_vec);
if (!esc_idx) {
continue;
}
*output_count = DecodeUTF8(input, i + esc_idx.value(), output);
return i + esc_idx.value();
}
}
// If we reached this point, its possible for our input to have an
// incomplete sequence because we're consuming the full input. We need
// to trim any incomplete sequences from the end of the input.
const size_t trimmed_len =
simdutf::trim_partial_utf8(reinterpret_cast<const char*>(input), i);
*output_count = DecodeUTF8(input, trimmed_len, output);
return trimmed_len;
}
size_t DecodeUTF8UntilControlSeq(const uint8_t* HWY_RESTRICT input,
size_t count,
char32_t* output,
size_t* output_count) {
const hn::ScalableTag<uint8_t> d;
return DecodeUTF8UntilControlSeqImpl(d, input, count, output, output_count);
}
} // namespace HWY_NAMESPACE
} // namespace ghostty
HWY_AFTER_NAMESPACE();
// HWY_ONCE is true for only one of the target passes
#if HWY_ONCE
namespace ghostty {
HWY_EXPORT(DecodeUTF8UntilControlSeq);
size_t DecodeUTF8UntilControlSeq(const uint8_t* HWY_RESTRICT input,
size_t count,
char32_t* output,
size_t* output_count) {
return HWY_DYNAMIC_DISPATCH(DecodeUTF8UntilControlSeq)(input, count, output,
output_count);
}
} // namespace ghostty
extern "C" {
size_t ghostty_simd_decode_utf8_until_control_seq(const uint8_t* HWY_RESTRICT
input,
size_t count,
char32_t* output,
size_t* output_count) {
return ghostty::DecodeUTF8UntilControlSeq(input, count, output, output_count);
}
} // extern "C"
#endif // HWY_ONCE

30
src/simd/vt.h Normal file
View File

@ -0,0 +1,30 @@
#if defined(GHOSTTY_SIMD_VT_H_) == defined(HWY_TARGET_TOGGLE)
#ifdef GHOSTTY_SIMD_VT_H_
#undef GHOSTTY_SIMD_VT_H_
#else
#define GHOSTTY_SIMD_VT_H_
#endif
#include <hwy/highway.h>
HWY_BEFORE_NAMESPACE();
namespace ghostty {
namespace HWY_NAMESPACE {
namespace hn = hwy::HWY_NAMESPACE;
} // namespace HWY_NAMESPACE
} // namespace ghostty
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace ghostty {
typedef void (*PrintFunc)(const char32_t* chars, size_t count);
} // namespace ghostty
#endif // HWY_ONCE
#endif // GHOSTTY_SIMD_VT_H_

121
src/simd/vt.zig Normal file
View File

@ -0,0 +1,121 @@
const std = @import("std");
// vt.cpp
extern "c" fn ghostty_simd_decode_utf8_until_control_seq(
input: [*]const u8,
count: usize,
output: [*]u32,
output_count: *usize,
) usize;
const DecodeResult = struct {
consumed: usize,
decoded: usize,
};
pub fn utf8DecodeUntilControlSeq(
input: []const u8,
output: []u32,
) DecodeResult {
var decoded: usize = 0;
const consumed = ghostty_simd_decode_utf8_until_control_seq(
input.ptr,
input.len,
output.ptr,
&decoded,
);
return .{ .consumed = consumed, .decoded = decoded };
}
test "decode no escape" {
const testing = std.testing;
var output: [1024]u32 = undefined;
// TODO: many more test cases
{
const str = "hello" ** 128;
try testing.expectEqual(DecodeResult{
.consumed = str.len,
.decoded = str.len,
}, utf8DecodeUntilControlSeq(str, &output));
}
}
test "decode ASCII to escape" {
const testing = std.testing;
var output: [1024]u32 = undefined;
// TODO: many more test cases
{
const prefix = "hello" ** 64;
const str = prefix ++ "\x1b" ++ ("world" ** 64);
try testing.expectEqual(DecodeResult{
.consumed = prefix.len,
.decoded = prefix.len,
}, utf8DecodeUntilControlSeq(str, &output));
}
}
test "decode immediate esc sequence" {
const testing = std.testing;
var output: [64]u32 = undefined;
const str = "\x1b[?5s";
try testing.expectEqual(DecodeResult{
.consumed = 0,
.decoded = 0,
}, utf8DecodeUntilControlSeq(str, &output));
}
test "decode incomplete UTF-8" {
const testing = std.testing;
var output: [64]u32 = undefined;
// 2-byte
{
const str = "hello\xc2";
try testing.expectEqual(DecodeResult{
.consumed = 5,
.decoded = 5,
}, utf8DecodeUntilControlSeq(str, &output));
}
// 3-byte
{
const str = "hello\xe0\x00";
try testing.expectEqual(DecodeResult{
.consumed = 5,
.decoded = 5,
}, utf8DecodeUntilControlSeq(str, &output));
}
// 4-byte
{
const str = "hello\xf0\x90";
try testing.expectEqual(DecodeResult{
.consumed = 5,
.decoded = 5,
}, utf8DecodeUntilControlSeq(str, &output));
}
}
test "decode invalid UTF-8" {
const testing = std.testing;
var output: [64]u32 = undefined;
// Invalid leading 1s
{
const str = "hello\xc2\x00";
try testing.expectEqual(DecodeResult{
.consumed = 7,
.decoded = 7,
}, utf8DecodeUntilControlSeq(str, &output));
}
try testing.expectEqual(@as(u32, 0xFFFD), output[5]);
}

5
src/terminal/simdvt.zig Normal file
View File

@ -0,0 +1,5 @@
pub usingnamespace @import("simdvt/parser.zig");
test {
@import("std").testing.refAllDecls(@This());
}

View File

@ -1,5 +1,7 @@
const std = @import("std"); const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing; const testing = std.testing;
const simd = @import("../simd/main.zig");
const Parser = @import("Parser.zig"); const Parser = @import("Parser.zig");
const ansi = @import("ansi.zig"); const ansi = @import("ansi.zig");
const charsets = @import("charsets.zig"); const charsets = @import("charsets.zig");
@ -40,18 +42,140 @@ pub fn Stream(comptime Handler: type) type {
parser: Parser = .{}, parser: Parser = .{},
utf8decoder: UTF8Decoder = .{}, utf8decoder: UTF8Decoder = .{},
/// Keep track of any partial UTF-8 sequences that we need to
/// process in the next call to nextAssumeUtf8.
partial_utf8: [4]u8 = undefined,
partial_utf8_len: u3 = 0,
pub fn deinit(self: *Self) void { pub fn deinit(self: *Self) void {
self.parser.deinit(); self.parser.deinit();
} }
/// Process a string of characters. /// Process a string of characters.
pub fn nextSlice(self: *Self, c: []const u8) !void { pub fn nextSlice(self: *Self, input: []const u8) !void {
for (c) |single| try self.next(single); // This is the maximum number of codepoints we can decode
// at one time for this function call. This is somewhat arbitrary
// so if someone can demonstrate a better number then we can switch.
var cp_buf: [4096]u32 = undefined;
// Split the input into chunks that fit into cp_buf.
var i: usize = 0;
while (true) {
const len = @min(cp_buf.len, input.len - i);
try self.nextSliceCapped(input[i .. i + len], &cp_buf);
i += len;
if (i >= input.len) break;
}
} }
/// Process the next character and call any callbacks if necessary. fn nextSliceCapped(self: *Self, input: []const u8, cp_buf: []u32) !void {
assert(input.len <= cp_buf.len);
var offset: usize = 0;
// If we have a partial UTF-8 sequence then we process manually.
if (self.partial_utf8_len > 0) {
offset += try self.completePartialUtf8(input);
} else if (self.parser.state != .ground) {
// If we're not in the ground state then we process until
// we are. This can happen if the last chunk of input put us
// in the middle of a control sequence.
for (input[offset..]) |single| {
try self.nextNonUtf8(single);
offset += 1;
if (self.parser.state == .ground) break;
}
}
// If we're in the ground state then we can use SIMD to process
// input until we see an ESC (0x1B), since all other characters
// up to that point are just UTF-8.
while (self.parser.state == .ground and offset < input.len) {
const res = simd.vt.utf8DecodeUntilControlSeq(input[offset..], cp_buf);
for (cp_buf[0..res.decoded]) |cp| {
if (cp < 0xF) {
try self.execute(@intCast(cp));
} else {
try self.print(@intCast(cp));
}
}
// Consume the bytes we just processed.
offset += res.consumed;
if (offset >= input.len) return;
// If our offset is NOT an escape then we must have a
// partial UTF-8 sequence. In that case, we save it and
// return.
if (input[offset] != 0x1B) {
const rem = input[offset..];
assert(rem.len <= self.partial_utf8.len);
@memcpy(self.partial_utf8[0..rem.len], rem);
self.partial_utf8_len = @intCast(rem.len);
return;
}
// Process our control sequence.
for (input[offset..]) |single| {
try self.nextNonUtf8(single);
offset += 1;
if (self.parser.state == .ground) break;
}
}
}
// Complete a partial UTF-8 sequence from a prior input chunk.
// This processes the UTF-8 sequence and then returns the number
// of bytes consumed from the input.
fn completePartialUtf8(self: *Self, input: []const u8) !usize {
assert(self.partial_utf8_len > 0);
assert(self.parser.state == .ground);
// This cannot fail because the nature of partial utf8
// existing means we successfully processed it last time.
const len = std.unicode.utf8ByteSequenceLength(self.partial_utf8[0]) catch
unreachable;
// This is the length we need in the input in addition to
// our partial_utf8 to complete the sequence.
const input_len = len - self.partial_utf8_len;
// If we STILL don't have enough bytes, then we copy and continue.
// This is a really bizarre and stupid program thats running to
// send us incomplete UTF-8 sequences over multiple write() calls.
if (input_len > input.len) {
@memcpy(
self.partial_utf8[self.partial_utf8_len .. self.partial_utf8_len + input.len],
input,
);
self.partial_utf8_len += @intCast(input.len);
return input.len;
}
// Process the complete UTF-8 sequence.
@memcpy(
self.partial_utf8[self.partial_utf8_len .. self.partial_utf8_len + input_len],
input[0..input_len],
);
const cp = cp: {
if (std.unicode.utf8Decode(self.partial_utf8[0..len])) |cp| {
break :cp cp;
} else |err| {
log.warn("invalid UTF-8, ignoring err={}", .{err});
break :cp 0xFFFD; // replacement character
}
};
self.partial_utf8_len = 0;
try self.print(cp);
return input_len;
}
/// Like nextSlice but takes one byte and is necessarilly a scalar
/// operation that can't use SIMD. Prefer nextSlice if you can and
/// try to get multiple bytes at once.
pub fn next(self: *Self, c: u8) !void { pub fn next(self: *Self, c: u8) !void {
// log.debug("char: {c}", .{c}); // The scalar path can be responsible for decoding UTF-8.
if (self.parser.state == .ground and c != 0x1B) { if (self.parser.state == .ground and c != 0x1B) {
var consumed = false; var consumed = false;
while (!consumed) { while (!consumed) {
@ -65,9 +189,20 @@ pub fn Stream(comptime Handler: type) type {
} }
} }
} }
return; return;
} }
try self.nextNonUtf8(c);
}
/// Process the next character and call any callbacks if necessary.
///
/// This assumes that we're not in the UTF-8 decoding state. If
/// we may be in the UTF-8 decoding state call nextSlice or next.
fn nextNonUtf8(self: *Self, c: u8) !void {
assert(self.parser.state != .ground or c == 0x1B);
const actions = self.parser.next(c); const actions = self.parser.next(c);
for (actions) |action_opt| { for (actions) |action_opt| {
const action = action_opt orelse continue; const action = action_opt orelse continue;

View File

@ -1617,44 +1617,8 @@ const ReadThread = struct {
log.err("error processing terminal data: {}", .{err}); log.err("error processing terminal data: {}", .{err});
} }
} else { } else {
// Process the terminal data. This is an extremely hot part of the ev.terminal_stream.nextSlice(buf) catch |err|
// terminal emulator, so we do some abstraction leakage to avoid log.err("error processing terminal data: {}", .{err});
// function calls and unnecessary logic.
//
// The ground state is the only state that we can see and print/execute
// ASCII, so we only execute this hot path if we're already in the ground
// state.
//
// Empirically, this alone improved throughput of large text output by ~20%.
var i: usize = 0;
const end = buf.len;
if (ev.terminal_stream.parser.state == .ground) {
for (buf[i..end]) |ch| {
switch (terminal.parse_table.table[ch][@intFromEnum(terminal.Parser.State.ground)].action) {
// Print, call directly.
.print => ev.terminal_stream.handler.print(@intCast(ch)) catch |err|
log.err("error processing terminal data: {}", .{err}),
// C0 execute, let our stream handle this one but otherwise
// continue since we're guaranteed to be back in ground.
.execute => ev.terminal_stream.execute(ch) catch |err|
log.err("error processing terminal data: {}", .{err}),
// Otherwise, break out and go the slow path until we're
// back in ground. There is a slight optimization here where
// could try to find the next transition to ground but when
// I implemented that it didn't materially change performance.
else => break,
}
i += 1;
}
}
if (i < end) {
ev.terminal_stream.nextSlice(buf[i..end]) catch |err|
log.err("error processing terminal data: {}", .{err});
}
} }
// If our stream handling caused messages to be sent to the writer // If our stream handling caused messages to be sent to the writer