diff --git a/build.zig b/build.zig index 0389d81d1..9c7c12aed 100644 --- a/build.zig +++ b/build.zig @@ -686,6 +686,10 @@ fn addDeps( .target = step.target, .optimize = step.optimize, }); + const oniguruma_dep = b.dependency("oniguruma", .{ + .target = step.target, + .optimize = step.optimize, + }); const opengl_dep = b.dependency("opengl", .{}); const pixman_dep = b.dependency("pixman", .{ .target = step.target, @@ -749,6 +753,7 @@ fn addDeps( "fontconfig", fontconfig_dep.module("fontconfig"), ); + step.addModule("oniguruma", oniguruma_dep.module("oniguruma")); step.addModule("freetype", freetype_dep.module("freetype")); step.addModule("glslang", glslang_dep.module("glslang")); step.addModule("spirv_cross", spirv_cross_dep.module("spirv_cross")); @@ -793,6 +798,7 @@ fn addDeps( step.linkSystemLibrary2("freetype2", dynamic_link_opts); step.linkSystemLibrary2("harfbuzz", dynamic_link_opts); step.linkSystemLibrary2("libpng", dynamic_link_opts); + step.linkSystemLibrary2("oniguruma", dynamic_link_opts); step.linkSystemLibrary2("pixman-1", dynamic_link_opts); step.linkSystemLibrary2("zlib", dynamic_link_opts); @@ -803,6 +809,9 @@ fn addDeps( // Other dependencies, we may dynamically link if (static) { + step.linkLibrary(oniguruma_dep.artifact("oniguruma")); + try static_libs.append(oniguruma_dep.artifact("oniguruma").getEmittedBin()); + step.linkLibrary(zlib_dep.artifact("z")); try static_libs.append(zlib_dep.artifact("z").getEmittedBin()); diff --git a/build.zig.zon b/build.zig.zon index f6a675cb2..fc1ecb2f9 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -32,6 +32,7 @@ .harfbuzz = .{ .path = "./pkg/harfbuzz" }, .libpng = .{ .path = "./pkg/libpng" }, .macos = .{ .path = "./pkg/macos" }, + .oniguruma = .{ .path = "./pkg/oniguruma" }, .opengl = .{ .path = "./pkg/opengl" }, .pixman = .{ .path = "./pkg/pixman" }, .tracy = .{ .path = "./pkg/tracy" }, diff --git a/nix/devshell.nix b/nix/devshell.nix index 5804cceb5..247302828 100644 --- a/nix/devshell.nix +++ b/nix/devshell.nix @@ -6,6 +6,7 @@ , ncurses , nodejs , nodePackages +, oniguruma , parallel , pkg-config , python3 @@ -52,6 +53,7 @@ let freetype harfbuzz libpng + oniguruma pixman zlib @@ -111,6 +113,7 @@ in mkShell rec { freetype harfbuzz libpng + oniguruma pixman zlib diff --git a/nix/zig_cache_hash.nix b/nix/zig_cache_hash.nix index ecebd55be..3163c1ac9 100644 --- a/nix/zig_cache_hash.nix +++ b/nix/zig_cache_hash.nix @@ -1,3 +1,3 @@ # This file is auto-generated! check build-support/check-zig-cache-hash.sh for # more details. -"sha256-KMv4j1/UxsrRPkr+Fy9BjtzwFZquiLfJCLMP28oVZiU=" +"sha256-bQ81e0vJBSLkVJ3a3nLraVc/JeG77xGckuARmu81DqI=" diff --git a/pkg/oniguruma/build.zig b/pkg/oniguruma/build.zig new file mode 100644 index 000000000..70a1e9312 --- /dev/null +++ b/pkg/oniguruma/build.zig @@ -0,0 +1,130 @@ +const std = @import("std"); +const NativeTargetInfo = std.zig.system.NativeTargetInfo; + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + _ = b.addModule("oniguruma", .{ .source_file = .{ .path = "main.zig" } }); + + const upstream = b.dependency("oniguruma", .{}); + const lib = try buildOniguruma(b, upstream, target, optimize); + b.installArtifact(lib); + + { + const test_exe = b.addTest(.{ + .name = "test", + .root_source_file = .{ .path = "main.zig" }, + .target = target, + .optimize = optimize, + }); + test_exe.linkLibrary(lib); + const tests_run = b.addRunArtifact(test_exe); + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&tests_run.step); + + // Uncomment this if we're debugging tests + b.installArtifact(test_exe); + } +} + +fn buildOniguruma( + b: *std.Build, + upstream: *std.Build.Dependency, + target: std.zig.CrossTarget, + optimize: std.builtin.OptimizeMode, +) !*std.Build.Step.Compile { + const lib = b.addStaticLibrary(.{ + .name = "oniguruma", + .target = target, + .optimize = optimize, + }); + const t = lib.target_info.target; + lib.linkLibC(); + lib.addIncludePath(upstream.path("src")); + + lib.addConfigHeader(b.addConfigHeader(.{ + .style = .{ .cmake = upstream.path("src/config.h.cmake.in") }, + }, .{ + .PACKAGE = "oniguruma", + .PACKAGE_VERSION = "6.9.9", + .VERSION = "6.9.9", + .HAVE_STDINT_H = true, + .HAVE_SYS_TIMES_H = true, + .HAVE_SYS_TIME_H = true, + .HAVE_SYS_TYPES_H = true, + .HAVE_UNISTD_H = true, + .HAVE_INTTYPES_H = true, + .SIZEOF_INT = t.c_type_byte_size(.int), + .SIZEOF_LONG = t.c_type_byte_size(.long), + .SIZEOF_LONG_LONG = t.c_type_byte_size(.longlong), + .SIZEOF_VOIDP = t.ptrBitWidth() / t.c_type_bit_size(.char), + })); + + var flags = std.ArrayList([]const u8).init(b.allocator); + defer flags.deinit(); + try flags.appendSlice(&.{}); + lib.addCSourceFiles(.{ + .dependency = upstream, + .flags = flags.items, + .files = &.{ + "src/regerror.c", + "src/regparse.c", + "src/regext.c", + "src/regcomp.c", + "src/regexec.c", + "src/reggnu.c", + "src/regenc.c", + "src/regsyntax.c", + "src/regtrav.c", + "src/regversion.c", + "src/st.c", + "src/onig_init.c", + "src/unicode.c", + "src/ascii.c", + "src/utf8.c", + "src/utf16_be.c", + "src/utf16_le.c", + "src/utf32_be.c", + "src/utf32_le.c", + "src/euc_jp.c", + "src/sjis.c", + "src/iso8859_1.c", + "src/iso8859_2.c", + "src/iso8859_3.c", + "src/iso8859_4.c", + "src/iso8859_5.c", + "src/iso8859_6.c", + "src/iso8859_7.c", + "src/iso8859_8.c", + "src/iso8859_9.c", + "src/iso8859_10.c", + "src/iso8859_11.c", + "src/iso8859_13.c", + "src/iso8859_14.c", + "src/iso8859_15.c", + "src/iso8859_16.c", + "src/euc_tw.c", + "src/euc_kr.c", + "src/big5.c", + "src/gb18030.c", + "src/koi8_r.c", + "src/cp1251.c", + "src/euc_jp_prop.c", + "src/sjis_prop.c", + "src/unicode_unfold_key.c", + "src/unicode_fold1_key.c", + "src/unicode_fold2_key.c", + "src/unicode_fold3_key.c", + }, + }); + + lib.installHeadersDirectoryOptions(.{ + .source_dir = upstream.path("src"), + .install_dir = .header, + .install_subdir = "", + .include_extensions = &.{".h"}, + }); + + return lib; +} diff --git a/pkg/oniguruma/build.zig.zon b/pkg/oniguruma/build.zig.zon new file mode 100644 index 000000000..8e08a0ad2 --- /dev/null +++ b/pkg/oniguruma/build.zig.zon @@ -0,0 +1,11 @@ +.{ + .name = "oniguruma", + .version = "6.9.9", + .paths = .{""}, + .dependencies = .{ + .oniguruma = .{ + .url = "https://github.com/kkos/oniguruma/archive/refs/tags/v6.9.9.tar.gz", + .hash = "1220c15e72eadd0d9085a8af134904d9a0f5dfcbed5f606ad60edc60ebeccd9706bb", + }, + }, +} diff --git a/pkg/oniguruma/c.zig b/pkg/oniguruma/c.zig new file mode 100644 index 000000000..1788a6289 --- /dev/null +++ b/pkg/oniguruma/c.zig @@ -0,0 +1,3 @@ +pub usingnamespace @cImport({ + @cInclude("oniguruma.h"); +}); diff --git a/pkg/oniguruma/errors.zig b/pkg/oniguruma/errors.zig new file mode 100644 index 000000000..f13eda95a --- /dev/null +++ b/pkg/oniguruma/errors.zig @@ -0,0 +1,201 @@ +const c = @import("c.zig"); +const Encoding = @import("types.zig").Encoding; + +/// Maximum error message length. +pub const MAX_ERROR_LEN = c.ONIG_MAX_ERROR_MESSAGE_LEN; + +/// Convert an Oniguruma error to an error. +pub fn convertError(code: c_int) !c_int { + if (code >= 0) return code; + inline for (error_code_map) |m| { + if (m[1] == code) return m[0]; + } + + return Error.Unknown; +} + +/// Convert an error code to a string. buf must be at least +/// MAX_ERROR_LEN bytes long. +pub fn errorString(buf: []u8, code: c_int) ![]u8 { + const len = c.onig_error_code_to_str(buf.ptr, code); + return buf[0..@intCast(len)]; +} + +/// The Oniguruma error info type, matching the C type exactly. +pub const ErrorInfo = extern struct { + encoding: *Encoding, + par: [*]u8, + par_end: [*]u8, +}; + +/// All possible Oniguruma errors. +pub const Error = error{ + Mismatch, + NoSupportConfig, + Abort, + Memory, + TypeBug, + ParserBug, + StackBug, + UndefinedBytecode, + UnexpectedBytecode, + MatchStackLimitOver, + ParseDepthLimitOver, + RetryLimitInMatchOver, + RetryLimitInSearchOver, + SubexpCallLimitInSearchOver, + DefaultEncodingIsNotSet, + SpecifiedEncodingCantConvertToWideChar, + FailToInitialize, + InvalidArgument, + EndPatternAtLeftBrace, + EndPatternAtLeftBracket, + EmptyCharClass, + PrematureEndOfCharClass, + EndPatternAtEscape, + EndPatternAtMeta, + EndPatternAtControl, + MetaCodeSyntax, + ControlCodeSyntax, + CharClassValueAtEndOfRange, + CharClassValueAtStartOfRange, + UnmatchedRangeSpecifierInCharClass, + TargetOfRepeatOperatorNotSpecified, + TargetOfRepeatOperatorInvalid, + NestedRepeatOperator, + UnmatchedCloseParenthesis, + EndPatternWithUnmatchedParenthesis, + EndPatternInGroup, + UndefinedGroupOption, + InvalidGroupOption, + InvalidPosixBracketType, + InvalidLookBehindPattern, + InvalidRepeatRangePattern, + TooBigNumber, + TooBigNumberForRepeatRange, + UpperSmallerThanLowerInRepeatRange, + EmptyRangeInCharClass, + MismatchCodeLengthInClassRange, + TooManyMultiByteRanges, + TooShortMultiByteString, + TooBigBackrefNumber, + InvalidBackref, + NumberedBackrefOrCallNotAllowed, + TooManyCaptures, + TooLongWideCharValue, + UndefinedOperator, + EmptyGroupName, + InvalidGroupName, + InvalidCharInGroupName, + UndefinedNameReference, + UndefinedGroupReference, + MultiplexDefinedName, + MultiplexDefinitionNameCall, + NeverEndingRecursion, + GroupNumberOverForCaptureHistory, + InvalidCharPropertyName, + InvalidIfElseSyntax, + InvalidAbsentGroupPattern, + InvalidAbsentGroupGeneratorPattern, + InvalidCalloutPattern, + InvalidCalloutName, + UndefinedCalloutName, + InvalidCalloutBody, + InvalidCalloutTagName, + InvalidCalloutArg, + InvalidCodePointValue, + InvalidWideCharValue, + TooBigWideCharValue, + NotSupportedEncodingCombination, + InvalidCombinationOfOptions, + TooManyUserDefinedObjects, + TooLongPropertyName, + VeryInefficientPattern, + LibraryIsNotInitialized, + Unknown, +}; + +const error_code_map: []const struct { Error, c_int } = &.{ + .{ Error.Mismatch, c.ONIG_MISMATCH }, + .{ Error.NoSupportConfig, c.ONIG_NO_SUPPORT_CONFIG }, + .{ Error.Abort, c.ONIG_ABORT }, + .{ Error.Memory, c.ONIGERR_MEMORY }, + .{ Error.TypeBug, c.ONIGERR_TYPE_BUG }, + .{ Error.ParserBug, c.ONIGERR_PARSER_BUG }, + .{ Error.StackBug, c.ONIGERR_STACK_BUG }, + .{ Error.UndefinedBytecode, c.ONIGERR_UNDEFINED_BYTECODE }, + .{ Error.UnexpectedBytecode, c.ONIGERR_UNEXPECTED_BYTECODE }, + .{ Error.MatchStackLimitOver, c.ONIGERR_MATCH_STACK_LIMIT_OVER }, + .{ Error.ParseDepthLimitOver, c.ONIGERR_PARSE_DEPTH_LIMIT_OVER }, + .{ Error.RetryLimitInMatchOver, c.ONIGERR_RETRY_LIMIT_IN_MATCH_OVER }, + .{ Error.RetryLimitInSearchOver, c.ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER }, + .{ Error.SubexpCallLimitInSearchOver, c.ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER }, + .{ Error.DefaultEncodingIsNotSet, c.ONIGERR_DEFAULT_ENCODING_IS_NOT_SET }, + .{ Error.SpecifiedEncodingCantConvertToWideChar, c.ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR }, + .{ Error.FailToInitialize, c.ONIGERR_FAIL_TO_INITIALIZE }, + .{ Error.InvalidArgument, c.ONIGERR_INVALID_ARGUMENT }, + .{ Error.EndPatternAtLeftBrace, c.ONIGERR_END_PATTERN_AT_LEFT_BRACE }, + .{ Error.EndPatternAtLeftBracket, c.ONIGERR_END_PATTERN_AT_LEFT_BRACKET }, + .{ Error.EmptyCharClass, c.ONIGERR_EMPTY_CHAR_CLASS }, + .{ Error.PrematureEndOfCharClass, c.ONIGERR_PREMATURE_END_OF_CHAR_CLASS }, + .{ Error.EndPatternAtEscape, c.ONIGERR_END_PATTERN_AT_ESCAPE }, + .{ Error.EndPatternAtMeta, c.ONIGERR_END_PATTERN_AT_META }, + .{ Error.EndPatternAtControl, c.ONIGERR_END_PATTERN_AT_CONTROL }, + .{ Error.MetaCodeSyntax, c.ONIGERR_META_CODE_SYNTAX }, + .{ Error.ControlCodeSyntax, c.ONIGERR_CONTROL_CODE_SYNTAX }, + .{ Error.CharClassValueAtEndOfRange, c.ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE }, + .{ Error.CharClassValueAtStartOfRange, c.ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE }, + .{ Error.UnmatchedRangeSpecifierInCharClass, c.ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS }, + .{ Error.TargetOfRepeatOperatorNotSpecified, c.ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED }, + .{ Error.TargetOfRepeatOperatorInvalid, c.ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID }, + .{ Error.NestedRepeatOperator, c.ONIGERR_NESTED_REPEAT_OPERATOR }, + .{ Error.UnmatchedCloseParenthesis, c.ONIGERR_UNMATCHED_CLOSE_PARENTHESIS }, + .{ Error.EndPatternWithUnmatchedParenthesis, c.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS }, + .{ Error.EndPatternInGroup, c.ONIGERR_END_PATTERN_IN_GROUP }, + .{ Error.UndefinedGroupOption, c.ONIGERR_UNDEFINED_GROUP_OPTION }, + .{ Error.InvalidGroupOption, c.ONIGERR_INVALID_GROUP_OPTION }, + .{ Error.InvalidPosixBracketType, c.ONIGERR_INVALID_POSIX_BRACKET_TYPE }, + .{ Error.InvalidLookBehindPattern, c.ONIGERR_INVALID_LOOK_BEHIND_PATTERN }, + .{ Error.InvalidRepeatRangePattern, c.ONIGERR_INVALID_REPEAT_RANGE_PATTERN }, + .{ Error.TooBigNumber, c.ONIGERR_TOO_BIG_NUMBER }, + .{ Error.TooBigNumberForRepeatRange, c.ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE }, + .{ Error.UpperSmallerThanLowerInRepeatRange, c.ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE }, + .{ Error.EmptyRangeInCharClass, c.ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS }, + .{ Error.MismatchCodeLengthInClassRange, c.ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE }, + .{ Error.TooManyMultiByteRanges, c.ONIGERR_TOO_MANY_MULTI_BYTE_RANGES }, + .{ Error.TooShortMultiByteString, c.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING }, + .{ Error.TooBigBackrefNumber, c.ONIGERR_TOO_BIG_BACKREF_NUMBER }, + .{ Error.InvalidBackref, c.ONIGERR_INVALID_BACKREF }, + .{ Error.NumberedBackrefOrCallNotAllowed, c.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED }, + .{ Error.TooManyCaptures, c.ONIGERR_TOO_MANY_CAPTURES }, + .{ Error.TooLongWideCharValue, c.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE }, + .{ Error.UndefinedOperator, c.ONIGERR_UNDEFINED_OPERATOR }, + .{ Error.EmptyGroupName, c.ONIGERR_EMPTY_GROUP_NAME }, + .{ Error.InvalidGroupName, c.ONIGERR_INVALID_GROUP_NAME }, + .{ Error.InvalidCharInGroupName, c.ONIGERR_INVALID_CHAR_IN_GROUP_NAME }, + .{ Error.UndefinedNameReference, c.ONIGERR_UNDEFINED_NAME_REFERENCE }, + .{ Error.UndefinedGroupReference, c.ONIGERR_UNDEFINED_GROUP_REFERENCE }, + .{ Error.MultiplexDefinedName, c.ONIGERR_MULTIPLEX_DEFINED_NAME }, + .{ Error.MultiplexDefinitionNameCall, c.ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL }, + .{ Error.NeverEndingRecursion, c.ONIGERR_NEVER_ENDING_RECURSION }, + .{ Error.GroupNumberOverForCaptureHistory, c.ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY }, + .{ Error.InvalidCharPropertyName, c.ONIGERR_INVALID_CHAR_PROPERTY_NAME }, + .{ Error.InvalidIfElseSyntax, c.ONIGERR_INVALID_IF_ELSE_SYNTAX }, + .{ Error.InvalidAbsentGroupPattern, c.ONIGERR_INVALID_ABSENT_GROUP_PATTERN }, + .{ Error.InvalidAbsentGroupGeneratorPattern, c.ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN }, + .{ Error.InvalidCalloutPattern, c.ONIGERR_INVALID_CALLOUT_PATTERN }, + .{ Error.InvalidCalloutName, c.ONIGERR_INVALID_CALLOUT_NAME }, + .{ Error.UndefinedCalloutName, c.ONIGERR_UNDEFINED_CALLOUT_NAME }, + .{ Error.InvalidCalloutBody, c.ONIGERR_INVALID_CALLOUT_BODY }, + .{ Error.InvalidCalloutTagName, c.ONIGERR_INVALID_CALLOUT_TAG_NAME }, + .{ Error.InvalidCalloutArg, c.ONIGERR_INVALID_CALLOUT_ARG }, + .{ Error.InvalidCodePointValue, c.ONIGERR_INVALID_CODE_POINT_VALUE }, + .{ Error.InvalidWideCharValue, c.ONIGERR_INVALID_WIDE_CHAR_VALUE }, + .{ Error.TooBigWideCharValue, c.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE }, + .{ Error.NotSupportedEncodingCombination, c.ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION }, + .{ Error.InvalidCombinationOfOptions, c.ONIGERR_INVALID_COMBINATION_OF_OPTIONS }, + .{ Error.TooManyUserDefinedObjects, c.ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS }, + .{ Error.TooLongPropertyName, c.ONIGERR_TOO_LONG_PROPERTY_NAME }, + .{ Error.VeryInefficientPattern, c.ONIGERR_VERY_INEFFICIENT_PATTERN }, + .{ Error.LibraryIsNotInitialized, c.ONIGERR_LIBRARY_IS_NOT_INITIALIZED }, +}; diff --git a/pkg/oniguruma/init.zig b/pkg/oniguruma/init.zig new file mode 100644 index 000000000..bf7420e40 --- /dev/null +++ b/pkg/oniguruma/init.zig @@ -0,0 +1,16 @@ +const c = @import("c.zig"); +const Encoding = @import("types.zig").Encoding; +const errors = @import("errors.zig"); + +/// Call once per process to initialize Oniguruma. This should be given +/// the encodings that the program will use. +pub fn init(encs: []const *Encoding) !void { + _ = try errors.convertError(c.onig_initialize( + @constCast(@ptrCast(@alignCast(encs.ptr))), + @intCast(encs.len), + )); +} + +pub fn deinit() void { + _ = c.onig_end(); +} diff --git a/pkg/oniguruma/main.zig b/pkg/oniguruma/main.zig new file mode 100644 index 000000000..b4eb3053b --- /dev/null +++ b/pkg/oniguruma/main.zig @@ -0,0 +1,11 @@ +pub usingnamespace @import("init.zig"); +pub usingnamespace @import("errors.zig"); +pub usingnamespace @import("regex.zig"); +pub usingnamespace @import("region.zig"); +pub usingnamespace @import("types.zig"); +pub const c = @import("c.zig"); +pub const testing = @import("testing.zig"); + +test { + @import("std").testing.refAllDecls(@This()); +} diff --git a/pkg/oniguruma/regex.zig b/pkg/oniguruma/regex.zig new file mode 100644 index 000000000..db9689ee2 --- /dev/null +++ b/pkg/oniguruma/regex.zig @@ -0,0 +1,87 @@ +const std = @import("std"); +const c = @import("c.zig"); +const types = @import("types.zig"); +const errors = @import("errors.zig"); +const testEnsureInit = @import("testing.zig").ensureInit; +const Region = @import("region.zig").Region; +const Error = errors.Error; +const ErrorInfo = errors.ErrorInfo; +const Encoding = types.Encoding; +const Option = types.Option; +const Syntax = types.Syntax; + +pub const Regex = struct { + value: c.OnigRegex, + + pub fn init( + pattern: []const u8, + options: Option, + enc: *Encoding, + syntax: *Syntax, + err: ?*ErrorInfo, + ) !Regex { + var self: Regex = undefined; + _ = try errors.convertError(c.onig_new( + &self.value, + pattern.ptr, + pattern.ptr + pattern.len, + options.int(), + @ptrCast(@alignCast(enc)), + @ptrCast(@alignCast(syntax)), + @ptrCast(err), + )); + return self; + } + + pub fn deinit(self: *Regex) void { + c.onig_free(self.value); + } + + /// Search an entire string for matches. This always returns a region + /// which may heap allocate (C allocator). + pub fn search( + self: *Regex, + str: []const u8, + options: Option, + ) !Region { + var region: Region = .{}; + _ = try self.searchAdvanced(str, 0, str.len, ®ion, options); + return region; + } + + /// onig_search directly + pub fn searchAdvanced( + self: *Regex, + str: []const u8, + start: usize, + end: usize, + region: *Region, + options: Option, + ) !usize { + const pos = try errors.convertError(c.onig_search( + self.value, + str.ptr, + str.ptr + str.len, + str.ptr + start, + str.ptr + end, + @ptrCast(region), + options.int(), + )); + + return @intCast(pos); + } +}; + +test { + const testing = std.testing; + + try testEnsureInit(); + var re = try Regex.init("foo", .{}, Encoding.utf8, Syntax.default, null); + defer re.deinit(); + + var reg = try re.search("hello foo bar", .{}); + defer reg.deinit(); + try testing.expectEqual(@as(usize, 1), reg.count()); + + try testing.expectError(Error.Mismatch, re.search("hello", .{})); +} diff --git a/pkg/oniguruma/region.zig b/pkg/oniguruma/region.zig new file mode 100644 index 000000000..1b99b55b7 --- /dev/null +++ b/pkg/oniguruma/region.zig @@ -0,0 +1,51 @@ +const std = @import("std"); +const c = @import("c.zig"); + +pub const Region = extern struct { + allocated: c_int = 0, + num_regs: c_int = 0, + beg: ?[*]c_int = null, + end: ?[*]c_int = null, + history_root: ?*c.OnigCaptureTreeNode = null, // TODO: convert to Zig + + pub fn deinit(self: *Region) void { + // We never free ourself because allocation of Region in the Zig + // bindings is handled by the Zig program. + c.onig_region_free(@ptrCast(self), 0); + } + + /// Count the number of matches + pub fn count(self: *const Region) usize { + return @intCast(self.num_regs); + } + + /// Iterate over the matched ranges. + pub fn iterator(self: *const Region) Iterator { + return .{ .region = self }; + } + + pub fn starts(self: *const Region) []const c_int { + if (self.num_regs == 0) return &.{}; + return self.beg.?[0..@intCast(self.num_regs)]; + } + + pub fn ends(self: *const Region) []const c_int { + if (self.num_regs == 0) return &.{}; + return self.end.?[0..@intCast(self.num_regs)]; + } + + pub const Iterator = struct { + region: *const Region, + i: usize = 0, + + /// The next range + pub fn next(self: *Iterator) ?[2]usize { + if (self.i >= self.region.num_regs) return null; + defer self.i += 1; + return .{ + @intCast(self.region.beg.?[self.i]), + @intCast(self.region.end.?[self.i]), + }; + } + }; +}; diff --git a/pkg/oniguruma/testing.zig b/pkg/oniguruma/testing.zig new file mode 100644 index 000000000..234b5e759 --- /dev/null +++ b/pkg/oniguruma/testing.zig @@ -0,0 +1,15 @@ +const init = @import("init.zig"); +const Encoding = @import("types.zig").Encoding; + +var initialized: bool = false; + +/// Call this function before any other tests in this package to ensure that +/// the oni library is initialized. This should only be used for tests +/// and only when you're sure this is the ONLY way that oni is being +/// initialized. +/// +/// This always only initializes the encodings the tests use. +pub fn ensureInit() !void { + if (initialized) return; + try init.init(&.{Encoding.utf8}); +} diff --git a/pkg/oniguruma/types.zig b/pkg/oniguruma/types.zig new file mode 100644 index 000000000..3eafc0e90 --- /dev/null +++ b/pkg/oniguruma/types.zig @@ -0,0 +1,96 @@ +const std = @import("std"); +const c = @import("c.zig"); + +pub const Encoding = opaque { + pub const ascii: *Encoding = @ptrCast(c.ONIG_ENCODING_ASCII); + pub const iso_8859_1: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_1); + pub const iso_8859_2: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_2); + pub const iso_8859_3: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_3); + pub const iso_8859_4: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_4); + pub const iso_8859_5: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_5); + pub const iso_8859_6: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_6); + pub const iso_8859_7: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_7); + pub const iso_8859_8: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_8); + pub const iso_8859_9: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_9); + pub const iso_8859_10: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_10); + pub const iso_8859_11: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_11); + pub const iso_8859_13: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_13); + pub const iso_8859_14: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_14); + pub const iso_8859_15: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_15); + pub const iso_8859_16: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_16); + pub const utf8: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF8); + pub const utf16_be: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF16_BE); + pub const utf16_le: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF16_LE); + pub const utf32_be: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF32_BE); + pub const utf32_le: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF32_LE); + pub const euc_jp: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_JP); + pub const euc_tw: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_TW); + pub const euc_kr: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_KR); + pub const euc_cn: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_CN); + pub const sjis: *Encoding = @ptrCast(c.ONIG_ENCODING_SJIS); + pub const koi8: *Encoding = @ptrCast(c.ONIG_ENCODING_KOI8); + pub const koi8_r: *Encoding = @ptrCast(c.ONIG_ENCODING_KOI8_R); + pub const cp1251: *Encoding = @ptrCast(c.ONIG_ENCODING_CP1251); + pub const big5: *Encoding = @ptrCast(c.ONIG_ENCODING_BIG5); + pub const gb18030: *Encoding = @ptrCast(c.ONIG_ENCODING_GB18030); +}; + +pub const Syntax = opaque { + pub const default: *Syntax = @ptrCast(c.ONIG_SYNTAX_ONIGURUMA); + pub const asis: *Syntax = @ptrCast(c.ONIG_SYNTAX_ASIS); + pub const posix_basic: *Syntax = @ptrCast(c.ONIG_SYNTAX_POSIX_BASIC); + pub const posix_extended: *Syntax = @ptrCast(c.ONIG_SYNTAX_POSIX_EXTENDED); + pub const emacs: *Syntax = @ptrCast(c.ONIG_SYNTAX_EMACS); + pub const grep: *Syntax = @ptrCast(c.ONIG_SYNTAX_GREP); + pub const gnu_regex: *Syntax = @ptrCast(c.ONIG_SYNTAX_GNU_REGEX); + pub const java: *Syntax = @ptrCast(c.ONIG_SYNTAX_JAVA); + pub const perl: *Syntax = @ptrCast(c.ONIG_SYNTAX_PERL); + pub const perl_ng: *Syntax = @ptrCast(c.ONIG_SYNTAX_PERL_NG); + pub const ruby: *Syntax = @ptrCast(c.ONIG_SYNTAX_RUBY); + pub const oniguruma: *Syntax = @ptrCast(c.ONIG_SYNTAX_ONIGURUMA); +}; + +pub const Option = packed struct(c_uint) { + ignorecase: bool = false, + extend: bool = false, + multiline: bool = false, + singleline: bool = false, + find_longest: bool = false, + find_not_empty: bool = false, + negate_singleline: bool = false, + dont_capture_group: bool = false, + capture_group: bool = false, + // search time + notbol: bool = false, + noteol: bool = false, + posix_region: bool = false, + check_validity_of_string: bool = false, + // compile time + ignorecase_is_ascii: bool = false, + word_is_ascii: bool = false, + digit_is_ascii: bool = false, + space_is_ascii: bool = false, + posix_is_ascii: bool = false, + text_segment_extended_grapheme_cluster: bool = false, + text_segment_word: bool = false, + // search time + not_begin_string: bool = false, + not_end_string: bool = false, + not_begin_position: bool = false, + callback_each_match: bool = false, + match_whole_string: bool = false, + + _padding: u7 = 0, + + pub fn int(self: Option) c_uint { + return @bitCast(self); + } + + test "order" { + const testing = std.testing; + const opt: Option = .{ .extend = true }; + try testing.expectEqual(c.ONIG_OPTION_EXTEND, opt.int()); + } +}; + +test {} diff --git a/src/Surface.zig b/src/Surface.zig index 698f218f5..43556d53c 100644 --- a/src/Surface.zig +++ b/src/Surface.zig @@ -20,6 +20,7 @@ const builtin = @import("builtin"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; +const oni = @import("oniguruma"); const ziglyph = @import("ziglyph"); const main = @import("main.zig"); const renderer = @import("renderer.zig"); @@ -137,6 +138,13 @@ const Mouse = struct { /// True if the mouse is hidden hidden: bool = false, + + /// True if the mouse position is currently over a link. + over_link: bool = false, + + /// The last x/y in the cursor position for links. We use this to + /// only process link hover events when the mouse actually moves cells. + link_point: ?terminal.point.Viewport = null, }; /// The configuration that a surface has, this is copied from the main @@ -165,12 +173,38 @@ const DerivedConfig = struct { window_padding_y: u32, window_padding_balance: bool, title: ?[:0]const u8, + links: []const Link, + + const Link = struct { + regex: oni.Regex, + action: input.Link.Action, + }; pub fn init(alloc_gpa: Allocator, config: *const configpkg.Config) !DerivedConfig { var arena = ArenaAllocator.init(alloc_gpa); errdefer arena.deinit(); const alloc = arena.allocator(); + // Build all of our links + const links = links: { + var links = std.ArrayList(Link).init(alloc); + defer links.deinit(); + for (config.link.links.items) |link| { + var regex = try link.oniRegex(); + errdefer regex.deinit(); + try links.append(.{ + .regex = regex, + .action = link.action, + }); + } + + break :links try links.toOwnedSlice(); + }; + errdefer { + for (links) |*link| link.regex.deinit(); + alloc.free(links); + } + return .{ .original_font_size = config.@"font-size", .keybind = try config.keybind.clone(alloc), @@ -192,6 +226,7 @@ const DerivedConfig = struct { .window_padding_y = config.@"window-padding-y", .window_padding_balance = config.@"window-padding-balance", .title = config.title, + .links = links, // Assignments happen sequentially so we have to do this last // so that the memory is captured from allocs above. @@ -1197,6 +1232,18 @@ pub fn keyCallback( self.hideMouse(); } + // If our mouse modifiers change, we run a cursor position event. + // This handles the scenario where URL highlighting should be + // toggled for example. + if (!self.mouse.mods.equal(event.mods)) mouse_mods: { + // We set this to null to force link reprocessing since + // mod changes can affect link highlighting. + self.mouse.link_point = null; + self.mouse.mods = event.mods; + const pos = self.rt_surface.getCursorPos() catch break :mouse_mods; + self.cursorPosCallback(pos) catch {}; + } + // When we are in the middle of a mouse event and we press shift, // we change the mouse to a text shape so that selection appears // possible. @@ -1842,6 +1889,18 @@ pub fn mouseButtonCallback( } } + // Handle link clicking. We want to do this before we do mouse + // reporting or any other mouse handling because a successfully + // clicked link will swallow the event. + if (button == .left and action == .release and self.mouse.over_link) { + const pos = try self.rt_surface.getCursorPos(); + if (self.processLinks(pos)) |processed| { + if (processed) return; + } else |err| { + log.warn("error processing links err={}", .{err}); + } + } + // Report mouse events if enabled { self.renderer_state.mutex.lock(); @@ -1970,6 +2029,65 @@ pub fn mouseButtonCallback( } } +/// Returns the link at the given cursor position, if any. +fn linkAtPos( + self: *Surface, + pos: apprt.CursorPos, +) !?struct { + DerivedConfig.Link, + terminal.Selection, +} { + // If we have no configured links we can save a lot of work + if (self.config.links.len == 0) return null; + + // Convert our cursor position to a screen point. + const mouse_pt = mouse_pt: { + const viewport_point = self.posToViewport(pos.x, pos.y); + break :mouse_pt viewport_point.toScreen(&self.io.terminal.screen); + }; + + // Get the line we're hovering over. + const line = self.io.terminal.screen.getLine(mouse_pt) orelse + return null; + const strmap = try line.stringMap(self.alloc); + defer strmap.deinit(self.alloc); + + // Go through each link and see if we clicked it + for (self.config.links) |link| { + var it = strmap.searchIterator(link.regex); + while (true) { + var match = (try it.next()) orelse break; + defer match.deinit(); + const sel = match.selection(); + if (!sel.contains(mouse_pt)) continue; + return .{ link, sel }; + } + } + + return null; +} + +/// Attempt to invoke the action of any link that is under the +/// given position. +/// +/// Requires the renderer state mutex is held. +fn processLinks(self: *Surface, pos: apprt.CursorPos) !bool { + const link, const sel = try self.linkAtPos(pos) orelse return false; + switch (link.action) { + .open => { + const str = try self.io.terminal.screen.selectionString( + self.alloc, + sel, + false, + ); + defer self.alloc.free(str); + try internal_os.open(self.alloc, str); + }, + } + + return true; +} + pub fn cursorPosCallback( self: *Surface, pos: apprt.CursorPos, @@ -1980,18 +2098,29 @@ pub fn cursorPosCallback( // Always show the mouse again if it is hidden if (self.mouse.hidden) self.showMouse(); + // The mouse position in the viewport + const pos_vp = self.posToViewport(pos.x, pos.y); + + // We always reset the over link status because it will be reprocessed + // below. But we need the old value to know if we need to undo mouse + // shape changes. + const over_link = self.mouse.over_link; + self.mouse.over_link = false; + // We are reading/writing state for the remainder self.renderer_state.mutex.lock(); defer self.renderer_state.mutex.unlock(); + // Update our mouse state. We set this to null initially because we only + // want to set it when we're not selecting or doing any other mouse + // event. + self.renderer_state.mouse.point = null; + // If we have an inspector, we need to always record position information if (self.inspector) |insp| { insp.mouse.last_xpos = pos.x; insp.mouse.last_ypos = pos.y; - - const point = self.posToViewport(pos.x, pos.y); - insp.mouse.last_point = point.toScreen(&self.io.terminal.screen); - + insp.mouse.last_point = pos_vp.toScreen(&self.io.terminal.screen); try self.queueRender(); } @@ -1999,7 +2128,6 @@ pub fn cursorPosCallback( if (self.io.terminal.flags.mouse_event != .none) report: { // Shift overrides mouse "grabbing" in the window, taken from Kitty. if (self.mouse.mods.shift and - self.mouse.click_state[@intFromEnum(input.MouseButton.left)] == .press and !self.mouseShiftCapture(false)) break :report; // We use the first mouse button we find pressed in order to report @@ -2011,41 +2139,73 @@ pub fn cursorPosCallback( try self.mouseReport(button, .motion, self.mouse.mods, pos); + // If we were previously over a link, we need to queue a + // render to undo the link state. + if (over_link) try self.queueRender(); + // If we're doing mouse motion tracking, we do not support text // selection. return; } - // If the cursor isn't clicked currently, it doesn't matter - if (self.mouse.click_state[@intFromEnum(input.MouseButton.left)] != .press) return; + // Handle cursor position for text selection + if (self.mouse.click_state[@intFromEnum(input.MouseButton.left)] == .press) { + // All roads lead to requiring a re-render at this point. + try self.queueRender(); - // All roads lead to requiring a re-render at this point. - try self.queueRender(); + // If our y is negative, we're above the window. In this case, we scroll + // up. The amount we scroll up is dependent on how negative we are. + // Note: one day, we can change this from distance to time based if we want. + //log.warn("CURSOR POS: {} {}", .{ pos, self.screen_size }); + const max_y: f32 = @floatFromInt(self.screen_size.height); + if (pos.y < 0 or pos.y > max_y) { + const delta: isize = if (pos.y < 0) -1 else 1; + try self.io.terminal.scrollViewport(.{ .delta = delta }); - // If our y is negative, we're above the window. In this case, we scroll - // up. The amount we scroll up is dependent on how negative we are. - // Note: one day, we can change this from distance to time based if we want. - //log.warn("CURSOR POS: {} {}", .{ pos, self.screen_size }); - const max_y: f32 = @floatFromInt(self.screen_size.height); - if (pos.y < 0 or pos.y > max_y) { - const delta: isize = if (pos.y < 0) -1 else 1; - try self.io.terminal.scrollViewport(.{ .delta = delta }); + // TODO: We want a timer or something to repeat while we're still + // at this cursor position. Right now, the user has to jiggle their + // mouse in order to scroll. + } - // TODO: We want a timer or something to repeat while we're still - // at this cursor position. Right now, the user has to jiggle their - // mouse in order to scroll. + // Convert to points + const screen_point = pos_vp.toScreen(&self.io.terminal.screen); + + // Handle dragging depending on click count + switch (self.mouse.left_click_count) { + 1 => self.dragLeftClickSingle(screen_point, pos.x), + 2 => self.dragLeftClickDouble(screen_point), + 3 => self.dragLeftClickTriple(screen_point), + else => unreachable, + } + + return; } - // Convert to points - const viewport_point = self.posToViewport(pos.x, pos.y); - const screen_point = viewport_point.toScreen(&self.io.terminal.screen); + // Handle link hovering + if (self.mouse.link_point) |last_vp| { + // If our last link viewport point is unchanged, then don't process + // links. This avoids constantly reprocessing regular expressions + // for every pixel change. + if (last_vp.eql(pos_vp)) { + // We have to restore old values that are always cleared + if (over_link) { + self.mouse.over_link = over_link; + self.renderer_state.mouse.point = pos_vp; + } - // Handle dragging depending on click count - switch (self.mouse.left_click_count) { - 1 => self.dragLeftClickSingle(screen_point, pos.x), - 2 => self.dragLeftClickDouble(screen_point), - 3 => self.dragLeftClickTriple(screen_point), - else => unreachable, + return; + } + } + self.mouse.link_point = pos_vp; + + if (try self.linkAtPos(pos)) |_| { + self.renderer_state.mouse.point = pos_vp; + self.mouse.over_link = true; + try self.rt_surface.setMouseShape(.pointer); + try self.queueRender(); + } else if (over_link) { + try self.rt_surface.setMouseShape(self.io.terminal.mouse_shape); + try self.queueRender(); } } diff --git a/src/apprt/gtk/Surface.zig b/src/apprt/gtk/Surface.zig index 5f0ce48f2..66ccb2047 100644 --- a/src/apprt/gtk/Surface.zig +++ b/src/apprt/gtk/Surface.zig @@ -912,7 +912,6 @@ fn keyEvent( ud: ?*anyopaque, ) bool { const self = userdataSelf(ud.?); - const mods = translateMods(gtk_mods); const keyval_unicode = c.gdk_keyval_to_unicode(keyval); const event = c.gtk_event_controller_get_current_event(@ptrCast(ec_key)); @@ -986,6 +985,57 @@ fn keyEvent( if (entry.native == keycode) break :keycode entry.key; } else .invalid; + // Get our modifiers. We have to translate modifier-only presses here + // to state in the mods manually because GTK only does it AFTER the press + // event. + const mods = mods: { + var mods = translateMods(gtk_mods); + switch (physical_key) { + .left_shift => { + mods.shift = action == .press; + if (mods.shift) mods.sides.shift = .left; + }, + + .right_shift => { + mods.shift = action == .press; + if (mods.shift) mods.sides.shift = .right; + }, + + .left_control => { + mods.ctrl = action == .press; + if (mods.ctrl) mods.sides.ctrl = .left; + }, + + .right_control => { + mods.ctrl = action == .press; + if (mods.ctrl) mods.sides.ctrl = .right; + }, + + .left_alt => { + mods.alt = action == .press; + if (mods.alt) mods.sides.alt = .left; + }, + + .right_alt => { + mods.alt = action == .press; + if (mods.alt) mods.sides.alt = .right; + }, + + .left_super => { + mods.super = action == .press; + if (mods.super) mods.sides.super = .left; + }, + + .right_super => { + mods.super = action == .press; + if (mods.super) mods.sides.super = .right; + }, + + else => {}, + } + break :mods mods; + }; + // Get our consumed modifiers const consumed_mods: input.Mods = consumed: { const raw = c.gdk_key_event_get_consumed_modifiers(event); diff --git a/src/config.zig b/src/config.zig index e639f9b84..cd449fb38 100644 --- a/src/config.zig +++ b/src/config.zig @@ -3,6 +3,7 @@ const builtin = @import("builtin"); pub usingnamespace @import("config/key.zig"); pub const Config = @import("config/Config.zig"); pub const string = @import("config/string.zig"); +pub const url = @import("config/url.zig"); // Field types pub const CopyOnSelect = Config.CopyOnSelect; diff --git a/src/config/Config.zig b/src/config/Config.zig index a28bb2d2d..f49d21d82 100644 --- a/src/config/Config.zig +++ b/src/config/Config.zig @@ -14,6 +14,7 @@ const terminal = @import("../terminal/main.zig"); const internal_os = @import("../os/main.zig"); const cli = @import("../cli.zig"); +const url = @import("url.zig"); const Key = @import("key.zig").Key; const KeyValue = @import("key.zig").Value; const ErrorList = @import("ErrorList.zig"); @@ -329,6 +330,28 @@ command: ?[]const u8 = null, /// indicate that it is a login shell, depending on the OS). @"command-arg": RepeatableString = .{}, +/// Match a regular expression against the terminal text and associate +/// clicking it with an action. This can be used to match URLs, file paths, +/// etc. Actions can be opening using the system opener (i.e. "open" or +/// "xdg-open") or executing any arbitrary binding action. +/// +/// Links that are configured earlier take precedence over links that +/// are configured later. +/// +/// A default link that matches a URL and opens it in the system opener +/// always exists. This can be disabled using "link-url". +/// +/// TODO: This can't currently be set! +link: RepeatableLink = .{}, + +/// Enable URL matching. URLs are matched on hover and open using the +/// default system application for the linked URL. +/// +/// The URL matcher is always lowest priority of any configured links +/// (see "link"). If you want to customize URL matching, use "link" +/// and disable this. +@"link-url": bool = true, + /// Start new windows in fullscreen. This setting applies to new /// windows and does not apply to tabs, splits, etc. However, this /// setting will apply to all new windows, not just the first one. @@ -1189,6 +1212,13 @@ pub fn default(alloc_gpa: Allocator) Allocator.Error!Config { ); } + // Add our default link for URL detection + try result.link.links.append(alloc, .{ + .regex = url.regex, + .action = .{ .open = {} }, + .highlight = .{ .hover = {} }, + }); + return result; } @@ -1527,6 +1557,10 @@ pub fn finalize(self: *Config) !void { // Minimmum window size if (self.@"window-width" > 0) self.@"window-width" = @max(10, self.@"window-width"); if (self.@"window-height" > 0) self.@"window-height" = @max(4, self.@"window-height"); + + // If URLs are disabled, cut off the first link. The first link is + // always the URL matcher. + if (!self.@"link-url") self.link.links.items = self.link.links.items[1..]; } /// Callback for src/cli/args.zig to allow us to handle special cases @@ -2508,6 +2542,34 @@ pub const FontStyle = union(enum) { } }; +/// See "link" for documentation. +pub const RepeatableLink = struct { + const Self = @This(); + + links: std.ArrayListUnmanaged(inputpkg.Link) = .{}, + + pub fn parseCLI(self: *Self, alloc: Allocator, input_: ?[]const u8) !void { + _ = self; + _ = alloc; + _ = input_; + return error.NotImplemented; + } + + /// Deep copy of the struct. Required by Config. + pub fn clone(self: *const Self, alloc: Allocator) !Self { + _ = self; + _ = alloc; + return .{}; + } + + /// Compare if two of our value are requal. Required by Config. + pub fn equal(self: Self, other: Self) bool { + _ = self; + _ = other; + return true; + } +}; + /// Options for copy on select behavior. pub const CopyOnSelect = enum { /// Disables copy on select entirely. diff --git a/src/config/url.zig b/src/config/url.zig new file mode 100644 index 000000000..4cbfacdd4 --- /dev/null +++ b/src/config/url.zig @@ -0,0 +1,26 @@ +const std = @import("std"); +const oni = @import("oniguruma"); + +/// Default URL regex. This is used to detect URLs in terminal output. +/// This is here in the config package because one day the matchers will be +/// configurable and this will be a default. +/// +/// This is taken from the Alacritty project. +pub const regex = "(ipfs:|ipns:|magnet:|mailto:|gemini://|gopher://|https://|http://|news:|file:|git://|ssh:|ftp://)[^\u{0000}-\u{001F}\u{007F}-\u{009F}<>\x22\\s{-}\\^⟨⟩\x60]+"; + +test "url regex" { + try oni.testing.ensureInit(); + var re = try oni.Regex.init(regex, .{}, oni.Encoding.utf8, oni.Syntax.default, null); + defer re.deinit(); + + // The URL cases to test that our regex matches. Feel free to add to this + // as we find bugs or just want more coverage. + const cases: []const []const u8 = &.{ + "https://example.com", + }; + + for (cases) |case| { + var reg = try re.search(case, .{}); + defer reg.deinit(); + } +} diff --git a/src/input.zig b/src/input.zig index f3afce97d..14140a524 100644 --- a/src/input.zig +++ b/src/input.zig @@ -7,6 +7,7 @@ pub const function_keys = @import("input/function_keys.zig"); pub const keycodes = @import("input/keycodes.zig"); pub const kitty = @import("input/kitty.zig"); pub const Binding = @import("input/Binding.zig"); +pub const Link = @import("input/Link.zig"); pub const KeyEncoder = @import("input/KeyEncoder.zig"); pub const InspectorMode = Binding.Action.InspectorMode; pub const SplitDirection = Binding.Action.SplitDirection; diff --git a/src/input/Link.zig b/src/input/Link.zig new file mode 100644 index 000000000..4fb9eadcf --- /dev/null +++ b/src/input/Link.zig @@ -0,0 +1,44 @@ +//! A link is a clickable element that can be used to trigger some action. +//! A link is NOT just a URL that opens in a browser. A link is any generic +//! regular expression match over terminal text that can trigger various +//! action types. +const Link = @This(); + +const oni = @import("oniguruma"); + +/// The regular expression that will be used to match the link. Ownership +/// of this memory is up to the caller. The link will never free this memory. +regex: []const u8, + +/// The action that will be triggered when the link is clicked. +action: Action, + +/// The situations in which the link will be highlighted. A link is only +/// clickable by the mouse when it is highlighted, so this also controls +/// when the link is clickable. +highlight: Highlight, + +pub const Action = union(enum) { + /// Open the full matched value using the default open program. + /// For example, on macOS this is "open" and on Linux this is "xdg-open". + open: void, +}; + +pub const Highlight = union(enum) { + /// Always highlight the link. + always: void, + + /// Only highlight the link when the mouse is hovering over it. + hover: void, +}; + +/// Returns a new oni.Regex that can be used to match the link. +pub fn oniRegex(self: *const Link) !oni.Regex { + return try oni.Regex.init( + self.regex, + .{}, + oni.Encoding.utf8, + oni.Syntax.default, + null, + ); +} diff --git a/src/main.zig b/src/main.zig index 6d8ac9ad0..91167e721 100644 --- a/src/main.zig +++ b/src/main.zig @@ -6,6 +6,7 @@ const options = @import("build_options"); const glfw = @import("glfw"); const glslang = @import("glslang"); const macos = @import("macos"); +const oni = @import("oniguruma"); const tracy = @import("tracy"); const cli = @import("cli.zig"); const internal_os = @import("os/main.zig"); @@ -277,6 +278,9 @@ pub const GlobalState = struct { // Initialize glslang for shader compilation try glslang.init(); + // Initialize oniguruma for regex + try oni.init(&.{oni.Encoding.utf8}); + // Find our resources directory once for the app so every launch // hereafter can use this cached value. self.resources_dir = try internal_os.resourcesDir(self.alloc); diff --git a/src/os/main.zig b/src/os/main.zig index 7fdcb2d8b..1782601e0 100644 --- a/src/os/main.zig +++ b/src/os/main.zig @@ -9,6 +9,7 @@ pub usingnamespace @import("homedir.zig"); pub usingnamespace @import("locale.zig"); pub usingnamespace @import("macos_version.zig"); pub usingnamespace @import("mouse.zig"); +pub usingnamespace @import("open.zig"); pub usingnamespace @import("pipe.zig"); pub usingnamespace @import("resourcesdir.zig"); pub const TempDir = @import("TempDir.zig"); diff --git a/src/os/open.zig b/src/os/open.zig new file mode 100644 index 000000000..14e21111f --- /dev/null +++ b/src/os/open.zig @@ -0,0 +1,16 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; + +/// Open a URL in the default handling application. +pub fn open(alloc: Allocator, url: []const u8) !void { + const argv = switch (builtin.os.tag) { + .linux => &.{ "xdg-open", url }, + .macos => &.{ "open", url }, + .windows => &.{ "rundll32", "url.dll,FileProtocolHandler", url }, + else => @compileError("unsupported OS"), + }; + + var exe = std.process.Child.init(argv, alloc); + try exe.spawn(); +} diff --git a/src/renderer/Metal.zig b/src/renderer/Metal.zig index 5c11595c4..205f7eafc 100644 --- a/src/renderer/Metal.zig +++ b/src/renderer/Metal.zig @@ -18,6 +18,7 @@ const terminal = @import("../terminal/main.zig"); const renderer = @import("../renderer.zig"); const math = @import("../math.zig"); const Surface = @import("../Surface.zig"); +const link = @import("link.zig"); const shadertoy = @import("shadertoy.zig"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; @@ -153,6 +154,7 @@ pub const DerivedConfig = struct { invert_selection_fg_bg: bool, custom_shaders: std.ArrayListUnmanaged([]const u8), custom_shader_animation: bool, + links: link.Set, pub fn init( alloc_gpa: Allocator, @@ -174,6 +176,12 @@ pub const DerivedConfig = struct { font_styles.set(.italic, config.@"font-style-italic" != .false); font_styles.set(.bold_italic, config.@"font-style-bold-italic" != .false); + // Our link configs + const links = try link.Set.fromConfig( + alloc, + config.link.links.items, + ); + return .{ .background_opacity = @max(0, @min(1, config.@"background-opacity")), .font_thicken = config.@"font-thicken", @@ -208,12 +216,15 @@ pub const DerivedConfig = struct { .custom_shaders = custom_shaders, .custom_shader_animation = config.@"custom-shader-animation", + .links = links, .arena = arena, }; } pub fn deinit(self: *DerivedConfig) void { + const alloc = self.arena.allocator(); + self.links.deinit(alloc); self.arena.deinit(); } }; @@ -555,6 +566,7 @@ pub fn updateFrame( bg: terminal.color.RGB, selection: ?terminal.Selection, screen: terminal.Screen, + mouse: renderer.State.Mouse, preedit: ?renderer.State.Preedit, cursor_style: ?renderer.CursorStyle, }; @@ -622,6 +634,7 @@ pub fn updateFrame( .bg = self.background_color, .selection = selection, .screen = screen_copy, + .mouse = state.mouse, .preedit = if (cursor_style != null) state.preedit else null, .cursor_style = cursor_style, }; @@ -632,6 +645,7 @@ pub fn updateFrame( try self.rebuildCells( critical.selection, &critical.screen, + critical.mouse, critical.preedit, critical.cursor_style, ); @@ -1354,6 +1368,7 @@ fn rebuildCells( self: *Metal, term_selection: ?terminal.Selection, screen: *terminal.Screen, + mouse: renderer.State.Mouse, preedit: ?renderer.State.Preedit, cursor_style_: ?renderer.CursorStyle, ) !void { @@ -1371,6 +1386,18 @@ fn rebuildCells( (screen.rows * screen.cols * 2) + 1, ); + // Create an arena for all our temporary allocations while rebuilding + var arena = ArenaAllocator.init(self.alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + + // Create our match set for the links. + var link_match_set = try self.config.links.matchSet( + arena_alloc, + screen, + mouse.point orelse .{}, + ); + // Determine our x/y range for preedit. We don't want to render anything // here because we will render the preedit separately. const preedit_range: ?struct { @@ -1475,10 +1502,27 @@ fn rebuildCells( } } + // It this cell is within our hint range then we need to + // underline it. + const cell: terminal.Screen.Cell = cell: { + var cell = row.getCell(shaper_cell.x); + + // If our links contain this cell then we want to + // underline it. + if (link_match_set.orderedContains(.{ + .x = shaper_cell.x, + .y = y, + })) { + cell.attrs.underline = .single; + } + + break :cell cell; + }; + if (self.updateCell( term_selection, screen, - row.getCell(shaper_cell.x), + cell, shaper_cell, run, shaper_cell.x, diff --git a/src/renderer/OpenGL.zig b/src/renderer/OpenGL.zig index 5d83cf701..eda31218e 100644 --- a/src/renderer/OpenGL.zig +++ b/src/renderer/OpenGL.zig @@ -8,6 +8,7 @@ const assert = std.debug.assert; const testing = std.testing; const Allocator = std.mem.Allocator; const ArenaAllocator = std.heap.ArenaAllocator; +const link = @import("link.zig"); const shadertoy = @import("shadertoy.zig"); const apprt = @import("../apprt.zig"); const configpkg = @import("../config.zig"); @@ -225,6 +226,7 @@ pub const DerivedConfig = struct { invert_selection_fg_bg: bool, custom_shaders: std.ArrayListUnmanaged([]const u8), custom_shader_animation: bool, + links: link.Set, pub fn init( alloc_gpa: Allocator, @@ -246,6 +248,12 @@ pub const DerivedConfig = struct { font_styles.set(.italic, config.@"font-style-italic" != .false); font_styles.set(.bold_italic, config.@"font-style-bold-italic" != .false); + // Our link configs + const links = try link.Set.fromConfig( + alloc, + config.link.links.items, + ); + return .{ .background_opacity = @max(0, @min(1, config.@"background-opacity")), .font_thicken = config.@"font-thicken", @@ -280,12 +288,15 @@ pub const DerivedConfig = struct { .custom_shaders = custom_shaders, .custom_shader_animation = config.@"custom-shader-animation", + .links = links, .arena = arena, }; } pub fn deinit(self: *DerivedConfig) void { + const alloc = self.arena.allocator(); + self.links.deinit(alloc); self.arena.deinit(); } }; @@ -598,6 +609,7 @@ pub fn updateFrame( gl_bg: terminal.color.RGB, selection: ?terminal.Selection, screen: terminal.Screen, + mouse: renderer.State.Mouse, preedit: ?renderer.State.Preedit, cursor_style: ?renderer.CursorStyle, }; @@ -665,6 +677,7 @@ pub fn updateFrame( .gl_bg = self.background_color, .selection = selection, .screen = screen_copy, + .mouse = state.mouse, .preedit = if (cursor_style != null) state.preedit else null, .cursor_style = cursor_style, }; @@ -683,6 +696,7 @@ pub fn updateFrame( try self.rebuildCells( critical.selection, &critical.screen, + critical.mouse, critical.preedit, critical.cursor_style, ); @@ -855,6 +869,7 @@ pub fn rebuildCells( self: *OpenGL, term_selection: ?terminal.Selection, screen: *terminal.Screen, + mouse: renderer.State.Mouse, preedit: ?renderer.State.Preedit, cursor_style_: ?renderer.CursorStyle, ) !void { @@ -877,9 +892,21 @@ pub fn rebuildCells( (screen.rows * screen.cols * 2) + 1, ); + // Create an arena for all our temporary allocations while rebuilding + var arena = ArenaAllocator.init(self.alloc); + defer arena.deinit(); + const arena_alloc = arena.allocator(); + // We've written no data to the GPU, refresh it all self.gl_cells_written = 0; + // Create our match set for the links. + var link_match_set = try self.config.links.matchSet( + arena_alloc, + screen, + mouse.point orelse .{}, + ); + // Determine our x/y range for preedit. We don't want to render anything // here because we will render the preedit separately. const preedit_range: ?struct { @@ -975,10 +1002,27 @@ pub fn rebuildCells( } } + // It this cell is within our hint range then we need to + // underline it. + const cell: terminal.Screen.Cell = cell: { + var cell = row.getCell(shaper_cell.x); + + // If our links contain this cell then we want to + // underline it. + if (link_match_set.orderedContains(.{ + .x = shaper_cell.x, + .y = y, + })) { + cell.attrs.underline = .single; + } + + break :cell cell; + }; + if (self.updateCell( term_selection, screen, - row.getCell(shaper_cell.x), + cell, shaper_cell, run, shaper_cell.x, diff --git a/src/renderer/State.zig b/src/renderer/State.zig index 57b54365d..bde0f0f52 100644 --- a/src/renderer/State.zig +++ b/src/renderer/State.zig @@ -25,6 +25,17 @@ inspector: ?*Inspector = null, /// a future exercise. preedit: ?Preedit = null, +/// Mouse state. This only contains state relevant to what renderers +/// need about the mouse. +mouse: Mouse = .{}, + +pub const Mouse = struct { + /// The point on the viewport where the mouse currently is. We use + /// viewport points to avoid the complexity of mapping the mouse to + /// the renderer state. + point: ?terminal.point.Viewport = null, +}; + /// The pre-edit state. See Surface.preeditCallback for more information. pub const Preedit = struct { /// The codepoints to render as preedit text. We allow up to 16 codepoints diff --git a/src/renderer/link.zig b/src/renderer/link.zig new file mode 100644 index 000000000..0537aef6d --- /dev/null +++ b/src/renderer/link.zig @@ -0,0 +1,257 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const oni = @import("oniguruma"); +const configpkg = @import("../config.zig"); +const inputpkg = @import("../input.zig"); +const terminal = @import("../terminal/main.zig"); +const point = terminal.point; +const Screen = terminal.Screen; + +const log = std.log.scoped(.renderer_link); + +/// The link configuration needed for renderers. +pub const Link = struct { + /// The regular expression to match the link against. + regex: oni.Regex, + + /// The situations in which the link should be highlighted. + highlight: inputpkg.Link.Highlight, + + pub fn deinit(self: *Link) void { + self.regex.deinit(); + } +}; + +/// A set of links. This provides a higher level API for renderers +/// to match against a viewport and determine if cells are part of +/// a link. +pub const Set = struct { + links: []Link, + + /// Returns the slice of links from the configuration. + pub fn fromConfig( + alloc: Allocator, + config: []const inputpkg.Link, + ) !Set { + var links = std.ArrayList(Link).init(alloc); + defer links.deinit(); + + for (config) |link| { + var regex = try link.oniRegex(); + errdefer regex.deinit(); + try links.append(.{ + .regex = regex, + .highlight = link.highlight, + }); + } + + return .{ .links = try links.toOwnedSlice() }; + } + + pub fn deinit(self: *Set, alloc: Allocator) void { + for (self.links) |*link| link.deinit(); + alloc.free(self.links); + } + + /// Returns the matchset for the viewport state. The matchset is the + /// full set of matching links for the visible viewport. A link + /// only matches if it is also in the correct state (i.e. hovered + /// if necessary). + /// + /// This is not a particularly efficient operation. This should be + /// called sparingly. + pub fn matchSet( + self: *const Set, + alloc: Allocator, + screen: *Screen, + mouse_vp_pt: point.Viewport, + ) !MatchSet { + // Convert the viewport point to a screen point. + const mouse_pt = mouse_vp_pt.toScreen(screen); + + // This contains our list of matches. The matches are stored + // as selections which contain the start and end points of + // the match. There is no way to map these back to the link + // configuration right now because we don't need to. + var matches = std.ArrayList(terminal.Selection).init(alloc); + defer matches.deinit(); + + // Iterate over all the visible lines. + var lineIter = screen.lineIterator(.viewport); + while (lineIter.next()) |line| { + const strmap = line.stringMap(alloc) catch |err| { + log.warn( + "failed to build string map for link checking err={}", + .{err}, + ); + continue; + }; + defer strmap.deinit(alloc); + + // Go through each link and see if we have any matches. + for (self.links) |link| { + // If this is a hover link and our mouse point isn't within + // this line at all, we can skip it. + if (link.highlight == .hover) { + if (!line.selection().contains(mouse_pt)) continue; + } + + var it = strmap.searchIterator(link.regex); + while (true) { + const match_ = it.next() catch |err| { + log.warn("failed to search for link err={}", .{err}); + break; + }; + var match = match_ orelse break; + defer match.deinit(); + const sel = match.selection(); + + // If this is a highlight link then we only want to + // include matches that include our hover point. + if (link.highlight == .hover and + !sel.contains(mouse_pt)) + { + continue; + } + + try matches.append(sel); + } + } + } + + return .{ .matches = try matches.toOwnedSlice() }; + } +}; + +/// MatchSet is the result of matching links against a screen. This contains +/// all the matching links and operations on them such as whether a specific +/// cell is part of a matched link. +pub const MatchSet = struct { + /// The matches. + /// + /// Important: this must be in left-to-right top-to-bottom order. + matches: []const terminal.Selection, + i: usize = 0, + + pub fn deinit(self: *MatchSet, alloc: Allocator) void { + alloc.free(self.matches); + } + + /// Checks if the matchset contains the given pt. The points must be + /// given in left-to-right top-to-bottom order. This is a stateful + /// operation and giving a point out of order can cause invalid + /// results. + pub fn orderedContains( + self: *MatchSet, + pt: point.ScreenPoint, + ) bool { + // If we're beyond the end of our possible matches, we're done. + if (self.i >= self.matches.len) return false; + + // If our selection ends before the point, then no point will ever + // again match this selection so we move on to the next one. + while (self.matches[self.i].end.before(pt)) { + self.i += 1; + if (self.i >= self.matches.len) return false; + } + + return self.matches[self.i].contains(pt); + } +}; + +test "matchset" { + const testing = std.testing; + const alloc = testing.allocator; + + // Initialize our screen + var s = try Screen.init(alloc, 5, 5, 0); + defer s.deinit(); + const str = "1ABCD2EFGH\n3IJKL"; + try s.testWriteString(str); + + // Get a set + var set = try Set.fromConfig(alloc, &.{ + .{ + .regex = "AB", + .action = .{ .open = {} }, + .highlight = .{ .always = {} }, + }, + + .{ + .regex = "EF", + .action = .{ .open = {} }, + .highlight = .{ .always = {} }, + }, + }); + defer set.deinit(alloc); + + // Get our matches + var match = try set.matchSet(alloc, &s, .{}); + defer match.deinit(alloc); + try testing.expectEqual(@as(usize, 2), match.matches.len); + + // Test our matches + try testing.expect(!match.orderedContains(.{ .x = 0, .y = 0 })); + try testing.expect(match.orderedContains(.{ .x = 1, .y = 0 })); + try testing.expect(match.orderedContains(.{ .x = 2, .y = 0 })); + try testing.expect(!match.orderedContains(.{ .x = 3, .y = 0 })); + try testing.expect(match.orderedContains(.{ .x = 1, .y = 1 })); + try testing.expect(!match.orderedContains(.{ .x = 1, .y = 2 })); +} + +test "matchset hover links" { + const testing = std.testing; + const alloc = testing.allocator; + + // Initialize our screen + var s = try Screen.init(alloc, 5, 5, 0); + defer s.deinit(); + const str = "1ABCD2EFGH\n3IJKL"; + try s.testWriteString(str); + + // Get a set + var set = try Set.fromConfig(alloc, &.{ + .{ + .regex = "AB", + .action = .{ .open = {} }, + .highlight = .{ .hover = {} }, + }, + + .{ + .regex = "EF", + .action = .{ .open = {} }, + .highlight = .{ .always = {} }, + }, + }); + defer set.deinit(alloc); + + // Not hovering over the first link + { + var match = try set.matchSet(alloc, &s, .{}); + defer match.deinit(alloc); + try testing.expectEqual(@as(usize, 1), match.matches.len); + + // Test our matches + try testing.expect(!match.orderedContains(.{ .x = 0, .y = 0 })); + try testing.expect(!match.orderedContains(.{ .x = 1, .y = 0 })); + try testing.expect(!match.orderedContains(.{ .x = 2, .y = 0 })); + try testing.expect(!match.orderedContains(.{ .x = 3, .y = 0 })); + try testing.expect(match.orderedContains(.{ .x = 1, .y = 1 })); + try testing.expect(!match.orderedContains(.{ .x = 1, .y = 2 })); + } + + // Hovering over the first link + { + var match = try set.matchSet(alloc, &s, .{ .x = 1, .y = 0 }); + defer match.deinit(alloc); + try testing.expectEqual(@as(usize, 2), match.matches.len); + + // Test our matches + try testing.expect(!match.orderedContains(.{ .x = 0, .y = 0 })); + try testing.expect(match.orderedContains(.{ .x = 1, .y = 0 })); + try testing.expect(match.orderedContains(.{ .x = 2, .y = 0 })); + try testing.expect(!match.orderedContains(.{ .x = 3, .y = 0 })); + try testing.expect(match.orderedContains(.{ .x = 1, .y = 1 })); + try testing.expect(!match.orderedContains(.{ .x = 1, .y = 2 })); + } +} diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index 63295a97a..08af2b9f2 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -13,6 +13,9 @@ //! affect this area. //! * Viewport - The area that is currently visible to the user. This //! can be thought of as the current window into the screen. +//! * Row - A single visible row in the screen. +//! * Line - A single line of text. This may map to multiple rows if +//! the row is soft-wrapped. //! //! The internal storage of the screen is stored in a circular buffer //! with roughly the following format: @@ -64,6 +67,7 @@ const kitty = @import("kitty.zig"); const point = @import("point.zig"); const CircBuf = @import("../circ_buf.zig").CircBuf; const Selection = @import("Selection.zig"); +const StringMap = @import("StringMap.zig"); const fastmem = @import("../fastmem.zig"); const charsets = @import("charsets.zig"); @@ -900,6 +904,72 @@ pub const GraphemeData = union(enum) { } }; +/// A line represents a line of text, potentially across soft-wrapped +/// boundaries. This differs from row, which is a single physical row within +/// the terminal screen. +pub const Line = struct { + screen: *Screen, + tag: RowIndexTag, + start: usize, + len: usize, + + /// Return the string for this line. + pub fn string(self: *const Line, alloc: Allocator) ![:0]const u8 { + return try self.screen.selectionString(alloc, self.selection(), true); + } + + /// Receive the string for this line along with the byte-to-point mapping. + pub fn stringMap(self: *const Line, alloc: Allocator) !StringMap { + return try self.screen.selectionStringMap(alloc, self.selection()); + } + + /// Return a selection that covers the entire line. + pub fn selection(self: *const Line) Selection { + // Get the start and end screen point. + const start_idx = self.tag.index(self.start).toScreen(self.screen).screen; + const end_idx = self.tag.index(self.start + (self.len - 1)).toScreen(self.screen).screen; + + // Convert the start and end screen points into a selection across + // the entire rows. We then use selectionString because it handles + // unwrapping, graphemes, etc. + return .{ + .start = .{ .y = start_idx, .x = 0 }, + .end = .{ .y = end_idx, .x = self.screen.cols - 1 }, + }; + } +}; + +/// Iterator over textual lines within the terminal. This will unwrap +/// wrapped lines and consider them a single line. +pub const LineIterator = struct { + row_it: RowIterator, + + pub fn next(self: *LineIterator) ?Line { + const start = self.row_it.value; + + // Get our current row + var row = self.row_it.next() orelse return null; + var len: usize = 1; + + // While the row is wrapped we keep iterating over the rows + // and incrementing the length. + while (row.isWrapped()) { + // Note: this orelse shouldn't happen. A wrapped row should + // always have a next row. However, this isn't the place where + // we want to assert that. + row = self.row_it.next() orelse break; + len += 1; + } + + return .{ + .screen = self.row_it.screen, + .tag = self.row_it.tag, + .start = start, + .len = len, + }; + } +}; + // Initialize to header and not a cell so that we can check header.init // to know if the remainder of the row has been initialized or not. const StorageBuf = CircBuf(StorageCell, .{ .header = .{} }); @@ -1097,6 +1167,50 @@ pub fn rowIterator(self: *Screen, tag: RowIndexTag) RowIterator { }; } +/// Returns an iterator that iterates over the lines of the screen. A line +/// is a single line of text which may wrap across multiple rows. A row +/// is a single physical row of the terminal. +pub fn lineIterator(self: *Screen, tag: RowIndexTag) LineIterator { + return .{ .row_it = self.rowIterator(tag) }; +} + +/// Returns the line that contains the given point. This may be null if the +/// point is outside the screen. +pub fn getLine(self: *Screen, pt: point.ScreenPoint) ?Line { + // If our y is outside of our written area, we have no line. + if (pt.y >= RowIndexTag.screen.maxLen(self)) return null; + if (pt.x >= self.cols) return null; + + // Find the starting y. We go back and as soon as we find a row that + // isn't wrapped, we know the NEXT line is the one we want. + const start_y: usize = if (pt.y == 0) 0 else start_y: { + for (1..pt.y) |y| { + const bot_y = pt.y - y; + const row = self.getRow(.{ .screen = bot_y }); + if (!row.isWrapped()) break :start_y bot_y + 1; + } + + break :start_y 0; + }; + + // Find the end y, which is the first row that isn't wrapped. + const end_y = end_y: { + for (pt.y..self.rowsWritten()) |y| { + const row = self.getRow(.{ .screen = y }); + if (!row.isWrapped()) break :end_y y; + } + + break :end_y self.rowsWritten() - 1; + }; + + return .{ + .screen = self, + .tag = .screen, + .start = start_y, + .len = (end_y - start_y) + 1, + }; +} + /// Returns the row at the given index. This row is writable, although /// only the active area should probably be written to. pub fn getRow(self: *Screen, index: RowIndex) Row { @@ -2076,62 +2190,83 @@ pub fn selectionString( // Get the slices for the string const slices = self.selectionSlices(sel); - // We can now know how much space we'll need to store the string. We loop - // over and UTF8-encode and calculate the exact size required. We will be - // off here by at most "newlines" values in the worst case that every - // single line is soft-wrapped. - const chars = chars: { - var count: usize = 0; + // Use an ArrayList so that we can grow the array as we go. We + // build an initial capacity of just our rows in our selection times + // columns. It can be more or less based on graphemes, newlines, etc. + var strbuilder = try std.ArrayList(u8).initCapacity(alloc, slices.rows * self.cols); + defer strbuilder.deinit(); - // We need to keep track of our x/y so that we can get graphemes. - var y: usize = slices.sel.start.y; - var x: usize = 0; - var row: Row = undefined; + // Get our string result. + try self.selectionSliceString(slices, &strbuilder, null); - const arr = [_][]StorageCell{ slices.top, slices.bot }; - for (arr) |slice| { - for (slice, 0..) |cell, i| { - // detect row headers - if (@mod(i, self.cols + 1) == 0) { - // We use each row header as an opportunity to "count" - // a new row, and therefore count a possible newline. - count += 1; + // Remove any trailing spaces on lines. We could do optimize this by + // doing this in the loop above but this isn't very hot path code and + // this is simple. + if (trim) { + var it = std.mem.tokenize(u8, strbuilder.items, "\n"); - // Increase our row count and get our next row - y += 1; - x = 0; - row = self.getRow(.{ .screen = y - 1 }); - continue; - } - - var buf: [4]u8 = undefined; - const char = if (cell.cell.char > 0) cell.cell.char else ' '; - count += try std.unicode.utf8Encode(@intCast(char), &buf); - - // We need to also count any grapheme chars - var it = row.codepointIterator(x); - while (it.next()) |cp| { - count += try std.unicode.utf8Encode(cp, &buf); - } - - x += 1; - } + // Reset our items. We retain our capacity. Because we're only + // removing bytes, we know that the trimmed string must be no longer + // than the original string so we copy directly back into our + // allocated memory. + strbuilder.clearRetainingCapacity(); + while (it.next()) |line| { + const trimmed = std.mem.trimRight(u8, line, " \t"); + const i = strbuilder.items.len; + strbuilder.items.len += trimmed.len; + std.mem.copyForwards(u8, strbuilder.items[i..], trimmed); + strbuilder.appendAssumeCapacity('\n'); } - break :chars count; - }; - const buf = try alloc.alloc(u8, chars + 1); - errdefer alloc.free(buf); - - // Special case the empty case - if (chars == 0) { - buf[0] = 0; - return buf[0..0 :0]; + // Remove our trailing newline again + if (strbuilder.items.len > 0) strbuilder.items.len -= 1; } + // Get our final string + const string = try strbuilder.toOwnedSliceSentinel(0); + errdefer alloc.free(string); + + return string; +} + +/// Returns the row text associated with a selection along with the +/// mapping of each individual byte in the string to the point in the screen. +fn selectionStringMap( + self: *Screen, + alloc: Allocator, + sel: Selection, +) !StringMap { + // Get the slices for the string + const slices = self.selectionSlices(sel); + + // Use an ArrayList so that we can grow the array as we go. We + // build an initial capacity of just our rows in our selection times + // columns. It can be more or less based on graphemes, newlines, etc. + var strbuilder = try std.ArrayList(u8).initCapacity(alloc, slices.rows * self.cols); + defer strbuilder.deinit(); + var mapbuilder = try std.ArrayList(point.ScreenPoint).initCapacity(alloc, strbuilder.capacity); + defer mapbuilder.deinit(); + + // Get our results + try self.selectionSliceString(slices, &strbuilder, &mapbuilder); + + // Get our final string + const string = try strbuilder.toOwnedSliceSentinel(0); + errdefer alloc.free(string); + const map = try mapbuilder.toOwnedSlice(); + errdefer alloc.free(map); + return .{ .string = string, .map = map }; +} + +/// Takes a SelectionSlices value and builds the string and mapping for it. +fn selectionSliceString( + self: *Screen, + slices: SelectionSlices, + strbuilder: *std.ArrayList(u8), + mapbuilder: ?*std.ArrayList(point.ScreenPoint), +) !void { // Connect the text from the two slices const arr = [_][]StorageCell{ slices.top, slices.bot }; - var buf_i: usize = 0; var row_count: usize = 0; for (arr) |slice| { const row_start: usize = row_count; @@ -2151,6 +2286,13 @@ pub fn selectionString( // the first row. var skip: usize = if (row_count == 0) slices.top_offset else 0; + // If we have runtime safety we need to initialize the row + // so that the proper union tag is set. In release modes we + // don't need to do this because we zero the memory. + if (std.debug.runtime_safety) { + _ = self.getRow(.{ .screen = slices.sel.start.y + row_i }); + } + const row: Row = .{ .screen = self, .storage = slice[start_idx..end_idx] }; var it = row.cellIterator(); var x: usize = 0; @@ -2166,56 +2308,61 @@ pub fn selectionString( if (cell.attrs.wide_spacer_head or cell.attrs.wide_spacer_tail) continue; + var buf: [4]u8 = undefined; const char = if (cell.char > 0) cell.char else ' '; - buf_i += try std.unicode.utf8Encode(@intCast(char), buf[buf_i..]); + { + const encode_len = try std.unicode.utf8Encode(@intCast(char), &buf); + try strbuilder.appendSlice(buf[0..encode_len]); + if (mapbuilder) |b| { + for (0..encode_len) |_| try b.append(.{ + .x = x, + .y = slices.sel.start.y + row_i, + }); + } + } var cp_it = row.codepointIterator(x); while (cp_it.next()) |cp| { - buf_i += try std.unicode.utf8Encode(cp, buf[buf_i..]); + const encode_len = try std.unicode.utf8Encode(cp, &buf); + try strbuilder.appendSlice(buf[0..encode_len]); + if (mapbuilder) |b| { + for (0..encode_len) |_| try b.append(.{ + .x = x, + .y = slices.sel.start.y + row_i, + }); + } } } // If this row is not soft-wrapped, add a newline if (!row.header().flags.wrap) { - buf[buf_i] = '\n'; - buf_i += 1; + try strbuilder.append('\n'); + if (mapbuilder) |b| { + try b.append(.{ + .x = self.cols - 1, + .y = slices.sel.start.y + row_i, + }); + } } } } // Remove our trailing newline, its never correct. - if (buf_i > 0 and buf[buf_i - 1] == '\n') buf_i -= 1; - - // Remove any trailing spaces on lines. We could do optimize this by - // doing this in the loop above but this isn't very hot path code and - // this is simple. - if (trim) { - var it = std.mem.tokenize(u8, buf[0..buf_i], "\n"); - buf_i = 0; - while (it.next()) |line| { - const trimmed = std.mem.trimRight(u8, line, " \t"); - std.mem.copy(u8, buf[buf_i..], trimmed); - buf_i += trimmed.len; - buf[buf_i] = '\n'; - buf_i += 1; - } - - // Remove our trailing newline again - if (buf_i > 0) buf_i -= 1; + if (strbuilder.items.len > 0 and + strbuilder.items[strbuilder.items.len - 1] == '\n') + { + strbuilder.items.len -= 1; + if (mapbuilder) |b| b.items.len -= 1; } - // Add null termination - buf[buf_i] = 0; - - // Realloc so our free length is exactly correct - const result = try alloc.realloc(buf, buf_i + 1); - return result[0..buf_i :0]; + if (std.debug.runtime_safety) { + if (mapbuilder) |b| { + assert(strbuilder.items.len == b.items.len); + } + } } -/// Returns the slices that make up the selection, in order. There are at most -/// two parts to handle the ring buffer. If the selection fits in one contiguous -/// slice, then the second slice will have a length of zero. -fn selectionSlices(self: *Screen, sel_raw: Selection) struct { +const SelectionSlices = struct { rows: usize, // The selection that the slices below represent. This may not @@ -2228,7 +2375,12 @@ fn selectionSlices(self: *Screen, sel_raw: Selection) struct { top_offset: usize, top: []StorageCell, bot: []StorageCell, -} { +}; + +/// Returns the slices that make up the selection, in order. There are at most +/// two parts to handle the ring buffer. If the selection fits in one contiguous +/// slice, then the second slice will have a length of zero. +fn selectionSlices(self: *Screen, sel_raw: Selection) SelectionSlices { // Note: this function is tested via selectionString // If the selection starts beyond the end of the screen, then we return empty @@ -3404,6 +3556,91 @@ test "Screen: write long emoji" { try testing.expectEqual(@as(usize, 5), s.cursor.x); } +test "Screen: lineIterator" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 5, 5, 0); + defer s.deinit(); + + // Sanity check that our test helpers work + const str = "1ABCD\n2EFGH"; + try s.testWriteString(str); + + // Test the line iterator + var iter = s.lineIterator(.viewport); + { + const line = iter.next().?; + const actual = try line.string(alloc); + defer alloc.free(actual); + try testing.expectEqualStrings("1ABCD", actual); + } + { + const line = iter.next().?; + const actual = try line.string(alloc); + defer alloc.free(actual); + try testing.expectEqualStrings("2EFGH", actual); + } + try testing.expect(iter.next() == null); +} + +test "Screen: lineIterator soft wrap" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 5, 5, 0); + defer s.deinit(); + + // Sanity check that our test helpers work + const str = "1ABCD2EFGH\n3ABCD"; + try s.testWriteString(str); + + // Test the line iterator + var iter = s.lineIterator(.viewport); + { + const line = iter.next().?; + const actual = try line.string(alloc); + defer alloc.free(actual); + try testing.expectEqualStrings("1ABCD2EFGH", actual); + } + { + const line = iter.next().?; + const actual = try line.string(alloc); + defer alloc.free(actual); + try testing.expectEqualStrings("3ABCD", actual); + } + try testing.expect(iter.next() == null); +} + +test "Screen: getLine soft wrap" { + const testing = std.testing; + const alloc = testing.allocator; + + var s = try init(alloc, 5, 5, 0); + defer s.deinit(); + + // Sanity check that our test helpers work + const str = "1ABCD2EFGH\n3ABCD"; + try s.testWriteString(str); + + // Test the line iterator + { + const line = s.getLine(.{ .x = 2, .y = 1 }).?; + const actual = try line.string(alloc); + defer alloc.free(actual); + try testing.expectEqualStrings("1ABCD2EFGH", actual); + } + { + const line = s.getLine(.{ .x = 2, .y = 2 }).?; + const actual = try line.string(alloc); + defer alloc.free(actual); + try testing.expectEqualStrings("3ABCD", actual); + } + + try testing.expect(s.getLine(.{ .x = 2, .y = 3 }) == null); + try testing.expect(s.getLine(.{ .x = 7, .y = 1 }) == null); +} + test "Screen: scrolling" { const testing = std.testing; const alloc = testing.allocator; diff --git a/src/terminal/StringMap.zig b/src/terminal/StringMap.zig new file mode 100644 index 000000000..588013d9d --- /dev/null +++ b/src/terminal/StringMap.zig @@ -0,0 +1,124 @@ +/// A string along with the mapping of each individual byte in the string +/// to the point in the screen. +const StringMap = @This(); + +const std = @import("std"); +const oni = @import("oniguruma"); +const point = @import("point.zig"); +const Selection = @import("Selection.zig"); +const Screen = @import("Screen.zig"); +const Allocator = std.mem.Allocator; + +string: [:0]const u8, +map: []point.ScreenPoint, + +pub fn deinit(self: StringMap, alloc: Allocator) void { + alloc.free(self.string); + alloc.free(self.map); +} + +/// Returns an iterator that yields the next match of the given regex. +pub fn searchIterator( + self: StringMap, + regex: oni.Regex, +) SearchIterator { + return .{ .map = self, .regex = regex }; +} + +/// Iterates over the regular expression matches of the string. +pub const SearchIterator = struct { + map: StringMap, + regex: oni.Regex, + offset: usize = 0, + + /// Returns the next regular expression match or null if there are + /// no more matches. + pub fn next(self: *SearchIterator) !?Match { + if (self.offset >= self.map.string.len) return null; + + var region = self.regex.search( + self.map.string[self.offset..], + .{}, + ) catch |err| switch (err) { + error.Mismatch => { + self.offset = self.map.string.len; + return null; + }, + + else => return err, + }; + errdefer region.deinit(); + + // Increment our offset by the number of bytes in the match. + // We defer this so that we can return the match before + // modifying the offset. + const end_idx: usize = @intCast(region.ends()[0]); + defer self.offset += end_idx; + + return .{ + .map = self.map, + .offset = self.offset, + .region = region, + }; + } +}; + +/// A single regular expression match. +pub const Match = struct { + map: StringMap, + offset: usize, + region: oni.Region, + + pub fn deinit(self: *Match) void { + self.region.deinit(); + } + + /// Returns the selection containing the full match. + pub fn selection(self: Match) Selection { + const start_idx: usize = @intCast(self.region.starts()[0]); + const end_idx: usize = @intCast(self.region.ends()[0] - 1); + const start_pt = self.map.map[self.offset + start_idx]; + const end_pt = self.map.map[self.offset + end_idx]; + return .{ .start = start_pt, .end = end_pt }; + } +}; + +test "searchIterator" { + const testing = std.testing; + const alloc = testing.allocator; + + // Initialize our regex + try oni.testing.ensureInit(); + var re = try oni.Regex.init( + "[A-B]{2}", + .{}, + oni.Encoding.utf8, + oni.Syntax.default, + null, + ); + defer re.deinit(); + + // Initialize our screen + var s = try Screen.init(alloc, 5, 5, 0); + defer s.deinit(); + const str = "1ABCD2EFGH\n3IJKL"; + try s.testWriteString(str); + const line = s.getLine(.{ .x = 2, .y = 1 }).?; + const map = try line.stringMap(alloc); + defer map.deinit(alloc); + + // Get our iterator + var it = map.searchIterator(re); + { + var match = (try it.next()).?; + defer match.deinit(); + + const sel = match.selection(); + try testing.expectEqual(Selection{ + .start = .{ .x = 1, .y = 0 }, + .end = .{ .x = 2, .y = 0 }, + }, sel); + } + + try testing.expect(try it.next() == null); +} diff --git a/src/terminal/main.zig b/src/terminal/main.zig index a752d64eb..486e3526e 100644 --- a/src/terminal/main.zig +++ b/src/terminal/main.zig @@ -26,6 +26,7 @@ pub const Terminal = @import("Terminal.zig"); pub const Parser = @import("Parser.zig"); pub const Selection = @import("Selection.zig"); pub const Screen = @import("Screen.zig"); +pub const StringMap = @import("StringMap.zig"); pub const Stream = stream.Stream; pub const Cursor = Screen.Cursor; pub const CursorStyleReq = ansi.CursorStyle;