diff --git a/pkg/oniguruma/errors.zig b/pkg/oniguruma/errors.zig index b9edf2d36..f13eda95a 100644 --- a/pkg/oniguruma/errors.zig +++ b/pkg/oniguruma/errors.zig @@ -7,9 +7,11 @@ pub const MAX_ERROR_LEN = c.ONIG_MAX_ERROR_MESSAGE_LEN; /// Convert an Oniguruma error to an error. pub fn convertError(code: c_int) !c_int { if (code >= 0) return code; - switch (code) { - else => return error.OnigurumaError, + inline for (error_code_map) |m| { + if (m[1] == code) return m[0]; } + + return Error.Unknown; } /// Convert an error code to a string. buf must be at least @@ -25,3 +27,175 @@ pub const ErrorInfo = extern struct { par: [*]u8, par_end: [*]u8, }; + +/// All possible Oniguruma errors. +pub const Error = error{ + Mismatch, + NoSupportConfig, + Abort, + Memory, + TypeBug, + ParserBug, + StackBug, + UndefinedBytecode, + UnexpectedBytecode, + MatchStackLimitOver, + ParseDepthLimitOver, + RetryLimitInMatchOver, + RetryLimitInSearchOver, + SubexpCallLimitInSearchOver, + DefaultEncodingIsNotSet, + SpecifiedEncodingCantConvertToWideChar, + FailToInitialize, + InvalidArgument, + EndPatternAtLeftBrace, + EndPatternAtLeftBracket, + EmptyCharClass, + PrematureEndOfCharClass, + EndPatternAtEscape, + EndPatternAtMeta, + EndPatternAtControl, + MetaCodeSyntax, + ControlCodeSyntax, + CharClassValueAtEndOfRange, + CharClassValueAtStartOfRange, + UnmatchedRangeSpecifierInCharClass, + TargetOfRepeatOperatorNotSpecified, + TargetOfRepeatOperatorInvalid, + NestedRepeatOperator, + UnmatchedCloseParenthesis, + EndPatternWithUnmatchedParenthesis, + EndPatternInGroup, + UndefinedGroupOption, + InvalidGroupOption, + InvalidPosixBracketType, + InvalidLookBehindPattern, + InvalidRepeatRangePattern, + TooBigNumber, + TooBigNumberForRepeatRange, + UpperSmallerThanLowerInRepeatRange, + EmptyRangeInCharClass, + MismatchCodeLengthInClassRange, + TooManyMultiByteRanges, + TooShortMultiByteString, + TooBigBackrefNumber, + InvalidBackref, + NumberedBackrefOrCallNotAllowed, + TooManyCaptures, + TooLongWideCharValue, + UndefinedOperator, + EmptyGroupName, + InvalidGroupName, + InvalidCharInGroupName, + UndefinedNameReference, + UndefinedGroupReference, + MultiplexDefinedName, + MultiplexDefinitionNameCall, + NeverEndingRecursion, + GroupNumberOverForCaptureHistory, + InvalidCharPropertyName, + InvalidIfElseSyntax, + InvalidAbsentGroupPattern, + InvalidAbsentGroupGeneratorPattern, + InvalidCalloutPattern, + InvalidCalloutName, + UndefinedCalloutName, + InvalidCalloutBody, + InvalidCalloutTagName, + InvalidCalloutArg, + InvalidCodePointValue, + InvalidWideCharValue, + TooBigWideCharValue, + NotSupportedEncodingCombination, + InvalidCombinationOfOptions, + TooManyUserDefinedObjects, + TooLongPropertyName, + VeryInefficientPattern, + LibraryIsNotInitialized, + Unknown, +}; + +const error_code_map: []const struct { Error, c_int } = &.{ + .{ Error.Mismatch, c.ONIG_MISMATCH }, + .{ Error.NoSupportConfig, c.ONIG_NO_SUPPORT_CONFIG }, + .{ Error.Abort, c.ONIG_ABORT }, + .{ Error.Memory, c.ONIGERR_MEMORY }, + .{ Error.TypeBug, c.ONIGERR_TYPE_BUG }, + .{ Error.ParserBug, c.ONIGERR_PARSER_BUG }, + .{ Error.StackBug, c.ONIGERR_STACK_BUG }, + .{ Error.UndefinedBytecode, c.ONIGERR_UNDEFINED_BYTECODE }, + .{ Error.UnexpectedBytecode, c.ONIGERR_UNEXPECTED_BYTECODE }, + .{ Error.MatchStackLimitOver, c.ONIGERR_MATCH_STACK_LIMIT_OVER }, + .{ Error.ParseDepthLimitOver, c.ONIGERR_PARSE_DEPTH_LIMIT_OVER }, + .{ Error.RetryLimitInMatchOver, c.ONIGERR_RETRY_LIMIT_IN_MATCH_OVER }, + .{ Error.RetryLimitInSearchOver, c.ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER }, + .{ Error.SubexpCallLimitInSearchOver, c.ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER }, + .{ Error.DefaultEncodingIsNotSet, c.ONIGERR_DEFAULT_ENCODING_IS_NOT_SET }, + .{ Error.SpecifiedEncodingCantConvertToWideChar, c.ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR }, + .{ Error.FailToInitialize, c.ONIGERR_FAIL_TO_INITIALIZE }, + .{ Error.InvalidArgument, c.ONIGERR_INVALID_ARGUMENT }, + .{ Error.EndPatternAtLeftBrace, c.ONIGERR_END_PATTERN_AT_LEFT_BRACE }, + .{ Error.EndPatternAtLeftBracket, c.ONIGERR_END_PATTERN_AT_LEFT_BRACKET }, + .{ Error.EmptyCharClass, c.ONIGERR_EMPTY_CHAR_CLASS }, + .{ Error.PrematureEndOfCharClass, c.ONIGERR_PREMATURE_END_OF_CHAR_CLASS }, + .{ Error.EndPatternAtEscape, c.ONIGERR_END_PATTERN_AT_ESCAPE }, + .{ Error.EndPatternAtMeta, c.ONIGERR_END_PATTERN_AT_META }, + .{ Error.EndPatternAtControl, c.ONIGERR_END_PATTERN_AT_CONTROL }, + .{ Error.MetaCodeSyntax, c.ONIGERR_META_CODE_SYNTAX }, + .{ Error.ControlCodeSyntax, c.ONIGERR_CONTROL_CODE_SYNTAX }, + .{ Error.CharClassValueAtEndOfRange, c.ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE }, + .{ Error.CharClassValueAtStartOfRange, c.ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE }, + .{ Error.UnmatchedRangeSpecifierInCharClass, c.ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS }, + .{ Error.TargetOfRepeatOperatorNotSpecified, c.ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED }, + .{ Error.TargetOfRepeatOperatorInvalid, c.ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID }, + .{ Error.NestedRepeatOperator, c.ONIGERR_NESTED_REPEAT_OPERATOR }, + .{ Error.UnmatchedCloseParenthesis, c.ONIGERR_UNMATCHED_CLOSE_PARENTHESIS }, + .{ Error.EndPatternWithUnmatchedParenthesis, c.ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS }, + .{ Error.EndPatternInGroup, c.ONIGERR_END_PATTERN_IN_GROUP }, + .{ Error.UndefinedGroupOption, c.ONIGERR_UNDEFINED_GROUP_OPTION }, + .{ Error.InvalidGroupOption, c.ONIGERR_INVALID_GROUP_OPTION }, + .{ Error.InvalidPosixBracketType, c.ONIGERR_INVALID_POSIX_BRACKET_TYPE }, + .{ Error.InvalidLookBehindPattern, c.ONIGERR_INVALID_LOOK_BEHIND_PATTERN }, + .{ Error.InvalidRepeatRangePattern, c.ONIGERR_INVALID_REPEAT_RANGE_PATTERN }, + .{ Error.TooBigNumber, c.ONIGERR_TOO_BIG_NUMBER }, + .{ Error.TooBigNumberForRepeatRange, c.ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE }, + .{ Error.UpperSmallerThanLowerInRepeatRange, c.ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE }, + .{ Error.EmptyRangeInCharClass, c.ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS }, + .{ Error.MismatchCodeLengthInClassRange, c.ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE }, + .{ Error.TooManyMultiByteRanges, c.ONIGERR_TOO_MANY_MULTI_BYTE_RANGES }, + .{ Error.TooShortMultiByteString, c.ONIGERR_TOO_SHORT_MULTI_BYTE_STRING }, + .{ Error.TooBigBackrefNumber, c.ONIGERR_TOO_BIG_BACKREF_NUMBER }, + .{ Error.InvalidBackref, c.ONIGERR_INVALID_BACKREF }, + .{ Error.NumberedBackrefOrCallNotAllowed, c.ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED }, + .{ Error.TooManyCaptures, c.ONIGERR_TOO_MANY_CAPTURES }, + .{ Error.TooLongWideCharValue, c.ONIGERR_TOO_LONG_WIDE_CHAR_VALUE }, + .{ Error.UndefinedOperator, c.ONIGERR_UNDEFINED_OPERATOR }, + .{ Error.EmptyGroupName, c.ONIGERR_EMPTY_GROUP_NAME }, + .{ Error.InvalidGroupName, c.ONIGERR_INVALID_GROUP_NAME }, + .{ Error.InvalidCharInGroupName, c.ONIGERR_INVALID_CHAR_IN_GROUP_NAME }, + .{ Error.UndefinedNameReference, c.ONIGERR_UNDEFINED_NAME_REFERENCE }, + .{ Error.UndefinedGroupReference, c.ONIGERR_UNDEFINED_GROUP_REFERENCE }, + .{ Error.MultiplexDefinedName, c.ONIGERR_MULTIPLEX_DEFINED_NAME }, + .{ Error.MultiplexDefinitionNameCall, c.ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL }, + .{ Error.NeverEndingRecursion, c.ONIGERR_NEVER_ENDING_RECURSION }, + .{ Error.GroupNumberOverForCaptureHistory, c.ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY }, + .{ Error.InvalidCharPropertyName, c.ONIGERR_INVALID_CHAR_PROPERTY_NAME }, + .{ Error.InvalidIfElseSyntax, c.ONIGERR_INVALID_IF_ELSE_SYNTAX }, + .{ Error.InvalidAbsentGroupPattern, c.ONIGERR_INVALID_ABSENT_GROUP_PATTERN }, + .{ Error.InvalidAbsentGroupGeneratorPattern, c.ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN }, + .{ Error.InvalidCalloutPattern, c.ONIGERR_INVALID_CALLOUT_PATTERN }, + .{ Error.InvalidCalloutName, c.ONIGERR_INVALID_CALLOUT_NAME }, + .{ Error.UndefinedCalloutName, c.ONIGERR_UNDEFINED_CALLOUT_NAME }, + .{ Error.InvalidCalloutBody, c.ONIGERR_INVALID_CALLOUT_BODY }, + .{ Error.InvalidCalloutTagName, c.ONIGERR_INVALID_CALLOUT_TAG_NAME }, + .{ Error.InvalidCalloutArg, c.ONIGERR_INVALID_CALLOUT_ARG }, + .{ Error.InvalidCodePointValue, c.ONIGERR_INVALID_CODE_POINT_VALUE }, + .{ Error.InvalidWideCharValue, c.ONIGERR_INVALID_WIDE_CHAR_VALUE }, + .{ Error.TooBigWideCharValue, c.ONIGERR_TOO_BIG_WIDE_CHAR_VALUE }, + .{ Error.NotSupportedEncodingCombination, c.ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION }, + .{ Error.InvalidCombinationOfOptions, c.ONIGERR_INVALID_COMBINATION_OF_OPTIONS }, + .{ Error.TooManyUserDefinedObjects, c.ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS }, + .{ Error.TooLongPropertyName, c.ONIGERR_TOO_LONG_PROPERTY_NAME }, + .{ Error.VeryInefficientPattern, c.ONIGERR_VERY_INEFFICIENT_PATTERN }, + .{ Error.LibraryIsNotInitialized, c.ONIGERR_LIBRARY_IS_NOT_INITIALIZED }, +}; diff --git a/pkg/oniguruma/init.zig b/pkg/oniguruma/init.zig index 3e40887c0..bf7420e40 100644 --- a/pkg/oniguruma/init.zig +++ b/pkg/oniguruma/init.zig @@ -10,3 +10,7 @@ pub fn init(encs: []const *Encoding) !void { @intCast(encs.len), )); } + +pub fn deinit() void { + _ = c.onig_end(); +} diff --git a/pkg/oniguruma/regex.zig b/pkg/oniguruma/regex.zig index 3e4e563d5..db9689ee2 100644 --- a/pkg/oniguruma/regex.zig +++ b/pkg/oniguruma/regex.zig @@ -4,6 +4,7 @@ const types = @import("types.zig"); const errors = @import("errors.zig"); const testEnsureInit = @import("testing.zig").ensureInit; const Region = @import("region.zig").Region; +const Error = errors.Error; const ErrorInfo = errors.ErrorInfo; const Encoding = types.Encoding; const Option = types.Option; @@ -36,17 +37,19 @@ pub const Regex = struct { c.onig_free(self.value); } - /// onig_search shorthand to search an entire string. + /// Search an entire string for matches. This always returns a region + /// which may heap allocate (C allocator). pub fn search( self: *Regex, str: []const u8, - region: *Region, options: Option, - ) !usize { - return try self.searchAdvanced(str, 0, str.len, region, options); + ) !Region { + var region: Region = .{}; + _ = try self.searchAdvanced(str, 0, str.len, ®ion, options); + return region; } - /// onig_search + /// onig_search directly pub fn searchAdvanced( self: *Regex, str: []const u8, @@ -76,8 +79,9 @@ test { var re = try Regex.init("foo", .{}, Encoding.utf8, Syntax.default, null); defer re.deinit(); - var region: Region = .{}; - defer region.deinit(); - const pos = try re.search("hello foo bar", ®ion, .{}); - try testing.expectEqual(@as(usize, 6), pos); + var reg = try re.search("hello foo bar", .{}); + defer reg.deinit(); + try testing.expectEqual(@as(usize, 1), reg.count()); + + try testing.expectError(Error.Mismatch, re.search("hello", .{})); } diff --git a/pkg/oniguruma/region.zig b/pkg/oniguruma/region.zig index b8c61fc4f..1b99b55b7 100644 --- a/pkg/oniguruma/region.zig +++ b/pkg/oniguruma/region.zig @@ -13,4 +13,39 @@ pub const Region = extern struct { // bindings is handled by the Zig program. c.onig_region_free(@ptrCast(self), 0); } + + /// Count the number of matches + pub fn count(self: *const Region) usize { + return @intCast(self.num_regs); + } + + /// Iterate over the matched ranges. + pub fn iterator(self: *const Region) Iterator { + return .{ .region = self }; + } + + pub fn starts(self: *const Region) []const c_int { + if (self.num_regs == 0) return &.{}; + return self.beg.?[0..@intCast(self.num_regs)]; + } + + pub fn ends(self: *const Region) []const c_int { + if (self.num_regs == 0) return &.{}; + return self.end.?[0..@intCast(self.num_regs)]; + } + + pub const Iterator = struct { + region: *const Region, + i: usize = 0, + + /// The next range + pub fn next(self: *Iterator) ?[2]usize { + if (self.i >= self.region.num_regs) return null; + defer self.i += 1; + return .{ + @intCast(self.region.beg.?[self.i]), + @intCast(self.region.end.?[self.i]), + }; + } + }; };