From 4d3c7212d81caea5655edc0515edda3e59d95e43 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 25 Nov 2023 09:00:43 -0800 Subject: [PATCH] pkg/oniguruma: init regex object --- pkg/oniguruma/encoding.zig | 5 -- pkg/oniguruma/errors.zig | 8 ++++ pkg/oniguruma/init.zig | 2 +- pkg/oniguruma/main.zig | 3 +- pkg/oniguruma/regex.zig | 43 +++++++++++++++++ pkg/oniguruma/testing.zig | 3 +- pkg/oniguruma/types.zig | 96 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 152 insertions(+), 8 deletions(-) delete mode 100644 pkg/oniguruma/encoding.zig create mode 100644 pkg/oniguruma/regex.zig create mode 100644 pkg/oniguruma/types.zig diff --git a/pkg/oniguruma/encoding.zig b/pkg/oniguruma/encoding.zig deleted file mode 100644 index 3407b8286..000000000 --- a/pkg/oniguruma/encoding.zig +++ /dev/null @@ -1,5 +0,0 @@ -const c = @import("c.zig"); - -pub const Encoding = opaque { - pub const utf8: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF8); -}; diff --git a/pkg/oniguruma/errors.zig b/pkg/oniguruma/errors.zig index 25bf1d859..f5ec71279 100644 --- a/pkg/oniguruma/errors.zig +++ b/pkg/oniguruma/errors.zig @@ -1,4 +1,5 @@ const c = @import("c.zig"); +const Encoding = @import("types.zig").Encoding; /// Maximum error message length. pub const MAX_ERROR_LEN = c.ONIG_MAX_ERROR_MESSAGE_LEN; @@ -17,3 +18,10 @@ pub fn errorString(buf: []u8, code: c_int) ![]u8 { const len = c.onig_error_code_to_str(buf.ptr, code); return buf[0..@intCast(len)]; } + +/// The Oniguruma error info type, matching the C type exactly. +pub const ErrorInfo = extern struct { + encoding: *Encoding, + par: [*]u8, + par_end: [*]u8, +}; diff --git a/pkg/oniguruma/init.zig b/pkg/oniguruma/init.zig index edd19f80d..531f9e0c7 100644 --- a/pkg/oniguruma/init.zig +++ b/pkg/oniguruma/init.zig @@ -1,5 +1,5 @@ const c = @import("c.zig"); -const Encoding = @import("encoding.zig").Encoding; +const Encoding = @import("types.zig").Encoding; const errors = @import("errors.zig"); /// Call once per process to initialize Oniguruma. This should be given diff --git a/pkg/oniguruma/main.zig b/pkg/oniguruma/main.zig index 19ced50bc..113f23e5c 100644 --- a/pkg/oniguruma/main.zig +++ b/pkg/oniguruma/main.zig @@ -1,7 +1,8 @@ pub usingnamespace @import("init.zig"); pub usingnamespace @import("errors.zig"); +pub usingnamespace @import("regex.zig"); +pub usingnamespace @import("types.zig"); pub const c = @import("c.zig"); -pub const Encoding = @import("encoding.zig").Encoding; test { @import("std").testing.refAllDecls(@This()); diff --git a/pkg/oniguruma/regex.zig b/pkg/oniguruma/regex.zig new file mode 100644 index 000000000..490129220 --- /dev/null +++ b/pkg/oniguruma/regex.zig @@ -0,0 +1,43 @@ +const std = @import("std"); +const c = @import("c.zig"); +const types = @import("types.zig"); +const errors = @import("errors.zig"); +const testEnsureInit = @import("testing.zig").ensureInit; +const ErrorInfo = errors.ErrorInfo; +const Encoding = types.Encoding; +const Option = types.Option; +const Syntax = types.Syntax; + +pub const Regex = struct { + value: c.OnigRegex, + + pub fn init( + pattern: []const u8, + options: Option, + enc: *Encoding, + syntax: *Syntax, + err: ?*ErrorInfo, + ) !Regex { + var self: Regex = undefined; + try errors.convertError(c.onig_new( + &self.value, + pattern.ptr, + pattern.ptr + pattern.len, + options.int(), + @ptrCast(@alignCast(enc)), + @ptrCast(@alignCast(syntax)), + @ptrCast(err), + )); + return self; + } + + pub fn deinit(self: *Regex) void { + c.onig_free(self.value); + } +}; + +test { + try testEnsureInit(); + var re = try Regex.init("foo", .{}, Encoding.utf8, Syntax.default, null); + defer re.deinit(); +} diff --git a/pkg/oniguruma/testing.zig b/pkg/oniguruma/testing.zig index 126ef49fa..234b5e759 100644 --- a/pkg/oniguruma/testing.zig +++ b/pkg/oniguruma/testing.zig @@ -1,4 +1,5 @@ const init = @import("init.zig"); +const Encoding = @import("types.zig").Encoding; var initialized: bool = false; @@ -10,5 +11,5 @@ var initialized: bool = false; /// This always only initializes the encodings the tests use. pub fn ensureInit() !void { if (initialized) return; - try init.init(); + try init.init(&.{Encoding.utf8}); } diff --git a/pkg/oniguruma/types.zig b/pkg/oniguruma/types.zig new file mode 100644 index 000000000..3eafc0e90 --- /dev/null +++ b/pkg/oniguruma/types.zig @@ -0,0 +1,96 @@ +const std = @import("std"); +const c = @import("c.zig"); + +pub const Encoding = opaque { + pub const ascii: *Encoding = @ptrCast(c.ONIG_ENCODING_ASCII); + pub const iso_8859_1: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_1); + pub const iso_8859_2: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_2); + pub const iso_8859_3: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_3); + pub const iso_8859_4: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_4); + pub const iso_8859_5: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_5); + pub const iso_8859_6: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_6); + pub const iso_8859_7: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_7); + pub const iso_8859_8: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_8); + pub const iso_8859_9: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_9); + pub const iso_8859_10: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_10); + pub const iso_8859_11: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_11); + pub const iso_8859_13: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_13); + pub const iso_8859_14: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_14); + pub const iso_8859_15: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_15); + pub const iso_8859_16: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_16); + pub const utf8: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF8); + pub const utf16_be: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF16_BE); + pub const utf16_le: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF16_LE); + pub const utf32_be: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF32_BE); + pub const utf32_le: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF32_LE); + pub const euc_jp: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_JP); + pub const euc_tw: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_TW); + pub const euc_kr: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_KR); + pub const euc_cn: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_CN); + pub const sjis: *Encoding = @ptrCast(c.ONIG_ENCODING_SJIS); + pub const koi8: *Encoding = @ptrCast(c.ONIG_ENCODING_KOI8); + pub const koi8_r: *Encoding = @ptrCast(c.ONIG_ENCODING_KOI8_R); + pub const cp1251: *Encoding = @ptrCast(c.ONIG_ENCODING_CP1251); + pub const big5: *Encoding = @ptrCast(c.ONIG_ENCODING_BIG5); + pub const gb18030: *Encoding = @ptrCast(c.ONIG_ENCODING_GB18030); +}; + +pub const Syntax = opaque { + pub const default: *Syntax = @ptrCast(c.ONIG_SYNTAX_ONIGURUMA); + pub const asis: *Syntax = @ptrCast(c.ONIG_SYNTAX_ASIS); + pub const posix_basic: *Syntax = @ptrCast(c.ONIG_SYNTAX_POSIX_BASIC); + pub const posix_extended: *Syntax = @ptrCast(c.ONIG_SYNTAX_POSIX_EXTENDED); + pub const emacs: *Syntax = @ptrCast(c.ONIG_SYNTAX_EMACS); + pub const grep: *Syntax = @ptrCast(c.ONIG_SYNTAX_GREP); + pub const gnu_regex: *Syntax = @ptrCast(c.ONIG_SYNTAX_GNU_REGEX); + pub const java: *Syntax = @ptrCast(c.ONIG_SYNTAX_JAVA); + pub const perl: *Syntax = @ptrCast(c.ONIG_SYNTAX_PERL); + pub const perl_ng: *Syntax = @ptrCast(c.ONIG_SYNTAX_PERL_NG); + pub const ruby: *Syntax = @ptrCast(c.ONIG_SYNTAX_RUBY); + pub const oniguruma: *Syntax = @ptrCast(c.ONIG_SYNTAX_ONIGURUMA); +}; + +pub const Option = packed struct(c_uint) { + ignorecase: bool = false, + extend: bool = false, + multiline: bool = false, + singleline: bool = false, + find_longest: bool = false, + find_not_empty: bool = false, + negate_singleline: bool = false, + dont_capture_group: bool = false, + capture_group: bool = false, + // search time + notbol: bool = false, + noteol: bool = false, + posix_region: bool = false, + check_validity_of_string: bool = false, + // compile time + ignorecase_is_ascii: bool = false, + word_is_ascii: bool = false, + digit_is_ascii: bool = false, + space_is_ascii: bool = false, + posix_is_ascii: bool = false, + text_segment_extended_grapheme_cluster: bool = false, + text_segment_word: bool = false, + // search time + not_begin_string: bool = false, + not_end_string: bool = false, + not_begin_position: bool = false, + callback_each_match: bool = false, + match_whole_string: bool = false, + + _padding: u7 = 0, + + pub fn int(self: Option) c_uint { + return @bitCast(self); + } + + test "order" { + const testing = std.testing; + const opt: Option = .{ .extend = true }; + try testing.expectEqual(c.ONIG_OPTION_EXTEND, opt.int()); + } +}; + +test {}