pkg/oniguruma: init regex object

This commit is contained in:
Mitchell Hashimoto
2023-11-25 09:00:43 -08:00
parent c0a06ab523
commit 4d3c7212d8
7 changed files with 152 additions and 8 deletions

View File

@ -1,5 +0,0 @@
const c = @import("c.zig");
pub const Encoding = opaque {
pub const utf8: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF8);
};

View File

@ -1,4 +1,5 @@
const c = @import("c.zig");
const Encoding = @import("types.zig").Encoding;
/// Maximum error message length.
pub const MAX_ERROR_LEN = c.ONIG_MAX_ERROR_MESSAGE_LEN;
@ -17,3 +18,10 @@ pub fn errorString(buf: []u8, code: c_int) ![]u8 {
const len = c.onig_error_code_to_str(buf.ptr, code);
return buf[0..@intCast(len)];
}
/// The Oniguruma error info type, matching the C type exactly.
pub const ErrorInfo = extern struct {
encoding: *Encoding,
par: [*]u8,
par_end: [*]u8,
};

View File

@ -1,5 +1,5 @@
const c = @import("c.zig");
const Encoding = @import("encoding.zig").Encoding;
const Encoding = @import("types.zig").Encoding;
const errors = @import("errors.zig");
/// Call once per process to initialize Oniguruma. This should be given

View File

@ -1,7 +1,8 @@
pub usingnamespace @import("init.zig");
pub usingnamespace @import("errors.zig");
pub usingnamespace @import("regex.zig");
pub usingnamespace @import("types.zig");
pub const c = @import("c.zig");
pub const Encoding = @import("encoding.zig").Encoding;
test {
@import("std").testing.refAllDecls(@This());

43
pkg/oniguruma/regex.zig Normal file
View File

@ -0,0 +1,43 @@
const std = @import("std");
const c = @import("c.zig");
const types = @import("types.zig");
const errors = @import("errors.zig");
const testEnsureInit = @import("testing.zig").ensureInit;
const ErrorInfo = errors.ErrorInfo;
const Encoding = types.Encoding;
const Option = types.Option;
const Syntax = types.Syntax;
pub const Regex = struct {
value: c.OnigRegex,
pub fn init(
pattern: []const u8,
options: Option,
enc: *Encoding,
syntax: *Syntax,
err: ?*ErrorInfo,
) !Regex {
var self: Regex = undefined;
try errors.convertError(c.onig_new(
&self.value,
pattern.ptr,
pattern.ptr + pattern.len,
options.int(),
@ptrCast(@alignCast(enc)),
@ptrCast(@alignCast(syntax)),
@ptrCast(err),
));
return self;
}
pub fn deinit(self: *Regex) void {
c.onig_free(self.value);
}
};
test {
try testEnsureInit();
var re = try Regex.init("foo", .{}, Encoding.utf8, Syntax.default, null);
defer re.deinit();
}

View File

@ -1,4 +1,5 @@
const init = @import("init.zig");
const Encoding = @import("types.zig").Encoding;
var initialized: bool = false;
@ -10,5 +11,5 @@ var initialized: bool = false;
/// This always only initializes the encodings the tests use.
pub fn ensureInit() !void {
if (initialized) return;
try init.init();
try init.init(&.{Encoding.utf8});
}

96
pkg/oniguruma/types.zig Normal file
View File

@ -0,0 +1,96 @@
const std = @import("std");
const c = @import("c.zig");
pub const Encoding = opaque {
pub const ascii: *Encoding = @ptrCast(c.ONIG_ENCODING_ASCII);
pub const iso_8859_1: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_1);
pub const iso_8859_2: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_2);
pub const iso_8859_3: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_3);
pub const iso_8859_4: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_4);
pub const iso_8859_5: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_5);
pub const iso_8859_6: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_6);
pub const iso_8859_7: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_7);
pub const iso_8859_8: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_8);
pub const iso_8859_9: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_9);
pub const iso_8859_10: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_10);
pub const iso_8859_11: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_11);
pub const iso_8859_13: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_13);
pub const iso_8859_14: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_14);
pub const iso_8859_15: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_15);
pub const iso_8859_16: *Encoding = @ptrCast(c.ONIG_ENCODING_ISO_8859_16);
pub const utf8: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF8);
pub const utf16_be: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF16_BE);
pub const utf16_le: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF16_LE);
pub const utf32_be: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF32_BE);
pub const utf32_le: *Encoding = @ptrCast(c.ONIG_ENCODING_UTF32_LE);
pub const euc_jp: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_JP);
pub const euc_tw: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_TW);
pub const euc_kr: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_KR);
pub const euc_cn: *Encoding = @ptrCast(c.ONIG_ENCODING_EUC_CN);
pub const sjis: *Encoding = @ptrCast(c.ONIG_ENCODING_SJIS);
pub const koi8: *Encoding = @ptrCast(c.ONIG_ENCODING_KOI8);
pub const koi8_r: *Encoding = @ptrCast(c.ONIG_ENCODING_KOI8_R);
pub const cp1251: *Encoding = @ptrCast(c.ONIG_ENCODING_CP1251);
pub const big5: *Encoding = @ptrCast(c.ONIG_ENCODING_BIG5);
pub const gb18030: *Encoding = @ptrCast(c.ONIG_ENCODING_GB18030);
};
pub const Syntax = opaque {
pub const default: *Syntax = @ptrCast(c.ONIG_SYNTAX_ONIGURUMA);
pub const asis: *Syntax = @ptrCast(c.ONIG_SYNTAX_ASIS);
pub const posix_basic: *Syntax = @ptrCast(c.ONIG_SYNTAX_POSIX_BASIC);
pub const posix_extended: *Syntax = @ptrCast(c.ONIG_SYNTAX_POSIX_EXTENDED);
pub const emacs: *Syntax = @ptrCast(c.ONIG_SYNTAX_EMACS);
pub const grep: *Syntax = @ptrCast(c.ONIG_SYNTAX_GREP);
pub const gnu_regex: *Syntax = @ptrCast(c.ONIG_SYNTAX_GNU_REGEX);
pub const java: *Syntax = @ptrCast(c.ONIG_SYNTAX_JAVA);
pub const perl: *Syntax = @ptrCast(c.ONIG_SYNTAX_PERL);
pub const perl_ng: *Syntax = @ptrCast(c.ONIG_SYNTAX_PERL_NG);
pub const ruby: *Syntax = @ptrCast(c.ONIG_SYNTAX_RUBY);
pub const oniguruma: *Syntax = @ptrCast(c.ONIG_SYNTAX_ONIGURUMA);
};
pub const Option = packed struct(c_uint) {
ignorecase: bool = false,
extend: bool = false,
multiline: bool = false,
singleline: bool = false,
find_longest: bool = false,
find_not_empty: bool = false,
negate_singleline: bool = false,
dont_capture_group: bool = false,
capture_group: bool = false,
// search time
notbol: bool = false,
noteol: bool = false,
posix_region: bool = false,
check_validity_of_string: bool = false,
// compile time
ignorecase_is_ascii: bool = false,
word_is_ascii: bool = false,
digit_is_ascii: bool = false,
space_is_ascii: bool = false,
posix_is_ascii: bool = false,
text_segment_extended_grapheme_cluster: bool = false,
text_segment_word: bool = false,
// search time
not_begin_string: bool = false,
not_end_string: bool = false,
not_begin_position: bool = false,
callback_each_match: bool = false,
match_whole_string: bool = false,
_padding: u7 = 0,
pub fn int(self: Option) c_uint {
return @bitCast(self);
}
test "order" {
const testing = std.testing;
const opt: Option = .{ .extend = true };
try testing.expectEqual(c.ONIG_OPTION_EXTEND, opt.int());
}
};
test {}