Add two edge cases to the url regex

This commit is contained in:
Vincent Prigent
2024-11-22 22:57:45 +13:00
parent a733ea381b
commit 96bda270a2

View File

@ -24,7 +24,7 @@ const oni = @import("oniguruma");
/// handling them well requires a non-regex approach. /// handling them well requires a non-regex approach.
pub const regex = pub const regex =
"(?:" ++ url_schemes ++ "(?:" ++ url_schemes ++
\\)(?:[\w\-.~:/?#\[\]@!$&*+,;=%]+(?:\(\w*\))?)+(?<!\.) \\)(?:[\w\-.~:/?#@!$&*+,;=%]+(?:[\(\[]\w*[\)\]])?)+(?<![,.])
; ;
const url_schemes = const url_schemes =
\\https?://|mailto:|ftp://|file:|ssh:|git://|ssh://|tel:|magnet:|ipfs://|ipns://|gemini://|gopher://|news: \\https?://|mailto:|ftp://|file:|ssh:|git://|ssh://|tel:|magnet:|ipfs://|ipns://|gemini://|gopher://|news:
@ -70,6 +70,10 @@ test "url regex" {
.input = "Link period https://example.com. More text.", .input = "Link period https://example.com. More text.",
.expect = "https://example.com", .expect = "https://example.com",
}, },
.{
.input = "Link trailing colon https://example.com, more text.",
.expect = "https://example.com",
},
.{ .{
.input = "Link in double quotes \"https://example.com\" and more", .input = "Link in double quotes \"https://example.com\" and more",
.expect = "https://example.com", .expect = "https://example.com",
@ -112,6 +116,11 @@ test "url regex" {
.input = "square brackets https://example.com/[foo] and more", .input = "square brackets https://example.com/[foo] and more",
.expect = "https://example.com/[foo]", .expect = "https://example.com/[foo]",
}, },
// square bracket following url
.{
.input = "[13]:TooManyStatements: TempFile#assign_temp_file_to_entity has approx 7 statements [https://example.com/docs/Too-Many-Statements.md]",
.expect = "https://example.com/docs/Too-Many-Statements.md",
},
// remaining URL schemes tests // remaining URL schemes tests
.{ .{
.input = "match ftp://example.com ftp links", .input = "match ftp://example.com ftp links",