From 4475f5b9c5e6955c6034b2792e194d8122b479c0 Mon Sep 17 00:00:00 2001 From: Erlend Lind Madsen Date: Fri, 2 Feb 2024 00:41:31 +0100 Subject: [PATCH] url/Link: simplify regex, remove 'find_longest' param and add new tests --- src/config/url.zig | 37 ++++++++++++++++++++++++++++++------- src/input/Link.zig | 2 +- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/config/url.zig b/src/config/url.zig index 0a04ad3f8..4feb343b0 100644 --- a/src/config/url.zig +++ b/src/config/url.zig @@ -5,8 +5,6 @@ const oni = @import("oniguruma"); /// This is here in the config package because one day the matchers will be /// configurable and this will be a default. /// -/// This is adapted from a regex used in the Alacritty project. -/// /// This regex is liberal in what it accepts after the scheme, with exceptions /// for URLs ending with . or ). Although such URLs are perfectly valid, it is /// common for text to contain URLs surrounded by parentheses (such as in @@ -24,9 +22,8 @@ const oni = @import("oniguruma"); /// /// There are many complicated cases where these heuristics break down, but /// handling them well requires a non-regex approach. -pub const regex = "(?:" ++ url_scheme ++ ")(?:[^" ++ url_exclude ++ "]*[^" ++ url_exclude ++ ").]|[^" ++ url_exclude ++ "(]*\\([^" ++ url_exclude ++ ")]*\\))"; -const url_scheme = "ipfs:|ipns:|magnet:|mailto:|gemini://|gopher://|https://|http://|news:|file:|git://|ssh:|ftp://"; -const url_exclude = "\u{0000}-\u{001F}\u{007F}-\u{009F}<>\x22\x27\\s{-}\\^⟨⟩\x60"; +pub const regex = "(?:" ++ url_scheme ++ ")(?:[\\w./+:@%?=&]+(?:\\(\\w*\\))?)+(?