Add Locale Fallback and URL Regex Alias Enhancements

This commit is contained in:
plyght
2024-12-25 10:24:18 -05:00
parent b2f2df8192
commit 5517107cd9
2 changed files with 37 additions and 97 deletions

View File

@ -4,8 +4,8 @@ const oni = @import("oniguruma");
/// Default URL regex.
///
/// Sources:
/// 1. [Oniguruma GitHub](https://github.com/kkos/oniguruma)
/// 2. [Zig stdlib docs](https://ziglang.org/documentation/master/std/)
/// 1. [Oniguruma GitHub](https://github.com/kkos/oniguruma)
/// 2. [Zig stdlib docs](https://ziglang.org/documentation/master/std/)
///
/// Explanation (analysis):
/// - Matches a set of schemes (URL_SCHEMES).
@ -40,6 +40,9 @@ const URL_SCHEMES =
"|gopher://" ++
"|news:";
/// Alias so that external code can refer to `url.regex`.
pub const regex = URL_REGEX;
// Simple regex test to ensure detection of URLs works as expected.
test "url regex" {
const testing = std.testing;

View File

@ -1,8 +1,3 @@
//! This file provides functions to ensure the OS locale is correctly set.
//! On Darwin (macOS), if `LANG` is unset or empty, we attempt to pull the locale
//! settings from the system preferences. Otherwise, we rely on the environment
//! variables. If everything fails, we fall back to `en_US.UTF-8`.
const std = @import("std");
const builtin = @import("builtin");
const objc = @import("objc");
@ -11,134 +6,82 @@ const internal_os = @import("main.zig");
const assert = std.debug.assert;
const log = std.log.scoped(.os);
//
// External definitions referencing libc symbols.
//
// References:
// - POSIX setlocale: https://pubs.opengroup.org/onlinepubs/9699919799/functions/setlocale.html
// - newlocale/freelocale: https://pubs.opengroup.org/onlinepubs/9699919799/functions/newlocale.html
//
const LC_ALL: c_int = 6; // from C <locale.h>
const LC_ALL_MASK: c_int = 0x7fffffff; // from C <locale.h>
const LC_ALL: c_int = 6; // from C <locale.h>
const LC_ALL_MASK: c_int = 0x7fffffff; // from C <locale.h>
const locale_t = ?*anyopaque;
extern "c" fn setlocale(category: c_int, locale: ?[*]const u8) ?[*:0]u8;
extern "c" fn newlocale(category: c_int, locale: ?[*]const u8, base: locale_t) locale_t;
extern "c" fn freelocale(v: locale_t) void;
//
// Public API
//
/// Ensures that the locale is set correctly. If `LANG` is unset or empty
/// on Darwin (macOS), attempts to query the system locale via Cocoa.
/// Otherwise, tries to use the existing environment variable values, and
/// if that fails, falls back to `en_US.UTF-8`.
///
/// # Parameters
/// - `alloc`: A valid allocator for temporary allocations.
///
/// # Returns
/// An error if reading or modifying environment variables fails.
///
/// # References
/// - Zig standard library environment handling: https://ziglang.org/documentation/master/std/#std;mem
/// - Darwin/macOS locale logic (analysis): It's common for macOS apps to
/// lack a `LANG` variable when launched from Finder, so we use the Cocoa
/// API to derive one.
///
/// # Analysis
/// This function tries several fallbacks to ensure the user is not left
/// in a broken or unsupported locale scenario.
/// Otherwise, uses environment variables. If everything fails, falls back to
/// `en_US.UTF-8`.
pub fn ensureLocale(alloc: std.mem.Allocator) !void {
assert(builtin.link_libc);
// Attempt to read `LANG` from the environment.
// Reference: Zig environment variable handling: https://ziglang.org/documentation/master/std/#std;os
const maybe_lang = try internal_os.getenv(alloc, "LANG");
defer if (maybe_lang) |lang_buffer| lang_buffer.deinit(alloc);
defer if (maybe_lang) |lang| lang.deinit(alloc);
// On macOS, if `LANG` is unset or empty, we attempt to set it via Cocoa.
// Reference: Apple docs for NSLocale:
// https://developer.apple.com/documentation/foundation/nslocale
if (comptime builtin.target.isDarwin()) {
if (maybe_lang == null or maybe_lang.?.value.len == 0) {
setLangFromCocoa();
}
}
// Attempt to set locale from environment variables.
// If successful, we're done.
if (setlocale(LC_ALL, "")) |setloc_result| {
log.info("Locale set from environment: {s}", .{setloc_result});
if (setlocale(LC_ALL, "")) |loc| {
log.info("Locale set from environment: {s}", .{loc});
return;
}
// The call to setlocale failed, likely due to an invalid LANG value.
// We try unsetting `LANG` altogether and re-attempting.
if (maybe_lang) |old_lang| {
if (old_lang.value.len > 0) {
// Clear/unset LANG to force the system default locale.
_ = internal_os.setenv("LANG", "");
_ = internal_os.unsetenv("LANG");
const rc_unset = internal_os.unsetenv("LANG");
if (rc_unset < 0) {
log.err("Failed to unset LANG.", .{});
// Could return an error if desired:
// return error.CannotUnsetLang;
}
if (setlocale(LC_ALL, "")) |setloc_result| {
log.info("Locale set after unsetting LANG: {s}", .{setloc_result});
// Some systems fall back to "C" if the specified locale doesn't exist.
// If that's the case, we prefer not to rely on "C" and instead will
// later force "en_US.UTF-8".
if (!std.mem.eql(u8, std.mem.sliceTo(setloc_result, 0), "C")) {
// Retry
if (setlocale(LC_ALL, "")) |loc| {
log.info("Locale set after unsetting LANG: {s}", .{loc});
if (!std.mem.eql(u8, std.mem.sliceTo(loc, 0), "C")) {
return;
}
}
}
}
// If we get here, everything has failed, so fallback to en_US.UTF-8.
// Final fallback
log.warn("All attempts to set a locale have failed. Falling back to en_US.UTF-8.", .{});
if (setlocale(LC_ALL, "en_US.UTF-8")) |fallback_setloc| {
_ = internal_os.setenv("LANG", "en_US.UTF-8");
log.info("Locale forced to en_US.UTF-8: {s}", .{fallback_setloc});
return;
if (setlocale(LC_ALL, "en_US.UTF-8")) |fallback_loc| {
const rc_env = internal_os.setenv("LANG", "en_US.UTF-8");
if (rc_env < 0) {
log.err("Failed to set LANG to en_US.UTF-8.", .{});
// Could return an error or just continue
}
log.info("Locale forced to en_US.UTF-8: {s}", .{fallback_loc});
} else {
// Even the fallback has failed, which is quite unusual.
log.err("setlocale failed even with en_US.UTF-8 fallback. Proceeding with uncertain results.", .{});
// Even fallback failed
log.err("setlocale('en_US.UTF-8') failed. Proceeding with uncertain results.", .{});
}
}
//
// Internal Helpers
//
/// Sets the LANG environment variable on Darwin/macOS based on the system
/// preferences selected locale settings.
///
/// # Analysis
/// If the Cocoa calls or the class lookups fail, a warning is logged and
/// the function returns without modifying any environment variables.
fn setLangFromCocoa() void {
const pool = objc.AutoreleasePool.init();
defer pool.deinit();
// Attempt to obtain references to Foundation classes.
// Reference: Apple Objective-C runtime: https://developer.apple.com/documentation/objectivec
const NSLocale = objc.getClass("NSLocale") orelse {
log.err("NSLocale class not found. Locale may be incorrect.", .{});
return;
};
// msgSend allows sending a message to the class instance:
// - `currentLocale` returns the current user locale.
// Reference: https://developer.apple.com/documentation/foundation/nslocale/1642833-currentlocale
const locale_obj = NSLocale.msgSend(objc.Object, objc.sel("currentLocale"), .{});
const lang_obj = locale_obj.getProperty(objc.Object, "languageCode");
const country_obj = locale_obj.getProperty(objc.Object, "countryCode");
// Retrieve the `UTF8String` property from the Objective-C strings.
// If these calls fail, they will return null pointers, which we can
// detect by zero-length slices in Zig.
const c_lang_ptr = lang_obj.getProperty([*:0]const u8, "UTF8String");
const c_country_ptr = country_obj.getProperty([*:0]const u8, "UTF8String");
@ -146,21 +89,15 @@ fn setLangFromCocoa() void {
const z_country = std.mem.sliceTo(c_country_ptr, 0);
var buf: [128]u8 = undefined;
// Attempt to format a string like "en_US.UTF-8" into a buffer.
const env_value = std.fmt.bufPrintZ(
&buf,
"{s}_{s}.UTF-8",
.{ z_lang, z_country }
) catch |err| {
log.err("Error constructing locale string from system preferences. err={}", .{err});
const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| {
log.err("Error constructing locale string. err={}", .{err});
return;
};
log.info("Detected system locale: {s}", .{env_value});
// Finally, set `LANG` using our internal OS helper.
// If setenv fails, it returns a negative integer.
if (internal_os.setenv("LANG", env_value) < 0) {
const rc = internal_os.setenv("LANG", env_value);
if (rc < 0) {
log.err("Error setting the LANG environment variable to '{s}'.", .{env_value});
}
}