Add Locale Fallback and URL Regex Alias Enhancements

This commit is contained in:
plyght
2024-12-25 10:24:18 -05:00
parent b2f2df8192
commit 5517107cd9
2 changed files with 37 additions and 97 deletions

View File

@ -40,6 +40,9 @@ const URL_SCHEMES =
"|gopher://" ++ "|gopher://" ++
"|news:"; "|news:";
/// Alias so that external code can refer to `url.regex`.
pub const regex = URL_REGEX;
// Simple regex test to ensure detection of URLs works as expected. // Simple regex test to ensure detection of URLs works as expected.
test "url regex" { test "url regex" {
const testing = std.testing; const testing = std.testing;

View File

@ -1,8 +1,3 @@
//! This file provides functions to ensure the OS locale is correctly set.
//! On Darwin (macOS), if `LANG` is unset or empty, we attempt to pull the locale
//! settings from the system preferences. Otherwise, we rely on the environment
//! variables. If everything fails, we fall back to `en_US.UTF-8`.
const std = @import("std"); const std = @import("std");
const builtin = @import("builtin"); const builtin = @import("builtin");
const objc = @import("objc"); const objc = @import("objc");
@ -11,134 +6,82 @@ const internal_os = @import("main.zig");
const assert = std.debug.assert; const assert = std.debug.assert;
const log = std.log.scoped(.os); const log = std.log.scoped(.os);
// const LC_ALL: c_int = 6; // from C <locale.h>
// External definitions referencing libc symbols. const LC_ALL_MASK: c_int = 0x7fffffff; // from C <locale.h>
//
// References:
// - POSIX setlocale: https://pubs.opengroup.org/onlinepubs/9699919799/functions/setlocale.html
// - newlocale/freelocale: https://pubs.opengroup.org/onlinepubs/9699919799/functions/newlocale.html
//
const LC_ALL: c_int = 6; // from C <locale.h>
const LC_ALL_MASK: c_int = 0x7fffffff; // from C <locale.h>
const locale_t = ?*anyopaque; const locale_t = ?*anyopaque;
extern "c" fn setlocale(category: c_int, locale: ?[*]const u8) ?[*:0]u8; extern "c" fn setlocale(category: c_int, locale: ?[*]const u8) ?[*:0]u8;
extern "c" fn newlocale(category: c_int, locale: ?[*]const u8, base: locale_t) locale_t; extern "c" fn newlocale(category: c_int, locale: ?[*]const u8, base: locale_t) locale_t;
extern "c" fn freelocale(v: locale_t) void; extern "c" fn freelocale(v: locale_t) void;
//
// Public API
//
/// Ensures that the locale is set correctly. If `LANG` is unset or empty /// Ensures that the locale is set correctly. If `LANG` is unset or empty
/// on Darwin (macOS), attempts to query the system locale via Cocoa. /// on Darwin (macOS), attempts to query the system locale via Cocoa.
/// Otherwise, tries to use the existing environment variable values, and /// Otherwise, uses environment variables. If everything fails, falls back to
/// if that fails, falls back to `en_US.UTF-8`. /// `en_US.UTF-8`.
///
/// # Parameters
/// - `alloc`: A valid allocator for temporary allocations.
///
/// # Returns
/// An error if reading or modifying environment variables fails.
///
/// # References
/// - Zig standard library environment handling: https://ziglang.org/documentation/master/std/#std;mem
/// - Darwin/macOS locale logic (analysis): It's common for macOS apps to
/// lack a `LANG` variable when launched from Finder, so we use the Cocoa
/// API to derive one.
///
/// # Analysis
/// This function tries several fallbacks to ensure the user is not left
/// in a broken or unsupported locale scenario.
pub fn ensureLocale(alloc: std.mem.Allocator) !void { pub fn ensureLocale(alloc: std.mem.Allocator) !void {
assert(builtin.link_libc); assert(builtin.link_libc);
// Attempt to read `LANG` from the environment.
// Reference: Zig environment variable handling: https://ziglang.org/documentation/master/std/#std;os
const maybe_lang = try internal_os.getenv(alloc, "LANG"); const maybe_lang = try internal_os.getenv(alloc, "LANG");
defer if (maybe_lang) |lang_buffer| lang_buffer.deinit(alloc); defer if (maybe_lang) |lang| lang.deinit(alloc);
// On macOS, if `LANG` is unset or empty, we attempt to set it via Cocoa.
// Reference: Apple docs for NSLocale:
// https://developer.apple.com/documentation/foundation/nslocale
if (comptime builtin.target.isDarwin()) { if (comptime builtin.target.isDarwin()) {
if (maybe_lang == null or maybe_lang.?.value.len == 0) { if (maybe_lang == null or maybe_lang.?.value.len == 0) {
setLangFromCocoa(); setLangFromCocoa();
} }
} }
// Attempt to set locale from environment variables. if (setlocale(LC_ALL, "")) |loc| {
// If successful, we're done. log.info("Locale set from environment: {s}", .{loc});
if (setlocale(LC_ALL, "")) |setloc_result| {
log.info("Locale set from environment: {s}", .{setloc_result});
return; return;
} }
// The call to setlocale failed, likely due to an invalid LANG value.
// We try unsetting `LANG` altogether and re-attempting.
if (maybe_lang) |old_lang| { if (maybe_lang) |old_lang| {
if (old_lang.value.len > 0) { if (old_lang.value.len > 0) {
// Clear/unset LANG to force the system default locale. const rc_unset = internal_os.unsetenv("LANG");
_ = internal_os.setenv("LANG", ""); if (rc_unset < 0) {
_ = internal_os.unsetenv("LANG"); log.err("Failed to unset LANG.", .{});
// Could return an error if desired:
// return error.CannotUnsetLang;
}
if (setlocale(LC_ALL, "")) |setloc_result| { // Retry
log.info("Locale set after unsetting LANG: {s}", .{setloc_result}); if (setlocale(LC_ALL, "")) |loc| {
log.info("Locale set after unsetting LANG: {s}", .{loc});
// Some systems fall back to "C" if the specified locale doesn't exist. if (!std.mem.eql(u8, std.mem.sliceTo(loc, 0), "C")) {
// If that's the case, we prefer not to rely on "C" and instead will
// later force "en_US.UTF-8".
if (!std.mem.eql(u8, std.mem.sliceTo(setloc_result, 0), "C")) {
return; return;
} }
} }
} }
} }
// If we get here, everything has failed, so fallback to en_US.UTF-8. // Final fallback
log.warn("All attempts to set a locale have failed. Falling back to en_US.UTF-8.", .{}); log.warn("All attempts to set a locale have failed. Falling back to en_US.UTF-8.", .{});
if (setlocale(LC_ALL, "en_US.UTF-8")) |fallback_setloc| {
_ = internal_os.setenv("LANG", "en_US.UTF-8"); if (setlocale(LC_ALL, "en_US.UTF-8")) |fallback_loc| {
log.info("Locale forced to en_US.UTF-8: {s}", .{fallback_setloc}); const rc_env = internal_os.setenv("LANG", "en_US.UTF-8");
return; if (rc_env < 0) {
log.err("Failed to set LANG to en_US.UTF-8.", .{});
// Could return an error or just continue
}
log.info("Locale forced to en_US.UTF-8: {s}", .{fallback_loc});
} else { } else {
// Even the fallback has failed, which is quite unusual. // Even fallback failed
log.err("setlocale failed even with en_US.UTF-8 fallback. Proceeding with uncertain results.", .{}); log.err("setlocale('en_US.UTF-8') failed. Proceeding with uncertain results.", .{});
} }
} }
//
// Internal Helpers
//
/// Sets the LANG environment variable on Darwin/macOS based on the system
/// preferences selected locale settings.
///
/// # Analysis
/// If the Cocoa calls or the class lookups fail, a warning is logged and
/// the function returns without modifying any environment variables.
fn setLangFromCocoa() void { fn setLangFromCocoa() void {
const pool = objc.AutoreleasePool.init(); const pool = objc.AutoreleasePool.init();
defer pool.deinit(); defer pool.deinit();
// Attempt to obtain references to Foundation classes.
// Reference: Apple Objective-C runtime: https://developer.apple.com/documentation/objectivec
const NSLocale = objc.getClass("NSLocale") orelse { const NSLocale = objc.getClass("NSLocale") orelse {
log.err("NSLocale class not found. Locale may be incorrect.", .{}); log.err("NSLocale class not found. Locale may be incorrect.", .{});
return; return;
}; };
// msgSend allows sending a message to the class instance:
// - `currentLocale` returns the current user locale.
// Reference: https://developer.apple.com/documentation/foundation/nslocale/1642833-currentlocale
const locale_obj = NSLocale.msgSend(objc.Object, objc.sel("currentLocale"), .{}); const locale_obj = NSLocale.msgSend(objc.Object, objc.sel("currentLocale"), .{});
const lang_obj = locale_obj.getProperty(objc.Object, "languageCode"); const lang_obj = locale_obj.getProperty(objc.Object, "languageCode");
const country_obj = locale_obj.getProperty(objc.Object, "countryCode"); const country_obj = locale_obj.getProperty(objc.Object, "countryCode");
// Retrieve the `UTF8String` property from the Objective-C strings.
// If these calls fail, they will return null pointers, which we can
// detect by zero-length slices in Zig.
const c_lang_ptr = lang_obj.getProperty([*:0]const u8, "UTF8String"); const c_lang_ptr = lang_obj.getProperty([*:0]const u8, "UTF8String");
const c_country_ptr = country_obj.getProperty([*:0]const u8, "UTF8String"); const c_country_ptr = country_obj.getProperty([*:0]const u8, "UTF8String");
@ -146,21 +89,15 @@ fn setLangFromCocoa() void {
const z_country = std.mem.sliceTo(c_country_ptr, 0); const z_country = std.mem.sliceTo(c_country_ptr, 0);
var buf: [128]u8 = undefined; var buf: [128]u8 = undefined;
// Attempt to format a string like "en_US.UTF-8" into a buffer. const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| {
const env_value = std.fmt.bufPrintZ( log.err("Error constructing locale string. err={}", .{err});
&buf,
"{s}_{s}.UTF-8",
.{ z_lang, z_country }
) catch |err| {
log.err("Error constructing locale string from system preferences. err={}", .{err});
return; return;
}; };
log.info("Detected system locale: {s}", .{env_value}); log.info("Detected system locale: {s}", .{env_value});
// Finally, set `LANG` using our internal OS helper. const rc = internal_os.setenv("LANG", env_value);
// If setenv fails, it returns a negative integer. if (rc < 0) {
if (internal_os.setenv("LANG", env_value) < 0) {
log.err("Error setting the LANG environment variable to '{s}'.", .{env_value}); log.err("Error setting the LANG environment variable to '{s}'.", .{env_value});
} }
} }