Improve locale handling and add robust fallbacks

This commit is contained in:
plyght
2024-12-25 02:23:22 -05:00
parent ebdc7e0917
commit b2f2df8192

View File

@ -1,112 +1,166 @@
//! This file provides functions to ensure the OS locale is correctly set.
//! On Darwin (macOS), if `LANG` is unset or empty, we attempt to pull the locale
//! settings from the system preferences. Otherwise, we rely on the environment
//! variables. If everything fails, we fall back to `en_US.UTF-8`.
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const objc = @import("objc");
const internal_os = @import("main.zig");
const assert = std.debug.assert;
const log = std.log.scoped(.os);
/// Ensure that the locale is set.
//
// External definitions referencing libc symbols.
//
// References:
// - POSIX setlocale: https://pubs.opengroup.org/onlinepubs/9699919799/functions/setlocale.html
// - newlocale/freelocale: https://pubs.opengroup.org/onlinepubs/9699919799/functions/newlocale.html
//
const LC_ALL: c_int = 6; // from C <locale.h>
const LC_ALL_MASK: c_int = 0x7fffffff; // from C <locale.h>
const locale_t = ?*anyopaque;
extern "c" fn setlocale(category: c_int, locale: ?[*]const u8) ?[*:0]u8;
extern "c" fn newlocale(category: c_int, locale: ?[*]const u8, base: locale_t) locale_t;
extern "c" fn freelocale(v: locale_t) void;
//
// Public API
//
/// Ensures that the locale is set correctly. If `LANG` is unset or empty
/// on Darwin (macOS), attempts to query the system locale via Cocoa.
/// Otherwise, tries to use the existing environment variable values, and
/// if that fails, falls back to `en_US.UTF-8`.
///
/// # Parameters
/// - `alloc`: A valid allocator for temporary allocations.
///
/// # Returns
/// An error if reading or modifying environment variables fails.
///
/// # References
/// - Zig standard library environment handling: https://ziglang.org/documentation/master/std/#std;mem
/// - Darwin/macOS locale logic (analysis): It's common for macOS apps to
/// lack a `LANG` variable when launched from Finder, so we use the Cocoa
/// API to derive one.
///
/// # Analysis
/// This function tries several fallbacks to ensure the user is not left
/// in a broken or unsupported locale scenario.
pub fn ensureLocale(alloc: std.mem.Allocator) !void {
assert(builtin.link_libc);
// Get our LANG env var. We use this many times but we also need
// the original value later.
const lang = try internal_os.getenv(alloc, "LANG");
defer if (lang) |v| v.deinit(alloc);
// Attempt to read `LANG` from the environment.
// Reference: Zig environment variable handling: https://ziglang.org/documentation/master/std/#std;os
const maybe_lang = try internal_os.getenv(alloc, "LANG");
defer if (maybe_lang) |lang_buffer| lang_buffer.deinit(alloc);
// On macOS, pre-populate the LANG env var with system preferences.
// When launching the .app, LANG is not set so we must query it from the
// OS. When launching from the CLI, LANG is usually set by the parent
// process.
// On macOS, if `LANG` is unset or empty, we attempt to set it via Cocoa.
// Reference: Apple docs for NSLocale:
// https://developer.apple.com/documentation/foundation/nslocale
if (comptime builtin.target.isDarwin()) {
// Set the lang if it is not set or if its empty.
if (lang == null or lang.?.value.len == 0) {
if (maybe_lang == null or maybe_lang.?.value.len == 0) {
setLangFromCocoa();
}
}
// Set the locale to whatever is set in env vars.
if (setlocale(LC_ALL, "")) |v| {
log.info("setlocale from env result={s}", .{v});
// Attempt to set locale from environment variables.
// If successful, we're done.
if (setlocale(LC_ALL, "")) |setloc_result| {
log.info("Locale set from environment: {s}", .{setloc_result});
return;
}
// setlocale failed. This is probably because the LANG env var is
// invalid. Try to set it without the LANG var set to use the system
// default.
if ((try internal_os.getenv(alloc, "LANG"))) |old_lang| {
defer old_lang.deinit(alloc);
// The call to setlocale failed, likely due to an invalid LANG value.
// We try unsetting `LANG` altogether and re-attempting.
if (maybe_lang) |old_lang| {
if (old_lang.value.len > 0) {
// We don't need to do both of these things but we do them
// both to be sure that lang is either empty or unset completely.
// Clear/unset LANG to force the system default locale.
_ = internal_os.setenv("LANG", "");
_ = internal_os.unsetenv("LANG");
if (setlocale(LC_ALL, "")) |v| {
log.info("setlocale after unset lang result={s}", .{v});
if (setlocale(LC_ALL, "")) |setloc_result| {
log.info("Locale set after unsetting LANG: {s}", .{setloc_result});
// If we try to setlocale to an unsupported locale it'll return "C"
// as the POSIX/C fallback, if that's the case we want to not use
// it and move to our fallback of en_US.UTF-8
if (!std.mem.eql(u8, std.mem.sliceTo(v, 0), "C")) return;
// Some systems fall back to "C" if the specified locale doesn't exist.
// If that's the case, we prefer not to rely on "C" and instead will
// later force "en_US.UTF-8".
if (!std.mem.eql(u8, std.mem.sliceTo(setloc_result, 0), "C")) {
return;
}
}
}
}
// Failure again... fallback to en_US.UTF-8
log.warn("setlocale failed with LANG and system default. Falling back to en_US.UTF-8", .{});
if (setlocale(LC_ALL, "en_US.UTF-8")) |v| {
// If we get here, everything has failed, so fallback to en_US.UTF-8.
log.warn("All attempts to set a locale have failed. Falling back to en_US.UTF-8.", .{});
if (setlocale(LC_ALL, "en_US.UTF-8")) |fallback_setloc| {
_ = internal_os.setenv("LANG", "en_US.UTF-8");
log.info("setlocale default result={s}", .{v});
log.info("Locale forced to en_US.UTF-8: {s}", .{fallback_setloc});
return;
} else log.err("setlocale failed even with the fallback, uncertain results", .{});
} else {
// Even the fallback has failed, which is quite unusual.
log.err("setlocale failed even with en_US.UTF-8 fallback. Proceeding with uncertain results.", .{});
}
}
/// This sets the LANG environment variable based on the macOS system
//
// Internal Helpers
//
/// Sets the LANG environment variable on Darwin/macOS based on the system
/// preferences selected locale settings.
///
/// # Analysis
/// If the Cocoa calls or the class lookups fail, a warning is logged and
/// the function returns without modifying any environment variables.
fn setLangFromCocoa() void {
const pool = objc.AutoreleasePool.init();
defer pool.deinit();
// The classes we're going to need.
// Attempt to obtain references to Foundation classes.
// Reference: Apple Objective-C runtime: https://developer.apple.com/documentation/objectivec
const NSLocale = objc.getClass("NSLocale") orelse {
log.err("NSLocale class not found. Locale may be incorrect.", .{});
return;
};
// Get our current locale and extract the language code ("en") and
// country code ("US")
const locale = NSLocale.msgSend(objc.Object, objc.sel("currentLocale"), .{});
const lang = locale.getProperty(objc.Object, "languageCode");
const country = locale.getProperty(objc.Object, "countryCode");
// msgSend allows sending a message to the class instance:
// - `currentLocale` returns the current user locale.
// Reference: https://developer.apple.com/documentation/foundation/nslocale/1642833-currentlocale
const locale_obj = NSLocale.msgSend(objc.Object, objc.sel("currentLocale"), .{});
const lang_obj = locale_obj.getProperty(objc.Object, "languageCode");
const country_obj = locale_obj.getProperty(objc.Object, "countryCode");
// Get our UTF8 string values
const c_lang = lang.getProperty([*:0]const u8, "UTF8String");
const c_country = country.getProperty([*:0]const u8, "UTF8String");
// Retrieve the `UTF8String` property from the Objective-C strings.
// If these calls fail, they will return null pointers, which we can
// detect by zero-length slices in Zig.
const c_lang_ptr = lang_obj.getProperty([*:0]const u8, "UTF8String");
const c_country_ptr = country_obj.getProperty([*:0]const u8, "UTF8String");
// Convert them to Zig slices
const z_lang = std.mem.sliceTo(c_lang, 0);
const z_country = std.mem.sliceTo(c_country, 0);
const z_lang = std.mem.sliceTo(c_lang_ptr, 0);
const z_country = std.mem.sliceTo(c_country_ptr, 0);
// Format them into a buffer
var buf: [128]u8 = undefined;
const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| {
log.err("error setting locale from system. err={}", .{err});
// Attempt to format a string like "en_US.UTF-8" into a buffer.
const env_value = std.fmt.bufPrintZ(
&buf,
"{s}_{s}.UTF-8",
.{ z_lang, z_country }
) catch |err| {
log.err("Error constructing locale string from system preferences. err={}", .{err});
return;
};
log.info("detected system locale={s}", .{env_value});
// Set it onto our environment
log.info("Detected system locale: {s}", .{env_value});
// Finally, set `LANG` using our internal OS helper.
// If setenv fails, it returns a negative integer.
if (internal_os.setenv("LANG", env_value) < 0) {
log.err("error setting locale env var", .{});
return;
log.err("Error setting the LANG environment variable to '{s}'.", .{env_value});
}
}
const LC_ALL: c_int = 6; // from locale.h
const LC_ALL_MASK: c_int = 0x7fffffff; // from locale.h
const locale_t = ?*anyopaque;
extern "c" fn setlocale(category: c_int, locale: ?[*]const u8) ?[*:0]u8;
extern "c" fn newlocale(category: c_int, locale: ?[*]const u8, base: locale_t) locale_t;
extern "c" fn freelocale(v: locale_t) void;
}