From 0849aa8f2099e92018be9a14ca23037daa1f16eb Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 11 Jul 2023 11:50:12 -0700 Subject: [PATCH] validate locale prior to setting, fallback to en_US.UTF-8 Fixes #201 I don't fully understand locales, but it appears that the locale returned from NSLocale can be "valid" in general but invalid according to libc's locale API. If you attempt to `setlocale` with this bad locale, it defaults everything to "C", which ends up breaking a lot of things. This commit validates the locale, and if it is invalid, we default to "en_US.UTF-8" so things tend to work. This behavior can be overridden using standard environment variables (LANG, LC_ALL, etc.). This also doesn't touch env vars so further subprocesses from the shell see original locale env vars. --- src/os/locale.zig | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/src/os/locale.zig b/src/os/locale.zig index 21de12387..ebc347b7c 100644 --- a/src/os/locale.zig +++ b/src/os/locale.zig @@ -25,12 +25,35 @@ pub fn ensureLocale() void { } } + // If we have a locale set in the env, we verify it is valid. If it isn't + // valid, then we set it to a default value of en_US.UTF-8. We don't touch + // the existing LANG value in the env so that child processes can use it, + // though. + const locale: []const u8 = locale: { + // If we don't have lang set, we use "" and let the default happen. + const lang = std.os.getenv("LANG") orelse break :locale ""; + + // If the locale is valid, we also use "" because LANG is already set. + if (localeIsValid(lang)) break :locale ""; + + log.info("LANG is not valid according to libc, will use en_US.UTF-8", .{}); + break :locale "en_US.UTF-8"; + }; + // Set the locale - if (setlocale(LC_ALL, "")) |locale| { - log.debug("setlocale result={s}", .{locale}); + if (setlocale(LC_ALL, locale.ptr)) |v| { + log.debug("setlocale result={s}", .{v}); } else log.warn("setlocale failed, locale may be incorrect", .{}); } +/// Returns true if the given locale is valid according to libc. This value +/// can be `en_US` or `en_US.utf-8` style of formatting. +fn localeIsValid(locale: []const u8) bool { + const v = newlocale(LC_ALL_MASK, locale.ptr, null) orelse return false; + defer freelocale(v); + return true; +} + /// This sets the LANG environment variable based on the macOS system /// preferences selected locale settings. fn setLangFromCocoa() void { @@ -87,5 +110,9 @@ fn setLangFromCocoa() void { } const LC_ALL: c_int = 6; // from locale.h -extern "c" fn setlocale(category: c_int, locale: ?[*]const u8) ?[*:0]u8; +const LC_ALL_MASK: c_int = 0x7fffffff; // from locale.h +const locale_t = ?*anyopaque; extern "c" fn setenv(name: ?[*]const u8, value: ?[*]const u8, overwrite: c_int) c_int; +extern "c" fn setlocale(category: c_int, locale: ?[*]const u8) ?[*:0]u8; +extern "c" fn newlocale(category: c_int, locale: ?[*]const u8, base: locale_t) locale_t; +extern "c" fn freelocale(v: locale_t) void;