diff --git a/TODO.md b/TODO.md index 933f4efa4..be8138f90 100644 --- a/TODO.md +++ b/TODO.md @@ -18,6 +18,11 @@ Correctness: * `exit` in the shell should close the window * test wrap against wraptest: https://github.com/mattiase/wraptest - automate this in some way +* Charsets: UTF-8 vs. ASCII mode + - we only support UTF-8 input right now + - need fallback glyphs if they're not supported + - can effect a crash using `vttest` menu `3 10` since it tries to parse + ASCII as UTF-8. Improvements: @@ -37,4 +42,3 @@ Major Features: * Sixels: https://saitoha.github.io/libsixel/ * Kitty keyboard protocol: https://sw.kovidgoyal.net/kitty/keyboard-protocol/ * Kitty graphics protocol: https://sw.kovidgoyal.net/kitty/graphics-protocol/ -* Charsets (i.e. line drawing, `CSI ( B` and so on) diff --git a/src/Window.zig b/src/Window.zig index e7a06619c..575680dc4 100644 --- a/src/Window.zig +++ b/src/Window.zig @@ -1552,7 +1552,8 @@ pub fn deviceAttributes( _ = params; switch (req) { - .primary => self.queueWrite("\x1B[?6c") catch |err| + // VT220 + .primary => self.queueWrite("\x1B[?62;c") catch |err| log.warn("error queueing device attr response: {}", .{err}), else => log.warn("unimplemented device attributes req: {}", .{req}), } @@ -1632,3 +1633,20 @@ pub fn setActiveStatusDisplay( ) !void { self.terminal.status_display = req; } + +pub fn configureCharset( + self: *Window, + slot: terminal.CharsetSlot, + set: terminal.Charset, +) !void { + self.terminal.configureCharset(slot, set); +} + +pub fn invokeCharset( + self: *Window, + active: terminal.CharsetActiveSlot, + slot: terminal.CharsetSlot, + single: bool, +) !void { + self.terminal.invokeCharset(active, slot, single); +} diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index 0433ed553..5dfcbe9d5 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -12,6 +12,7 @@ const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ansi = @import("ansi.zig"); +const charsets = @import("charsets.zig"); const csi = @import("csi.zig"); const sgr = @import("sgr.zig"); const Selection = @import("Selection.zig"); @@ -56,6 +57,9 @@ cols: usize, /// The current scrolling region. scrolling_region: ScrollingRegion, +/// The charset state +charset: CharsetState = .{}, + /// Modes - This isn't exhaustive, since some modes (i.e. cursor origin) /// are applied to the cursor and others aren't boolean yes/no. modes: packed struct { @@ -79,6 +83,23 @@ modes: packed struct { } } = .{}, +/// State required for all charset operations. +const CharsetState = struct { + /// The list of graphical charsets by slot + charsets: CharsetArray = CharsetArray.initFill(charsets.Charset.utf8), + + /// GL is the slot to use when using a 7-bit printable char (up to 127) + /// GR used for 8-bit printable chars. + gl: charsets.Slots = .G0, + gr: charsets.Slots = .G2, + + /// Single shift where a slot is used for exactly one char. + single_shift: ?charsets.Slots = null, + + /// An array to map a charset slot to a lookup table. + const CharsetArray = std.EnumArray(charsets.Slots, charsets.Charset); +}; + /// The event types that can be reported for mouse-related activities. /// These are all mutually exclusive (hence in a single enum). pub const MouseEvents = enum(u3) { @@ -376,6 +397,31 @@ pub fn setAttribute(self: *Terminal, attr: sgr.Attribute) !void { } } +/// Set the charset into the given slot. +pub fn configureCharset(self: *Terminal, slot: charsets.Slots, set: charsets.Charset) void { + self.charset.charsets.set(slot, set); +} + +/// Invoke the charset in slot into the active slot. If single is true, +/// then this will only be invoked for a single character. +pub fn invokeCharset( + self: *Terminal, + active: charsets.ActiveSlot, + slot: charsets.Slots, + single: bool, +) void { + if (single) { + assert(active == .GL); + self.charset.single_shift = slot; + return; + } + + switch (active) { + .GL => self.charset.gl = slot, + .GR => self.charset.gr = slot, + } +} + pub fn print(self: *Terminal, c: u21) !void { const tracy = trace(@src()); defer tracy.end(); @@ -438,7 +484,25 @@ pub fn print(self: *Terminal, c: u21) !void { } } -fn printCell(self: *Terminal, c: u21) *Screen.Cell { +fn printCell(self: *Terminal, unmapped_c: u21) *Screen.Cell { + const c = c: { + // TODO: non-utf8 handling, gr + + // If we're single shifting, then we use the key exactly once. + const key = if (self.charset.single_shift) |key_once| blk: { + self.charset.single_shift = null; + break :blk key_once; + } else self.charset.gl; + const set = self.charset.charsets.get(key); + + // UTF-8 or ASCII is used as-is + if (set == .utf8 or set == .ascii) break :c unmapped_c; + + // Get our lookup table and map it + const table = set.table(); + break :c @intCast(u21, table[@intCast(u8, unmapped_c)]); + }; + const cell = self.screen.getCell( self.screen.cursor.y, self.screen.cursor.x, @@ -1245,6 +1309,68 @@ test "Terminal: print writes to bottom if scrolled" { } } +test "Terminal: print charset" { + var t = try init(testing.allocator, 80, 80); + defer t.deinit(testing.allocator); + + // G1 should have no effect + t.configureCharset(.G1, .dec_special); + t.configureCharset(.G2, .dec_special); + t.configureCharset(.G3, .dec_special); + + // Basic grid writing + try t.print('`'); + t.configureCharset(.G0, .utf8); + try t.print('`'); + t.configureCharset(.G0, .ascii); + try t.print('`'); + t.configureCharset(.G0, .dec_special); + try t.print('`'); + { + var str = try t.plainString(testing.allocator); + defer testing.allocator.free(str); + try testing.expectEqualStrings("```◆", str); + } +} + +test "Terminal: print invoke charset" { + var t = try init(testing.allocator, 80, 80); + defer t.deinit(testing.allocator); + + t.configureCharset(.G1, .dec_special); + + // Basic grid writing + try t.print('`'); + t.invokeCharset(.GL, .G1, false); + try t.print('`'); + try t.print('`'); + t.invokeCharset(.GL, .G0, false); + try t.print('`'); + { + var str = try t.plainString(testing.allocator); + defer testing.allocator.free(str); + try testing.expectEqualStrings("`◆◆`", str); + } +} + +test "Terminal: print invoke charset single" { + var t = try init(testing.allocator, 80, 80); + defer t.deinit(testing.allocator); + + t.configureCharset(.G1, .dec_special); + + // Basic grid writing + try t.print('`'); + t.invokeCharset(.GL, .G1, true); + try t.print('`'); + try t.print('`'); + { + var str = try t.plainString(testing.allocator); + defer testing.allocator.free(str); + try testing.expectEqualStrings("`◆`", str); + } +} + test "Terminal: linefeed and carriage return" { var t = try init(testing.allocator, 80, 80); defer t.deinit(testing.allocator); diff --git a/src/terminal/charsets.zig b/src/terminal/charsets.zig new file mode 100644 index 000000000..e6ebe21aa --- /dev/null +++ b/src/terminal/charsets.zig @@ -0,0 +1,113 @@ +const std = @import("std"); +const assert = std.debug.assert; + +/// The available charset slots for a terminal. +pub const Slots = enum(u3) { + G0 = 0, + G1 = 1, + G2 = 2, + G3 = 3, +}; + +/// The name of the active slots. +pub const ActiveSlot = enum { GL, GR }; + +/// The list of supported character sets and their associated tables. +pub const Charset = enum { + utf8, + ascii, + british, + dec_special, + + /// The table for the given charset. This returns a pointer to a + /// slice that is guaranteed to be 255 chars that can be used to map + /// ASCII to the given charset. + pub fn table(set: Charset) []const u16 { + return switch (set) { + .british => &british, + .dec_special => &dec_special, + + // utf8 is not a table, callers should double-check if the + // charset is utf8 and NOT use tables. + .utf8 => unreachable, + + // recommended that callers just map ascii directly but we can + // support a table + .ascii => &ascii, + }; + } +}; + +/// Just a basic c => c ascii table +const ascii = initTable(); + +/// https://vt100.net/docs/vt220-rm/chapter2.html +const british = british: { + var table = initTable(); + table[0x23] = 0x00a3; + break :british table; +}; + +/// https://en.wikipedia.org/wiki/DEC_Special_Graphics +const dec_special = tech: { + var table = initTable(); + table[0x60] = 0x25C6; + table[0x61] = 0x2592; + table[0x62] = 0x2409; + table[0x63] = 0x240C; + table[0x64] = 0x240D; + table[0x65] = 0x240A; + table[0x66] = 0x00B0; + table[0x67] = 0x00B1; + table[0x68] = 0x2424; + table[0x69] = 0x240B; + table[0x6a] = 0x2518; + table[0x6b] = 0x2510; + table[0x6c] = 0x250C; + table[0x6d] = 0x2514; + table[0x6e] = 0x253C; + table[0x6f] = 0x23BA; + table[0x70] = 0x23BB; + table[0x71] = 0x2500; + table[0x72] = 0x23BC; + table[0x73] = 0x23BD; + table[0x74] = 0x251C; + table[0x75] = 0x2524; + table[0x76] = 0x2534; + table[0x77] = 0x252C; + table[0x78] = 0x2502; + table[0x79] = 0x2264; + table[0x7a] = 0x2265; + table[0x7b] = 0x03C0; + table[0x7c] = 0x2260; + table[0x7d] = 0x00A3; + table[0x7e] = 0x00B7; + break :tech table; +}; + +const max_u8 = std.math.maxInt(u8); + +/// Creates a table that maps ASCII to ASCII as a getting started point. +fn initTable() [max_u8]u16 { + var result: [max_u8]u16 = undefined; + var i: usize = 0; + while (i < max_u8) : (i += 1) result[i] = @intCast(u16, i); + assert(i == max_u8); + return result; +} + +test { + const testing = std.testing; + const info = @typeInfo(Charset).Enum; + inline for (info.fields) |field| { + // utf8 has no table + if (@field(Charset, field.name) == .utf8) continue; + + const table = @field(Charset, field.name).table(); + + // Yes, I could use `max_u8` here, but I want to explicitly use a + // hardcoded constant so that if there are miscompilations or a comptime + // issue, we catch it. + try testing.expectEqual(@as(usize, 255), table.len); + } +} diff --git a/src/terminal/main.zig b/src/terminal/main.zig index fd75852e4..a1a1d0d34 100644 --- a/src/terminal/main.zig +++ b/src/terminal/main.zig @@ -1,5 +1,6 @@ const builtin = @import("builtin"); +const charsets = @import("charsets.zig"); const stream = @import("stream.zig"); const ansi = @import("ansi.zig"); const csi = @import("csi.zig"); @@ -7,6 +8,9 @@ const sgr = @import("sgr.zig"); pub const point = @import("point.zig"); pub const color = @import("color.zig"); +pub const Charset = charsets.Charset; +pub const CharsetSlot = charsets.Slots; +pub const CharsetActiveSlot = charsets.ActiveSlot; pub const Terminal = @import("Terminal.zig"); pub const Parser = @import("Parser.zig"); pub const Selection = @import("Selection.zig"); @@ -25,6 +29,7 @@ pub const Attribute = sgr.Attribute; test { _ = ansi; + _ = charsets; _ = color; _ = csi; _ = point; diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index b695b4e96..3ba9a0d04 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -2,6 +2,7 @@ const std = @import("std"); const testing = std.testing; const Parser = @import("Parser.zig"); const ansi = @import("ansi.zig"); +const charsets = @import("charsets.zig"); const csi = @import("csi.zig"); const sgr = @import("sgr.zig"); const trace = @import("tracy").trace; @@ -108,8 +109,15 @@ pub fn Stream(comptime Handler: type) type { else log.warn("unimplemented execute: {x}", .{c}), - // TODO - .SO, .SI => log.warn("TODO: Shift out/Shift in", .{}), + .SO => if (@hasDecl(T, "invokeCharset")) + try self.handler.invokeCharset(.GL, .G1, false) + else + log.warn("unimplemented invokeCharset: {x}", .{c}), + + .SI => if (@hasDecl(T, "invokeCharset")) + try self.handler.invokeCharset(.GL, .G0, false) + else + log.warn("unimplemented invokeCharset: {x}", .{c}), } } @@ -408,17 +416,49 @@ pub fn Stream(comptime Handler: type) type { } } + fn configureCharset( + self: Self, + intermediates: []const u8, + set: charsets.Charset, + ) !void { + if (intermediates.len != 1) { + log.warn("invalid charset intermediate: {any}", .{intermediates}); + return; + } + + const slot: charsets.Slots = switch (intermediates[0]) { + // TODO: support slots '-', '.', '/' + + '(' => .G0, + ')' => .G1, + '*' => .G2, + '+' => .G3, + else => { + log.warn("invalid charset intermediate: {any}", .{intermediates}); + return; + }, + }; + + if (@hasDecl(T, "configureCharset")) { + try self.handler.configureCharset(slot, set); + return; + } + + log.warn("unimplemented configureCharset callback slot={} set={}", .{ + slot, + set, + }); + } + fn escDispatch( self: *Self, action: Parser.Action.ESC, ) !void { switch (action.final) { // Charsets - 'B' => { - // TODO: Charset support. Just ignore this for now because - // every application sets this and it makes our logs SO - // noisy. - }, + 'B' => try self.configureCharset(action.intermediates, .ascii), + 'A' => try self.configureCharset(action.intermediates, .british), + '0' => try self.configureCharset(action.intermediates, .dec_special), // DECSC - Save Cursor '7' => if (@hasDecl(T, "saveCursor")) switch (action.intermediates.len) { @@ -486,6 +526,69 @@ pub fn Stream(comptime Handler: type) type { }, } else log.warn("unimplemented ESC callback: {}", .{action}), + // SS2 - Single Shift 2 + 'N' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) { + 0 => try self.handler.invokeCharset(.GL, .G2, true), + else => { + log.warn("invalid single shift 2 command: {}", .{action}); + return; + }, + } else log.warn("unimplemented invokeCharset: {}", .{action}), + + // SS3 - Single Shift 3 + 'O' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) { + 0 => try self.handler.invokeCharset(.GL, .G3, true), + else => { + log.warn("invalid single shift 3 command: {}", .{action}); + return; + }, + } else log.warn("unimplemented invokeCharset: {}", .{action}), + + // LS2 - Locking Shift 2 + 'n' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) { + 0 => try self.handler.invokeCharset(.GL, .G2, false), + else => { + log.warn("invalid single shift 2 command: {}", .{action}); + return; + }, + } else log.warn("unimplemented invokeCharset: {}", .{action}), + + // LS3 - Locking Shift 3 + 'o' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) { + 0 => try self.handler.invokeCharset(.GL, .G3, false), + else => { + log.warn("invalid single shift 3 command: {}", .{action}); + return; + }, + } else log.warn("unimplemented invokeCharset: {}", .{action}), + + // LS1R - Locking Shift 1 Right + '~' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) { + 0 => try self.handler.invokeCharset(.GR, .G1, false), + else => { + log.warn("invalid locking shift 1 right command: {}", .{action}); + return; + }, + } else log.warn("unimplemented invokeCharset: {}", .{action}), + + // LS2R - Locking Shift 2 Right + '}' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) { + 0 => try self.handler.invokeCharset(.GR, .G2, false), + else => { + log.warn("invalid locking shift 2 right command: {}", .{action}); + return; + }, + } else log.warn("unimplemented invokeCharset: {}", .{action}), + + // LS3R - Locking Shift 3 Right + '|' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) { + 0 => try self.handler.invokeCharset(.GR, .G3, false), + else => { + log.warn("invalid locking shift 3 right command: {}", .{action}); + return; + }, + } else log.warn("unimplemented invokeCharset: {}", .{action}), + else => if (@hasDecl(T, "escUnimplemented")) try self.handler.escUnimplemented(action) else