Initial charset support. This gets a lot of stuff working but still has some critical (but not very real world) limitations.

Implements:

  * All charset slots G0 to G3
  * All shifting commands to shift into GL and GR
  * Single shift commands `ESC N` and `ESC O`
  * UTF8, ASCII, British, and DEC Special Graphics character sets

Notably missing:

  * A number of rarely used charsets
  * NRC handling
  * Disabling UTF-8 mode

All the missing stuff is very edge case... but necessary for correctness.
This commit is contained in:
Mitchell Hashimoto
2022-08-27 10:33:38 -07:00
committed by GitHub
6 changed files with 379 additions and 10 deletions

View File

@ -18,6 +18,11 @@ Correctness:
* `exit` in the shell should close the window * `exit` in the shell should close the window
* test wrap against wraptest: https://github.com/mattiase/wraptest * test wrap against wraptest: https://github.com/mattiase/wraptest
- automate this in some way - automate this in some way
* Charsets: UTF-8 vs. ASCII mode
- we only support UTF-8 input right now
- need fallback glyphs if they're not supported
- can effect a crash using `vttest` menu `3 10` since it tries to parse
ASCII as UTF-8.
Improvements: Improvements:
@ -37,4 +42,3 @@ Major Features:
* Sixels: https://saitoha.github.io/libsixel/ * Sixels: https://saitoha.github.io/libsixel/
* Kitty keyboard protocol: https://sw.kovidgoyal.net/kitty/keyboard-protocol/ * Kitty keyboard protocol: https://sw.kovidgoyal.net/kitty/keyboard-protocol/
* Kitty graphics protocol: https://sw.kovidgoyal.net/kitty/graphics-protocol/ * Kitty graphics protocol: https://sw.kovidgoyal.net/kitty/graphics-protocol/
* Charsets (i.e. line drawing, `CSI ( B` and so on)

View File

@ -1552,7 +1552,8 @@ pub fn deviceAttributes(
_ = params; _ = params;
switch (req) { switch (req) {
.primary => self.queueWrite("\x1B[?6c") catch |err| // VT220
.primary => self.queueWrite("\x1B[?62;c") catch |err|
log.warn("error queueing device attr response: {}", .{err}), log.warn("error queueing device attr response: {}", .{err}),
else => log.warn("unimplemented device attributes req: {}", .{req}), else => log.warn("unimplemented device attributes req: {}", .{req}),
} }
@ -1632,3 +1633,20 @@ pub fn setActiveStatusDisplay(
) !void { ) !void {
self.terminal.status_display = req; self.terminal.status_display = req;
} }
pub fn configureCharset(
self: *Window,
slot: terminal.CharsetSlot,
set: terminal.Charset,
) !void {
self.terminal.configureCharset(slot, set);
}
pub fn invokeCharset(
self: *Window,
active: terminal.CharsetActiveSlot,
slot: terminal.CharsetSlot,
single: bool,
) !void {
self.terminal.invokeCharset(active, slot, single);
}

View File

@ -12,6 +12,7 @@ const assert = std.debug.assert;
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const ansi = @import("ansi.zig"); const ansi = @import("ansi.zig");
const charsets = @import("charsets.zig");
const csi = @import("csi.zig"); const csi = @import("csi.zig");
const sgr = @import("sgr.zig"); const sgr = @import("sgr.zig");
const Selection = @import("Selection.zig"); const Selection = @import("Selection.zig");
@ -56,6 +57,9 @@ cols: usize,
/// The current scrolling region. /// The current scrolling region.
scrolling_region: ScrollingRegion, scrolling_region: ScrollingRegion,
/// The charset state
charset: CharsetState = .{},
/// Modes - This isn't exhaustive, since some modes (i.e. cursor origin) /// Modes - This isn't exhaustive, since some modes (i.e. cursor origin)
/// are applied to the cursor and others aren't boolean yes/no. /// are applied to the cursor and others aren't boolean yes/no.
modes: packed struct { modes: packed struct {
@ -79,6 +83,23 @@ modes: packed struct {
} }
} = .{}, } = .{},
/// State required for all charset operations.
const CharsetState = struct {
/// The list of graphical charsets by slot
charsets: CharsetArray = CharsetArray.initFill(charsets.Charset.utf8),
/// GL is the slot to use when using a 7-bit printable char (up to 127)
/// GR used for 8-bit printable chars.
gl: charsets.Slots = .G0,
gr: charsets.Slots = .G2,
/// Single shift where a slot is used for exactly one char.
single_shift: ?charsets.Slots = null,
/// An array to map a charset slot to a lookup table.
const CharsetArray = std.EnumArray(charsets.Slots, charsets.Charset);
};
/// The event types that can be reported for mouse-related activities. /// The event types that can be reported for mouse-related activities.
/// These are all mutually exclusive (hence in a single enum). /// These are all mutually exclusive (hence in a single enum).
pub const MouseEvents = enum(u3) { pub const MouseEvents = enum(u3) {
@ -376,6 +397,31 @@ pub fn setAttribute(self: *Terminal, attr: sgr.Attribute) !void {
} }
} }
/// Set the charset into the given slot.
pub fn configureCharset(self: *Terminal, slot: charsets.Slots, set: charsets.Charset) void {
self.charset.charsets.set(slot, set);
}
/// Invoke the charset in slot into the active slot. If single is true,
/// then this will only be invoked for a single character.
pub fn invokeCharset(
self: *Terminal,
active: charsets.ActiveSlot,
slot: charsets.Slots,
single: bool,
) void {
if (single) {
assert(active == .GL);
self.charset.single_shift = slot;
return;
}
switch (active) {
.GL => self.charset.gl = slot,
.GR => self.charset.gr = slot,
}
}
pub fn print(self: *Terminal, c: u21) !void { pub fn print(self: *Terminal, c: u21) !void {
const tracy = trace(@src()); const tracy = trace(@src());
defer tracy.end(); defer tracy.end();
@ -438,7 +484,25 @@ pub fn print(self: *Terminal, c: u21) !void {
} }
} }
fn printCell(self: *Terminal, c: u21) *Screen.Cell { fn printCell(self: *Terminal, unmapped_c: u21) *Screen.Cell {
const c = c: {
// TODO: non-utf8 handling, gr
// If we're single shifting, then we use the key exactly once.
const key = if (self.charset.single_shift) |key_once| blk: {
self.charset.single_shift = null;
break :blk key_once;
} else self.charset.gl;
const set = self.charset.charsets.get(key);
// UTF-8 or ASCII is used as-is
if (set == .utf8 or set == .ascii) break :c unmapped_c;
// Get our lookup table and map it
const table = set.table();
break :c @intCast(u21, table[@intCast(u8, unmapped_c)]);
};
const cell = self.screen.getCell( const cell = self.screen.getCell(
self.screen.cursor.y, self.screen.cursor.y,
self.screen.cursor.x, self.screen.cursor.x,
@ -1245,6 +1309,68 @@ test "Terminal: print writes to bottom if scrolled" {
} }
} }
test "Terminal: print charset" {
var t = try init(testing.allocator, 80, 80);
defer t.deinit(testing.allocator);
// G1 should have no effect
t.configureCharset(.G1, .dec_special);
t.configureCharset(.G2, .dec_special);
t.configureCharset(.G3, .dec_special);
// Basic grid writing
try t.print('`');
t.configureCharset(.G0, .utf8);
try t.print('`');
t.configureCharset(.G0, .ascii);
try t.print('`');
t.configureCharset(.G0, .dec_special);
try t.print('`');
{
var str = try t.plainString(testing.allocator);
defer testing.allocator.free(str);
try testing.expectEqualStrings("```◆", str);
}
}
test "Terminal: print invoke charset" {
var t = try init(testing.allocator, 80, 80);
defer t.deinit(testing.allocator);
t.configureCharset(.G1, .dec_special);
// Basic grid writing
try t.print('`');
t.invokeCharset(.GL, .G1, false);
try t.print('`');
try t.print('`');
t.invokeCharset(.GL, .G0, false);
try t.print('`');
{
var str = try t.plainString(testing.allocator);
defer testing.allocator.free(str);
try testing.expectEqualStrings("`◆◆`", str);
}
}
test "Terminal: print invoke charset single" {
var t = try init(testing.allocator, 80, 80);
defer t.deinit(testing.allocator);
t.configureCharset(.G1, .dec_special);
// Basic grid writing
try t.print('`');
t.invokeCharset(.GL, .G1, true);
try t.print('`');
try t.print('`');
{
var str = try t.plainString(testing.allocator);
defer testing.allocator.free(str);
try testing.expectEqualStrings("`◆`", str);
}
}
test "Terminal: linefeed and carriage return" { test "Terminal: linefeed and carriage return" {
var t = try init(testing.allocator, 80, 80); var t = try init(testing.allocator, 80, 80);
defer t.deinit(testing.allocator); defer t.deinit(testing.allocator);

113
src/terminal/charsets.zig Normal file
View File

@ -0,0 +1,113 @@
const std = @import("std");
const assert = std.debug.assert;
/// The available charset slots for a terminal.
pub const Slots = enum(u3) {
G0 = 0,
G1 = 1,
G2 = 2,
G3 = 3,
};
/// The name of the active slots.
pub const ActiveSlot = enum { GL, GR };
/// The list of supported character sets and their associated tables.
pub const Charset = enum {
utf8,
ascii,
british,
dec_special,
/// The table for the given charset. This returns a pointer to a
/// slice that is guaranteed to be 255 chars that can be used to map
/// ASCII to the given charset.
pub fn table(set: Charset) []const u16 {
return switch (set) {
.british => &british,
.dec_special => &dec_special,
// utf8 is not a table, callers should double-check if the
// charset is utf8 and NOT use tables.
.utf8 => unreachable,
// recommended that callers just map ascii directly but we can
// support a table
.ascii => &ascii,
};
}
};
/// Just a basic c => c ascii table
const ascii = initTable();
/// https://vt100.net/docs/vt220-rm/chapter2.html
const british = british: {
var table = initTable();
table[0x23] = 0x00a3;
break :british table;
};
/// https://en.wikipedia.org/wiki/DEC_Special_Graphics
const dec_special = tech: {
var table = initTable();
table[0x60] = 0x25C6;
table[0x61] = 0x2592;
table[0x62] = 0x2409;
table[0x63] = 0x240C;
table[0x64] = 0x240D;
table[0x65] = 0x240A;
table[0x66] = 0x00B0;
table[0x67] = 0x00B1;
table[0x68] = 0x2424;
table[0x69] = 0x240B;
table[0x6a] = 0x2518;
table[0x6b] = 0x2510;
table[0x6c] = 0x250C;
table[0x6d] = 0x2514;
table[0x6e] = 0x253C;
table[0x6f] = 0x23BA;
table[0x70] = 0x23BB;
table[0x71] = 0x2500;
table[0x72] = 0x23BC;
table[0x73] = 0x23BD;
table[0x74] = 0x251C;
table[0x75] = 0x2524;
table[0x76] = 0x2534;
table[0x77] = 0x252C;
table[0x78] = 0x2502;
table[0x79] = 0x2264;
table[0x7a] = 0x2265;
table[0x7b] = 0x03C0;
table[0x7c] = 0x2260;
table[0x7d] = 0x00A3;
table[0x7e] = 0x00B7;
break :tech table;
};
const max_u8 = std.math.maxInt(u8);
/// Creates a table that maps ASCII to ASCII as a getting started point.
fn initTable() [max_u8]u16 {
var result: [max_u8]u16 = undefined;
var i: usize = 0;
while (i < max_u8) : (i += 1) result[i] = @intCast(u16, i);
assert(i == max_u8);
return result;
}
test {
const testing = std.testing;
const info = @typeInfo(Charset).Enum;
inline for (info.fields) |field| {
// utf8 has no table
if (@field(Charset, field.name) == .utf8) continue;
const table = @field(Charset, field.name).table();
// Yes, I could use `max_u8` here, but I want to explicitly use a
// hardcoded constant so that if there are miscompilations or a comptime
// issue, we catch it.
try testing.expectEqual(@as(usize, 255), table.len);
}
}

View File

@ -1,5 +1,6 @@
const builtin = @import("builtin"); const builtin = @import("builtin");
const charsets = @import("charsets.zig");
const stream = @import("stream.zig"); const stream = @import("stream.zig");
const ansi = @import("ansi.zig"); const ansi = @import("ansi.zig");
const csi = @import("csi.zig"); const csi = @import("csi.zig");
@ -7,6 +8,9 @@ const sgr = @import("sgr.zig");
pub const point = @import("point.zig"); pub const point = @import("point.zig");
pub const color = @import("color.zig"); pub const color = @import("color.zig");
pub const Charset = charsets.Charset;
pub const CharsetSlot = charsets.Slots;
pub const CharsetActiveSlot = charsets.ActiveSlot;
pub const Terminal = @import("Terminal.zig"); pub const Terminal = @import("Terminal.zig");
pub const Parser = @import("Parser.zig"); pub const Parser = @import("Parser.zig");
pub const Selection = @import("Selection.zig"); pub const Selection = @import("Selection.zig");
@ -25,6 +29,7 @@ pub const Attribute = sgr.Attribute;
test { test {
_ = ansi; _ = ansi;
_ = charsets;
_ = color; _ = color;
_ = csi; _ = csi;
_ = point; _ = point;

View File

@ -2,6 +2,7 @@ const std = @import("std");
const testing = std.testing; const testing = std.testing;
const Parser = @import("Parser.zig"); const Parser = @import("Parser.zig");
const ansi = @import("ansi.zig"); const ansi = @import("ansi.zig");
const charsets = @import("charsets.zig");
const csi = @import("csi.zig"); const csi = @import("csi.zig");
const sgr = @import("sgr.zig"); const sgr = @import("sgr.zig");
const trace = @import("tracy").trace; const trace = @import("tracy").trace;
@ -108,8 +109,15 @@ pub fn Stream(comptime Handler: type) type {
else else
log.warn("unimplemented execute: {x}", .{c}), log.warn("unimplemented execute: {x}", .{c}),
// TODO .SO => if (@hasDecl(T, "invokeCharset"))
.SO, .SI => log.warn("TODO: Shift out/Shift in", .{}), try self.handler.invokeCharset(.GL, .G1, false)
else
log.warn("unimplemented invokeCharset: {x}", .{c}),
.SI => if (@hasDecl(T, "invokeCharset"))
try self.handler.invokeCharset(.GL, .G0, false)
else
log.warn("unimplemented invokeCharset: {x}", .{c}),
} }
} }
@ -408,17 +416,49 @@ pub fn Stream(comptime Handler: type) type {
} }
} }
fn configureCharset(
self: Self,
intermediates: []const u8,
set: charsets.Charset,
) !void {
if (intermediates.len != 1) {
log.warn("invalid charset intermediate: {any}", .{intermediates});
return;
}
const slot: charsets.Slots = switch (intermediates[0]) {
// TODO: support slots '-', '.', '/'
'(' => .G0,
')' => .G1,
'*' => .G2,
'+' => .G3,
else => {
log.warn("invalid charset intermediate: {any}", .{intermediates});
return;
},
};
if (@hasDecl(T, "configureCharset")) {
try self.handler.configureCharset(slot, set);
return;
}
log.warn("unimplemented configureCharset callback slot={} set={}", .{
slot,
set,
});
}
fn escDispatch( fn escDispatch(
self: *Self, self: *Self,
action: Parser.Action.ESC, action: Parser.Action.ESC,
) !void { ) !void {
switch (action.final) { switch (action.final) {
// Charsets // Charsets
'B' => { 'B' => try self.configureCharset(action.intermediates, .ascii),
// TODO: Charset support. Just ignore this for now because 'A' => try self.configureCharset(action.intermediates, .british),
// every application sets this and it makes our logs SO '0' => try self.configureCharset(action.intermediates, .dec_special),
// noisy.
},
// DECSC - Save Cursor // DECSC - Save Cursor
'7' => if (@hasDecl(T, "saveCursor")) switch (action.intermediates.len) { '7' => if (@hasDecl(T, "saveCursor")) switch (action.intermediates.len) {
@ -486,6 +526,69 @@ pub fn Stream(comptime Handler: type) type {
}, },
} else log.warn("unimplemented ESC callback: {}", .{action}), } else log.warn("unimplemented ESC callback: {}", .{action}),
// SS2 - Single Shift 2
'N' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
0 => try self.handler.invokeCharset(.GL, .G2, true),
else => {
log.warn("invalid single shift 2 command: {}", .{action});
return;
},
} else log.warn("unimplemented invokeCharset: {}", .{action}),
// SS3 - Single Shift 3
'O' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
0 => try self.handler.invokeCharset(.GL, .G3, true),
else => {
log.warn("invalid single shift 3 command: {}", .{action});
return;
},
} else log.warn("unimplemented invokeCharset: {}", .{action}),
// LS2 - Locking Shift 2
'n' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
0 => try self.handler.invokeCharset(.GL, .G2, false),
else => {
log.warn("invalid single shift 2 command: {}", .{action});
return;
},
} else log.warn("unimplemented invokeCharset: {}", .{action}),
// LS3 - Locking Shift 3
'o' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
0 => try self.handler.invokeCharset(.GL, .G3, false),
else => {
log.warn("invalid single shift 3 command: {}", .{action});
return;
},
} else log.warn("unimplemented invokeCharset: {}", .{action}),
// LS1R - Locking Shift 1 Right
'~' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
0 => try self.handler.invokeCharset(.GR, .G1, false),
else => {
log.warn("invalid locking shift 1 right command: {}", .{action});
return;
},
} else log.warn("unimplemented invokeCharset: {}", .{action}),
// LS2R - Locking Shift 2 Right
'}' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
0 => try self.handler.invokeCharset(.GR, .G2, false),
else => {
log.warn("invalid locking shift 2 right command: {}", .{action});
return;
},
} else log.warn("unimplemented invokeCharset: {}", .{action}),
// LS3R - Locking Shift 3 Right
'|' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
0 => try self.handler.invokeCharset(.GR, .G3, false),
else => {
log.warn("invalid locking shift 3 right command: {}", .{action});
return;
},
} else log.warn("unimplemented invokeCharset: {}", .{action}),
else => if (@hasDecl(T, "escUnimplemented")) else => if (@hasDecl(T, "escUnimplemented"))
try self.handler.escUnimplemented(action) try self.handler.escUnimplemented(action)
else else