mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-16 16:56:09 +03:00
Charsets
Initial charset support. This gets a lot of stuff working but still has some critical (but not very real world) limitations. Implements: * All charset slots G0 to G3 * All shifting commands to shift into GL and GR * Single shift commands `ESC N` and `ESC O` * UTF8, ASCII, British, and DEC Special Graphics character sets Notably missing: * A number of rarely used charsets * NRC handling * Disabling UTF-8 mode All the missing stuff is very edge case... but necessary for correctness.
This commit is contained in:
6
TODO.md
6
TODO.md
@ -18,6 +18,11 @@ Correctness:
|
||||
* `exit` in the shell should close the window
|
||||
* test wrap against wraptest: https://github.com/mattiase/wraptest
|
||||
- automate this in some way
|
||||
* Charsets: UTF-8 vs. ASCII mode
|
||||
- we only support UTF-8 input right now
|
||||
- need fallback glyphs if they're not supported
|
||||
- can effect a crash using `vttest` menu `3 10` since it tries to parse
|
||||
ASCII as UTF-8.
|
||||
|
||||
Improvements:
|
||||
|
||||
@ -37,4 +42,3 @@ Major Features:
|
||||
* Sixels: https://saitoha.github.io/libsixel/
|
||||
* Kitty keyboard protocol: https://sw.kovidgoyal.net/kitty/keyboard-protocol/
|
||||
* Kitty graphics protocol: https://sw.kovidgoyal.net/kitty/graphics-protocol/
|
||||
* Charsets (i.e. line drawing, `CSI ( B` and so on)
|
||||
|
@ -1552,7 +1552,8 @@ pub fn deviceAttributes(
|
||||
_ = params;
|
||||
|
||||
switch (req) {
|
||||
.primary => self.queueWrite("\x1B[?6c") catch |err|
|
||||
// VT220
|
||||
.primary => self.queueWrite("\x1B[?62;c") catch |err|
|
||||
log.warn("error queueing device attr response: {}", .{err}),
|
||||
else => log.warn("unimplemented device attributes req: {}", .{req}),
|
||||
}
|
||||
@ -1632,3 +1633,20 @@ pub fn setActiveStatusDisplay(
|
||||
) !void {
|
||||
self.terminal.status_display = req;
|
||||
}
|
||||
|
||||
pub fn configureCharset(
|
||||
self: *Window,
|
||||
slot: terminal.CharsetSlot,
|
||||
set: terminal.Charset,
|
||||
) !void {
|
||||
self.terminal.configureCharset(slot, set);
|
||||
}
|
||||
|
||||
pub fn invokeCharset(
|
||||
self: *Window,
|
||||
active: terminal.CharsetActiveSlot,
|
||||
slot: terminal.CharsetSlot,
|
||||
single: bool,
|
||||
) !void {
|
||||
self.terminal.invokeCharset(active, slot, single);
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const ansi = @import("ansi.zig");
|
||||
const charsets = @import("charsets.zig");
|
||||
const csi = @import("csi.zig");
|
||||
const sgr = @import("sgr.zig");
|
||||
const Selection = @import("Selection.zig");
|
||||
@ -56,6 +57,9 @@ cols: usize,
|
||||
/// The current scrolling region.
|
||||
scrolling_region: ScrollingRegion,
|
||||
|
||||
/// The charset state
|
||||
charset: CharsetState = .{},
|
||||
|
||||
/// Modes - This isn't exhaustive, since some modes (i.e. cursor origin)
|
||||
/// are applied to the cursor and others aren't boolean yes/no.
|
||||
modes: packed struct {
|
||||
@ -79,6 +83,23 @@ modes: packed struct {
|
||||
}
|
||||
} = .{},
|
||||
|
||||
/// State required for all charset operations.
|
||||
const CharsetState = struct {
|
||||
/// The list of graphical charsets by slot
|
||||
charsets: CharsetArray = CharsetArray.initFill(charsets.Charset.utf8),
|
||||
|
||||
/// GL is the slot to use when using a 7-bit printable char (up to 127)
|
||||
/// GR used for 8-bit printable chars.
|
||||
gl: charsets.Slots = .G0,
|
||||
gr: charsets.Slots = .G2,
|
||||
|
||||
/// Single shift where a slot is used for exactly one char.
|
||||
single_shift: ?charsets.Slots = null,
|
||||
|
||||
/// An array to map a charset slot to a lookup table.
|
||||
const CharsetArray = std.EnumArray(charsets.Slots, charsets.Charset);
|
||||
};
|
||||
|
||||
/// The event types that can be reported for mouse-related activities.
|
||||
/// These are all mutually exclusive (hence in a single enum).
|
||||
pub const MouseEvents = enum(u3) {
|
||||
@ -376,6 +397,31 @@ pub fn setAttribute(self: *Terminal, attr: sgr.Attribute) !void {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the charset into the given slot.
|
||||
pub fn configureCharset(self: *Terminal, slot: charsets.Slots, set: charsets.Charset) void {
|
||||
self.charset.charsets.set(slot, set);
|
||||
}
|
||||
|
||||
/// Invoke the charset in slot into the active slot. If single is true,
|
||||
/// then this will only be invoked for a single character.
|
||||
pub fn invokeCharset(
|
||||
self: *Terminal,
|
||||
active: charsets.ActiveSlot,
|
||||
slot: charsets.Slots,
|
||||
single: bool,
|
||||
) void {
|
||||
if (single) {
|
||||
assert(active == .GL);
|
||||
self.charset.single_shift = slot;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (active) {
|
||||
.GL => self.charset.gl = slot,
|
||||
.GR => self.charset.gr = slot,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print(self: *Terminal, c: u21) !void {
|
||||
const tracy = trace(@src());
|
||||
defer tracy.end();
|
||||
@ -438,7 +484,25 @@ pub fn print(self: *Terminal, c: u21) !void {
|
||||
}
|
||||
}
|
||||
|
||||
fn printCell(self: *Terminal, c: u21) *Screen.Cell {
|
||||
fn printCell(self: *Terminal, unmapped_c: u21) *Screen.Cell {
|
||||
const c = c: {
|
||||
// TODO: non-utf8 handling, gr
|
||||
|
||||
// If we're single shifting, then we use the key exactly once.
|
||||
const key = if (self.charset.single_shift) |key_once| blk: {
|
||||
self.charset.single_shift = null;
|
||||
break :blk key_once;
|
||||
} else self.charset.gl;
|
||||
const set = self.charset.charsets.get(key);
|
||||
|
||||
// UTF-8 or ASCII is used as-is
|
||||
if (set == .utf8 or set == .ascii) break :c unmapped_c;
|
||||
|
||||
// Get our lookup table and map it
|
||||
const table = set.table();
|
||||
break :c @intCast(u21, table[@intCast(u8, unmapped_c)]);
|
||||
};
|
||||
|
||||
const cell = self.screen.getCell(
|
||||
self.screen.cursor.y,
|
||||
self.screen.cursor.x,
|
||||
@ -1245,6 +1309,68 @@ test "Terminal: print writes to bottom if scrolled" {
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: print charset" {
|
||||
var t = try init(testing.allocator, 80, 80);
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
// G1 should have no effect
|
||||
t.configureCharset(.G1, .dec_special);
|
||||
t.configureCharset(.G2, .dec_special);
|
||||
t.configureCharset(.G3, .dec_special);
|
||||
|
||||
// Basic grid writing
|
||||
try t.print('`');
|
||||
t.configureCharset(.G0, .utf8);
|
||||
try t.print('`');
|
||||
t.configureCharset(.G0, .ascii);
|
||||
try t.print('`');
|
||||
t.configureCharset(.G0, .dec_special);
|
||||
try t.print('`');
|
||||
{
|
||||
var str = try t.plainString(testing.allocator);
|
||||
defer testing.allocator.free(str);
|
||||
try testing.expectEqualStrings("```◆", str);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: print invoke charset" {
|
||||
var t = try init(testing.allocator, 80, 80);
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
t.configureCharset(.G1, .dec_special);
|
||||
|
||||
// Basic grid writing
|
||||
try t.print('`');
|
||||
t.invokeCharset(.GL, .G1, false);
|
||||
try t.print('`');
|
||||
try t.print('`');
|
||||
t.invokeCharset(.GL, .G0, false);
|
||||
try t.print('`');
|
||||
{
|
||||
var str = try t.plainString(testing.allocator);
|
||||
defer testing.allocator.free(str);
|
||||
try testing.expectEqualStrings("`◆◆`", str);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: print invoke charset single" {
|
||||
var t = try init(testing.allocator, 80, 80);
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
t.configureCharset(.G1, .dec_special);
|
||||
|
||||
// Basic grid writing
|
||||
try t.print('`');
|
||||
t.invokeCharset(.GL, .G1, true);
|
||||
try t.print('`');
|
||||
try t.print('`');
|
||||
{
|
||||
var str = try t.plainString(testing.allocator);
|
||||
defer testing.allocator.free(str);
|
||||
try testing.expectEqualStrings("`◆`", str);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: linefeed and carriage return" {
|
||||
var t = try init(testing.allocator, 80, 80);
|
||||
defer t.deinit(testing.allocator);
|
||||
|
113
src/terminal/charsets.zig
Normal file
113
src/terminal/charsets.zig
Normal file
@ -0,0 +1,113 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// The available charset slots for a terminal.
|
||||
pub const Slots = enum(u3) {
|
||||
G0 = 0,
|
||||
G1 = 1,
|
||||
G2 = 2,
|
||||
G3 = 3,
|
||||
};
|
||||
|
||||
/// The name of the active slots.
|
||||
pub const ActiveSlot = enum { GL, GR };
|
||||
|
||||
/// The list of supported character sets and their associated tables.
|
||||
pub const Charset = enum {
|
||||
utf8,
|
||||
ascii,
|
||||
british,
|
||||
dec_special,
|
||||
|
||||
/// The table for the given charset. This returns a pointer to a
|
||||
/// slice that is guaranteed to be 255 chars that can be used to map
|
||||
/// ASCII to the given charset.
|
||||
pub fn table(set: Charset) []const u16 {
|
||||
return switch (set) {
|
||||
.british => &british,
|
||||
.dec_special => &dec_special,
|
||||
|
||||
// utf8 is not a table, callers should double-check if the
|
||||
// charset is utf8 and NOT use tables.
|
||||
.utf8 => unreachable,
|
||||
|
||||
// recommended that callers just map ascii directly but we can
|
||||
// support a table
|
||||
.ascii => &ascii,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// Just a basic c => c ascii table
|
||||
const ascii = initTable();
|
||||
|
||||
/// https://vt100.net/docs/vt220-rm/chapter2.html
|
||||
const british = british: {
|
||||
var table = initTable();
|
||||
table[0x23] = 0x00a3;
|
||||
break :british table;
|
||||
};
|
||||
|
||||
/// https://en.wikipedia.org/wiki/DEC_Special_Graphics
|
||||
const dec_special = tech: {
|
||||
var table = initTable();
|
||||
table[0x60] = 0x25C6;
|
||||
table[0x61] = 0x2592;
|
||||
table[0x62] = 0x2409;
|
||||
table[0x63] = 0x240C;
|
||||
table[0x64] = 0x240D;
|
||||
table[0x65] = 0x240A;
|
||||
table[0x66] = 0x00B0;
|
||||
table[0x67] = 0x00B1;
|
||||
table[0x68] = 0x2424;
|
||||
table[0x69] = 0x240B;
|
||||
table[0x6a] = 0x2518;
|
||||
table[0x6b] = 0x2510;
|
||||
table[0x6c] = 0x250C;
|
||||
table[0x6d] = 0x2514;
|
||||
table[0x6e] = 0x253C;
|
||||
table[0x6f] = 0x23BA;
|
||||
table[0x70] = 0x23BB;
|
||||
table[0x71] = 0x2500;
|
||||
table[0x72] = 0x23BC;
|
||||
table[0x73] = 0x23BD;
|
||||
table[0x74] = 0x251C;
|
||||
table[0x75] = 0x2524;
|
||||
table[0x76] = 0x2534;
|
||||
table[0x77] = 0x252C;
|
||||
table[0x78] = 0x2502;
|
||||
table[0x79] = 0x2264;
|
||||
table[0x7a] = 0x2265;
|
||||
table[0x7b] = 0x03C0;
|
||||
table[0x7c] = 0x2260;
|
||||
table[0x7d] = 0x00A3;
|
||||
table[0x7e] = 0x00B7;
|
||||
break :tech table;
|
||||
};
|
||||
|
||||
const max_u8 = std.math.maxInt(u8);
|
||||
|
||||
/// Creates a table that maps ASCII to ASCII as a getting started point.
|
||||
fn initTable() [max_u8]u16 {
|
||||
var result: [max_u8]u16 = undefined;
|
||||
var i: usize = 0;
|
||||
while (i < max_u8) : (i += 1) result[i] = @intCast(u16, i);
|
||||
assert(i == max_u8);
|
||||
return result;
|
||||
}
|
||||
|
||||
test {
|
||||
const testing = std.testing;
|
||||
const info = @typeInfo(Charset).Enum;
|
||||
inline for (info.fields) |field| {
|
||||
// utf8 has no table
|
||||
if (@field(Charset, field.name) == .utf8) continue;
|
||||
|
||||
const table = @field(Charset, field.name).table();
|
||||
|
||||
// Yes, I could use `max_u8` here, but I want to explicitly use a
|
||||
// hardcoded constant so that if there are miscompilations or a comptime
|
||||
// issue, we catch it.
|
||||
try testing.expectEqual(@as(usize, 255), table.len);
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
const builtin = @import("builtin");
|
||||
|
||||
const charsets = @import("charsets.zig");
|
||||
const stream = @import("stream.zig");
|
||||
const ansi = @import("ansi.zig");
|
||||
const csi = @import("csi.zig");
|
||||
@ -7,6 +8,9 @@ const sgr = @import("sgr.zig");
|
||||
pub const point = @import("point.zig");
|
||||
pub const color = @import("color.zig");
|
||||
|
||||
pub const Charset = charsets.Charset;
|
||||
pub const CharsetSlot = charsets.Slots;
|
||||
pub const CharsetActiveSlot = charsets.ActiveSlot;
|
||||
pub const Terminal = @import("Terminal.zig");
|
||||
pub const Parser = @import("Parser.zig");
|
||||
pub const Selection = @import("Selection.zig");
|
||||
@ -25,6 +29,7 @@ pub const Attribute = sgr.Attribute;
|
||||
|
||||
test {
|
||||
_ = ansi;
|
||||
_ = charsets;
|
||||
_ = color;
|
||||
_ = csi;
|
||||
_ = point;
|
||||
|
@ -2,6 +2,7 @@ const std = @import("std");
|
||||
const testing = std.testing;
|
||||
const Parser = @import("Parser.zig");
|
||||
const ansi = @import("ansi.zig");
|
||||
const charsets = @import("charsets.zig");
|
||||
const csi = @import("csi.zig");
|
||||
const sgr = @import("sgr.zig");
|
||||
const trace = @import("tracy").trace;
|
||||
@ -108,8 +109,15 @@ pub fn Stream(comptime Handler: type) type {
|
||||
else
|
||||
log.warn("unimplemented execute: {x}", .{c}),
|
||||
|
||||
// TODO
|
||||
.SO, .SI => log.warn("TODO: Shift out/Shift in", .{}),
|
||||
.SO => if (@hasDecl(T, "invokeCharset"))
|
||||
try self.handler.invokeCharset(.GL, .G1, false)
|
||||
else
|
||||
log.warn("unimplemented invokeCharset: {x}", .{c}),
|
||||
|
||||
.SI => if (@hasDecl(T, "invokeCharset"))
|
||||
try self.handler.invokeCharset(.GL, .G0, false)
|
||||
else
|
||||
log.warn("unimplemented invokeCharset: {x}", .{c}),
|
||||
}
|
||||
}
|
||||
|
||||
@ -408,17 +416,49 @@ pub fn Stream(comptime Handler: type) type {
|
||||
}
|
||||
}
|
||||
|
||||
fn configureCharset(
|
||||
self: Self,
|
||||
intermediates: []const u8,
|
||||
set: charsets.Charset,
|
||||
) !void {
|
||||
if (intermediates.len != 1) {
|
||||
log.warn("invalid charset intermediate: {any}", .{intermediates});
|
||||
return;
|
||||
}
|
||||
|
||||
const slot: charsets.Slots = switch (intermediates[0]) {
|
||||
// TODO: support slots '-', '.', '/'
|
||||
|
||||
'(' => .G0,
|
||||
')' => .G1,
|
||||
'*' => .G2,
|
||||
'+' => .G3,
|
||||
else => {
|
||||
log.warn("invalid charset intermediate: {any}", .{intermediates});
|
||||
return;
|
||||
},
|
||||
};
|
||||
|
||||
if (@hasDecl(T, "configureCharset")) {
|
||||
try self.handler.configureCharset(slot, set);
|
||||
return;
|
||||
}
|
||||
|
||||
log.warn("unimplemented configureCharset callback slot={} set={}", .{
|
||||
slot,
|
||||
set,
|
||||
});
|
||||
}
|
||||
|
||||
fn escDispatch(
|
||||
self: *Self,
|
||||
action: Parser.Action.ESC,
|
||||
) !void {
|
||||
switch (action.final) {
|
||||
// Charsets
|
||||
'B' => {
|
||||
// TODO: Charset support. Just ignore this for now because
|
||||
// every application sets this and it makes our logs SO
|
||||
// noisy.
|
||||
},
|
||||
'B' => try self.configureCharset(action.intermediates, .ascii),
|
||||
'A' => try self.configureCharset(action.intermediates, .british),
|
||||
'0' => try self.configureCharset(action.intermediates, .dec_special),
|
||||
|
||||
// DECSC - Save Cursor
|
||||
'7' => if (@hasDecl(T, "saveCursor")) switch (action.intermediates.len) {
|
||||
@ -486,6 +526,69 @@ pub fn Stream(comptime Handler: type) type {
|
||||
},
|
||||
} else log.warn("unimplemented ESC callback: {}", .{action}),
|
||||
|
||||
// SS2 - Single Shift 2
|
||||
'N' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
|
||||
0 => try self.handler.invokeCharset(.GL, .G2, true),
|
||||
else => {
|
||||
log.warn("invalid single shift 2 command: {}", .{action});
|
||||
return;
|
||||
},
|
||||
} else log.warn("unimplemented invokeCharset: {}", .{action}),
|
||||
|
||||
// SS3 - Single Shift 3
|
||||
'O' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
|
||||
0 => try self.handler.invokeCharset(.GL, .G3, true),
|
||||
else => {
|
||||
log.warn("invalid single shift 3 command: {}", .{action});
|
||||
return;
|
||||
},
|
||||
} else log.warn("unimplemented invokeCharset: {}", .{action}),
|
||||
|
||||
// LS2 - Locking Shift 2
|
||||
'n' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
|
||||
0 => try self.handler.invokeCharset(.GL, .G2, false),
|
||||
else => {
|
||||
log.warn("invalid single shift 2 command: {}", .{action});
|
||||
return;
|
||||
},
|
||||
} else log.warn("unimplemented invokeCharset: {}", .{action}),
|
||||
|
||||
// LS3 - Locking Shift 3
|
||||
'o' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
|
||||
0 => try self.handler.invokeCharset(.GL, .G3, false),
|
||||
else => {
|
||||
log.warn("invalid single shift 3 command: {}", .{action});
|
||||
return;
|
||||
},
|
||||
} else log.warn("unimplemented invokeCharset: {}", .{action}),
|
||||
|
||||
// LS1R - Locking Shift 1 Right
|
||||
'~' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
|
||||
0 => try self.handler.invokeCharset(.GR, .G1, false),
|
||||
else => {
|
||||
log.warn("invalid locking shift 1 right command: {}", .{action});
|
||||
return;
|
||||
},
|
||||
} else log.warn("unimplemented invokeCharset: {}", .{action}),
|
||||
|
||||
// LS2R - Locking Shift 2 Right
|
||||
'}' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
|
||||
0 => try self.handler.invokeCharset(.GR, .G2, false),
|
||||
else => {
|
||||
log.warn("invalid locking shift 2 right command: {}", .{action});
|
||||
return;
|
||||
},
|
||||
} else log.warn("unimplemented invokeCharset: {}", .{action}),
|
||||
|
||||
// LS3R - Locking Shift 3 Right
|
||||
'|' => if (@hasDecl(T, "invokeCharset")) switch (action.intermediates.len) {
|
||||
0 => try self.handler.invokeCharset(.GR, .G3, false),
|
||||
else => {
|
||||
log.warn("invalid locking shift 3 right command: {}", .{action});
|
||||
return;
|
||||
},
|
||||
} else log.warn("unimplemented invokeCharset: {}", .{action}),
|
||||
|
||||
else => if (@hasDecl(T, "escUnimplemented"))
|
||||
try self.handler.escUnimplemented(action)
|
||||
else
|
||||
|
Reference in New Issue
Block a user