mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-15 00:06:09 +03:00
terminal: utf-8 decoding
This commit is contained in:
@ -501,7 +501,7 @@ fn renderTimerCallback(t: *libuv.Timer) void {
|
|||||||
//-------------------------------------------------------------------
|
//-------------------------------------------------------------------
|
||||||
// Stream Callbacks
|
// Stream Callbacks
|
||||||
|
|
||||||
pub fn print(self: *Window, c: u8) !void {
|
pub fn print(self: *Window, c: u21) !void {
|
||||||
try self.terminal.print(self.alloc, c);
|
try self.terminal.print(self.alloc, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
const Parser = @This();
|
const Parser = @This();
|
||||||
|
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
const builtin = @import("builtin");
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
const table = @import("parse_table.zig").table;
|
const table = @import("parse_table.zig").table;
|
||||||
const osc = @import("osc.zig");
|
const osc = @import("osc.zig");
|
||||||
@ -28,6 +29,9 @@ pub const State = enum {
|
|||||||
dcs_ignore,
|
dcs_ignore,
|
||||||
osc_string,
|
osc_string,
|
||||||
sos_pm_apc_string,
|
sos_pm_apc_string,
|
||||||
|
|
||||||
|
// Custom states added that aren't present on vt100.net
|
||||||
|
utf8,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Transition action is an action that can be taken during a state
|
/// Transition action is an action that can be taken during a state
|
||||||
@ -49,8 +53,8 @@ pub const TransitionAction = enum {
|
|||||||
/// Action is the action that a caller of the parser is expected to
|
/// Action is the action that a caller of the parser is expected to
|
||||||
/// take as a result of some input character.
|
/// take as a result of some input character.
|
||||||
pub const Action = union(enum) {
|
pub const Action = union(enum) {
|
||||||
/// Draw character to the screen.
|
/// Draw character to the screen. This is a unicode codepoint.
|
||||||
print: u8,
|
print: u21,
|
||||||
|
|
||||||
/// Execute the C0 or C1 function.
|
/// Execute the C0 or C1 function.
|
||||||
execute: u8,
|
execute: u8,
|
||||||
@ -97,8 +101,10 @@ const ParamSepState = enum(u8) {
|
|||||||
mixed = 1,
|
mixed = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Maximum number of intermediate characters during parsing.
|
/// Maximum number of intermediate characters during parsing. This is
|
||||||
const MAX_INTERMEDIATE = 2;
|
/// 4 because we also use the intermediates array for UTF8 decoding which
|
||||||
|
/// can be at most 4 bytes.
|
||||||
|
const MAX_INTERMEDIATE = 4;
|
||||||
const MAX_PARAMS = 16;
|
const MAX_PARAMS = 16;
|
||||||
|
|
||||||
/// Current state of the state machine
|
/// Current state of the state machine
|
||||||
@ -126,6 +132,11 @@ pub fn init() Parser {
|
|||||||
/// Up to 3 actions may need to be exected -- in order -- representing
|
/// Up to 3 actions may need to be exected -- in order -- representing
|
||||||
/// the state exit, transition, and entry actions.
|
/// the state exit, transition, and entry actions.
|
||||||
pub fn next(self: *Parser, c: u8) [3]?Action {
|
pub fn next(self: *Parser, c: u8) [3]?Action {
|
||||||
|
// If we're processing UTF-8, we handle this manually.
|
||||||
|
if (self.state == .utf8) {
|
||||||
|
return .{ self.next_utf8(c), null, null };
|
||||||
|
}
|
||||||
|
|
||||||
const effect = effect: {
|
const effect = effect: {
|
||||||
// First look up the transition in the anywhere table.
|
// First look up the transition in the anywhere table.
|
||||||
const anywhere = table[c][@enumToInt(State.anywhere)];
|
const anywhere = table[c][@enumToInt(State.anywhere)];
|
||||||
@ -143,6 +154,13 @@ pub fn next(self: *Parser, c: u8) [3]?Action {
|
|||||||
// After generating the actions, we set our next state.
|
// After generating the actions, we set our next state.
|
||||||
defer self.state = next_state;
|
defer self.state = next_state;
|
||||||
|
|
||||||
|
// In debug mode, we log bad state transitions.
|
||||||
|
if (builtin.mode == .Debug) {
|
||||||
|
if (next_state == .anywhere) {
|
||||||
|
log.warn("state transition to 'anywhere', likely bug: {x}", .{c});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// When going from one state to another, the actions take place in this order:
|
// When going from one state to another, the actions take place in this order:
|
||||||
//
|
//
|
||||||
// 1. exit action from old state
|
// 1. exit action from old state
|
||||||
@ -183,21 +201,55 @@ pub fn next(self: *Parser, c: u8) [3]?Action {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Processes the next byte in a UTF8 sequence. It is assumed that
|
||||||
|
/// intermediates[0] already has the first byte of a UTF8 sequence
|
||||||
|
/// (triggered via the state machine).
|
||||||
|
fn next_utf8(self: *Parser, c: u8) ?Action {
|
||||||
|
// Collect the byte into the intermediates array
|
||||||
|
self.collect(c);
|
||||||
|
|
||||||
|
// Error is unreachable because the first byte comes from the state machine.
|
||||||
|
// If we get an error here, it is a bug in the state machine that we want
|
||||||
|
// to chase down.
|
||||||
|
const len = std.unicode.utf8ByteSequenceLength(self.intermediates[0]) catch unreachable;
|
||||||
|
|
||||||
|
// We need to collect more
|
||||||
|
if (self.intermediates_idx < len) return null;
|
||||||
|
|
||||||
|
// No matter what happens, we go back to ground since we know we have
|
||||||
|
// enough bytes for the UTF8 sequence.
|
||||||
|
defer {
|
||||||
|
self.state = .ground;
|
||||||
|
self.intermediates_idx = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We have enough bytes, decode!
|
||||||
|
const bytes = self.intermediates[0..len];
|
||||||
|
const rune = std.unicode.utf8Decode(bytes) catch {
|
||||||
|
log.warn("invalid UTF-8 sequence: {any}", .{bytes});
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
return Action{ .print = rune };
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect(self: *Parser, c: u8) void {
|
||||||
|
if (self.intermediates_idx >= MAX_INTERMEDIATE) {
|
||||||
|
log.warn("invalid intermediates count", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.intermediates[self.intermediates_idx] = c;
|
||||||
|
self.intermediates_idx += 1;
|
||||||
|
}
|
||||||
|
|
||||||
fn doAction(self: *Parser, action: TransitionAction, c: u8) ?Action {
|
fn doAction(self: *Parser, action: TransitionAction, c: u8) ?Action {
|
||||||
return switch (action) {
|
return switch (action) {
|
||||||
.none, .ignore => null,
|
.none, .ignore => null,
|
||||||
.print => Action{ .print = c },
|
.print => Action{ .print = c },
|
||||||
.execute => Action{ .execute = c },
|
.execute => Action{ .execute = c },
|
||||||
.collect => collect: {
|
.collect => collect: {
|
||||||
if (self.intermediates_idx >= MAX_INTERMEDIATE) {
|
self.collect(c);
|
||||||
log.warn("invalid intermediates count", .{});
|
|
||||||
break :collect null;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.intermediates[self.intermediates_idx] = c;
|
|
||||||
self.intermediates_idx += 1;
|
|
||||||
|
|
||||||
// The client is expected to perform no action.
|
|
||||||
break :collect null;
|
break :collect null;
|
||||||
},
|
},
|
||||||
.param => param: {
|
.param => param: {
|
||||||
@ -433,3 +485,56 @@ test "osc: change window title" {
|
|||||||
try testing.expect(cmd == .change_window_title);
|
try testing.expect(cmd == .change_window_title);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "print: utf8 2 byte" {
|
||||||
|
var p = init();
|
||||||
|
var a: [3]?Action = undefined;
|
||||||
|
for ("£") |c| a = p.next(c);
|
||||||
|
|
||||||
|
try testing.expect(p.state == .ground);
|
||||||
|
try testing.expect(a[0].? == .print);
|
||||||
|
try testing.expect(a[1] == null);
|
||||||
|
try testing.expect(a[2] == null);
|
||||||
|
|
||||||
|
const rune = a[0].?.print;
|
||||||
|
try testing.expectEqual(try std.unicode.utf8Decode("£"), rune);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "print: utf8 3 byte" {
|
||||||
|
var p = init();
|
||||||
|
var a: [3]?Action = undefined;
|
||||||
|
for ("€") |c| a = p.next(c);
|
||||||
|
|
||||||
|
try testing.expect(p.state == .ground);
|
||||||
|
try testing.expect(a[0].? == .print);
|
||||||
|
try testing.expect(a[1] == null);
|
||||||
|
try testing.expect(a[2] == null);
|
||||||
|
|
||||||
|
const rune = a[0].?.print;
|
||||||
|
try testing.expectEqual(try std.unicode.utf8Decode("€"), rune);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "print: utf8 4 byte" {
|
||||||
|
var p = init();
|
||||||
|
var a: [3]?Action = undefined;
|
||||||
|
for ("𐍈") |c| a = p.next(c);
|
||||||
|
|
||||||
|
try testing.expect(p.state == .ground);
|
||||||
|
try testing.expect(a[0].? == .print);
|
||||||
|
try testing.expect(a[1] == null);
|
||||||
|
try testing.expect(a[2] == null);
|
||||||
|
|
||||||
|
const rune = a[0].?.print;
|
||||||
|
try testing.expectEqual(try std.unicode.utf8Decode("𐍈"), rune);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "print: utf8 invalid" {
|
||||||
|
var p = init();
|
||||||
|
var a: [3]?Action = undefined;
|
||||||
|
for ("\xC3\x28") |c| a = p.next(c);
|
||||||
|
|
||||||
|
try testing.expect(p.state == .ground);
|
||||||
|
try testing.expect(a[0] == null);
|
||||||
|
try testing.expect(a[1] == null);
|
||||||
|
try testing.expect(a[2] == null);
|
||||||
|
}
|
||||||
|
@ -164,7 +164,7 @@ pub fn setAttribute(self: *Terminal, attr: sgr.Attribute) !void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn print(self: *Terminal, alloc: Allocator, c: u8) !void {
|
pub fn print(self: *Terminal, alloc: Allocator, c: u21) !void {
|
||||||
const tracy = trace(@src());
|
const tracy = trace(@src());
|
||||||
defer tracy.end();
|
defer tracy.end();
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ fn genTableType() type {
|
|||||||
|
|
||||||
/// Function to generate the full state transition table for VT emulation.
|
/// Function to generate the full state transition table for VT emulation.
|
||||||
fn genTable() Table {
|
fn genTable() Table {
|
||||||
@setEvalBranchQuota(15000);
|
@setEvalBranchQuota(20000);
|
||||||
var result: Table = undefined;
|
var result: Table = undefined;
|
||||||
|
|
||||||
// Initialize everything so every state transition exists
|
// Initialize everything so every state transition exists
|
||||||
@ -45,6 +45,8 @@ fn genTable() Table {
|
|||||||
|
|
||||||
// ground
|
// ground
|
||||||
{
|
{
|
||||||
|
const source = State.ground;
|
||||||
|
|
||||||
// anywhere =>
|
// anywhere =>
|
||||||
single(&result, 0x18, .anywhere, .ground, .execute);
|
single(&result, 0x18, .anywhere, .ground, .execute);
|
||||||
single(&result, 0x1A, .anywhere, .ground, .execute);
|
single(&result, 0x1A, .anywhere, .ground, .execute);
|
||||||
@ -55,6 +57,11 @@ fn genTable() Table {
|
|||||||
range(&result, 0, 0x17, .ground, .ground, .execute);
|
range(&result, 0, 0x17, .ground, .ground, .execute);
|
||||||
range(&result, 0x1C, 0x1F, .ground, .ground, .execute);
|
range(&result, 0x1C, 0x1F, .ground, .ground, .execute);
|
||||||
range(&result, 0x20, 0x7F, .ground, .ground, .print);
|
range(&result, 0x20, 0x7F, .ground, .ground, .print);
|
||||||
|
|
||||||
|
// => utf8
|
||||||
|
range(&result, 0xC2, 0xDF, source, .utf8, .collect);
|
||||||
|
range(&result, 0xE0, 0xEF, source, .utf8, .collect);
|
||||||
|
range(&result, 0xF0, 0xF4, source, .utf8, .collect);
|
||||||
}
|
}
|
||||||
|
|
||||||
// escape_intermediate
|
// escape_intermediate
|
||||||
|
@ -45,7 +45,7 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
const tracy = trace(@src());
|
const tracy = trace(@src());
|
||||||
defer tracy.end();
|
defer tracy.end();
|
||||||
|
|
||||||
//log.debug("char: {}", .{c});
|
//log.debug("char: {x}", .{c});
|
||||||
const actions = self.parser.next(c);
|
const actions = self.parser.next(c);
|
||||||
for (actions) |action_opt| {
|
for (actions) |action_opt| {
|
||||||
// if (action_opt) |action| log.info("action: {}", .{action});
|
// if (action_opt) |action| log.info("action: {}", .{action});
|
||||||
@ -324,16 +324,16 @@ pub fn Stream(comptime Handler: type) type {
|
|||||||
|
|
||||||
test "stream: print" {
|
test "stream: print" {
|
||||||
const H = struct {
|
const H = struct {
|
||||||
c: ?u8 = 0,
|
c: ?u21 = 0,
|
||||||
|
|
||||||
pub fn print(self: *@This(), c: u8) !void {
|
pub fn print(self: *@This(), c: u21) !void {
|
||||||
self.c = c;
|
self.c = c;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
var s: Stream(H) = .{ .handler = .{} };
|
var s: Stream(H) = .{ .handler = .{} };
|
||||||
try s.next('x');
|
try s.next('x');
|
||||||
try testing.expectEqual(@as(u8, 'x'), s.handler.c.?);
|
try testing.expectEqual(@as(u21, 'x'), s.handler.c.?);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "stream: cursor right (CUF)" {
|
test "stream: cursor right (CUF)" {
|
||||||
|
Reference in New Issue
Block a user