From 0c2a87e5fb8b2d2e0e581b7f8b80a7ee806cfb41 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Mon, 5 Feb 2024 21:20:20 -0800 Subject: [PATCH] terminal: small stylistic tweaks --- src/terminal/UTF8Decoder.zig | 26 +++++++++++++------------- src/terminal/stream.zig | 1 + 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/terminal/UTF8Decoder.zig b/src/terminal/UTF8Decoder.zig index c020549c7..6bb0d9815 100644 --- a/src/terminal/UTF8Decoder.zig +++ b/src/terminal/UTF8Decoder.zig @@ -13,7 +13,7 @@ const testing = std.testing; const log = std.log.scoped(.utf8decoder); -//zig fmt: off +// zig fmt: off const char_classes = [_]u4{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -32,7 +32,7 @@ const transitions = [_]u8 { 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,12,12,12,12,12, }; -//zig fmt: on +// zig fmt: on // DFA states const ACCEPT_STATE = 0; @@ -43,10 +43,6 @@ accumulator: u21 = 0, // The internal state of the DFA. state: u8 = ACCEPT_STATE, -pub fn init() UTF8Decoder { - return .{}; -} - /// Takes the next byte in the utf-8 sequence and emits a tuple of /// - The codepoint that was generated, if there is one. /// - A boolean that indicates whether the provided byte was consumed. @@ -72,7 +68,8 @@ pub inline fn next(self: *UTF8Decoder, byte: u8) struct { ?u21, bool } { self.state = transitions[self.state + char_class]; if (self.state == ACCEPT_STATE) { - defer { self.accumulator = 0; } + defer self.accumulator = 0; + // Emit the fully decoded codepoint. return .{ self.accumulator, true }; } else if (self.state == REJECT_STATE) { @@ -88,8 +85,8 @@ pub inline fn next(self: *UTF8Decoder, byte: u8) struct { ?u21, bool } { } test "ASCII" { - var d = init(); - var out = std.mem.zeroes([13]u8); + var d: UTF8Decoder = .{}; + var out: [13]u8 = undefined; for ("Hello, World!", 0..) |byte, i| { const res = d.next(byte); try testing.expect(res[1]); @@ -97,12 +94,13 @@ test "ASCII" { out[i] = @intCast(codepoint); } } + try testing.expect(std.mem.eql(u8, &out, "Hello, World!")); } test "Well formed utf-8" { - var d = init(); - var out = std.mem.zeroes([4]u21); + var d: UTF8Decoder = .{}; + var out: [4]u21 = undefined; var i: usize = 0; // 4 bytes, 3 bytes, 2 bytes, 1 byte for ("๐Ÿ˜„โœครA") |byte| { @@ -119,12 +117,13 @@ test "Well formed utf-8" { } } } + try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0x1F604, 0x2724, 0xC1, 0x41 })); } test "Partially invalid utf-8" { - var d = init(); - var out = std.mem.zeroes([5]u21); + var d: UTF8Decoder = .{}; + var out: [5]u21 = undefined; var i: usize = 0; // Illegally terminated sequence, valid sequence, illegal surrogate pair. for ("\xF0\x9F๐Ÿ˜„\xED\xA0\x80") |byte| { @@ -138,5 +137,6 @@ test "Partially invalid utf-8" { } } } + try testing.expect(std.mem.eql(u21, &out, &[_]u21{ 0xFFFD, 0x1F604, 0xFFFD, 0xFFFD, 0xFFFD })); } diff --git a/src/terminal/stream.zig b/src/terminal/stream.zig index ad884b201..fe4c5c53a 100644 --- a/src/terminal/stream.zig +++ b/src/terminal/stream.zig @@ -67,6 +67,7 @@ pub fn Stream(comptime Handler: type) type { } return; } + const actions = self.parser.next(c); for (actions) |action_opt| { const action = action_opt orelse continue;