diff --git a/src/Atlas.zig b/src/Atlas.zig index a910d16a9..f31d595cd 100644 --- a/src/Atlas.zig +++ b/src/Atlas.zig @@ -19,6 +19,7 @@ const std = @import("std"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; const testing = std.testing; +const fastmem = @import("fastmem.zig"); /// Data is the raw texture data. data: []u8, @@ -233,7 +234,7 @@ pub fn set(self: *Atlas, reg: Region, data: []const u8) void { while (i < reg.height) : (i += 1) { const tex_offset = (((reg.y + i) * self.size) + reg.x) * depth; const data_offset = i * reg.width * depth; - std.mem.copy( + fastmem.copy( u8, self.data[tex_offset..], data[data_offset .. data_offset + (reg.width * depth)], diff --git a/src/fastmem.zig b/src/fastmem.zig new file mode 100644 index 000000000..b15b0e7aa --- /dev/null +++ b/src/fastmem.zig @@ -0,0 +1,26 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const assert = std.debug.assert; + +/// Same as std.mem.copy but prefers libc memmove if it is available +/// because it is generally much faster. +pub inline fn move(comptime T: type, dest: []T, source: []const T) void { + if (builtin.link_libc) { + _ = memmove(dest.ptr, source.ptr, source.len * @sizeOf(T)); + } else { + std.mem.copy(T, dest, source); + } +} + +/// Same as std.mem.copy but prefers libc memcpy if it is available +/// because it is generally much faster. +pub inline fn copy(comptime T: type, dest: []T, source: []const T) void { + if (builtin.link_libc) { + _ = memcpy(dest.ptr, source.ptr, source.len * @sizeOf(T)); + } else { + std.mem.copy(T, dest, source); + } +} + +extern "c" fn memcpy(*anyopaque, *const anyopaque, usize) *anyopaque; +extern "c" fn memmove(*anyopaque, *const anyopaque, usize) *anyopaque; diff --git a/src/font/face/freetype.zig b/src/font/face/freetype.zig index 746650e94..f892c377b 100644 --- a/src/font/face/freetype.zig +++ b/src/font/face/freetype.zig @@ -18,6 +18,7 @@ const Glyph = font.Glyph; const Library = font.Library; const Presentation = font.Presentation; const convert = @import("freetype_convert.zig"); +const fastmem = @import("../../fastmem.zig"); const log = std.log.scoped(.font_face); @@ -248,7 +249,7 @@ pub const Face = struct { var src_ptr = bitmap.buffer; var i: usize = 0; while (i < bitmap.rows) : (i += 1) { - std.mem.copy(u8, dst_ptr, src_ptr[0 .. bitmap.width * depth]); + fastmem.copy(u8, dst_ptr, src_ptr[0 .. bitmap.width * depth]); dst_ptr = dst_ptr[tgt_w * depth ..]; src_ptr += @intCast(usize, bitmap.pitch); } diff --git a/src/terminal/Screen.zig b/src/terminal/Screen.zig index 5fb66b312..cedf832ca 100644 --- a/src/terminal/Screen.zig +++ b/src/terminal/Screen.zig @@ -60,6 +60,7 @@ const color = @import("color.zig"); const point = @import("point.zig"); const CircBuf = @import("circ_buf.zig").CircBuf; const Selection = @import("Selection.zig"); +const fastmem = @import("../fastmem.zig"); const log = std.log.scoped(.screen); @@ -400,7 +401,7 @@ pub const Row = struct { // If the source has no graphemes (likely) then this is fast. const end = @min(src.storage.len, self.storage.len); if (!src.storage[0].header.flags.grapheme) { - std.mem.copy(StorageCell, self.storage[1..], src.storage[1..end]); + fastmem.copy(StorageCell, self.storage[1..], src.storage[1..end]); return; } @@ -642,7 +643,7 @@ pub const GraphemeData = union(enum) { .three => |v| self.* = .{ .four = .{ v[0], v[1], v[2], cp } }, .four => |v| { const many = try alloc.alloc(u21, 5); - std.mem.copy(u21, many, &v); + fastmem.copy(u21, many, &v); many[4] = cp; self.* = .{ .many = many }; }, @@ -898,7 +899,7 @@ pub fn scrollRegionUp(self: *Screen, top: RowIndex, bottom: RowIndex, count: usi const src_offset = count * (self.cols + 1); const src = buf[src_offset..]; assert(@ptrToInt(dst.ptr) < @ptrToInt(src.ptr)); - std.mem.copy(StorageCell, dst, src); + fastmem.move(StorageCell, dst, src); } { @@ -1449,7 +1450,7 @@ pub fn resize(self: *Screen, rows: usize, cols: usize) !void { // The row doesn't fit, meaning we have to soft-wrap the // new row but probably at a diff boundary. - std.mem.copy( + fastmem.copy( StorageCell, new_row.storage[x + 1 ..], wrapped_cells[wrapped_i .. wrapped_i + copy_len], diff --git a/src/terminal/Tabstops.zig b/src/terminal/Tabstops.zig index afa61787c..c011439b4 100644 --- a/src/terminal/Tabstops.zig +++ b/src/terminal/Tabstops.zig @@ -13,6 +13,7 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const testing = std.testing; const assert = std.debug.assert; +const fastmem = @import("../fastmem.zig"); /// Unit is the type we use per tabstop unit (see file docs). const Unit = u8; @@ -129,7 +130,7 @@ pub fn resize(self: *Tabstops, alloc: Allocator, cols: usize) !void { // Note: we can probably try to realloc here but I'm not sure it matters. const new = try alloc.alloc(Unit, size); if (self.dynamic_stops.len > 0) { - std.mem.copy(Unit, new, self.dynamic_stops); + fastmem.copy(Unit, new, self.dynamic_stops); alloc.free(self.dynamic_stops); } diff --git a/src/terminal/circ_buf.zig b/src/terminal/circ_buf.zig index 3558b6c4a..9fc61141c 100644 --- a/src/terminal/circ_buf.zig +++ b/src/terminal/circ_buf.zig @@ -1,6 +1,8 @@ const std = @import("std"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; +const trace = @import("tracy").trace; +const fastmem = @import("../fastmem.zig"); /// Returns a circular buffer containing type T. pub fn CircBuf(comptime T: type, comptime default: T) type { @@ -82,13 +84,13 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { } if (!self.full and self.head >= self.tail) { - std.mem.copy(T, buf, self.storage[self.tail..self.head]); + fastmem.copy(T, buf, self.storage[self.tail..self.head]); return; } const middle = self.storage.len - self.tail; - std.mem.copy(T, buf, self.storage[self.tail..]); - std.mem.copy(T, buf[middle..], self.storage[0..self.head]); + fastmem.copy(T, buf, self.storage[self.tail..]); + fastmem.copy(T, buf[middle..], self.storage[0..self.head]); } /// Returns if the buffer is currently empty. To check if its @@ -130,6 +132,9 @@ pub fn CircBuf(comptime T: type, comptime default: T) type { /// the end of our buffer. This never "rotates" the buffer because /// the offset can only be within the size of the buffer. pub fn getPtrSlice(self: *Self, offset: usize, slice_len: usize) [2][]T { + const tracy = trace(@src()); + defer tracy.end(); + // Note: this assertion is very important, it hints the compiler // which generates ~10% faster code than without it. assert(offset + slice_len <= self.capacity()); diff --git a/src/termio/Exec.zig b/src/termio/Exec.zig index 53cebca82..ab4fc7cd0 100644 --- a/src/termio/Exec.zig +++ b/src/termio/Exec.zig @@ -14,6 +14,7 @@ const libuv = @import("libuv"); const renderer = @import("../renderer.zig"); const tracy = @import("tracy"); const trace = tracy.trace; +const fastmem = @import("../fastmem.zig"); const log = std.log.scoped(.io_exec); @@ -339,7 +340,7 @@ const EventData = struct { const req = try self.write_req_pool.get(); const buf = try self.write_buf_pool.get(); const end = @min(data.len, i + buf.len); - std.mem.copy(u8, buf, data[i..end]); + fastmem.copy(u8, buf, data[i..end]); try self.data_stream.write( .{ .req = req }, &[1][]u8{buf[0..(end - i)]},