mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-07-16 08:46:08 +03:00
crash/minidump: reader that streams data from a source
This commit is contained in:
@ -1,7 +1,7 @@
|
||||
const minidump = @import("minidump/minidump.zig");
|
||||
const reader = @import("minidump/reader.zig");
|
||||
|
||||
pub const stream = @import("minidump/stream.zig");
|
||||
pub const Minidump = minidump.Minidump;
|
||||
pub const Reader = reader.Reader;
|
||||
|
||||
test {
|
||||
@import("std").testing.refAllDecls(@This());
|
||||
|
@ -1,154 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const external = @import("external.zig");
|
||||
const stream = @import("stream.zig");
|
||||
const Stream = stream.Stream;
|
||||
|
||||
const log = std.log.scoped(.minidump);
|
||||
|
||||
/// Minidump file format.
|
||||
pub const Minidump = struct {
|
||||
/// The arena that all streams are allocated within when reading the
|
||||
/// minidump file. This is freed on deinit.
|
||||
arena: std.heap.ArenaAllocator,
|
||||
|
||||
/// The header of the minidump file. On serialization, the stream count
|
||||
/// and rva will be updated to match the streams. On deserialization,
|
||||
/// this is read directly from the file.
|
||||
header: external.Header,
|
||||
|
||||
/// The streams within the minidump file in the order they're serialized.
|
||||
streams: std.ArrayListUnmanaged(Stream),
|
||||
|
||||
/// Read the minidump file for the given source.
|
||||
///
|
||||
/// The source must have a reader() and seekableStream() method.
|
||||
/// For example, both File and std.io.FixedBufferStream implement these.
|
||||
///
|
||||
/// The reader will read the full minidump data into memory. This makes
|
||||
/// it easy to serialize the data back out. This is acceptable for our
|
||||
/// use case which doesn't rely too much on being memory efficient or
|
||||
/// high load. We also expect the minidump files to be relatively small
|
||||
/// (dozens of MB at most, hundreds of KB typically).
|
||||
///
|
||||
/// NOTE(mitchellh): If we ever want to make this more memory efficient,
|
||||
/// I would create a new type that is a "lazy reader" that stores the
|
||||
/// source type and reads the data as needed. Then this type should use
|
||||
/// that type.
|
||||
pub fn read(alloc_gpa: Allocator, source: anytype) !Minidump {
|
||||
var arena = std.heap.ArenaAllocator.init(alloc_gpa);
|
||||
errdefer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
||||
// Read the header which also determines the endianness of the file.
|
||||
const header, const endian = try readHeader(source);
|
||||
//log.warn("header={} endian={}", .{ header, endian });
|
||||
|
||||
var streams = try std.ArrayListUnmanaged(Stream).initCapacity(
|
||||
alloc,
|
||||
header.stream_count,
|
||||
);
|
||||
errdefer streams.deinit(alloc);
|
||||
|
||||
// Read the streams. All the streams are first described in a
|
||||
// "directory" structure which tells us the type of stream and
|
||||
// where it is located in the file. The directory structures are
|
||||
// stored in a contiguous block at the stream_directory_rva.
|
||||
//
|
||||
// Due to how we use this structure, we read directories one by one,
|
||||
// then read all the data for that directory, then move on to the
|
||||
// next directory. This is because we copy all the minidump data
|
||||
// into memory.
|
||||
const seeker = source.seekableStream();
|
||||
try seeker.seekTo(header.stream_directory_rva);
|
||||
for (0..header.stream_count) |_| {
|
||||
// Read the current directory
|
||||
const directory = try source.reader().readStructEndian(external.Directory, endian);
|
||||
log.warn("directory={}", .{directory});
|
||||
|
||||
// Seek to the location of the data. We have to store our current
|
||||
// position because we need to seek back to it after reading the
|
||||
// data in order to read the next directory.
|
||||
const pos = try seeker.getPos();
|
||||
|
||||
try seeker.seekTo(directory.location.rva);
|
||||
|
||||
// Read the data. The data length is defined by the directory.
|
||||
// If we can't read exactly that amount of data, we return an error.
|
||||
var data = std.ArrayList(u8).init(alloc);
|
||||
defer data.deinit();
|
||||
source.reader().readAllArrayList(
|
||||
&data,
|
||||
directory.location.data_size,
|
||||
) catch |err| switch (err) {
|
||||
// This means there was more data in the reader than what
|
||||
// we asked for this. This is okay and expected because
|
||||
// all streams except the last one will have this error.
|
||||
error.StreamTooLong => {},
|
||||
else => return err,
|
||||
};
|
||||
|
||||
// Basic check.
|
||||
if (data.items.len != directory.location.data_size) return error.DataSizeMismatch;
|
||||
|
||||
// Store our stream
|
||||
try streams.append(alloc, .{ .encoded = .{
|
||||
.type = directory.stream_type,
|
||||
.data = try data.toOwnedSlice(),
|
||||
} });
|
||||
|
||||
// Seek back to where we were after reading this directory
|
||||
// entry so we can read the next one.
|
||||
try seeker.seekTo(pos);
|
||||
}
|
||||
|
||||
return .{
|
||||
.arena = arena,
|
||||
.header = header,
|
||||
.streams = streams,
|
||||
};
|
||||
}
|
||||
|
||||
/// Reads the header for the minidump file and returns endianness of
|
||||
/// the file.
|
||||
fn readHeader(source: anytype) !struct { external.Header, std.builtin.Endian } {
|
||||
// Start by trying LE.
|
||||
var endian: std.builtin.Endian = .little;
|
||||
var header = try source.reader().readStructEndian(external.Header, endian);
|
||||
|
||||
// If the signature doesn't match, we assume its BE.
|
||||
if (header.signature != external.signature) {
|
||||
// Seek back to the start of the file so we can reread.
|
||||
try source.seekableStream().seekTo(0);
|
||||
|
||||
// Try BE, if the signature doesn't match, return an error.
|
||||
endian = .big;
|
||||
header = try source.reader().readStructEndian(external.Header, endian);
|
||||
if (header.signature != external.signature) return error.InvalidHeader;
|
||||
}
|
||||
|
||||
// "The low-order word is MINIDUMP_VERSION. The high-order word is an
|
||||
// internal value that is implementation specific."
|
||||
if (header.version.low != external.version) return error.InvalidVersion;
|
||||
|
||||
return .{ header, endian };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Minidump) void {
|
||||
self.arena.deinit();
|
||||
}
|
||||
|
||||
/// The arena allocator associated with this envelope
|
||||
pub fn allocator(self: *Minidump) Allocator {
|
||||
return self.arena.allocator();
|
||||
}
|
||||
};
|
||||
|
||||
test "Minidump read" {
|
||||
const testing = std.testing;
|
||||
const alloc = testing.allocator;
|
||||
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
|
||||
var md = try Minidump.read(alloc, &fbs);
|
||||
defer md.deinit();
|
||||
}
|
167
src/crash/minidump/reader.zig
Normal file
167
src/crash/minidump/reader.zig
Normal file
@ -0,0 +1,167 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const external = @import("external.zig");
|
||||
const stream = @import("stream.zig");
|
||||
const EncodedStream = stream.EncodedStream;
|
||||
|
||||
const log = std.log.scoped(.minidump_reader);
|
||||
|
||||
/// Possible minidump-specific errors that can occur when reading a minidump.
|
||||
/// This isn't the full error set since IO errors can also occur depending
|
||||
/// on the Source type.
|
||||
pub const ReadError = error{
|
||||
InvalidHeader,
|
||||
InvalidVersion,
|
||||
};
|
||||
|
||||
/// Reader creates a new minidump reader for the given source type. The
|
||||
/// source must have both a "reader()" and "seekableStream()" function.
|
||||
///
|
||||
/// Given the format of a minidump file, we must keep the source open and
|
||||
/// continually access it because the format of the minidump is full of
|
||||
/// pointers and offsets that we must follow depending on the stream types.
|
||||
/// Also, since we're not aware of all stream types (in fact its impossible
|
||||
/// to be aware since custom stream types are allowed), its possible any stream
|
||||
/// type can define their own pointers and offsets. So, the source must always
|
||||
/// be available so callers can decode the streams as needed.
|
||||
pub fn Reader(comptime Source: type) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
/// The source data.
|
||||
source: Source,
|
||||
|
||||
/// The endianness of the minidump file. This is detected by reading
|
||||
/// the byte order of the header.
|
||||
endian: std.builtin.Endian,
|
||||
|
||||
/// The number of streams within the minidump file. This is read from
|
||||
/// the header and stored here so we can quickly access them. Note
|
||||
/// the stream types require reading the source; this is an optimization
|
||||
/// to avoid any allocations on the reader and the caller can choose
|
||||
/// to store them if they want.
|
||||
stream_count: u32,
|
||||
stream_directory_rva: u32,
|
||||
|
||||
const SourceCallable = switch (@typeInfo(Source)) {
|
||||
.Pointer => |v| v.child,
|
||||
.Struct => Source,
|
||||
else => @compileError("Source type must be a pointer or struct"),
|
||||
};
|
||||
|
||||
const SourceReader = @typeInfo(@TypeOf(SourceCallable.reader)).Fn.return_type.?;
|
||||
const SourceSeeker = @typeInfo(@TypeOf(SourceCallable.seekableStream)).Fn.return_type.?;
|
||||
|
||||
/// The reader type for stream reading. This is a LimitedReader so
|
||||
/// you must still call reader() on the result to get the actual
|
||||
/// reader to read the data.
|
||||
pub const StreamReader = std.io.LimitedReader(SourceReader);
|
||||
|
||||
/// Initialize a reader. The source must remain available for the entire
|
||||
/// lifetime of the reader. The reader does not take ownership of the
|
||||
/// source so if it has resources that need to be cleaned up, the caller
|
||||
/// must do so once the reader is no longer needed.
|
||||
pub fn init(source: Source) !Self {
|
||||
const header, const endian = try readHeader(Source, source);
|
||||
return .{
|
||||
.source = source,
|
||||
.endian = endian,
|
||||
.stream_count = header.stream_count,
|
||||
.stream_directory_rva = header.stream_directory_rva,
|
||||
};
|
||||
}
|
||||
|
||||
/// Return a StreamReader for the given directory type. This streams
|
||||
/// from the underlying source so the returned reader is only valid
|
||||
/// as long as the source is unmodified (i.e. the source is not
|
||||
/// closed, the source is not seeked, etc.).
|
||||
pub fn streamReader(
|
||||
self: *const Self,
|
||||
dir: external.Directory,
|
||||
) SourceSeeker.SeekError!StreamReader {
|
||||
try self.source.seekableStream().seekTo(dir.location.rva);
|
||||
return .{
|
||||
.inner_reader = self.source.reader(),
|
||||
.bytes_left = dir.location.data_size,
|
||||
};
|
||||
}
|
||||
|
||||
/// Get the directory entry with the given index.
|
||||
///
|
||||
/// Asserts the index is valid (idx < stream_count).
|
||||
pub fn directory(self: *const Self, idx: usize) !external.Directory {
|
||||
assert(idx < self.stream_count);
|
||||
|
||||
// Seek to the directory.
|
||||
const offset: u32 = @intCast(@sizeOf(external.Directory) * idx);
|
||||
const rva: u32 = self.stream_directory_rva + offset;
|
||||
try self.source.seekableStream().seekTo(rva);
|
||||
|
||||
// Read the directory.
|
||||
return try self.source.reader().readStructEndian(
|
||||
external.Directory,
|
||||
self.endian,
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Reads the header for the minidump file and returns endianness of
|
||||
/// the file.
|
||||
fn readHeader(comptime T: type, source: T) !struct {
|
||||
external.Header,
|
||||
std.builtin.Endian,
|
||||
} {
|
||||
// Start by trying LE.
|
||||
var endian: std.builtin.Endian = .little;
|
||||
var header = try source.reader().readStructEndian(external.Header, endian);
|
||||
|
||||
// If the signature doesn't match, we assume its BE.
|
||||
if (header.signature != external.signature) {
|
||||
// Seek back to the start of the file so we can reread.
|
||||
try source.seekableStream().seekTo(0);
|
||||
|
||||
// Try BE, if the signature doesn't match, return an error.
|
||||
endian = .big;
|
||||
header = try source.reader().readStructEndian(external.Header, endian);
|
||||
if (header.signature != external.signature) return ReadError.InvalidHeader;
|
||||
}
|
||||
|
||||
// "The low-order word is MINIDUMP_VERSION. The high-order word is an
|
||||
// internal value that is implementation specific."
|
||||
if (header.version.low != external.version) return ReadError.InvalidVersion;
|
||||
|
||||
return .{ header, endian };
|
||||
}
|
||||
|
||||
// Uncomment to dump some debug information for a minidump file.
|
||||
test "Minidump debug" {
|
||||
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
|
||||
const r = try Reader(*@TypeOf(fbs)).init(&fbs);
|
||||
for (0..r.stream_count) |i| {
|
||||
const dir = try r.directory(i);
|
||||
log.warn("directory i={} dir={}", .{ i, dir });
|
||||
}
|
||||
}
|
||||
|
||||
test "Minidump read" {
|
||||
const testing = std.testing;
|
||||
const alloc = testing.allocator;
|
||||
|
||||
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
|
||||
const r = try Reader(*@TypeOf(fbs)).init(&fbs);
|
||||
try testing.expectEqual(std.builtin.Endian.little, r.endian);
|
||||
try testing.expectEqual(7, r.stream_count);
|
||||
{
|
||||
const dir = try r.directory(0);
|
||||
try testing.expectEqual(3, dir.stream_type);
|
||||
try testing.expectEqual(584, dir.location.data_size);
|
||||
|
||||
var bytes = std.ArrayList(u8).init(alloc);
|
||||
defer bytes.deinit();
|
||||
var sr = try r.streamReader(dir);
|
||||
try sr.reader().readAllArrayList(&bytes, std.math.maxInt(usize));
|
||||
try testing.expectEqual(584, bytes.items.len);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user