Merge pull request #2287 from ghostty-org/minidump

Minidump Parser
This commit is contained in:
Mitchell Hashimoto
2024-09-22 14:12:12 -07:00
committed by GitHub
7 changed files with 456 additions and 0 deletions

View File

@ -5,6 +5,7 @@
const dir = @import("dir.zig");
const sentry_envelope = @import("sentry_envelope.zig");
pub const minidump = @import("minidump.zig");
pub const sentry = @import("sentry.zig");
pub const Envelope = sentry_envelope.Envelope;
pub const defaultDir = dir.defaultDir;

7
src/crash/minidump.zig Normal file
View File

@ -0,0 +1,7 @@
pub const reader = @import("minidump/reader.zig");
pub const stream = @import("minidump/stream.zig");
pub const Reader = reader.Reader;
test {
@import("std").testing.refAllDecls(@This());
}

View File

@ -0,0 +1,59 @@
//! This file contains the external structs and constants for the minidump
//! format. Most are from the Microsoft documentation on the minidump format:
//! https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/
//!
//! Wherever possible, we also compare our definitions to other projects
//! such as rust-minidump, libmdmp, breakpad, etc. to ensure we're doing
//! the right thing.
/// "MDMP" in little-endian.
pub const signature = 0x504D444D;
/// The version of the minidump format.
pub const version = 0xA793;
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_header
pub const Header = extern struct {
signature: u32,
version: packed struct(u32) { low: u16, high: u16 },
stream_count: u32,
stream_directory_rva: u32,
checksum: u32,
time_date_stamp: u32,
flags: u64,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_directory
pub const Directory = extern struct {
stream_type: u32,
location: LocationDescriptor,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_location_descriptor
pub const LocationDescriptor = extern struct {
data_size: u32,
rva: u32,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_memory_descriptor
pub const MemoryDescriptor = extern struct {
start_of_memory_range: u64,
memory: LocationDescriptor,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread_list
pub const ThreadList = extern struct {
number_of_threads: u32,
threads: [1]Thread,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread
pub const Thread = extern struct {
thread_id: u32,
suspend_count: u32,
priority_class: u32,
priority: u32,
teb: u64,
stack: MemoryDescriptor,
thread_context: LocationDescriptor,
};

View File

@ -0,0 +1,242 @@
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const external = @import("external.zig");
const stream = @import("stream.zig");
const EncodedStream = stream.EncodedStream;
const log = std.log.scoped(.minidump_reader);
/// Possible minidump-specific errors that can occur when reading a minidump.
/// This isn't the full error set since IO errors can also occur depending
/// on the Source type.
pub const ReadError = error{
InvalidHeader,
InvalidVersion,
StreamSizeMismatch,
};
/// Reader creates a new minidump reader for the given source type. The
/// source must have both a "reader()" and "seekableStream()" function.
///
/// Given the format of a minidump file, we must keep the source open and
/// continually access it because the format of the minidump is full of
/// pointers and offsets that we must follow depending on the stream types.
/// Also, since we're not aware of all stream types (in fact its impossible
/// to be aware since custom stream types are allowed), its possible any stream
/// type can define their own pointers and offsets. So, the source must always
/// be available so callers can decode the streams as needed.
pub fn Reader(comptime S: type) type {
return struct {
const Self = @This();
/// The source data.
source: Source,
/// The endianness of the minidump file. This is detected by reading
/// the byte order of the header.
endian: std.builtin.Endian,
/// The number of streams within the minidump file. This is read from
/// the header and stored here so we can quickly access them. Note
/// the stream types require reading the source; this is an optimization
/// to avoid any allocations on the reader and the caller can choose
/// to store them if they want.
stream_count: u32,
stream_directory_rva: u32,
const SourceCallable = switch (@typeInfo(Source)) {
.Pointer => |v| v.child,
.Struct => Source,
else => @compileError("Source type must be a pointer or struct"),
};
const SourceReader = @typeInfo(@TypeOf(SourceCallable.reader)).Fn.return_type.?;
const SourceSeeker = @typeInfo(@TypeOf(SourceCallable.seekableStream)).Fn.return_type.?;
/// A limited reader for reading data from the source.
pub const LimitedReader = std.io.LimitedReader(SourceReader);
/// The source type for the reader.
pub const Source = S;
/// The stream types for reading
pub const ThreadList = stream.thread_list.ThreadListReader(Self);
/// The reader type for stream reading. This has some other methods so
/// you must still call reader() on the result to get the actual
/// reader to read the data.
pub const StreamReader = struct {
source: Source,
endian: std.builtin.Endian,
directory: external.Directory,
/// Should not be accessed directly. This is setup whenever
/// reader() is called.
limit_reader: LimitedReader = undefined,
pub const Reader = LimitedReader.Reader;
/// Returns a Reader implementation that reads the bytes of the
/// stream.
///
/// The reader is dependent on the state of Source so any
/// state-changing operations on Source will invalidate the
/// reader. For example, making another reader, reading another
/// stream directory, closing the source, etc.
pub fn reader(self: *StreamReader) LimitedReader.Reader {
try self.source.seekableStream().seekTo(self.directory.location.rva);
self.limit_reader = .{
.inner_reader = self.source.reader(),
.bytes_left = self.directory.location.data_size,
};
return self.limit_reader.reader();
}
/// Seeks the source to the location of the directory.
pub fn seekToPayload(self: *StreamReader) !void {
try self.source.seekableStream().seekTo(self.directory.location.rva);
}
};
/// Iterator type to read over the streams in the minidump file.
pub const StreamIterator = struct {
reader: *const Self,
i: u32 = 0,
pub fn next(self: *StreamIterator) !?StreamReader {
if (self.i >= self.reader.stream_count) return null;
const dir = try self.reader.directory(self.i);
self.i += 1;
return try self.reader.streamReader(dir);
}
};
/// Initialize a reader. The source must remain available for the entire
/// lifetime of the reader. The reader does not take ownership of the
/// source so if it has resources that need to be cleaned up, the caller
/// must do so once the reader is no longer needed.
pub fn init(source: Source) !Self {
const header, const endian = try readHeader(Source, source);
return .{
.source = source,
.endian = endian,
.stream_count = header.stream_count,
.stream_directory_rva = header.stream_directory_rva,
};
}
/// Return an iterator to read over the streams in the minidump file.
/// This is very similar to using a simple for loop to stream_count
/// and calling directory() on each index, but is more idiomatic
/// Zig.
pub fn streamIterator(self: *const Self) StreamIterator {
return .{ .reader = self };
}
/// Return a StreamReader for the given directory type. This streams
/// from the underlying source so the returned reader is only valid
/// as long as the source is unmodified (i.e. the source is not
/// closed, the source seek position is not moved, etc.).
pub fn streamReader(
self: *const Self,
dir: external.Directory,
) SourceSeeker.SeekError!StreamReader {
return .{
.source = self.source,
.endian = self.endian,
.directory = dir,
};
}
/// Get the directory entry with the given index.
///
/// Asserts the index is valid (idx < stream_count).
pub fn directory(self: *const Self, idx: usize) !external.Directory {
assert(idx < self.stream_count);
// Seek to the directory.
const offset: u32 = @intCast(@sizeOf(external.Directory) * idx);
const rva: u32 = self.stream_directory_rva + offset;
try self.source.seekableStream().seekTo(rva);
// Read the directory.
return try self.source.reader().readStructEndian(
external.Directory,
self.endian,
);
}
/// Return a reader for the given location descriptor. This is only
/// valid until the reader source is modified in some way.
pub fn locationReader(
self: *const Self,
loc: external.LocationDescriptor,
) !LimitedReader {
try self.source.seekableStream().seekTo(loc.rva);
return .{
.inner_reader = self.source.reader(),
.bytes_left = loc.data_size,
};
}
};
}
/// Reads the header for the minidump file and returns endianness of
/// the file.
fn readHeader(comptime T: type, source: T) !struct {
external.Header,
std.builtin.Endian,
} {
// Start by trying LE.
var endian: std.builtin.Endian = .little;
var header = try source.reader().readStructEndian(external.Header, endian);
// If the signature doesn't match, we assume its BE.
if (header.signature != external.signature) {
// Seek back to the start of the file so we can reread.
try source.seekableStream().seekTo(0);
// Try BE, if the signature doesn't match, return an error.
endian = .big;
header = try source.reader().readStructEndian(external.Header, endian);
if (header.signature != external.signature) return ReadError.InvalidHeader;
}
// "The low-order word is MINIDUMP_VERSION. The high-order word is an
// internal value that is implementation specific."
if (header.version.low != external.version) return ReadError.InvalidVersion;
return .{ header, endian };
}
// Uncomment to dump some debug information for a minidump file.
test "minidump debug" {
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
const r = try Reader(*@TypeOf(fbs)).init(&fbs);
var it = r.streamIterator();
while (try it.next()) |s| {
log.warn("directory i={} dir={}", .{ it.i - 1, s.directory });
}
}
test "minidump read" {
const testing = std.testing;
const alloc = testing.allocator;
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
const r = try Reader(*@TypeOf(fbs)).init(&fbs);
try testing.expectEqual(std.builtin.Endian.little, r.endian);
try testing.expectEqual(7, r.stream_count);
{
const dir = try r.directory(0);
try testing.expectEqual(3, dir.stream_type);
try testing.expectEqual(584, dir.location.data_size);
var bytes = std.ArrayList(u8).init(alloc);
defer bytes.deinit();
var sr = try r.streamReader(dir);
try sr.reader().readAllArrayList(&bytes, std.math.maxInt(usize));
try testing.expectEqual(584, bytes.items.len);
}
}

View File

@ -0,0 +1,30 @@
const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const log = std.log.scoped(.minidump_stream);
/// The known stream types.
pub const thread_list = @import("stream_threadlist.zig");
/// A stream within the minidump file. A stream can be either in an encoded
/// form or decoded form. The encoded form are raw bytes and aren't validated
/// until they're decoded. The decoded form is a structured form of the stream.
///
/// The decoded form is more ergonomic to work with but the encoded form is
/// more efficient to read/write.
pub const Stream = union(enum) {
encoded: EncodedStream,
};
/// An encoded stream value. It is "encoded" in the sense that it is raw bytes
/// with a type associated. The raw bytes are not validated to be correct for
/// the type.
pub const EncodedStream = struct {
type: u32,
data: []const u8,
};
test {
@import("std").testing.refAllDecls(@This());
}

View File

@ -0,0 +1,117 @@
const std = @import("std");
const assert = std.debug.assert;
const external = @import("external.zig");
const readerpkg = @import("reader.zig");
const Reader = readerpkg.Reader;
const ReadError = readerpkg.ReadError;
const log = std.log.scoped(.minidump_stream);
/// This is the list of threads from the process.
///
/// This is the Reader implementation. You usually do not use this directly.
/// Instead, use Reader(T).ThreadList which will get you the same thing.
///
/// ThreadList is stream type 0x3.
/// StreamReader is the Reader(T).StreamReader type.
pub fn ThreadListReader(comptime R: type) type {
return struct {
const Self = @This();
/// The number of threads in the list.
count: u32,
/// The rva to the first thread in the list.
rva: u32,
/// Source data and endianness so we can read.
source: R.Source,
endian: std.builtin.Endian,
pub fn init(r: *R.StreamReader) !Self {
assert(r.directory.stream_type == 0x3);
try r.seekToPayload();
const reader = r.source.reader();
// Our count is always a u32 in the header.
const count = try reader.readInt(u32, r.endian);
// Determine if we have padding in our header. It is possible
// for there to be padding if the list header was written by
// a 32-bit process but is being read on a 64-bit process.
const padding = padding: {
const maybe_size = @sizeOf(u32) + (@sizeOf(external.Thread) * count);
switch (std.math.order(maybe_size, r.directory.location.data_size)) {
// It should never be larger than what the directory says.
.gt => return ReadError.StreamSizeMismatch,
// If the sizes match exactly we're good.
.eq => break :padding 0,
.lt => {
const padding = r.directory.location.data_size - maybe_size;
if (padding != 4) return ReadError.StreamSizeMismatch;
break :padding padding;
},
}
};
// Rva is the location of the first thread in the list.
const rva = r.directory.location.rva + @as(u32, @sizeOf(u32)) + padding;
return .{
.count = count,
.rva = rva,
.source = r.source,
.endian = r.endian,
};
}
/// Get the thread entry for the given index.
///
/// Index is asserted to be less than count.
pub fn thread(self: *const Self, i: usize) !external.Thread {
assert(i < self.count);
// Seek to the thread
const offset: u32 = @intCast(@sizeOf(external.Thread) * i);
const rva: u32 = self.rva + offset;
try self.source.seekableStream().seekTo(rva);
// Read the thread
return try self.source.reader().readStructEndian(
external.Thread,
self.endian,
);
}
};
}
test "minidump: threadlist" {
const testing = std.testing;
const alloc = testing.allocator;
var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp"));
const R = Reader(*@TypeOf(fbs));
const r = try R.init(&fbs);
// Get our thread list stream
const dir = try r.directory(0);
try testing.expectEqual(3, dir.stream_type);
var sr = try r.streamReader(dir);
// Get our rich structure
const v = try R.ThreadList.init(&sr);
log.warn("threadlist count={} rva={}", .{ v.count, v.rva });
try testing.expectEqual(12, v.count);
for (0..v.count) |i| {
const t = try v.thread(i);
log.warn("thread i={} thread={}", .{ i, t });
// Read our stack memory
var stack_reader = try r.locationReader(t.stack.memory);
const bytes = try stack_reader.reader().readAllAlloc(alloc, t.stack.memory.data_size);
defer alloc.free(bytes);
}
}

BIN
src/crash/testdata/macos.dmp vendored Normal file

Binary file not shown.