diff --git a/src/crash/main.zig b/src/crash/main.zig index 1ac971851..5f9aa96c5 100644 --- a/src/crash/main.zig +++ b/src/crash/main.zig @@ -5,6 +5,7 @@ const dir = @import("dir.zig"); const sentry_envelope = @import("sentry_envelope.zig"); +pub const minidump = @import("minidump.zig"); pub const sentry = @import("sentry.zig"); pub const Envelope = sentry_envelope.Envelope; pub const defaultDir = dir.defaultDir; diff --git a/src/crash/minidump.zig b/src/crash/minidump.zig new file mode 100644 index 000000000..0abd67eae --- /dev/null +++ b/src/crash/minidump.zig @@ -0,0 +1,7 @@ +pub const reader = @import("minidump/reader.zig"); +pub const stream = @import("minidump/stream.zig"); +pub const Reader = reader.Reader; + +test { + @import("std").testing.refAllDecls(@This()); +} diff --git a/src/crash/minidump/external.zig b/src/crash/minidump/external.zig new file mode 100644 index 000000000..451810883 --- /dev/null +++ b/src/crash/minidump/external.zig @@ -0,0 +1,59 @@ +//! This file contains the external structs and constants for the minidump +//! format. Most are from the Microsoft documentation on the minidump format: +//! https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ +//! +//! Wherever possible, we also compare our definitions to other projects +//! such as rust-minidump, libmdmp, breakpad, etc. to ensure we're doing +//! the right thing. + +/// "MDMP" in little-endian. +pub const signature = 0x504D444D; + +/// The version of the minidump format. +pub const version = 0xA793; + +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_header +pub const Header = extern struct { + signature: u32, + version: packed struct(u32) { low: u16, high: u16 }, + stream_count: u32, + stream_directory_rva: u32, + checksum: u32, + time_date_stamp: u32, + flags: u64, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_directory +pub const Directory = extern struct { + stream_type: u32, + location: LocationDescriptor, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_location_descriptor +pub const LocationDescriptor = extern struct { + data_size: u32, + rva: u32, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_memory_descriptor +pub const MemoryDescriptor = extern struct { + start_of_memory_range: u64, + memory: LocationDescriptor, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread_list +pub const ThreadList = extern struct { + number_of_threads: u32, + threads: [1]Thread, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_thread +pub const Thread = extern struct { + thread_id: u32, + suspend_count: u32, + priority_class: u32, + priority: u32, + teb: u64, + stack: MemoryDescriptor, + thread_context: LocationDescriptor, +}; diff --git a/src/crash/minidump/reader.zig b/src/crash/minidump/reader.zig new file mode 100644 index 000000000..f792d6670 --- /dev/null +++ b/src/crash/minidump/reader.zig @@ -0,0 +1,242 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const external = @import("external.zig"); +const stream = @import("stream.zig"); +const EncodedStream = stream.EncodedStream; + +const log = std.log.scoped(.minidump_reader); + +/// Possible minidump-specific errors that can occur when reading a minidump. +/// This isn't the full error set since IO errors can also occur depending +/// on the Source type. +pub const ReadError = error{ + InvalidHeader, + InvalidVersion, + StreamSizeMismatch, +}; + +/// Reader creates a new minidump reader for the given source type. The +/// source must have both a "reader()" and "seekableStream()" function. +/// +/// Given the format of a minidump file, we must keep the source open and +/// continually access it because the format of the minidump is full of +/// pointers and offsets that we must follow depending on the stream types. +/// Also, since we're not aware of all stream types (in fact its impossible +/// to be aware since custom stream types are allowed), its possible any stream +/// type can define their own pointers and offsets. So, the source must always +/// be available so callers can decode the streams as needed. +pub fn Reader(comptime S: type) type { + return struct { + const Self = @This(); + + /// The source data. + source: Source, + + /// The endianness of the minidump file. This is detected by reading + /// the byte order of the header. + endian: std.builtin.Endian, + + /// The number of streams within the minidump file. This is read from + /// the header and stored here so we can quickly access them. Note + /// the stream types require reading the source; this is an optimization + /// to avoid any allocations on the reader and the caller can choose + /// to store them if they want. + stream_count: u32, + stream_directory_rva: u32, + + const SourceCallable = switch (@typeInfo(Source)) { + .Pointer => |v| v.child, + .Struct => Source, + else => @compileError("Source type must be a pointer or struct"), + }; + + const SourceReader = @typeInfo(@TypeOf(SourceCallable.reader)).Fn.return_type.?; + const SourceSeeker = @typeInfo(@TypeOf(SourceCallable.seekableStream)).Fn.return_type.?; + + /// A limited reader for reading data from the source. + pub const LimitedReader = std.io.LimitedReader(SourceReader); + + /// The source type for the reader. + pub const Source = S; + + /// The stream types for reading + pub const ThreadList = stream.thread_list.ThreadListReader(Self); + + /// The reader type for stream reading. This has some other methods so + /// you must still call reader() on the result to get the actual + /// reader to read the data. + pub const StreamReader = struct { + source: Source, + endian: std.builtin.Endian, + directory: external.Directory, + + /// Should not be accessed directly. This is setup whenever + /// reader() is called. + limit_reader: LimitedReader = undefined, + + pub const Reader = LimitedReader.Reader; + + /// Returns a Reader implementation that reads the bytes of the + /// stream. + /// + /// The reader is dependent on the state of Source so any + /// state-changing operations on Source will invalidate the + /// reader. For example, making another reader, reading another + /// stream directory, closing the source, etc. + pub fn reader(self: *StreamReader) LimitedReader.Reader { + try self.source.seekableStream().seekTo(self.directory.location.rva); + self.limit_reader = .{ + .inner_reader = self.source.reader(), + .bytes_left = self.directory.location.data_size, + }; + return self.limit_reader.reader(); + } + + /// Seeks the source to the location of the directory. + pub fn seekToPayload(self: *StreamReader) !void { + try self.source.seekableStream().seekTo(self.directory.location.rva); + } + }; + + /// Iterator type to read over the streams in the minidump file. + pub const StreamIterator = struct { + reader: *const Self, + i: u32 = 0, + + pub fn next(self: *StreamIterator) !?StreamReader { + if (self.i >= self.reader.stream_count) return null; + const dir = try self.reader.directory(self.i); + self.i += 1; + return try self.reader.streamReader(dir); + } + }; + + /// Initialize a reader. The source must remain available for the entire + /// lifetime of the reader. The reader does not take ownership of the + /// source so if it has resources that need to be cleaned up, the caller + /// must do so once the reader is no longer needed. + pub fn init(source: Source) !Self { + const header, const endian = try readHeader(Source, source); + return .{ + .source = source, + .endian = endian, + .stream_count = header.stream_count, + .stream_directory_rva = header.stream_directory_rva, + }; + } + + /// Return an iterator to read over the streams in the minidump file. + /// This is very similar to using a simple for loop to stream_count + /// and calling directory() on each index, but is more idiomatic + /// Zig. + pub fn streamIterator(self: *const Self) StreamIterator { + return .{ .reader = self }; + } + + /// Return a StreamReader for the given directory type. This streams + /// from the underlying source so the returned reader is only valid + /// as long as the source is unmodified (i.e. the source is not + /// closed, the source seek position is not moved, etc.). + pub fn streamReader( + self: *const Self, + dir: external.Directory, + ) SourceSeeker.SeekError!StreamReader { + return .{ + .source = self.source, + .endian = self.endian, + .directory = dir, + }; + } + + /// Get the directory entry with the given index. + /// + /// Asserts the index is valid (idx < stream_count). + pub fn directory(self: *const Self, idx: usize) !external.Directory { + assert(idx < self.stream_count); + + // Seek to the directory. + const offset: u32 = @intCast(@sizeOf(external.Directory) * idx); + const rva: u32 = self.stream_directory_rva + offset; + try self.source.seekableStream().seekTo(rva); + + // Read the directory. + return try self.source.reader().readStructEndian( + external.Directory, + self.endian, + ); + } + + /// Return a reader for the given location descriptor. This is only + /// valid until the reader source is modified in some way. + pub fn locationReader( + self: *const Self, + loc: external.LocationDescriptor, + ) !LimitedReader { + try self.source.seekableStream().seekTo(loc.rva); + return .{ + .inner_reader = self.source.reader(), + .bytes_left = loc.data_size, + }; + } + }; +} + +/// Reads the header for the minidump file and returns endianness of +/// the file. +fn readHeader(comptime T: type, source: T) !struct { + external.Header, + std.builtin.Endian, +} { + // Start by trying LE. + var endian: std.builtin.Endian = .little; + var header = try source.reader().readStructEndian(external.Header, endian); + + // If the signature doesn't match, we assume its BE. + if (header.signature != external.signature) { + // Seek back to the start of the file so we can reread. + try source.seekableStream().seekTo(0); + + // Try BE, if the signature doesn't match, return an error. + endian = .big; + header = try source.reader().readStructEndian(external.Header, endian); + if (header.signature != external.signature) return ReadError.InvalidHeader; + } + + // "The low-order word is MINIDUMP_VERSION. The high-order word is an + // internal value that is implementation specific." + if (header.version.low != external.version) return ReadError.InvalidVersion; + + return .{ header, endian }; +} + +// Uncomment to dump some debug information for a minidump file. +test "minidump debug" { + var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp")); + const r = try Reader(*@TypeOf(fbs)).init(&fbs); + var it = r.streamIterator(); + while (try it.next()) |s| { + log.warn("directory i={} dir={}", .{ it.i - 1, s.directory }); + } +} + +test "minidump read" { + const testing = std.testing; + const alloc = testing.allocator; + + var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp")); + const r = try Reader(*@TypeOf(fbs)).init(&fbs); + try testing.expectEqual(std.builtin.Endian.little, r.endian); + try testing.expectEqual(7, r.stream_count); + { + const dir = try r.directory(0); + try testing.expectEqual(3, dir.stream_type); + try testing.expectEqual(584, dir.location.data_size); + + var bytes = std.ArrayList(u8).init(alloc); + defer bytes.deinit(); + var sr = try r.streamReader(dir); + try sr.reader().readAllArrayList(&bytes, std.math.maxInt(usize)); + try testing.expectEqual(584, bytes.items.len); + } +} diff --git a/src/crash/minidump/stream.zig b/src/crash/minidump/stream.zig new file mode 100644 index 000000000..00ec6b042 --- /dev/null +++ b/src/crash/minidump/stream.zig @@ -0,0 +1,30 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; + +const log = std.log.scoped(.minidump_stream); + +/// The known stream types. +pub const thread_list = @import("stream_threadlist.zig"); + +/// A stream within the minidump file. A stream can be either in an encoded +/// form or decoded form. The encoded form are raw bytes and aren't validated +/// until they're decoded. The decoded form is a structured form of the stream. +/// +/// The decoded form is more ergonomic to work with but the encoded form is +/// more efficient to read/write. +pub const Stream = union(enum) { + encoded: EncodedStream, +}; + +/// An encoded stream value. It is "encoded" in the sense that it is raw bytes +/// with a type associated. The raw bytes are not validated to be correct for +/// the type. +pub const EncodedStream = struct { + type: u32, + data: []const u8, +}; + +test { + @import("std").testing.refAllDecls(@This()); +} diff --git a/src/crash/minidump/stream_threadlist.zig b/src/crash/minidump/stream_threadlist.zig new file mode 100644 index 000000000..51f3f9d4c --- /dev/null +++ b/src/crash/minidump/stream_threadlist.zig @@ -0,0 +1,117 @@ +const std = @import("std"); +const assert = std.debug.assert; +const external = @import("external.zig"); +const readerpkg = @import("reader.zig"); +const Reader = readerpkg.Reader; +const ReadError = readerpkg.ReadError; + +const log = std.log.scoped(.minidump_stream); + +/// This is the list of threads from the process. +/// +/// This is the Reader implementation. You usually do not use this directly. +/// Instead, use Reader(T).ThreadList which will get you the same thing. +/// +/// ThreadList is stream type 0x3. +/// StreamReader is the Reader(T).StreamReader type. +pub fn ThreadListReader(comptime R: type) type { + return struct { + const Self = @This(); + + /// The number of threads in the list. + count: u32, + + /// The rva to the first thread in the list. + rva: u32, + + /// Source data and endianness so we can read. + source: R.Source, + endian: std.builtin.Endian, + + pub fn init(r: *R.StreamReader) !Self { + assert(r.directory.stream_type == 0x3); + try r.seekToPayload(); + const reader = r.source.reader(); + + // Our count is always a u32 in the header. + const count = try reader.readInt(u32, r.endian); + + // Determine if we have padding in our header. It is possible + // for there to be padding if the list header was written by + // a 32-bit process but is being read on a 64-bit process. + const padding = padding: { + const maybe_size = @sizeOf(u32) + (@sizeOf(external.Thread) * count); + switch (std.math.order(maybe_size, r.directory.location.data_size)) { + // It should never be larger than what the directory says. + .gt => return ReadError.StreamSizeMismatch, + + // If the sizes match exactly we're good. + .eq => break :padding 0, + + .lt => { + const padding = r.directory.location.data_size - maybe_size; + if (padding != 4) return ReadError.StreamSizeMismatch; + break :padding padding; + }, + } + }; + + // Rva is the location of the first thread in the list. + const rva = r.directory.location.rva + @as(u32, @sizeOf(u32)) + padding; + + return .{ + .count = count, + .rva = rva, + .source = r.source, + .endian = r.endian, + }; + } + + /// Get the thread entry for the given index. + /// + /// Index is asserted to be less than count. + pub fn thread(self: *const Self, i: usize) !external.Thread { + assert(i < self.count); + + // Seek to the thread + const offset: u32 = @intCast(@sizeOf(external.Thread) * i); + const rva: u32 = self.rva + offset; + try self.source.seekableStream().seekTo(rva); + + // Read the thread + return try self.source.reader().readStructEndian( + external.Thread, + self.endian, + ); + } + }; +} + +test "minidump: threadlist" { + const testing = std.testing; + const alloc = testing.allocator; + + var fbs = std.io.fixedBufferStream(@embedFile("../testdata/macos.dmp")); + const R = Reader(*@TypeOf(fbs)); + const r = try R.init(&fbs); + + // Get our thread list stream + const dir = try r.directory(0); + try testing.expectEqual(3, dir.stream_type); + var sr = try r.streamReader(dir); + + // Get our rich structure + const v = try R.ThreadList.init(&sr); + log.warn("threadlist count={} rva={}", .{ v.count, v.rva }); + + try testing.expectEqual(12, v.count); + for (0..v.count) |i| { + const t = try v.thread(i); + log.warn("thread i={} thread={}", .{ i, t }); + + // Read our stack memory + var stack_reader = try r.locationReader(t.stack.memory); + const bytes = try stack_reader.reader().readAllAlloc(alloc, t.stack.memory.data_size); + defer alloc.free(bytes); + } +} diff --git a/src/crash/testdata/macos.dmp b/src/crash/testdata/macos.dmp new file mode 100644 index 000000000..212cc7e62 Binary files /dev/null and b/src/crash/testdata/macos.dmp differ