From ae8859bc7bcf0267c00dfb0c136c40be4c1fc0f4 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Fri, 6 Sep 2024 21:50:47 -0700 Subject: [PATCH] crash/minidump: read the streams from the minidump file --- src/crash/minidump.zig | 118 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 6 deletions(-) diff --git a/src/crash/minidump.zig b/src/crash/minidump.zig index caec0f1ea..fbd4ec809 100644 --- a/src/crash/minidump.zig +++ b/src/crash/minidump.zig @@ -4,23 +4,108 @@ const Allocator = std.mem.Allocator; const log = std.log.scoped(.minidump); -/// Minidump parser. +/// Minidump file format. pub const Minidump = struct { + /// The arena that all streams are allocated within when reading the + /// minidump file. This is freed on deinit. + arena: std.heap.ArenaAllocator, + + /// The header of the minidump file. On serialization, the stream count + /// and rva will be updated to match the streams. On deserialization, + /// this is read directly from the file. header: Header, + /// The streams within the minidump file in the order they're serialized. + streams: std.ArrayListUnmanaged(Stream), + + pub const Stream = struct { + type: u32, + data: []const u8, + }; + /// Read the minidump file for the given source. /// /// The source must have a reader() and seekableStream() method. /// For example, both File and std.io.FixedBufferStream implement these. - pub fn read(alloc: Allocator, source: anytype) !Minidump { - _ = alloc; + /// + /// The reader will read the full minidump data into memory. This makes + /// it easy to serialize the data back out. This is acceptable for our + /// use case which doesn't rely too much on being memory efficient or + /// high load. We also expect the minidump files to be relatively small + /// (dozens of MB at most, hundreds of KB typically). + /// + /// NOTE(mitchellh): If we ever want to make this more memory efficient, + /// I would create a new type that is a "lazy reader" that stores the + /// source type and reads the data as needed. Then this type should use + /// that type. + pub fn read(alloc_gpa: Allocator, source: anytype) !Minidump { + var arena = std.heap.ArenaAllocator.init(alloc_gpa); + errdefer arena.deinit(); + const alloc = arena.allocator(); // Read the header which also determines the endianness of the file. const header, const endian = try readHeader(source); - log.warn("header={} endian={}", .{ header, endian }); + + var streams = try std.ArrayListUnmanaged(Stream).initCapacity( + alloc, + header.stream_count, + ); + errdefer streams.deinit(alloc); + + // Read the streams. All the streams are first described in a + // "directory" structure which tells us the type of stream and + // where it is located in the file. The directory structures are + // stored in a contiguous block at the stream_directory_rva. + // + // Due to how we use this structure, we read directories one by one, + // then read all the data for that directory, then move on to the + // next directory. This is because we copy all the minidump data + // into memory. + const seeker = source.seekableStream(); + try seeker.seekTo(header.stream_directory_rva); + for (0..header.stream_count) |_| { + // Read the current directory + const directory = try source.reader().readStructEndian(Directory, endian); + + // Seek to the location of the data. We have to store our current + // position because we need to seek back to it after reading the + // data in order to read the next directory. + const pos = try seeker.getPos(); + try seeker.seekTo(directory.location.rva); + + // Read the data. The data length is defined by the directory. + // If we can't read exactly that amount of data, we return an error. + var data = std.ArrayList(u8).init(alloc); + defer data.deinit(); + source.reader().readAllArrayList( + &data, + directory.location.data_size, + ) catch |err| switch (err) { + // This means there was more data in the reader than what + // we asked for this. This is okay and expected because + // all streams except the last one will have this error. + error.StreamTooLong => {}, + else => return err, + }; + + // Basic check. + if (data.items.len != directory.location.data_size) return error.DataSizeMismatch; + + // Store our stream + try streams.append(alloc, .{ + .type = directory.stream_type, + .data = try data.toOwnedSlice(), + }); + + // Seek back to where we were after reading this directory + // entry so we can read the next one. + try seeker.seekTo(pos); + } return .{ + .arena = arena, .header = header, + .streams = streams, }; } @@ -48,8 +133,16 @@ pub const Minidump = struct { return .{ header, endian }; } -}; + pub fn deinit(self: *Minidump) void { + self.arena.deinit(); + } + + /// The arena allocator associated with this envelope + pub fn allocator(self: *Minidump) Allocator { + return self.arena.allocator(); + } +}; /// "MDMP" in little-endian. pub const signature = 0x504D444D; @@ -67,9 +160,22 @@ pub const Header = extern struct { flags: u64, }; +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_directory +pub const Directory = extern struct { + stream_type: u32, + location: LocationDescriptor, +}; + +/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_location_descriptor +pub const LocationDescriptor = extern struct { + data_size: u32, + rva: u32, +}; + test "Minidump read" { const testing = std.testing; const alloc = testing.allocator; var fbs = std.io.fixedBufferStream(@embedFile("testdata/macos.dmp")); - _ = try Minidump.read(alloc, &fbs); + var md = try Minidump.read(alloc, &fbs); + defer md.deinit(); }