crash/minidump: read the streams from the minidump file

This commit is contained in:
Mitchell Hashimoto
2024-09-06 21:50:47 -07:00
parent 5a2bbe9a08
commit ae8859bc7b

View File

@ -4,23 +4,108 @@ const Allocator = std.mem.Allocator;
const log = std.log.scoped(.minidump); const log = std.log.scoped(.minidump);
/// Minidump parser. /// Minidump file format.
pub const Minidump = struct { pub const Minidump = struct {
/// The arena that all streams are allocated within when reading the
/// minidump file. This is freed on deinit.
arena: std.heap.ArenaAllocator,
/// The header of the minidump file. On serialization, the stream count
/// and rva will be updated to match the streams. On deserialization,
/// this is read directly from the file.
header: Header, header: Header,
/// The streams within the minidump file in the order they're serialized.
streams: std.ArrayListUnmanaged(Stream),
pub const Stream = struct {
type: u32,
data: []const u8,
};
/// Read the minidump file for the given source. /// Read the minidump file for the given source.
/// ///
/// The source must have a reader() and seekableStream() method. /// The source must have a reader() and seekableStream() method.
/// For example, both File and std.io.FixedBufferStream implement these. /// For example, both File and std.io.FixedBufferStream implement these.
pub fn read(alloc: Allocator, source: anytype) !Minidump { ///
_ = alloc; /// The reader will read the full minidump data into memory. This makes
/// it easy to serialize the data back out. This is acceptable for our
/// use case which doesn't rely too much on being memory efficient or
/// high load. We also expect the minidump files to be relatively small
/// (dozens of MB at most, hundreds of KB typically).
///
/// NOTE(mitchellh): If we ever want to make this more memory efficient,
/// I would create a new type that is a "lazy reader" that stores the
/// source type and reads the data as needed. Then this type should use
/// that type.
pub fn read(alloc_gpa: Allocator, source: anytype) !Minidump {
var arena = std.heap.ArenaAllocator.init(alloc_gpa);
errdefer arena.deinit();
const alloc = arena.allocator();
// Read the header which also determines the endianness of the file. // Read the header which also determines the endianness of the file.
const header, const endian = try readHeader(source); const header, const endian = try readHeader(source);
log.warn("header={} endian={}", .{ header, endian });
var streams = try std.ArrayListUnmanaged(Stream).initCapacity(
alloc,
header.stream_count,
);
errdefer streams.deinit(alloc);
// Read the streams. All the streams are first described in a
// "directory" structure which tells us the type of stream and
// where it is located in the file. The directory structures are
// stored in a contiguous block at the stream_directory_rva.
//
// Due to how we use this structure, we read directories one by one,
// then read all the data for that directory, then move on to the
// next directory. This is because we copy all the minidump data
// into memory.
const seeker = source.seekableStream();
try seeker.seekTo(header.stream_directory_rva);
for (0..header.stream_count) |_| {
// Read the current directory
const directory = try source.reader().readStructEndian(Directory, endian);
// Seek to the location of the data. We have to store our current
// position because we need to seek back to it after reading the
// data in order to read the next directory.
const pos = try seeker.getPos();
try seeker.seekTo(directory.location.rva);
// Read the data. The data length is defined by the directory.
// If we can't read exactly that amount of data, we return an error.
var data = std.ArrayList(u8).init(alloc);
defer data.deinit();
source.reader().readAllArrayList(
&data,
directory.location.data_size,
) catch |err| switch (err) {
// This means there was more data in the reader than what
// we asked for this. This is okay and expected because
// all streams except the last one will have this error.
error.StreamTooLong => {},
else => return err,
};
// Basic check.
if (data.items.len != directory.location.data_size) return error.DataSizeMismatch;
// Store our stream
try streams.append(alloc, .{
.type = directory.stream_type,
.data = try data.toOwnedSlice(),
});
// Seek back to where we were after reading this directory
// entry so we can read the next one.
try seeker.seekTo(pos);
}
return .{ return .{
.arena = arena,
.header = header, .header = header,
.streams = streams,
}; };
} }
@ -48,8 +133,16 @@ pub const Minidump = struct {
return .{ header, endian }; return .{ header, endian };
} }
};
pub fn deinit(self: *Minidump) void {
self.arena.deinit();
}
/// The arena allocator associated with this envelope
pub fn allocator(self: *Minidump) Allocator {
return self.arena.allocator();
}
};
/// "MDMP" in little-endian. /// "MDMP" in little-endian.
pub const signature = 0x504D444D; pub const signature = 0x504D444D;
@ -67,9 +160,22 @@ pub const Header = extern struct {
flags: u64, flags: u64,
}; };
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_directory
pub const Directory = extern struct {
stream_type: u32,
location: LocationDescriptor,
};
/// https://learn.microsoft.com/en-us/windows/win32/api/minidumpapiset/ns-minidumpapiset-minidump_location_descriptor
pub const LocationDescriptor = extern struct {
data_size: u32,
rva: u32,
};
test "Minidump read" { test "Minidump read" {
const testing = std.testing; const testing = std.testing;
const alloc = testing.allocator; const alloc = testing.allocator;
var fbs = std.io.fixedBufferStream(@embedFile("testdata/macos.dmp")); var fbs = std.io.fixedBufferStream(@embedFile("testdata/macos.dmp"));
_ = try Minidump.read(alloc, &fbs); var md = try Minidump.read(alloc, &fbs);
defer md.deinit();
} }