[crashlog] Implement parser for JSON encoded crashlogs

Add a parser for JSON crashlogs. The CrashLogParser now defers to either
the JSONCrashLogParser or the TextCrashLogParser. It first tries to
interpret the input as JSON, and if that fails falling back to the
textual parser.

Differential revision: https://reviews.llvm.org/D91130
This commit is contained in:
Jonas Devlieghere 2020-11-16 13:46:44 -08:00
parent 5a4ca8b550
commit c7cbf32f57
10 changed files with 407 additions and 5 deletions

View file

@ -41,6 +41,7 @@ import subprocess
import sys
import time
import uuid
import json
try:
# First try for LLDB in case PYTHONPATH is already correctly setup.
@ -378,6 +379,129 @@ class CrashLog(symbolication.Symbolicator):
return self.target
class CrashLogFormatException(Exception):
pass
class CrashLogParser:
def parse(self, debugger, path, verbose):
try:
return JSONCrashLogParser(debugger, path, verbose).parse()
except CrashLogFormatException:
return TextCrashLogParser(debugger, path, verbose).parse()
class JSONCrashLogParser:
def __init__(self, debugger, path, verbose):
self.path = os.path.expanduser(path)
self.verbose = verbose
self.crashlog = CrashLog(debugger, self.path, self.verbose)
def parse(self):
with open(self.path, 'r') as f:
buffer = f.read()
# First line is meta-data.
buffer = buffer[buffer.index('\n') + 1:]
try:
self.data = json.loads(buffer)
except ValueError:
raise CrashLogFormatException()
self.parse_process_info(self.data)
self.parse_images(self.data['usedImages'])
self.parse_threads(self.data['threads'])
thread = self.crashlog.threads[self.crashlog.crashed_thread_idx]
thread.reason = self.parse_crash_reason(self.data['exception'])
thread.registers = self.parse_thread_registers(self.data['threadState'])
return self.crashlog
def get_image_extra_info(self, idx):
return self.data['legacyInfo']['imageExtraInfo'][idx]
def get_used_image(self, idx):
return self.data['usedImages'][idx]
def parse_process_info(self, json_data):
self.crashlog.process_id = json_data['pid']
self.crashlog.process_identifier = json_data['procName']
self.crashlog.process_path = json_data['procPath']
def parse_crash_reason(self, json_exception):
exception_type = json_exception['type']
exception_signal = json_exception['signal']
if 'codes' in json_exception:
exception_extra = " ({})".format(json_exception['codes'])
elif 'subtype' in json_exception:
exception_extra = " ({})".format(json_exception['subtype'])
else:
exception_extra = ""
return "{} ({}){}".format(exception_type, exception_signal,
exception_extra)
def parse_images(self, json_images):
idx = 0
for json_images in json_images:
img_uuid = uuid.UUID(json_images[0])
low = int(json_images[1])
high = 0
extra_info = self.get_image_extra_info(idx)
name = extra_info['name']
path = extra_info['path']
version = ""
darwin_image = self.crashlog.DarwinImage(low, high, name, version,
img_uuid, path,
self.verbose)
self.crashlog.images.append(darwin_image)
idx += 1
def parse_frames(self, thread, json_frames):
idx = 0
for json_frame in json_frames:
image_id = int(json_frame[0])
ident = self.get_image_extra_info(image_id)['name']
thread.add_ident(ident)
if ident not in self.crashlog.idents:
self.crashlog.idents.append(ident)
frame_offset = int(json_frame[1])
image = self.get_used_image(image_id)
image_addr = int(image[1])
pc = image_addr + frame_offset
thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
idx += 1
def parse_threads(self, json_threads):
idx = 0
for json_thread in json_threads:
thread = self.crashlog.Thread(idx, False)
if json_thread.get('triggered', False):
self.crashlog.crashed_thread_idx = idx
thread.queue = json_thread.get('queue')
self.parse_frames(thread, json_thread.get('frames', []))
self.crashlog.threads.append(thread)
idx += 1
def parse_thread_registers(self, json_thread_state):
idx = 0
registers = dict()
for reg in json_thread_state.get('x', []):
key = str('x{}'.format(idx))
value = int(reg)
registers[key] = value
idx += 1
for register in ['lr', 'cpsr', 'fp', 'sp', 'esr', 'pc']:
if register in json_thread_state:
registers[register] = int(json_thread_state[register])
return registers
class CrashLogParseMode:
NORMAL = 0
THREAD = 1
@ -387,7 +511,7 @@ class CrashLogParseMode:
INSTRS = 5
class CrashLogParser:
class TextCrashLogParser:
parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
@ -720,7 +844,7 @@ def interactive_crashlogs(debugger, options, args):
crash_logs = list()
for crash_log_file in crash_log_files:
try:
crash_log = CrashLogParser(debugger, crash_log_file, options.verbose).parse()
crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose)
except Exception as e:
print(e)
continue
@ -1055,8 +1179,7 @@ be disassembled and lookups can be performed using the addresses found in the cr
interactive_crashlogs(debugger, options, args)
else:
for crash_log_file in args:
crash_log_parser = CrashLogParser(debugger, crash_log_file, options.verbose)
crash_log = crash_log_parser.parse()
crash_log = CrashLogParser().parse(debugger, crash_log_file, options.verbose)
SymbolicateCrashLog(crash_log, options)
if __name__ == '__main__':
# Create a new debugger instance

View file

@ -0,0 +1 @@
# CHECK-NOT: AssertionError

View file

@ -0,0 +1,49 @@
Process: a.out [21606]
Path: /private/tmp/a.out
Identifier: a.out
Version: 0
Code Type: X86-64 (Native)
Parent Process: fish [88883]
User ID: 501
Date/Time: 2020-11-11 14:47:34.600 -0800
OS Version: macOS 11.0.1
Report Version: 12
Bridge OS Version: redacted
Anonymous UUID: DCEF35CB-68D5-F524-FF13-060901F52EA8
Time Awake Since Boot: 400000 seconds
System Integrity Protection: enabled
Crashed Thread: 0 Dispatch queue: com.apple.main-thread
Exception Type: EXC_BAD_ACCESS (SIGSEGV)
Exception Codes: KERN_INVALID_ADDRESS at 0x0000000000000000
Exception Note: EXC_CORPSE_NOTIFY
Termination Signal: Segmentation fault: 11
Termination Reason: Namespace SIGNAL, Code 0xb
Terminating Process: exc handler [21606]
Thread 0 Crashed:: Dispatch queue: com.apple.main-thread
0 a.out @foo@ foo + 16 (test.c:3)
1 a.out @bar@ bar + 9 (test.c:6)
2 a.out @main@ main + 20 (test.c:8)
3 libdyld.dylib 0x0000000100000000 start + 1
Thread 0 crashed with X86 Thread State (64-bit):
rax: 0x0000000000000000 rbx: 0x0000000000000000 rcx: 0x00007ffee42d81d0 rdx: 0x00007ffee42d8080
rdi: 0x0000000000000001 rsi: 0x00007ffee42d8070 rbp: 0x00007ffee42d8020 rsp: 0x00007ffee42d8020
r8: 0x0000000000000000 r9: 0x0000000000000000 r10: 0x0000000000000000 r11: 0x0000000000000000
r12: 0x0000000000000000 r13: 0x0000000000000000 r14: 0x0000000000000000 r15: 0x0000000000000000
rip: 0x000000010b92af70 rfl: 0x0000000000010202 cr2: 0x0000000000000000
Logical CPU: 2
Error Code: 0x00000006 (no mapping for user data write)
Trap Number: 14
Binary Images:
0x100000000 - 0x200000000 +a.out (0) <@UUID@> @EXEC@

View file

@ -0,0 +1,96 @@
{"app_name":"a.out","timestamp":"2020-11-11 16:12:18.00 -0800","app_version":"","slice_uuid":"9b76648c-9b4e-33a9-a97e-10856e911631","build_version":"","platform":1,"share_with_app_devs":1,"is_first_party":1,"bug_type":"309","os_version":"macOS 11.0.1","incident_id":"598C4706-28B0-4D96-A2F9-AE6973BEC635","name":"a.out"}
{
"uptime" : 180,
"procLaunch" : "2020-11-11 16:12:12.4375 -0800",
"procRole" : "Unspecified",
"exception" : {
"type" : "EXC_BAD_ACCESS",
"signal" : "SIGSEGV",
"subtype" : "KERN_INVALID_ADDRESS at 0x00000000"
},
"userID" : 501,
"modelCode" : "iMacPro1,1",
"coalitionID" : 471,
"osVersion" : {
"train" : "macOS 11.0.1",
"build" : "",
"releaseType" : ""
},
"captureTime" : "2020-11-11 16:12:12.6267 -0800",
"incident" : "598C4706-28B0-4D96-A2F9-AE6973BEC635",
"pid" : 2187,
"cpuType" : "X86-64",
"procName" : "a.out",
"procPath" : "\/private\/tmp\/a.out",
"parentProc" : "fish",
"parentPid" : 1651,
"coalitionName" : "io.alacritty",
"crashReporterKey" : "DCEF35CB-68D5-F524-FF13-060901F52EA8",
"responsiblePid" : 428,
"responsibleProc" : "alacritty",
"bridgeVersion" : {"build":"","train":""},
"sip" : "enabled",
"is_corpse" : 1,
"termination" : {"reason":"Namespace SIGNAL, Code 0xb","signal":"Segmentation fault: 11","byProc":"exc handler","code":11,"namespace":"SIGNAL","byPid":2187,"flags":0},
"asi" : ["dyld2 mode"],
"extMods" : {"caller":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"system":{"thread_create":0,"thread_set_state":0,"task_for_pid":2067},"targeted":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"warnings":0},
"threads" : [{"triggered":true,"id":22172,"queue":"com.apple.main-thread","frames":[[0,16240],[0,16265],[0,16292],[1,87601]]}],
"threadState" : {
"r13" : 0,
"rax" : 0,
"rflags" : 66054,
"cpu" : 6,
"rsi" : 140732908048520,
"r14" : 0,
"trap_description" : "(no mapping for user data write)",
"r8" : 0,
"cr2" : 0,
"rdx" : 140732908048536,
"r10" : 0,
"r9" : 0,
"r15" : 0,
"rbx" : 0,
"trap" : 14,
"err" : 6,
"r11" : 0,
"rip" : 4307689328,
"rbp" : 140732908048432,
"rsp" : 140732908048432,
"r12" : 0,
"rcx" : 140732908048880,
"flavor" : "x86_THREAD_STATE",
"rdi" : 1
},
"usedImages" : [
[
"@UUID@",
0,
"P"
],
[
"6a1f593e-3705-314d-bb40-e7f9d502bf81",
140733737017344,
"P"
]
],
"legacyInfo" : {
"imageExtraInfo" : [
{
"size" : 16384,
"arch" : "x86_64",
"path" : "@EXEC@",
"name" : "@NAME@"
},
{
"size" : 241664,
"arch" : "x86_64",
"path" : "\/usr\/lib\/system\/libdyld.dylib",
"name" : "libdyld.dylib"
}
],
"threadTriggered" : {
"index" : 0,
"queue" : "com.apple.main-thread"
}
}
}

View file

@ -0,0 +1,8 @@
void foo() {
int *i = 0;
*i = 1;
}
void bar() { foo(); }
int main(int argc, char **argv) { bar(); }

View file

@ -0,0 +1,10 @@
# RUN: %clang_host -g %S/Inputs/test.c -o %t.out
# RUN: cp %S/Inputs/a.out.ips %t.crash
# RUN: python %S/patch-crashlog.py %t.out %t.crash
# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s
# CHECK: Thread[0] EXC_BAD_ACCESS (SIGSEGV) (KERN_INVALID_ADDRESS at 0x00000000)
# CHECK: [ 0] {{.*}}out`foo + 16 at test.c
# CHECK: [ 1] {{.*}}out`bar + 8 at test.c
# CHECK: [ 2] {{.*}}out`main + 19 at test.c
# CHECK: [ 3] {{.*}}start

View file

@ -0,0 +1,45 @@
# -*- python -*-
# RUN: cd %S/../../../../../examples/python && cat %s | %lldb 2>&1 > %t.out
# RUN: cat %t.out | FileCheck %S/Inputs/Assertion.check
script
import crashlog
import json
parser = crashlog.JSONCrashLogParser("", "", False)
process_info_json = json.loads('{"pid" : 287, "procName" : "mediaserverd", "procPath" : "\/usr\/sbin\/mediaserverd"}')
parser.parse_process_info(process_info_json)
assert parser.crashlog.process_id == 287
assert parser.crashlog.process_identifier == "mediaserverd"
assert parser.crashlog.process_path == "/usr/sbin/mediaserverd"
crash_reason_json = json.loads('{"type" : "EXC_BAD_ACCESS", "signal" : "SIGSEGV", "subtype" : "KERN_INVALID_ADDRESS"}')
assert parser.parse_crash_reason(crash_reason_json) == "EXC_BAD_ACCESS (SIGSEGV) (KERN_INVALID_ADDRESS)"
crash_reason_json = json.loads('{"type" : "EXC_BAD_ACCESS", "signal" : "SIGSEGV"}')
assert parser.parse_crash_reason(crash_reason_json) == "EXC_BAD_ACCESS (SIGSEGV)"
crash_reason_json = json.loads('{"type" : "EXC_BAD_ACCESS", "signal" : "SIGSEGV", "codes" : "0x0000000000000000, 0x0000000000000000"}')
assert parser.parse_crash_reason(crash_reason_json) == "EXC_BAD_ACCESS (SIGSEGV) (0x0000000000000000, 0x0000000000000000)"
thread_state_json = json.loads('{"x":[268451845,117442566],"lr":7309751904,"cpsr":1073741824,"fp":6093236784,"sp":6093236704,"esr":1442840704,"pc":7309755088}')
registers = parser.parse_thread_registers(thread_state_json)
assert registers['x0'] == 268451845
assert registers['x1'] == 117442566
assert registers['lr'] == 7309751904
assert registers['cpsr'] ==1073741824
assert registers['fp'] == 6093236784
assert registers['sp'] == 6093236704
assert registers['esr'] == 1442840704
assert registers['pc'] == 7309755088
parser.data = json.loads('{"usedImages":[["f4d85377-f215-3da3-921e-3fe870e622e9",7309737984,"P"]],"legacyInfo":{"imageExtraInfo":[{"size":204800,"arch":"arm64e","path":"/usr/lib/system/libsystem_kernel.dylib","name":"libsystem_kernel.dylib"}]}}')
thread_json = json.loads('[{"triggered":true,"id":3835,"queue":"com.apple.bwgraph.devicevendor","frames":[[0,101472],[0,408892]]}]')
parser.parse_threads(thread_json)
assert len(parser.crashlog.threads) == 1
assert parser.crashlog.threads[0].queue == "com.apple.bwgraph.devicevendor"
assert len(parser.crashlog.threads[0].frames) == 2
assert parser.crashlog.threads[0].frames[0].pc == 7309839456
assert parser.crashlog.threads[0].frames[0].description == 101472
exit()

View file

@ -4,7 +4,7 @@
# CHECK-LABEL: {{S}}KIP BEYOND CHECKS
script
import crashlog
crash_log_parser = crashlog.CrashLogParser
crash_log_parser = crashlog.TextCrashLogParser
crash_log = crashlog.CrashLog
images = [
"0x10b60b000 - 0x10f707fff com.apple.LLDB.framework (1.1000.11.38.2 - 1000.11.38.2) <96E36F5C-1A83-39A1-8713-5FDD9701C3F1> /Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/LLDB",

View file

@ -0,0 +1,60 @@
#!/usr/bin/env python
import json
import os
import re
import subprocess
import sys
class CrashLogPatcher:
SYMBOL_REGEX = re.compile(r'^([0-9a-fA-F]+) T _(.*)$')
UUID_REGEX = re.compile(r'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
def __init__(self, data, binary, offsets):
self.data = data
self.binary = binary
self.offsets = offsets
def patch_executable(self):
self.data = self.data.replace("@EXEC@", self.binary)
self.data = self.data.replace("@NAME@", os.path.basename(self.binary))
def patch_uuid(self):
output = subprocess.check_output(['dwarfdump', '--uuid', self.binary])
m = self.UUID_REGEX.match(output)
if m:
self.data = self.data.replace("@UUID@", m.group(1))
def patch_addresses(self):
if not self.offsets:
return
output = subprocess.check_output(['nm', self.binary])
for line in output.splitlines():
m = self.SYMBOL_REGEX.match(line)
if m:
address = m.group(1)
symbol = m.group(2)
if symbol in self.offsets:
patch_addr = int(m.group(1), 16) + int(
self.offsets[symbol])
self.data = self.data.replace("@{}@".format(symbol),
str(hex(patch_addr)))
if __name__ == '__main__':
binary = sys.argv[1]
crashlog = sys.argv[2]
offsets = json.loads(sys.argv[3]) if len(sys.argv) > 3 else None
with open(crashlog, 'r') as file:
data = file.read()
p = CrashLogPatcher(data, binary, offsets)
p.patch_executable()
p.patch_uuid()
p.patch_addresses()
with open(crashlog, 'w') as file:
file.write(p.data)

View file

@ -0,0 +1,10 @@
# RUN: %clang_host -g %S/Inputs/test.c -o %t.out
# RUN: cp %S/Inputs/a.out.crash %t.crash
# RUN: python %S/patch-crashlog.py %t.out %t.crash '{"main":20, "bar":9, "foo":16}'
# RUN: %lldb %t.out -o 'command script import lldb.macosx.crashlog' -o 'crashlog %t.crash' 2>&1 | FileCheck %s
# CHECK: Thread[0] EXC_BAD_ACCESS (SIGSEGV) (KERN_INVALID_ADDRESS at 0x0000000000000000)
# CHECK: [ 0] {{.*}}out`foo + 16 at test.c
# CHECK: [ 1] {{.*}}out`bar + 8 at test.c
# CHECK: [ 2] {{.*}}out`main + 19 at test.c
# CHECK: [ 3] {{.*}}start + 1