diff --git a/CMakeLists.txt b/CMakeLists.txt index 76722c5c6..5710d25f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,8 +107,10 @@ add_subdirectory(third-party/spdlog) # build zydis third party library for disassembling x86 option(ZYDIS_BUILD_TOOLS "" OFF) option(ZYDIS_BUILD_EXAMPLES "" OFF) +option(ZYDIS_BUILD_SHARED_LIB "" ON) add_subdirectory("third-party/zydis") + # windows memory management lib IF (WIN32) add_subdirectory(third-party/mman) diff --git a/common/cross_os_debug/xdbg.cpp b/common/cross_os_debug/xdbg.cpp index 895b589c9..50fc908af 100644 --- a/common/cross_os_debug/xdbg.cpp +++ b/common/cross_os_debug/xdbg.cpp @@ -1,6 +1,7 @@ /*! * @file xdbg.cpp * Debugging utility library. This hides the platform specific details of the debugger. + * Nothing in here should hold state, that should all be managed in Debugger. */ #include @@ -44,6 +45,7 @@ std::string ThreadID::to_string() const { /*! * Get the ThreadID of whatever called this function. + * The runtime calls this to get the Thread to be debugged. */ ThreadID get_current_thread_id() { return ThreadID(syscall(SYS_gettid)); @@ -78,26 +80,52 @@ bool attach_and_break(const ThreadID& tid) { return false; } - // we could technically hang here forever if runtime ignores the signal. - int status; - if (waitpid(tid.id, &status, 0) < 0) { - printf("[Debugger] Failed to waitpid: %s. The runtime is probably in a bad state now.\n", - strerror(errno)); - return false; - } - - // double check that we stopped for the right reason - if (!WIFSTOPPED(status)) { - printf("[Debugger] Failed to STOP: %s. The runtime is probably in a bad state now.\n", - strerror(errno)); - return false; - } return true; } } +/*! + * Has the given thread transitioned from running to stopped? + * If the thread has transitioned to stop, check_stopped should only return true once. + * If true, populates out with information about why it stopped. + * This shouldn't hang if the thread doesn't stop. + */ +bool check_stopped(const ThreadID& tid, SignalInfo* out) { + int status; + if (waitpid(tid.id, &status, WNOHANG) < 0) { + printf("[Debugger] Failed to waitpid: %s.\n", strerror(errno)); + // assert(false); // todo, temp because I think we should never hit this. + return false; + } + + if (WIFSTOPPED(status)) { + auto sig = WSTOPSIG(status); + if (out) { + switch (sig) { + case SIGSEGV: + out->kind = SignalInfo::SEGFAULT; + break; + case SIGFPE: + out->kind = SignalInfo::MATH_EXCEPTION; + break; + case SIGTRAP: + out->kind = SignalInfo::BREAK; + break; + + default: + out->kind = SignalInfo::UNKNOWN; + } + } + + return true; + } + + return false; +} + /*! * Open memory of target. Assumes we are already connected and halted. + * If successful returns true and populates out with a "handle" to the memory. */ bool open_memory(const ThreadID& tid, MemoryHandle* out) { int fd = open(fmt::format("/proc/{}/mem", tid.id).c_str(), O_RDWR); @@ -194,9 +222,47 @@ bool get_regs_now(const ThreadID& tid, Regs* out) { return true; } +/*! + * Set all registers now. Must be attached and stopped + */ +bool set_regs_now(const ThreadID& tid, const Regs& out) { + user regs = {}; + if (ptrace(PTRACE_GETREGS, tid.id, nullptr, ®s) < 0) { + printf("[Debugger] Failed to PTRACE_GETREGS %s\n", strerror(errno)); + return false; + } + + regs.regs.rax = out.gprs[0]; + regs.regs.rcx = out.gprs[1]; + regs.regs.rdx = out.gprs[2]; + regs.regs.rbx = out.gprs[3]; + regs.regs.rsp = out.gprs[4]; + regs.regs.rbp = out.gprs[5]; + regs.regs.rsi = out.gprs[6]; + regs.regs.rdi = out.gprs[7]; + regs.regs.r8 = out.gprs[8]; + regs.regs.r9 = out.gprs[9]; + regs.regs.r10 = out.gprs[10]; + regs.regs.r11 = out.gprs[11]; + regs.regs.r12 = out.gprs[12]; + regs.regs.r13 = out.gprs[13]; + regs.regs.r14 = out.gprs[14]; + regs.regs.r15 = out.gprs[15]; + regs.regs.rip = out.rip; + + if (ptrace(PTRACE_SETREGS, tid.id, nullptr, ®s) < 0) { + printf("[Debugger] Failed to PTRACE_SETREGS %s\n", strerror(errno)); + return false; + } + // todo, set fprs. + return true; +} + /*! * Break the given thread. Must be attached and running. - * Waits for the given thread to actually stop first. + * Does not wait for the thread to stop. + * Eventually check_stop should return true with a reason of BREAK, unless the target gets really + * lucky and manages to crash before the SIGTRAP reaches the target */ bool break_now(const ThreadID& tid) { if (ptrace(PTRACE_INTERRUPT, tid.id, nullptr, nullptr) < 0) { @@ -204,19 +270,6 @@ bool break_now(const ThreadID& tid) { return false; } - int status; - if (waitpid(tid.id, &status, 0) < 0) { - printf("[Debugger] Failed to waitpid: %s. The runtime is probably in a bad state now.\n", - strerror(errno)); - return false; - } - - if (!WIFSTOPPED(status)) { - printf("[Debugger] Failed to STOP: %s. The runtime is probably in a bad state now.\n", - strerror(errno)); - return false; - } - return true; } @@ -295,6 +348,13 @@ bool write_goal_memory(const u8* src_buffer, return false; } +bool check_stopped(const ThreadID& tid, SignalInfo* out) { + return false; +} + +bool set_regs_now(const ThreadID& tid, const Regs& out) { + return false; +} #endif const char* gpr_names[] = {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", diff --git a/common/cross_os_debug/xdbg.h b/common/cross_os_debug/xdbg.h index f58e2725d..a2916899d 100644 --- a/common/cross_os_debug/xdbg.h +++ b/common/cross_os_debug/xdbg.h @@ -19,6 +19,9 @@ namespace xdbg { #ifdef __linux +/*! + * Identification for a thread. + */ struct ThreadID { pid_t id = 0; @@ -28,6 +31,9 @@ struct ThreadID { ThreadID() = default; }; +/*! + * Handle for the memory of a process. + */ struct MemoryHandle { int fd; }; @@ -45,12 +51,18 @@ struct MemoryHandle { }; #endif +/*! + * The info required to debug the target. + */ struct DebugContext { - ThreadID tid; - uintptr_t base; - uint32_t s7; + ThreadID tid; //! The target's GOAL thread + uintptr_t base; //! The base address for the GOAL memory + uint32_t s7; //! The value of s7 (GOAL address) }; +/*! + * The x86-64 registers, including rip. + */ struct Regs { u64 gprs[16]; u128 xmms[16]; @@ -63,12 +75,25 @@ struct Regs { std::string print_xmms_as_flt_vec() const; }; +/*! + * Information about why the target has stopped. + */ +struct SignalInfo { + enum Kind { + SEGFAULT, // access bad memory + BREAK, // hit a breakpoint or execute int3 + MATH_EXCEPTION, // divide by zero + UNKNOWN // some other signal that is unsupported + } kind = UNKNOWN; +}; + // Functions ThreadID get_current_thread_id(); bool attach_and_break(const ThreadID& tid); void allow_debugging(); bool detach_and_resume(const ThreadID& tid); bool get_regs_now(const ThreadID& tid, Regs* out); +bool set_regs_now(const ThreadID& tid, const Regs& in); bool break_now(const ThreadID& tid); bool cont_now(const ThreadID& tid); bool open_memory(const ThreadID& tid, MemoryHandle* out); @@ -93,4 +118,6 @@ bool write_goal_value(T& value, return write_goal_memory(&value, sizeof(value), goal_addr, context, handle); } +bool check_stopped(const ThreadID& tid, SignalInfo* out); + } // namespace xdbg diff --git a/common/goal_constants.h b/common/goal_constants.h index 7759c81ec..ea8d47375 100644 --- a/common/goal_constants.h +++ b/common/goal_constants.h @@ -11,6 +11,9 @@ constexpr int POINTER_SIZE = 4; constexpr int BASIC_OFFSET = 4; constexpr int STRUCTURE_ALIGNMENT = 16; +constexpr s32 GOAL_MAX_SYMBOLS = 0x2000; +constexpr s32 SYM_INFO_OFFSET = 0xff34; + enum class RegKind { GPR_64, FLOAT, INT_128, FLOAT_4X, INVALID }; constexpr u32 GOAL_NEW_METHOD = 0; // method ID of GOAL new diff --git a/doc/goal_dbg_doc.md b/doc/goal_dbg_doc.md index 7e75acf48..dc30771f5 100644 --- a/doc/goal_dbg_doc.md +++ b/doc/goal_dbg_doc.md @@ -50,4 +50,336 @@ Dump all GOAL memory to a file. Must be stopped. ``` The path is relative to the Jak project folder. -The file will be the exact size of `EE_MAIN_MEM_SIZE`, but the first `EE_LOW_MEM_PROTECT` bytes are zero, as these cannot be written or read. \ No newline at end of file +The file will be the exact size of `EE_MAIN_MEM_SIZE`, but the first `EE_LOW_MEM_PROTECT` bytes are zero, as these cannot be written or read. + +## Address Spec +Anywhere an address can be used, you can also use an "address spec", which gives you easier ways to input addresses. For now, the address spec is pretty simple, but there will be more features in the future. + +- `(sym-val )`. Get the address stored in the symbol with the given name. Currently there's no check to see if the symbol actually stores an address or not. This is like "evaluate ``, then treat the value as an address" +- `(sym )`. Get the address of the symbol object itself, including the basic offet. + +Example to show the difference: +```lisp + +;; the symbol is at 0x142d1c +gc> (inspect '*kernel-context*) +[ 142d1c] symbol + name: *kernel-context* + hash: #x8f9a35ff + value: # +1322268 + +;; the object is at 0x164a84 +gc> (inspect *kernel-context*) +[00164a84] kernel-context + prevent-from-run: 65 + require-for-run: 0 + allow-to-run: 0 + next-pid: 2 + fast-stack-top: 1879064576 + current-process: #f + relocating-process: #f + relocating-min: 0 + relocating-max: 0 + relocating-offset: 0 + low-memory-message: #t +1460868 + +;; break, so we can debug +gc> (:break) +Read symbol table (159872 bytes, 226 reads, 225 symbols, 1.96 ms) +rax: 0xfffffffffffffdfc rcx: 0x00007f745b508361 rdx: 0x00007f745b3ffca0 rbx: 0x0000000000147d24 +rsp: 0x00007f745b3ffc40 rbp: 0x00007f745b3ffcc0 rsi: 0x0000000000000000 rdi: 0x0000000000000000 + r8: 0x0000000000000000 r9: 0x0000000000000008 r10: 0x00007f745b3ffca0 r11: 0x0000000000000293 +r12: 0x0000000000147d24 r13: 0x00007ffdff32cfaf r14: 0x00007ffdff32cfb0 r15: 0x00007f745b3fffc0 +rip: 0x00007f745b508361 + +;; reads the symbol's memory: +;; at 0x142d1c there is the value 0x164a84 +gc> (dw (sym *kernel-context*) 1) + 0x00142d1c: 0x00164a84 + +;; treat the symbol's value as an address and read the memory there. +;; notice that the 0x41 in the first word is decimal 65, the first field of the kernel-context. +gc> (dw (sym-val *kernel-context*) 10) + 0x00164a84: 0x00000041 0x00000000 0x00000000 0x00000002 + 0x00164a94: 0x70004000 0x00147d24 0x00147d24 0x00000000 + 0x00164aa4: 0x00000000 0x00000000 +``` + + +## `(:pm)` +Print memory + +``` +(:pm elt-size addr elt-count [:print-mode mode]) +``` + +The element size is the size of each word to print. It can be 1, 2, 4, 8 currently. The address is the GOAL Address to print at. The elt-count is the number of words to print. The print mode is option and defaults to `hex`. There is also an `unsigned-decimal`, a `signed-decimal`, and `float`. The `float` mode only works when `elt-size` is 4. + +There are some useful macros inspired by the original PS2 TOOL debugger (`dsedb`) for the different sizes. They are `db`, `dh`, `dw`, and `dd` for 1, 2, 4, and 8 byte hex prints which follows the naming convention of MIPS load/stores. There is also a `df` for printing floats. See the example below. + + +```lisp +OpenGOAL Compiler 0.1 + +;; first connect the listener +g> (lt) +[Listener] Socket connected established! (took 0 tries). Waiting for version... +Got version 0.1 OK! +[OUTPUT] reset #x147d24 #x2000000000 53371 + +[Debugger] Context: valid = true, s7 = 0x147d24, base = 0x2000000000, tid = 53371 + +;; define a new array of floats, and set a few values +gc> (define x (new 'global 'array 'float 12)) +1452224 + +gc> (set! (-> x 0) 1.0) +1065353216 + +gc> (set! (-> x 2) 2.0) +1073741824 + +;; attach the debugger (halts the target) +gc> (dbg) +[Debugger] PTRACE_ATTACHED! Waiting for process to stop... +rax: 0xfffffffffffffdfc rcx: 0x00007f6b94964361 rdx: 0x00007f6b8fffeca0 rbx: 0x0000000000147d24 +rsp: 0x00007f6b8fffec40 rbp: 0x00007f6b8fffecc0 rsi: 0x0000000000000000 rdi: 0x0000000000000000 + r8: 0x0000000000000000 r9: 0x000000000000000b r10: 0x00007f6b8fffeca0 r11: 0x0000000000000293 +r12: 0x0000000000147d24 r13: 0x00007ffd16fb117f r14: 0x00007ffd16fb1180 r15: 0x00007f6b8fffefc0 +rip: 0x00007f6b94964361 +Debugger connected. + +;; print memory as 10 bytes +gc> (db 1452224 10) + 0x001628c0: 0x00 0x00 0x80 0x3f 0x00 0x00 0x00 0x00 0x00 0x00 + +;; print memory as 10 words (32-bit words) +gc> (dw 1452224 10) + 0x001628c0: 0x3f800000 0x00000000 0x40000000 0x00000000 + 0x001628d0: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x001628e0: 0x00000000 0x00000000 + +;; print memory as 10 floats +gc> (df 1452224 10) + 0x001628c0: 1.0000 0.0000 2.0000 0.0000 + 0x001628d0: 0.0000 0.0000 0.0000 0.0000 + 0x001628e0: 0.0000 0.0000 + +;; set some more values, must unbreak first +gc> (:cont) +gc> (set! (-> x 1) (the-as float -12)) +-12 + +;; break and print as decimal +gc> (:break) +rax: 0xfffffffffffffdfc rcx: 0x00007f6b94964361 rdx: 0x00007f6b8fffeca0 rbx: 0x0000000000147d24 +rsp: 0x00007f6b8fffec40 rbp: 0x00007f6b8fffecc0 rsi: 0x0000000000000000 rdi: 0x0000000000000000 + r8: 0x0000000000000000 r9: 0x0000000000000004 r10: 0x00007f6b8fffeca0 r11: 0x0000000000000293 +r12: 0x0000000000147d24 r13: 0x00007ffd16fb117f r14: 0x00007ffd16fb1180 r15: 0x00007f6b8fffefc0 +rip: 0x00007f6b94964361 +gc> (:pm 4 1452224 10 :print-mode unsigned-dec) + 0x001628c0: 1065353216 4294967284 1073741824 0 + 0x001628d0: 0 0 0 0 + 0x001628e0: 0 0 +gc> (:pm 4 1452224 10 :print-mode signed-dec) + 0x001628c0: 1065353216 -12 1073741824 0 + 0x001628d0: 0 0 0 0 + 0x001628e0: 0 0 +``` + + +## `(:disasm)` +Disassembly instructions in memory + +``` +(:disasm addr len) +``` + +Example (after doing a `(lt)`, `(blg)`, `(dbg)`): +```asm +gc> (:disasm (sym-val basic-type?) 80) +[0x2000162ae4] mov eax, [r15+rdi*1-0x04] +[0x2000162ae9] mov ecx, [r15+r14*1+0x38] +[0x2000162af1] mov rdx, rax +[0x2000162af4] cmp rdx, rsi +[0x2000162af7] jnz 0x0000002000162B0F +[0x2000162afd] mov eax, [r15+r14*1+0x08] +[0x2000162b05] jmp 0x0000002000162B32 +[0x2000162b0a] jmp 0x0000002000162B19 +[0x2000162b0f] mov rax, r14 +[0x2000162b12] add rax, 0x00 +[0x2000162b19] mov eax, [r15+rdx*1+0x04] +[0x2000162b1e] mov rdx, rax +[0x2000162b21] cmp rax, rcx +[0x2000162b24] jnz 0x0000002000162AF4 +[0x2000162b2a] mov eax, [r15+r14*1] +[0x2000162b32] ret + +``` + +For now, the disassembly is pretty basic, but it should eventually support GOAL symbols. + +## Breakpoints + +``` +OpenGOAL Compiler 0.1 + +;; first, connect to the target +g > (lt) +[Listener] Socket connected established! (took 0 tries). Waiting for version... +Got version 0.1 OK! +[OUTPUT] reset #x147d24 #x2000000000 322300 + +[Debugger] Context: valid = true, s7 = 0x147d24, base = 0x2000000000, tid = 322300 + + +;; run an infinite loop. This will time out because we don't see a response from the GOAL kernel that our function +;; has returned. +gc > (while #t (+ 1 2 3 4 5 6 7)) + Error - target has timed out. If it is stuck in a loop, it must be manually killed. +Runtime is not responding. Did it crash? + + +;; so we can attach the debugger! +gc > (dbg) +[Debugger] PTRACE_ATTACHED! Waiting for process to stop... +Target has stopped. Run (:di) to get more information. +Read symbol table (146816 bytes, 124 reads, 123 symbols, 2.02 ms) +rax: 0x000000000000000a rcx: 0x0000000000000005 rdx: 0x0000000000000000 rbx: 0x0000002000000000 +rsp: 0x00007fddcde75c58 rbp: 0x00007fddcde75cc0 rsi: 0x0000000000000000 rdi: 0x0000000000000000 + r8: 0x0000000000147d24 r9: 0x0000002000000000 r10: 0x00007fddcde75ca0 r11: 0x0000000000000000 +r12: 0x0000000000147d24 r13: 0x0000002007ffbf14 r14: 0x0000000000147d24 r15: 0x0000002000000000 +rip: 0x0000002007ffbf3b + [0x2007ffbf1b] add [rax], al + [0x2007ffbf1d] add [rcx+0x02], bh + [0x2007ffbf23] add rax, rcx + [0x2007ffbf26] mov ecx, 0x03 + [0x2007ffbf2b] add rax, rcx + [0x2007ffbf2e] mov ecx, 0x04 + [0x2007ffbf33] add rax, rcx + [0x2007ffbf36] mov ecx, 0x05 +- [0x2007ffbf3b] add rax, rcx + [0x2007ffbf3e] mov ecx, 0x06 + [0x2007ffbf43] add rax, rcx + [0x2007ffbf46] mov ecx, 0x07 + [0x2007ffbf4b] add rax, rcx + [0x2007ffbf4e] mov eax, [r15+r14*1+0x08] + [0x2007ffbf56] mov rcx, r14 + [0x2007ffbf59] add rcx, 0x00 + [0x2007ffbf60] cmp rax, rcx + [0x2007ffbf63] jnz 0x0000002007FFBF19 + [0x2007ffbf69] mov eax, [r15+r14*1] + [0x2007ffbf71] ret + [0x2007ffbf72] add [rax], al + [0x2007ffbf74] add [rax], al + [0x2007ffbf76] add [rax], al + [0x2007ffbf78] add [rax], al + [0x2007ffbf7a] INVALID (0x00) + +Debugger connected. + +;; currently rcx = 5. let's set a breakpoint where it should be 7 +gcs> (:bp #x2007ffbf4b) + +;; and continue... +gcs> (:cont) + +;; it hits the breakpoint. (this message should have more information...) +Target has stopped. Run (:di) to get more information. + +;; get some info: +gcs> (:di) +Read symbol table (146816 bytes, 124 reads, 123 symbols, 1.46 ms) +rax: 0x0000000000000015 rcx: 0x0000000000000007 rdx: 0x0000000000000000 rbx: 0x0000002000000000 +rsp: 0x00007fddcde75c58 rbp: 0x00007fddcde75cc0 rsi: 0x0000000000000000 rdi: 0x0000000000000000 + r8: 0x0000000000147d24 r9: 0x0000002000000000 r10: 0x00007fddcde75ca0 r11: 0x0000000000000000 +r12: 0x0000000000147d24 r13: 0x0000002007ffbf14 r14: 0x0000000000147d24 r15: 0x0000002000000000 +rip: 0x0000002007ffbf4c + [0x2007ffbf2c] add eax, ecx + [0x2007ffbf2e] mov ecx, 0x04 + [0x2007ffbf33] add rax, rcx + [0x2007ffbf36] mov ecx, 0x05 + [0x2007ffbf3b] add rax, rcx + [0x2007ffbf3e] mov ecx, 0x06 + [0x2007ffbf43] add rax, rcx + [0x2007ffbf46] mov ecx, 0x07 + [0x2007ffbf4b] int3 ;; oops! should probably patch this in the disassembly! +- [0x2007ffbf4c] add eax, ecx + [0x2007ffbf4e] mov eax, [r15+r14*1+0x08] + [0x2007ffbf56] mov rcx, r14 + [0x2007ffbf59] add rcx, 0x00 + [0x2007ffbf60] cmp rax, rcx + [0x2007ffbf63] jnz 0x0000002007FFBF19 + [0x2007ffbf69] mov eax, [r15+r14*1] + [0x2007ffbf71] ret + [0x2007ffbf72] add [rax], al + [0x2007ffbf74] add [rax], al + [0x2007ffbf76] add [rax], al + [0x2007ffbf78] add [rax], al + [0x2007ffbf7a] add [rax], al + [0x2007ffbf7c] add [rax], al + [0x2007ffbf7e] add [rax], al + [0x2007ffbf80] in al, 0x08 + [0x2007ffbf82] INVALID (0x16) + [0x2007ffbf82] add [rax], al + [0x2007ffbf84] add [rcx], al + [0x2007ffbf86] add [rbx], al + [0x2007ffbf88] add [rax], al + [0x2007ffbf8a] INVALID (0x00) + +;; remove the breakpoint +gcs> (:ubp #x2007ffbf4b) + +;; continue, it stays running +gcs> (:cont) +gcr> + +;; break and check, the code is back to normal! +gcr> (:break) +Target has stopped. Run (:di) to get more information. +Read symbol table (146816 bytes, 124 reads, 123 symbols, 1.28 ms) +rax: 0x0000000000000015 rcx: 0x0000000000000007 rdx: 0x0000000000000000 rbx: 0x0000002000000000 +rsp: 0x00007fddcde75c58 rbp: 0x00007fddcde75cc0 rsi: 0x0000000000000000 rdi: 0x0000000000000000 + r8: 0x0000000000147d24 r9: 0x0000002000000000 r10: 0x00007fddcde75ca0 r11: 0x0000000000000000 +r12: 0x0000000000147d24 r13: 0x0000002007ffbf14 r14: 0x0000000000147d24 r15: 0x0000002000000000 +rip: 0x0000002007ffbf4b + [0x2007ffbf2b] add rax, rcx + [0x2007ffbf2e] mov ecx, 0x04 + [0x2007ffbf33] add rax, rcx + [0x2007ffbf36] mov ecx, 0x05 + [0x2007ffbf3b] add rax, rcx + [0x2007ffbf3e] mov ecx, 0x06 + [0x2007ffbf43] add rax, rcx + [0x2007ffbf46] mov ecx, 0x07 +- [0x2007ffbf4b] add rax, rcx + [0x2007ffbf4e] mov eax, [r15+r14*1+0x08] + [0x2007ffbf56] mov rcx, r14 + [0x2007ffbf59] add rcx, 0x00 + [0x2007ffbf60] cmp rax, rcx + [0x2007ffbf63] jnz 0x0000002007FFBF19 + [0x2007ffbf69] mov eax, [r15+r14*1] + [0x2007ffbf71] ret + [0x2007ffbf72] add [rax], al + [0x2007ffbf74] add [rax], al + [0x2007ffbf76] add [rax], al + [0x2007ffbf78] add [rax], al + [0x2007ffbf7a] add [rax], al + [0x2007ffbf7c] add [rax], al + [0x2007ffbf7e] add [rax], al + [0x2007ffbf80] in al, 0x08 + [0x2007ffbf82] INVALID (0x16) + [0x2007ffbf82] add [rax], al + [0x2007ffbf84] add [rcx], al + [0x2007ffbf86] add [rbx], al + [0x2007ffbf88] add [rax], al + +gcs> + +;; we can still properly exit from the target, even in this state! +gcs> (e) +Tried to reset a halted target, detaching... + Error - target has timed out. If it is stuck in a loop, it must be manually killed. +[Listener] Closed connection to target +``` \ No newline at end of file diff --git a/game/kernel/kmalloc.cpp b/game/kernel/kmalloc.cpp index 5a7ed0cdd..0dc84e9c7 100644 --- a/game/kernel/kmalloc.cpp +++ b/game/kernel/kmalloc.cpp @@ -6,6 +6,7 @@ */ #include +#include "common/goal_constants.h" #include "kmalloc.h" #include "kprint.h" #include "kscheme.h" diff --git a/game/kernel/kscheme.cpp b/game/kernel/kscheme.cpp index 049c2f123..4aeb30700 100644 --- a/game/kernel/kscheme.cpp +++ b/game/kernel/kscheme.cpp @@ -7,6 +7,7 @@ #include #include "kscheme.h" #include "common/common_types.h" +#include "common/goal_constants.h" #include "kmachine.h" #include "klisten.h" #include "kmalloc.h" diff --git a/game/kernel/kscheme.h b/game/kernel/kscheme.h index 69cfc1518..37e2edd69 100644 --- a/game/kernel/kscheme.h +++ b/game/kernel/kscheme.h @@ -9,6 +9,7 @@ #define JAK_KSCHEME_H #include "common/common_types.h" +#include "common/goal_constants.h" #include "kmachine.h" #include "kmalloc.h" @@ -19,9 +20,6 @@ extern Ptr s7; extern Ptr SymbolTable2; extern Ptr LastSymbol; -constexpr s32 GOAL_MAX_SYMBOLS = 0x2000; - -constexpr s32 SYM_INFO_OFFSET = 0xff34; constexpr u32 EMPTY_HASH = 0x8454B6E6; constexpr u32 OFFSET_MASK = 7; constexpr u32 CRC_POLY = 0x04c11db7; diff --git a/goal_src/goal-lib.gc b/goal_src/goal-lib.gc index 60cbf7a14..c932cb985 100644 --- a/goal_src/goal-lib.gc +++ b/goal_src/goal-lib.gc @@ -102,6 +102,37 @@ ) ) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; DEBUGGER MACROS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defmacro db (&rest args) + `(:pm 1 ,@args) + ) + +(defmacro dh (&rest args) + `(:pm 2 ,@args) + ) + +(defmacro dw (&rest args) + `(:pm 4 ,@args) + ) + +(defmacro dd (&rest args) + `(:pm 8 ,@args) + ) + +(defmacro df (&rest args) + `(:pm 4 ,@args :print-mode float) + ) + +(defmacro segfault () + `(-> (the (pointer int) 0)) + ) + +(defmacro fpe () + `(/ 0 0) + ) ;;;;;;;;;;;;;;;;;;; ;; GOAL Syntax diff --git a/goalc/CMakeLists.txt b/goalc/CMakeLists.txt index d52ed1fb6..8b1a735f3 100644 --- a/goalc/CMakeLists.txt +++ b/goalc/CMakeLists.txt @@ -4,6 +4,7 @@ add_library(compiler emitter/ObjectFileData.cpp emitter/ObjectGenerator.cpp emitter/Register.cpp + emitter/disassemble.cpp compiler/Compiler.cpp compiler/Env.cpp compiler/Val.cpp @@ -35,10 +36,10 @@ add_library(compiler add_executable(goalc main.cpp) IF (WIN32) - target_link_libraries(compiler goos type_system mman common_util spdlog cross_os_debug cross_sockets) + target_link_libraries(compiler goos type_system mman common_util spdlog cross_os_debug cross_sockets Zydis) ELSE () - target_link_libraries(compiler goos type_system common_util spdlog cross_os_debug cross_sockets) + target_link_libraries(compiler goos type_system common_util spdlog cross_os_debug cross_sockets Zydis) ENDIF () target_link_libraries(goalc goos compiler type_system) diff --git a/goalc/compiler/Compiler.cpp b/goalc/compiler/Compiler.cpp index ed6dcf20d..d774e610a 100644 --- a/goalc/compiler/Compiler.cpp +++ b/goalc/compiler/Compiler.cpp @@ -26,12 +26,21 @@ void Compiler::execute_repl() { while (!m_want_exit) { try { // 1). get a line from the user (READ) - std::string prompt; + std::string prompt = "g"; if (m_listener.is_connected()) { - prompt = "gc"; + prompt += "c"; } else { - prompt = "g"; + prompt += " "; } + + if (m_debugger.is_halted()) { + prompt += "s"; + } else if (m_debugger.is_attached()) { + prompt += "r"; + } else { + prompt += " "; + } + Object code = m_goos.reader.read_from_stdin(prompt); // 2). compile @@ -246,6 +255,10 @@ std::vector Compiler::run_test_no_load(const std::string& source_co } void Compiler::shutdown_target() { + if (m_debugger.is_attached()) { + m_debugger.detach(); + } + if (m_listener.is_connected()) { m_listener.send_reset(true); } diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index be5a0fa05..548ba27ee 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -196,6 +196,12 @@ class Compiler { Val* compile_break(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_cont(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_dump_all(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_pm(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_di(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_disasm(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_bp(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_ubp(const goos::Object& form, const goos::Object& rest, Env* env); + u32 parse_address_spec(const goos::Object& form); // Macro Val* compile_gscond(const goos::Object& form, const goos::Object& rest, Env* env); diff --git a/goalc/compiler/compilation/Atoms.cpp b/goalc/compiler/compilation/Atoms.cpp index 8ca7552f5..9da4da6fc 100644 --- a/goalc/compiler/compilation/Atoms.cpp +++ b/goalc/compiler/compilation/Atoms.cpp @@ -58,6 +58,11 @@ static const std::unordered_map< {":cont", &Compiler::compile_cont}, {":break", &Compiler::compile_break}, {":dump-all-mem", &Compiler::compile_dump_all}, + {":pm", &Compiler::compile_pm}, + {":di", &Compiler::compile_di}, + {":disasm", &Compiler::compile_disasm}, + {":bp", &Compiler::compile_bp}, + {":ubp", &Compiler::compile_ubp}, // TYPE {"deftype", &Compiler::compile_deftype}, diff --git a/goalc/compiler/compilation/CompilerControl.cpp b/goalc/compiler/compilation/CompilerControl.cpp index b2c7ea2f1..4f1b91a88 100644 --- a/goalc/compiler/compilation/CompilerControl.cpp +++ b/goalc/compiler/compilation/CompilerControl.cpp @@ -17,6 +17,11 @@ Val* Compiler::compile_exit(const goos::Object& form, const goos::Object& rest, (void)env; auto args = get_va(form, rest); va_check(form, args, {}, {}); + + if (m_debugger.is_attached()) { + m_debugger.detach(); + } + if (m_listener.is_connected()) { m_listener.send_reset(false); } diff --git a/goalc/compiler/compilation/Debug.cpp b/goalc/compiler/compilation/Debug.cpp index 531ba4eb9..24b20f26b 100644 --- a/goalc/compiler/compilation/Debug.cpp +++ b/goalc/compiler/compilation/Debug.cpp @@ -1,7 +1,50 @@ #include "goalc/compiler/Compiler.h" +#include "goalc/emitter/disassemble.h" #include "common/util/FileUtil.h" #include "third-party/fmt/core.h" +u32 Compiler::parse_address_spec(const goos::Object& form) { + if (form.is_int()) { + return form.as_int(); + } + + if (form.is_pair()) { + auto first = form.as_pair()->car; + auto rest = form.as_pair()->cdr; + if (first.is_symbol() && symbol_string(first) == "sym") { + if (rest.is_pair() && rest.as_pair()->car.is_symbol()) { + u32 addr = m_debugger.get_symbol_address(symbol_string(rest.as_pair()->car)); + if (!addr) { + throw_compile_error(form, "debugger doesn't know where the symbol is"); + } + return addr; + } else { + throw_compile_error(form, "invalid sym form"); + return 0; + } + } else if (first.is_symbol() && symbol_string(first) == "sym-val") { + if (rest.is_pair() && rest.as_pair()->car.is_symbol()) { + u32 addr = 0; + if (!m_debugger.get_symbol_value(symbol_string(rest.as_pair()->car), &addr)) { + throw_compile_error(form, "debugger doesn't know where the symbol is"); + } + return addr; + } else { + throw_compile_error(form, "invalid sym-val form"); + return 0; + } + } + + else { + throw_compile_error(form, "can't parse this address spec"); + return 0; + } + } else { + throw_compile_error(form, "can't parse this address spec"); + return 0; + } +} + Val* Compiler::compile_dbg(const goos::Object& form, const goos::Object& rest, Env* env) { // todo - do something with args. (void)form; @@ -104,5 +147,236 @@ Val* Compiler::compile_dump_all(const goos::Object& form, const goos::Object& re } else { file_util::write_binary_file(file_util::get_file_path({dest_file}), buffer, EE_MAIN_MEM_SIZE); } + return get_none(); +} + +namespace { + +enum PrintMode { HEX, UNSIGNED_DEC, SIGNED_DEC, FLOAT }; + +template +void mem_print(T* data, int count, u32 start_addr, PrintMode mode) { + // always print 16 bytes / line. + int elt_per_line = 16 / sizeof(T); + + // pad wit the correct number of zeros. + std::string format_string; + + switch (mode) { + case HEX: + format_string = "0x{:0" + std::to_string(2 * sizeof(T)) + "x} "; + break; + case UNSIGNED_DEC: + case SIGNED_DEC: + format_string = "{:" + std::to_string(3 * sizeof(T)) + "d} "; + break; + case FLOAT: + format_string = "{:8.4f} "; // todo, is this what we want? + break; + default: + assert(false); + } + + // loop over elts + for (int i = 0; i < count; i++) { + if ((i % elt_per_line) == 0) { + // first in line, so we should print the GOAL address + fmt::print(" 0x{:08x}: ", start_addr + (i * sizeof(T))); + } + + // print the thing + fmt::print(format_string, data[i]); + + if ((i % elt_per_line) == (elt_per_line - 1)) { + // last in line, newline! + fmt::print("\n"); + } + } + fmt::print("\n"); +} +} // namespace + +Val* Compiler::compile_pm(const goos::Object& form, const goos::Object& rest, Env* env) { + (void)env; + auto args = get_va(form, rest); + va_check(form, args, {goos::ObjectType::INTEGER, {}, goos::ObjectType::INTEGER}, + {{"print-mode", {false, goos::ObjectType::SYMBOL}}}); + + int elt_size = args.unnamed.at(0).as_int(); + u32 addr = parse_address_spec(args.unnamed.at(1)); + u32 elts = args.unnamed.at(2).as_int(); + + PrintMode mode = HEX; + + if (args.has_named("print-mode")) { + auto mode_name = symbol_string(args.get_named("print-mode")); + if (mode_name == "hex") { + mode = HEX; + } else if (mode_name == "unsigned-dec") { + mode = UNSIGNED_DEC; + } else if (mode_name == "signed-dec") { + mode = SIGNED_DEC; + } else if (mode_name == "float") { + mode = FLOAT; + } else { + throw_compile_error(form, "Unknown print-mode for :pm " + mode_name); + } + } + + if (!m_debugger.is_halted()) { + throw_compile_error( + form, "Cannot print memory, the debugger must be connected and the target must be halted."); + } + + auto mem_size = elts * elt_size; + if (mem_size > 1024 * 1024) { + throw_compile_error( + form, + fmt::format(":pm used on over 1 MB of memory, this probably isn't what you meant to do.")); + } + + std::vector mem; + mem.resize(mem_size); + + if (addr < EE_MAIN_MEM_LOW_PROTECT || (addr + mem_size) > EE_MAIN_MEM_SIZE) { + throw_compile_error(form, ":pm memory out of range"); + } + + m_debugger.read_memory(mem.data(), mem_size, addr); + + switch (mode) { + case HEX: + case UNSIGNED_DEC: + switch (elt_size) { + case 1: + mem_print((u8*)mem.data(), elts, addr, mode); + break; + case 2: + mem_print((u16*)mem.data(), elts, addr, mode); + break; + case 4: + mem_print((u32*)mem.data(), elts, addr, mode); + break; + case 8: + mem_print((u64*)mem.data(), elts, addr, mode); + break; + default: + throw_compile_error(form, ":pm bad element size"); + } + break; + case SIGNED_DEC: + switch (elt_size) { + case 1: + mem_print((s8*)mem.data(), elts, addr, mode); + break; + case 2: + mem_print((s16*)mem.data(), elts, addr, mode); + break; + case 4: + mem_print((s32*)mem.data(), elts, addr, mode); + break; + case 8: + mem_print((s64*)mem.data(), elts, addr, mode); + break; + default: + throw_compile_error(form, ":pm bad element size"); + } + break; + case FLOAT: + switch (elt_size) { + case 4: + mem_print((float*)mem.data(), elts, addr, mode); + break; + default: + throw_compile_error(form, ":pm bad element size"); + } + break; + default: + assert(false); + } + + return get_none(); +} + +Val* Compiler::compile_di(const goos::Object& form, const goos::Object& rest, Env* env) { + (void)form; + (void)rest; + (void)env; + if (!m_debugger.is_halted()) { + throw_compile_error( + form, + "Cannot get debug info, the debugger must be connected and the target must be halted."); + } + + m_debugger.get_break_info(); + return get_none(); +} + +Val* Compiler::compile_disasm(const goos::Object& form, const goos::Object& rest, Env* env) { + (void)env; + auto args = get_va(form, rest); + va_check(form, args, {{}, goos::ObjectType::INTEGER}, {}); + u32 addr = parse_address_spec(args.unnamed.at(0)); + u32 size = args.unnamed.at(1).as_int(); + + if (!m_debugger.is_halted()) { + throw_compile_error( + form, + "Cannot disassemble memory, the debugger must be connected and the target must be halted."); + } + + if (size > 1024 * 1024) { + throw_compile_error( + form, + fmt::format( + ":disasm used on over 1 MB of memory, this probably isn't what you meant to do.")); + } + + std::vector mem; + mem.resize(size); + + if (addr < EE_MAIN_MEM_LOW_PROTECT || (addr + size) > EE_MAIN_MEM_SIZE) { + throw_compile_error(form, ":disasm memory out of range"); + } + + m_debugger.read_memory(mem.data(), size, addr); + + fmt::print("{}\n", + disassemble_x86(mem.data(), mem.size(), m_debugger.get_x86_base_addr() + addr)); + + return get_none(); +} + +Val* Compiler::compile_bp(const goos::Object& form, const goos::Object& rest, Env* env) { + (void)env; + auto args = get_va(form, rest); + va_check(form, args, {{}}, {}); + + if (!m_debugger.is_halted()) { + throw_compile_error( + form, + "Cannot add breakpoint, the debugger must be connected and the target must be halted."); + } + + u32 addr = parse_address_spec(args.unnamed.at(0)); + m_debugger.add_addr_breakpoint(addr); + + return get_none(); +} + +Val* Compiler::compile_ubp(const goos::Object& form, const goos::Object& rest, Env* env) { + (void)env; + auto args = get_va(form, rest); + va_check(form, args, {{}}, {}); + + if (!m_debugger.is_halted()) { + throw_compile_error( + form, + "Cannot remove breakpoint, the debugger must be connected and the target must be halted."); + } + + u32 addr = parse_address_spec(args.unnamed.at(0)); + m_debugger.remove_addr_breakpoint(addr); + return get_none(); } \ No newline at end of file diff --git a/goalc/debugger/Debugger.cpp b/goalc/debugger/Debugger.cpp index e1c578527..34e89d9dd 100644 --- a/goalc/debugger/Debugger.cpp +++ b/goalc/debugger/Debugger.cpp @@ -1,11 +1,16 @@ /*! * @file Debugger.h * The OpenGOAL debugger. + * Uses xdbg functions to debug an OpenGOAL target. */ #include #include "Debugger.h" +#include "common/util/Timer.h" +#include "common/goal_constants.h" +#include "common/symbols.h" #include "third-party/fmt/core.h" +#include "goalc/emitter/disassemble.h" /*! * Is the target halted? If we don't know or aren't connected, returns false. @@ -45,10 +50,12 @@ bool Debugger::is_attached() const { /*! * If attached, detach. If halted and attached, will unhalt. - * Will silently do nothing if we aren't attached. + * Will silently do nothing if we aren't attached, so it is safe to just call detach() to try to + * clean up when exiting. */ void Debugger::detach() { if (is_valid() && m_attached) { + stop_watcher(); xdbg::close_memory(m_debug_context.tid, &m_memory_handle); xdbg::detach_and_resume(m_debug_context.tid); m_context_valid = false; @@ -81,7 +88,23 @@ std::string Debugger::get_context_string() const { */ bool Debugger::attach_and_break() { if (is_valid() && !m_attached) { + // reset and start the stop watcher + clear_signal_queue(); + start_watcher(); + + // attach and send a break command if (xdbg::attach_and_break(m_debug_context.tid)) { + // wait for the signal queue to get a stop and pop it. + auto info = pop_signal(); + + // manually set up continue for this. + m_continue_info.valid = true; + m_continue_info.subtract_1 = false; + + // this may fail if you crash at exactly the wrong time. todo - remove? + assert(info.kind == xdbg::SignalInfo::BREAK); + + // open the memory of the process if (!xdbg::open_memory(m_debug_context.tid, &m_memory_handle)) { return false; } @@ -89,12 +112,11 @@ bool Debugger::attach_and_break() { m_attached = true; m_running = false; - xdbg::Regs regs; - if (!xdbg::get_regs_now(m_debug_context.tid, ®s)) { - fmt::print("[Debugger] get_regs_now failed after break, something is wrong\n"); - } else { - fmt::print("{}", regs.print_gprs()); - } + // get info from target + get_break_info(); + + auto signal_count = get_signal_count(); + assert(signal_count == 0); return true; } } else { @@ -104,21 +126,53 @@ bool Debugger::attach_and_break() { return false; } +/*! + * Read the registers, symbol table, and instructions near rip. + * Print out some info about where we are. + */ +void Debugger::get_break_info() { + read_symbol_table(); + m_regs_valid = false; + if (!xdbg::get_regs_now(m_debug_context.tid, &m_regs_at_break)) { + fmt::print("[Debugger] get_regs_now failed after break, something is wrong\n"); + } else { + m_regs_valid = true; + fmt::print("{}", m_regs_at_break.print_gprs()); + } + + if (regs_valid()) { + std::vector mem; + mem.resize(INSTR_DUMP_SIZE_REV + INSTR_DUMP_SIZE_FWD); + // very basic asm dump. + auto rip = m_regs_at_break.rip; + if (rip >= m_debug_context.base + EE_MAIN_MEM_LOW_PROTECT && + rip < m_debug_context.base + EE_MAIN_MEM_SIZE) { + read_memory(mem.data(), INSTR_DUMP_SIZE_REV + INSTR_DUMP_SIZE_FWD, + rip - m_debug_context.base - INSTR_DUMP_SIZE_REV); + fmt::print("{}\n", disassemble_x86(mem.data(), mem.size(), rip - INSTR_DUMP_SIZE_REV, rip)); + + } else { + fmt::print("Not in GOAL code!\n"); + } + } +} + /*! * Stop the target. Must be attached and not stopped. + * Waits for break to be acknowledged and reads break info. */ bool Debugger::do_break() { assert(is_valid() && is_attached() && is_running()); + m_expecting_immeidate_break = true; + m_continue_info.valid = false; + clear_signal_queue(); if (!xdbg::break_now(m_debug_context.tid)) { return false; } else { + auto info = pop_signal(); + assert(info.kind == xdbg::SignalInfo::BREAK); + get_break_info(); m_running = false; - xdbg::Regs regs; - if (!xdbg::get_regs_now(m_debug_context.tid, ®s)) { - fmt::print("[Debugger] get_regs_now failed after break, something is wrong\n"); - } else { - fmt::print("{}", regs.print_gprs()); - } return true; } } @@ -128,6 +182,24 @@ bool Debugger::do_break() { */ bool Debugger::do_continue() { assert(is_valid() && is_attached() && is_halted()); + if (!m_regs_valid) { + get_break_info(); + } + assert(regs_valid()); + + if (!m_continue_info.valid) { + update_continue_info(); + } + assert(m_continue_info.valid); + m_regs_valid = false; + + if (m_continue_info.subtract_1) { + m_regs_at_break.rip--; + auto result = xdbg::set_regs_now(m_debug_context.tid, m_regs_at_break); + assert(result); + } + + m_expecting_immeidate_break = false; if (!xdbg::cont_now(m_debug_context.tid)) { return false; } else { @@ -136,12 +208,318 @@ bool Debugger::do_continue() { } } +/*! + * Read memory from an attached and halted target. + */ bool Debugger::read_memory(u8* dest_buffer, int size, u32 goal_addr) { assert(is_valid() && is_attached() && is_halted()); return xdbg::read_goal_memory(dest_buffer, size, goal_addr, m_debug_context, m_memory_handle); } +/*! + * Write the memory of an attached and halted target. + */ bool Debugger::write_memory(const u8* src_buffer, int size, u32 goal_addr) { assert(is_valid() && is_attached() && is_halted()); return xdbg::write_goal_memory(src_buffer, size, goal_addr, m_debug_context, m_memory_handle); +} + +/*! + * Read the GOAL Symbol table from an attached and halted target. + */ +void Debugger::read_symbol_table() { + assert(is_valid() && is_attached() && is_halted()); + u32 bytes_read = 0; + u32 reads = 0; + Timer timer; + + u32 st_base = m_debug_context.s7 - ((GOAL_MAX_SYMBOLS / 2) * 8 + BASIC_OFFSET); + u32 empty_pair_offset = (m_debug_context.s7 + FIX_SYM_EMPTY_PAIR - PAIR_OFFSET) - st_base; + + std::vector mem; + mem.resize(0x20000); + + if (!xdbg::read_goal_memory(mem.data(), 0x20000, st_base, m_debug_context, m_memory_handle)) { + fmt::print("Read failed during read_symbol_table\n"); + return; + } + reads++; + bytes_read += 0x20000; + + struct SymLower { + u32 type; + u32 value; + }; + + struct SymUpper { + u32 hash; + u32 str; + }; + + m_symbol_name_to_offset_map.clear(); + m_symbol_offset_to_name_map.clear(); + m_symbol_name_to_value_map.clear(); + + u32 sym_type = 0; + // now loop through all the symbols + for (int i = 0; i < (SYM_INFO_OFFSET + 4) / int(sizeof(SymLower)); i++) { + auto offset = i * sizeof(SymLower); + if (offset == empty_pair_offset) { + continue; + } + auto sym = (SymLower*)(mem.data() + offset); + if (sym->type) { + // got a symbol! + if (!sym_type) { + sym_type = sym->type; + } else { + if (sym_type != sym->type) { + fmt::print("Got bad symbol type. Expected 0x{:x} got 0x{:x}\n", sym_type, sym->type); + return; + } + } + + // now get the info + auto info = (SymUpper*)(mem.data() + i * sizeof(SymLower) + SYM_INFO_OFFSET + BASIC_OFFSET); + + // now get the string. + char str_buff[128]; + if (!xdbg::read_goal_memory((u8*)str_buff, 128, info->str + 4, m_debug_context, + m_memory_handle)) { + fmt::print("Read symbol string failed during read_symbol_table\n"); + return; + } + reads++; + bytes_read += 128; + // just in case + str_buff[127] = '\0'; + assert(strlen(str_buff) < 50); + std::string str(str_buff); + + // GOAL sym - s7 + auto sym_offset = s32(offset + st_base + BASIC_OFFSET) - s32(m_debug_context.s7); + assert(sym_offset >= INT16_MIN); + assert(sym_offset <= INT16_MAX); + + // update maps + if (m_symbol_name_to_offset_map.find(str) != m_symbol_name_to_offset_map.end()) { + if (str == "asize-of-basic-func") { + // this is an actual bug in kscheme. The bug has no effect, but we replicate it so that + // the symbol table layout is closer. + + // to hide this duplicate symbol, we append "-hack-copy" to the end of it. + str += "-hack-copy"; + } else { + fmt::print("Symbol {} appears multiple times!\n", str); + assert(false); + } + } + + m_symbol_name_to_offset_map[str] = sym_offset; + m_symbol_offset_to_name_map[sym_offset] = str; + m_symbol_name_to_value_map[str] = sym->value; + } + } + + assert(m_symbol_offset_to_name_map.size() == m_symbol_name_to_offset_map.size()); + fmt::print("Read symbol table ({} bytes, {} reads, {} symbols, {:.2f} ms)\n", bytes_read, reads, + m_symbol_name_to_offset_map.size(), timer.getMs()); +} + +/*! + * Get the address of a symbol by name. Returns a GOAL address. + * Returns 0 if the symbol doesn't exist. + */ +u32 Debugger::get_symbol_address(const std::string& sym_name) { + assert(is_valid()); + auto kv = m_symbol_name_to_offset_map.find(sym_name); + if (kv != m_symbol_name_to_offset_map.end()) { + return m_debug_context.s7 + kv->second; + } + return 0; +} + +/*! + * Get the value of a symbol by name. Returns if the symbol exists and populates output if it does. + */ +bool Debugger::get_symbol_value(const std::string& sym_name, u32* output) { + assert(is_valid()); + auto kv = m_symbol_name_to_value_map.find(sym_name); + if (kv != m_symbol_name_to_value_map.end()) { + *output = kv->second; + return true; + } + return false; +} + +/*! + * Starts the debugger watch thread which watches the target process to see if it stops. + */ +void Debugger::start_watcher() { + assert(!m_watcher_running); + m_watcher_running = true; + m_watcher_should_stop = false; + m_watcher_thread = std::thread(&Debugger::watcher, this); +} + +/*! + * Stops the debugger watch thread (waits for it to end) + */ +void Debugger::stop_watcher() { + assert(m_watcher_running); + m_watcher_running = false; + m_watcher_should_stop = true; + m_watcher_thread.join(); +} + +Debugger::~Debugger() { + if (m_watcher_running) { + stop_watcher(); + } +} + +/*! + * The watcher thread. + */ +void Debugger::watcher() { + xdbg::SignalInfo signal_info; + while (!m_watcher_should_stop) { + // we just sit in a loop, waiting for stops. + if (xdbg::check_stopped(m_debug_context.tid, &signal_info)) { + // the target stopped! + m_continue_info.valid = false; + + switch (signal_info.kind) { + case xdbg::SignalInfo::SEGFAULT: + printf("Target has crashed with a SEGFAULT! Run (:di) to get more information.\n"); + break; + case xdbg::SignalInfo::BREAK: + printf("Target has stopped. Run (:di) to get more information.\n"); + break; + default: + printf("[Debugger] unhandled signal in watcher: %d\n", int(signal_info.kind)); + assert(false); + } + + { + std::lock_guard lock(m_watcher_mutex); + m_running = false; + m_watcher_queue.push({signal_info.kind}); // todo, more info? + } + m_watcher_cv.notify_one(); + + } else { + // the target didn't stop. + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + } +} + +Debugger::SignalInfo Debugger::pop_signal() { + { + std::unique_lock lock(m_watcher_mutex); + m_watcher_cv.wait(lock, [&] { return !m_watcher_queue.empty(); }); + } + + Debugger::SignalInfo result; + if (!try_pop_signal(&result)) { + assert(false); + } + return result; +} + +bool Debugger::try_pop_signal(SignalInfo* out) { + { + std::unique_lock lock(m_watcher_mutex); + if (!m_watcher_queue.empty()) { + *out = m_watcher_queue.front(); + m_watcher_queue.pop(); + return true; + } + } + + return false; +} + +int Debugger::get_signal_count() { + std::unique_lock lock(m_watcher_mutex); + return int(m_watcher_queue.size()); +} + +void Debugger::clear_signal_queue() { + std::unique_lock lock(m_watcher_mutex); + while (!m_watcher_queue.empty()) { + m_watcher_queue.pop(); + } +} + +void Debugger::add_addr_breakpoint(u32 addr) { + { + std::unique_lock lock(m_watcher_mutex); + auto kv = m_addr_breakpoints.find(addr); + if (kv != m_addr_breakpoints.end()) { + fmt::print("Breakpoint at address 0x{:08x} already exists as breakpoint {}\n", addr, + kv->second.id); + return; + } + + Breakpoint bp; + bp.goal_addr = addr; + bp.id = m_addr_breakpoints.size(); + if (!read_memory(&bp.old_data, 1, addr)) { + fmt::print("Failed to read memory for breakpoint, not adding breakpoint\n"); + return; + } + + u8 int3 = 0xcc; + if (!write_memory(&int3, 1, addr)) { + fmt::print("Failed to write memory for breakpoint, not adding breakpoint\n"); + return; + } + + m_addr_breakpoints[addr] = bp; + } +} + +void Debugger::remove_addr_breakpoint(u32 addr) { + { + std::unique_lock lock(m_watcher_mutex); + update_continue_info(); + auto kv = m_addr_breakpoints.find(addr); + if (kv == m_addr_breakpoints.end()) { + fmt::print("Breakpoint at address 0x{:08x} does not exist\n", addr); + return; + } + + if (!write_memory(&kv->second.old_data, 1, addr)) { + fmt::print("Failed to remove breakpoint\n"); + return; + } + + m_addr_breakpoints.erase(kv); + } +} + +void Debugger::update_continue_info() { + if (m_continue_info.valid || !is_halted()) { + return; + } + + if (!m_regs_valid) { + get_break_info(); + } + + auto kv = m_addr_breakpoints.find(get_regs().rip - 1); + if (kv == m_addr_breakpoints.end()) { + m_continue_info.subtract_1 = false; + } else { + if (m_expecting_immeidate_break) { + printf("Warning, conflicting break and breakpoints. Not sure why we stopped!\n"); + } + + m_continue_info.subtract_1 = true; + } + + m_expecting_immeidate_break = false; + m_continue_info.valid = true; } \ No newline at end of file diff --git a/goalc/debugger/Debugger.h b/goalc/debugger/Debugger.h index d386e3572..229ec45e0 100644 --- a/goalc/debugger/Debugger.h +++ b/goalc/debugger/Debugger.h @@ -1,17 +1,24 @@ /*! * @file Debugger.h * The OpenGOAL debugger. + * Uses xdbg functions to debug an OpenGOAL target. */ #pragma once +#include +#include +#include +#include +#include #include "common/common_types.h" #include "common/cross_os_debug/xdbg.h" class Debugger { public: Debugger() = default; - bool is_halted() const; // are we halted? + ~Debugger(); + bool is_halted() const; bool is_valid() const; bool is_attached() const; bool is_running() const; @@ -19,28 +26,114 @@ class Debugger { void invalidate(); void set_context(u32 s7, uintptr_t base, const std::string& thread_id); std::string get_context_string() const; - bool attach_and_break(); - bool do_break(); bool do_continue(); - bool read_memory(u8* dest_buffer, int size, u32 goal_addr); bool write_memory(const u8* src_buffer, int size, u32 goal_addr); + void read_symbol_table(); + u32 get_symbol_address(const std::string& sym_name); + bool get_symbol_value(const std::string& sym_name, u32* output); + void add_addr_breakpoint(u32 addr); + void remove_addr_breakpoint(u32 addr); + void get_break_info(); + /*! + * Get the x86 address of GOAL memory + */ + u64 get_x86_base_addr() const { + assert(m_context_valid); + return m_debug_context.base; + } + + /*! + * Get the thread being debugged. + */ + const xdbg::ThreadID& get_thread_id() const { + assert(m_context_valid); + return m_debug_context.tid; + } + + /*! + * Are the register values currently stored by the debugger currently accurate? + */ + bool regs_valid() const { return m_regs_valid; } + + /*! + * Write a value to GOAL memory + */ template bool write_value(const T& value, u32 goal_addr) { return write_memory((const u8*)&value, sizeof(T), goal_addr); } + /*! + * Read a value from GOAL memory + */ template bool read_value(T* value, u32 goal_addr) { return read_memory((u8*)value, sizeof(T), goal_addr); } + const xdbg::Regs& get_regs() { + assert(m_regs_valid); + return m_regs_at_break; + } + private: + // how many bytes of instructions to look at ahead of / behind rip when stopping + static constexpr int INSTR_DUMP_SIZE_REV = 32; + static constexpr int INSTR_DUMP_SIZE_FWD = 64; + + // symbol table info (all s7-relative offsets) + std::unordered_map m_symbol_name_to_offset_map; + std::unordered_map m_symbol_name_to_value_map; + std::unordered_map m_symbol_offset_to_name_map; + + // debug state xdbg::DebugContext m_debug_context; xdbg::MemoryHandle m_memory_handle; + xdbg::Regs m_regs_at_break; + + bool m_watcher_should_stop = false; + bool m_watcher_running = false; + bool m_regs_valid = false; + + void start_watcher(); + void stop_watcher(); + void watcher(); + void update_continue_info(); + + struct Breakpoint { + u32 goal_addr = 0; // address to break at + int id = -1; // breakpoint ID + u8 old_data = 0; // byte originally stored at goal_addr + }; + + bool m_expecting_immeidate_break = false; + + std::unordered_map m_addr_breakpoints; + + std::mutex m_watcher_mutex; + std::condition_variable m_watcher_cv; + std::thread m_watcher_thread; + + struct ContinueInfo { + bool subtract_1 = false; + bool valid = false; + } m_continue_info; + + // for more complicated breakpoint stuff, we have a queue of stops. + // right now it's barely used for anything other than waiting for a "break" to be acknowledged. + struct SignalInfo { + xdbg::SignalInfo::Kind kind; + }; + std::queue m_watcher_queue; + bool try_pop_signal(SignalInfo* out); + SignalInfo pop_signal(); + int get_signal_count(); + void clear_signal_queue(); + bool m_context_valid = false; bool m_running = true; bool m_attached = false; diff --git a/goalc/emitter/disassemble.cpp b/goalc/emitter/disassemble.cpp new file mode 100644 index 000000000..51d93b021 --- /dev/null +++ b/goalc/emitter/disassemble.cpp @@ -0,0 +1,59 @@ +#include "disassemble.h" +#include "Zydis/Zydis.h" +#include "third-party/fmt/core.h" + +std::string disassemble_x86(u8* data, int len, u64 base_addr) { + std::string result; + ZydisDecoder decoder; + ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); + ZydisFormatter formatter; + ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); + ZydisDecodedInstruction instr; + + constexpr int print_buff_size = 512; + char print_buff[print_buff_size]; + int offset = 0; + while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, data + offset, len - offset, &instr))) { + result += fmt::format("[0x{:x}] ", base_addr); + ZydisFormatterFormatInstruction(&formatter, &instr, print_buff, print_buff_size, base_addr); + result += print_buff; + result += "\n"; + + offset += instr.length; + base_addr += instr.length; + } + + return result; +} + +std::string disassemble_x86(u8* data, int len, u64 base_addr, u64 highlight_addr) { + std::string result; + ZydisDecoder decoder; + ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); + ZydisFormatter formatter; + ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); + ZydisDecodedInstruction instr; + + constexpr int print_buff_size = 512; + char print_buff[print_buff_size]; + int offset = 0; + + assert(highlight_addr > base_addr); + int mark_offset = int(highlight_addr - base_addr); + while (offset < len) { + char prefix = (offset == mark_offset) ? '-' : ' '; + if (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, data + offset, len - offset, &instr))) { + result += fmt::format("{:c} [0x{:x}] ", prefix, base_addr); + ZydisFormatterFormatInstruction(&formatter, &instr, print_buff, print_buff_size, base_addr); + result += print_buff; + result += "\n"; + offset += instr.length; + base_addr += instr.length; + } else { + result += fmt::format("{:c} [0x{:x}] INVALID (0x{:02x})\n", prefix, base_addr, data[offset]); + offset++; + } + } + + return result; +} \ No newline at end of file diff --git a/goalc/emitter/disassemble.h b/goalc/emitter/disassemble.h new file mode 100644 index 000000000..893ee45c4 --- /dev/null +++ b/goalc/emitter/disassemble.h @@ -0,0 +1,7 @@ +#pragma once + +#include +#include "common/common_types.h" + +std::string disassemble_x86(u8* data, int len, u64 base_addr); +std::string disassemble_x86(u8* data, int len, u64 base_addr, u64 highlight_addr); \ No newline at end of file diff --git a/test/goalc/test_debugger.cpp b/test/goalc/test_debugger.cpp index 332e4b66a..053c4ff31 100644 --- a/test/goalc/test_debugger.cpp +++ b/test/goalc/test_debugger.cpp @@ -106,4 +106,81 @@ TEST(Debugger, DebuggerWriteMemory) { } } +TEST(Debugger, Symbol) { + Compiler compiler; + // evidently you can't ptrace threads in your own process, so we need to run the runtime in a + // separate process. + if (!fork()) { + GoalTest::runtime_no_kernel(); + exit(0); + } else { + compiler.connect_to_target(); + compiler.poke_target(); + compiler.run_test_from_string("(dbg)"); + EXPECT_TRUE(compiler.get_debugger().do_continue()); + auto result = compiler.run_test_from_string("(define test-symbol (the int 123))"); + EXPECT_TRUE(compiler.get_debugger().do_break()); + auto addr = compiler.get_debugger().get_symbol_address("test-symbol"); + u32 value; + EXPECT_TRUE(compiler.get_debugger().read_value(&value, addr)); + EXPECT_EQ(value, 123); + EXPECT_TRUE(compiler.get_debugger().write_value(456, addr)); + EXPECT_TRUE(compiler.get_debugger().read_value(&value, addr)); + EXPECT_EQ(value, 456); + + EXPECT_TRUE(compiler.get_debugger().do_continue()); + result = compiler.run_test_from_string("test-symbol"); + EXPECT_EQ(456, std::stoi(result.at(0))); + + compiler.shutdown_target(); + + // and now the child process should be done! + EXPECT_TRUE(wait(nullptr) >= 0); + } +} + +TEST(Debugger, SimpleBreakpoint) { + Compiler compiler; + + if (!fork()) { + GoalTest::runtime_no_kernel(); + exit(0); + } else { + compiler.connect_to_target(); + compiler.poke_target(); + compiler.run_test_from_string("(defun test-function () (+ 1 2 3 4 5 6))"); + ; + compiler.run_test_from_string("(dbg)"); + u32 func_addr; + EXPECT_TRUE(compiler.get_debugger().get_symbol_value("test-function", &func_addr)); + EXPECT_TRUE(compiler.get_debugger().is_valid()); + EXPECT_TRUE(compiler.get_debugger().is_halted()); + + compiler.get_debugger().add_addr_breakpoint(func_addr); // todo from code. + compiler.run_test_from_string("(:cont)"); + compiler.run_test_from_string("(test-function)"); + // wait for breakpoint to be hit. + while (!compiler.get_debugger().is_halted()) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + + compiler.get_debugger().get_break_info(); + auto expected_instr_before_rip = compiler.get_debugger().get_x86_base_addr() + func_addr; + auto rip = compiler.get_debugger().get_regs().rip; + // instructions can be at most 15 bytes long. + EXPECT_TRUE(rip > expected_instr_before_rip && rip < expected_instr_before_rip + 15); + + EXPECT_TRUE(compiler.get_debugger().is_halted()); + compiler.get_debugger().remove_addr_breakpoint(func_addr); + compiler.get_debugger().do_continue(); + + auto result = compiler.run_test_from_string("(test-function)"); + EXPECT_EQ(std::stoi(result.at(0)), 21); + compiler.shutdown_target(); + + // and now the child process should be done! + EXPECT_TRUE(wait(nullptr) >= 0); + } +} + #endif