From 951f31878eb86841d1c9ddd00c655d3d78c71779 Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Fri, 13 Nov 2020 22:33:57 -0500 Subject: [PATCH] [Source Line Debugger] Tracking objects and IR (#115) * track where segments are when debugging * missing windows include * figure out what function we're in * addr to IR is working --- game/kernel/klisten.cpp | 7 +- game/kernel/kprint.cpp | 6 +- goal_src/engine/math/vector-h.gc | 2 +- goal_src/kernel/gcommon.gc | 8 ++ goalc/CMakeLists.txt | 4 +- goalc/compiler/CodeGenerator.cpp | 88 ++++++++++---- goalc/compiler/CodeGenerator.h | 14 ++- goalc/compiler/Compiler.cpp | 11 +- goalc/compiler/Compiler.h | 4 +- goalc/compiler/Env.h | 3 + goalc/compiler/IR.cpp | 1 + goalc/compiler/compilation/Debug.cpp | 4 +- goalc/compiler/compilation/Function.cpp | 8 +- goalc/compiler/compilation/Type.cpp | 3 +- goalc/debugger/DebugInfo.cpp | 31 +++++ goalc/debugger/DebugInfo.h | 56 +++++++++ goalc/debugger/Debugger.cpp | 97 +++++++++++++-- goalc/debugger/Debugger.h | 36 +++++- goalc/debugger/disassemble.cpp | 150 ++++++++++++++++++++++++ goalc/debugger/disassemble.h | 30 +++++ goalc/emitter/ObjectGenerator.cpp | 35 +++++- goalc/emitter/ObjectGenerator.h | 34 ++++-- goalc/emitter/Register.h | 3 + goalc/emitter/disassemble.cpp | 59 ---------- goalc/emitter/disassemble.h | 7 -- goalc/listener/Listener.cpp | 33 +++++- goalc/listener/Listener.h | 9 +- goalc/listener/MemoryMap.cpp | 128 ++++++++++++++++++++ goalc/listener/MemoryMap.h | 38 ++++++ test/goalc/test_debugger.cpp | 22 +++- test/goalc/test_with_game.cpp | 16 +++ 31 files changed, 799 insertions(+), 148 deletions(-) create mode 100644 goalc/debugger/DebugInfo.cpp create mode 100644 goalc/debugger/DebugInfo.h create mode 100644 goalc/debugger/disassemble.cpp create mode 100644 goalc/debugger/disassemble.h delete mode 100644 goalc/emitter/disassemble.cpp delete mode 100644 goalc/emitter/disassemble.h create mode 100644 goalc/listener/MemoryMap.cpp create mode 100644 goalc/listener/MemoryMap.h diff --git a/game/kernel/klisten.cpp b/game/kernel/klisten.cpp index 01b1457aa..9b667c212 100644 --- a/game/kernel/klisten.cpp +++ b/game/kernel/klisten.cpp @@ -147,8 +147,11 @@ void ProcessListenerMessage(Ptr msg) { // getting squashed. // this setup allows listener function execution to clean up after itself. - ListenerFunction->value = - link_and_exec(buffer, "*listener*", 0, kdebugheap, LINK_FLAG_FORCE_DEBUG).offset; + + // we have added the LINK_FLAG_OUTPUT_LOAD + ListenerFunction->value = link_and_exec(buffer, "*listener*", 0, kdebugheap, + LINK_FLAG_FORCE_DEBUG | LINK_FLAG_OUTPUT_LOAD) + .offset; return; // don't ack yet, this will happen after the function runs. } break; default: diff --git a/game/kernel/kprint.cpp b/game/kernel/kprint.cpp index 98822b9b7..29fbfb56c 100644 --- a/game/kernel/kprint.cpp +++ b/game/kernel/kprint.cpp @@ -150,8 +150,10 @@ void output_segment_load(const char* name, Ptr link_block, u32 flags) { char false_str[] = "nil"; char* flag_str = (flags & LINK_FLAG_OUTPUT_TRUE) ? true_str : false_str; auto lbp = link_block.cast(); - sprintf(buffer, "load \"%s\" %s #x%x #x%x #x%x\n", name, flag_str, lbp->code_infos[0].offset, - lbp->code_infos[1].offset, lbp->code_infos[2].offset); + // modified to also include segment sizes. + sprintf(buffer, "load \"%s\" %s #x%x #x%x #x%x #x%x #x%x #x%x\n", name, flag_str, + lbp->code_infos[0].offset, lbp->code_infos[1].offset, lbp->code_infos[2].offset, + lbp->code_infos[0].size, lbp->code_infos[1].size, lbp->code_infos[2].size); OutputPending = OutputBufArea + sizeof(ListenerMessageHeader); } } diff --git a/goal_src/engine/math/vector-h.gc b/goal_src/engine/math/vector-h.gc index 8c4b2b320..ef5a71fd6 100644 --- a/goal_src/engine/math/vector-h.gc +++ b/goal_src/engine/math/vector-h.gc @@ -453,7 +453,7 @@ ) ) -(defun vector-dot-vu ((a vector) (b vector)) +(defun vector4-dot-vu ((a vector) (b vector)) "Take the dot product of two vectors. Does the x, y, z, and w compoments Originally implemented using VU macro ops" diff --git a/goal_src/kernel/gcommon.gc b/goal_src/kernel/gcommon.gc index 532e6b2ae..a560f8930 100644 --- a/goal_src/kernel/gcommon.gc +++ b/goal_src/kernel/gcommon.gc @@ -610,6 +610,14 @@ ;; array (todo) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +#| +(defun segfault-function () + "Function which segfaults." + ;; this is added only to test debugging stuff and isn't in the game. + (segfault) + ) +|# + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; memcpy and similar ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/goalc/CMakeLists.txt b/goalc/CMakeLists.txt index 8b1a735f3..4c7e6b4f3 100644 --- a/goalc/CMakeLists.txt +++ b/goalc/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(compiler emitter/ObjectFileData.cpp emitter/ObjectGenerator.cpp emitter/Register.cpp - emitter/disassemble.cpp + debugger/disassemble.cpp compiler/Compiler.cpp compiler/Env.cpp compiler/Val.cpp @@ -25,8 +25,10 @@ add_library(compiler compiler/compilation/Static.cpp compiler/Util.cpp debugger/Debugger.cpp + debugger/DebugInfo.cpp logger/Logger.cpp listener/Listener.cpp + listener/MemoryMap.cpp regalloc/IRegister.cpp regalloc/Allocator.cpp regalloc/allocate.cpp diff --git a/goalc/compiler/CodeGenerator.cpp b/goalc/compiler/CodeGenerator.cpp index c7513e21c..3332abcd9 100644 --- a/goalc/compiler/CodeGenerator.cpp +++ b/goalc/compiler/CodeGenerator.cpp @@ -1,36 +1,60 @@ +/*! + * @file CodeGenerator.cpp + * Generate object files from a FileEnv using an emitter::ObjectGenerator. + * Populates a DebugInfo. + * Currently owns the logic for emitting the function prologues/epilogues and stack spill ops. + */ + +#include +#include #include "CodeGenerator.h" #include "goalc/emitter/IGen.h" #include "IR.h" using namespace emitter; -constexpr int GPR_SIZE = 8; -constexpr int XMM_SIZE = 16; -CodeGenerator::CodeGenerator(FileEnv* env) : m_fe(env) {} +CodeGenerator::CodeGenerator(FileEnv* env, DebugInfo* debug_info) + : m_fe(env), m_debug_info(debug_info) {} +/*! + * Generate an object file. + */ std::vector CodeGenerator::run() { + std::unordered_set function_names; + + // first, add each function to the ObjectGenerator (but don't add any data) for (auto& f : m_fe->functions()) { - m_gen.add_function_to_seg(f->segment); + if (function_names.find(f->name()) == function_names.end()) { + function_names.insert(f->name()); + } else { + printf("Failed to codegen, there are two functions with internal names %s\n", + f->name().c_str()); + throw std::runtime_error("Failed to codegen."); + } + m_gen.add_function_to_seg(f->segment, &m_debug_info->add_function(f->name())); } - // todo, static objects + // next, add all static objects. for (auto& static_obj : m_fe->statics()) { static_obj->generate(&m_gen); } + // next, add instructions to functions for (size_t i = 0; i < m_fe->functions().size(); i++) { do_function(m_fe->functions().at(i).get(), i); } - // for (auto& f : m_fe->functions()) { - // do_function(f.get()); - // } + // generate a v3 object. TODO - support for v4 "data" objects. return m_gen.generate_data_v3().to_vector(); } +/*! + * Add instructions to the function, specified by index. + * Generates prologues / epilogues. + */ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) { auto f_rec = m_gen.get_existing_function_record(f_idx); - // auto f_rec = m_gen.add_function_to_seg(env->segment); // todo, extra alignment settings + // todo, extra alignment settings auto& ri = emitter::gRegInfo; const auto& allocs = env->alloc_result(); @@ -41,8 +65,9 @@ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) { // back up xmms for (auto& saved_reg : allocs.used_saved_regs) { if (saved_reg.is_xmm()) { - m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE)); - m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg)); + m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::PROLOGUE); + m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg), + InstructionInfo::PROLOGUE); stack_offset += XMM_SIZE; } } @@ -50,39 +75,50 @@ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) { // back up gprs for (auto& saved_reg : allocs.used_saved_regs) { if (saved_reg.is_gpr()) { - m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(saved_reg)); + m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(saved_reg), InstructionInfo::PROLOGUE); stack_offset += GPR_SIZE; } } + // do we include an extra push to get 8 more bytes to keep the stack aligned? bool bonus_push = false; + + // the offset to add directly to rsp for stack variables (no push/pop) int manually_added_stack_offset = GPR_SIZE * allocs.stack_slots; stack_offset += manually_added_stack_offset; + // do we need to align or manually offset? if (manually_added_stack_offset || allocs.needs_aligned_stack_for_spills || env->needs_aligned_stack()) { if (!(stack_offset & 15)) { if (manually_added_stack_offset) { + // if we're already adding to rsp, just add 8 more. manually_added_stack_offset += 8; } else { + // otherwise to an extra push, and remember so we can do an extra pop later on. bonus_push = true; - m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(ri.get_saved_gpr(0))); + m_gen.add_instr_no_ir(f_rec, IGen::push_gpr64(ri.get_saved_gpr(0)), + InstructionInfo::PROLOGUE); } stack_offset += 8; } assert(stack_offset & 15); + // do manual stack offset. if (manually_added_stack_offset) { - m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(RSP, manually_added_stack_offset)); + m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(RSP, manually_added_stack_offset), + InstructionInfo::PROLOGUE); } } - // TODO EMIT FUNCTIONS + // emit each IR into x86 instructions. for (int ir_idx = 0; ir_idx < int(env->code().size()); ir_idx++) { auto& ir = env->code().at(ir_idx); - auto i_rec = m_gen.add_ir(f_rec); + // start of IR + auto i_rec = m_gen.add_ir(f_rec, ir->print()); + // load anything off the stack that was spilled and is needed. auto& bonus = allocs.stack_ops.at(ir_idx); for (auto& op : bonus.ops) { if (op.load) { @@ -93,7 +129,11 @@ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) { } } } + + // do the actual op ir->do_codegen(&m_gen, allocs, i_rec); + + // store things back on the stack if needed. for (auto& op : bonus.ops) { if (op.store) { if (op.reg.is_gpr()) { @@ -103,35 +143,37 @@ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) { } } } - } + } // end IR loop // EPILOGUE if (manually_added_stack_offset || allocs.needs_aligned_stack_for_spills || env->needs_aligned_stack()) { if (manually_added_stack_offset) { - m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(RSP, manually_added_stack_offset)); + m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(RSP, manually_added_stack_offset), + InstructionInfo::EPILOGUE); } if (bonus_push) { assert(!manually_added_stack_offset); - m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(ri.get_saved_gpr(0))); + m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(ri.get_saved_gpr(0)), InstructionInfo::EPILOGUE); } } for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { auto& saved_reg = allocs.used_saved_regs.at(i); if (saved_reg.is_gpr()) { - m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(saved_reg)); + m_gen.add_instr_no_ir(f_rec, IGen::pop_gpr64(saved_reg), InstructionInfo::EPILOGUE); } } for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) { auto& saved_reg = allocs.used_saved_regs.at(i); if (saved_reg.is_xmm()) { - m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP)); - m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE)); + m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP), + InstructionInfo::EPILOGUE); + m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::EPILOGUE); } } - m_gen.add_instr_no_ir(f_rec, IGen::ret()); + m_gen.add_instr_no_ir(f_rec, IGen::ret(), InstructionInfo::EPILOGUE); } \ No newline at end of file diff --git a/goalc/compiler/CodeGenerator.h b/goalc/compiler/CodeGenerator.h index 125ee3b67..102f1781c 100644 --- a/goalc/compiler/CodeGenerator.h +++ b/goalc/compiler/CodeGenerator.h @@ -1,3 +1,10 @@ +/*! + * @file CodeGenerator.h + * Generate object files from a FileEnv using an emitter::ObjectGenerator. + * Populates a DebugInfo. + * Currently owns the logic for emitting the function prologues. + */ + #pragma once #ifndef JAK_CODEGENERATOR_H @@ -6,15 +13,18 @@ #include "Env.h" #include "goalc/emitter/ObjectGenerator.h" +class DebugInfo; + class CodeGenerator { public: - CodeGenerator(FileEnv* env); + CodeGenerator(FileEnv* env, DebugInfo* debug_info); std::vector run(); private: void do_function(FunctionEnv* env, int f_idx); emitter::ObjectGenerator m_gen; - FileEnv* m_fe; + FileEnv* m_fe = nullptr; + DebugInfo* m_debug_info = nullptr; }; #endif // JAK_CODEGENERATOR_H diff --git a/goalc/compiler/Compiler.cpp b/goalc/compiler/Compiler.cpp index d774e610a..31f70707c 100644 --- a/goalc/compiler/Compiler.cpp +++ b/goalc/compiler/Compiler.cpp @@ -9,7 +9,7 @@ using namespace goos; -Compiler::Compiler() { +Compiler::Compiler() : m_debugger(&m_listener) { init_logger(); init_settings(); m_listener.add_debugger(&m_debugger); @@ -178,7 +178,9 @@ void Compiler::color_object_file(FileEnv* env) { } std::vector Compiler::codegen_object_file(FileEnv* env) { - CodeGenerator gen(env); + auto debug_info = &m_debugger.get_debug_info_for_object(env->name()); + debug_info->clear(); + CodeGenerator gen(env, debug_info); return gen.run(); } @@ -207,14 +209,15 @@ std::vector Compiler::run_test_from_file(const std::string& source_ } } -std::vector Compiler::run_test_from_string(const std::string& src) { +std::vector Compiler::run_test_from_string(const std::string& src, + const std::string& obj_name) { try { if (!connect_to_target()) { throw std::runtime_error("Compiler::run_test_from_file couldn't connect!"); } auto code = m_goos.reader.read_from_string({src}); - auto compiled = compile_object_file("test-code", code, true); + auto compiled = compile_object_file(obj_name, code, true); if (compiled->is_empty()) { return {}; } diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index 548ba27ee..0cd88098a 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -31,11 +31,13 @@ class Compiler { None* get_none() { return m_none.get(); } std::vector run_test_from_file(const std::string& source_code); - std::vector run_test_from_string(const std::string& src); + std::vector run_test_from_string(const std::string& src, + const std::string& obj_name = "*listener*"); std::vector run_test_no_load(const std::string& source_code); void shutdown_target(); void enable_throw_on_redefines() { m_throw_on_define_extern_redefinition = true; } Debugger& get_debugger() { return m_debugger; } + listener::Listener& listener() { return m_listener; } void poke_target() { m_listener.send_poke(); } diff --git a/goalc/compiler/Env.h b/goalc/compiler/Env.h index 4e920357e..ca92cf2e8 100644 --- a/goalc/compiler/Env.h +++ b/goalc/compiler/Env.h @@ -96,6 +96,7 @@ class FileEnv : public Env { assert(m_top_level_func); return *m_top_level_func; } + const std::string& name() { return m_name; } bool is_empty(); ~FileEnv() = default; @@ -167,6 +168,8 @@ class FunctionEnv : public DeclareEnv { return m_unnamed_labels.back().get(); } + const std::string& name() const { return m_name; } + int idx_in_file = -1; template diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index 4f2fbd3e7..4e40e4270 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -281,6 +281,7 @@ std::string IR_FunctionCall::print() { result += fmt::format("{} ", x->print()); } result.pop_back(); + result.push_back(')'); return result; } diff --git a/goalc/compiler/compilation/Debug.cpp b/goalc/compiler/compilation/Debug.cpp index 24b20f26b..dc05a2c8d 100644 --- a/goalc/compiler/compilation/Debug.cpp +++ b/goalc/compiler/compilation/Debug.cpp @@ -1,5 +1,5 @@ #include "goalc/compiler/Compiler.h" -#include "goalc/emitter/disassemble.h" +#include "goalc/debugger/disassemble.h" #include "common/util/FileUtil.h" #include "third-party/fmt/core.h" @@ -308,7 +308,7 @@ Val* Compiler::compile_di(const goos::Object& form, const goos::Object& rest, En "Cannot get debug info, the debugger must be connected and the target must be halted."); } - m_debugger.get_break_info(); + m_debugger.update_break_info(); return get_none(); } diff --git a/goalc/compiler/compilation/Function.cpp b/goalc/compiler/compilation/Function.cpp index 7e9b13531..d99b1e960 100644 --- a/goalc/compiler/compilation/Function.cpp +++ b/goalc/compiler/compilation/Function.cpp @@ -69,6 +69,7 @@ Val* Compiler::compile_inline(const goos::Object& form, const goos::Object& rest */ Val* Compiler::compile_lambda(const goos::Object& form, const goos::Object& rest, Env* env) { auto fe = get_parent_env_of_type(env); + auto obj_env = get_parent_env_of_type(env); auto args = get_va(form, rest); if (args.unnamed.empty() || !args.unnamed.front().is_list() || !args.only_contains_named({"name", "inline-only", "segment"})) { @@ -130,7 +131,11 @@ Val* Compiler::compile_lambda(const goos::Object& form, const goos::Object& rest } if (!inline_only) { - // compile a function! First create env + // compile a function! First create a unique name... + std::string function_name = lambda.debug_name; + if (function_name.empty()) { + function_name = fmt::format("anonymous-function-{}", obj_env->functions().size()); + } auto new_func_env = std::make_unique(env, lambda.debug_name); new_func_env->set_segment(segment); @@ -192,7 +197,6 @@ Val* Compiler::compile_lambda(const goos::Object& form, const goos::Object& rest new_func_env->finish(); // save our code for possible inlining - auto obj_env = get_parent_env_of_type(new_func_env.get()); assert(obj_env); if (new_func_env->settings.save_code) { obj_env->add_function(std::move(new_func_env)); diff --git a/goalc/compiler/compilation/Type.cpp b/goalc/compiler/compilation/Type.cpp index 091e71d1a..0d4a24215 100644 --- a/goalc/compiler/compilation/Type.cpp +++ b/goalc/compiler/compilation/Type.cpp @@ -165,7 +165,8 @@ Val* Compiler::generate_inspector_for_type(const goos::Object& form, Env* env, T // Create a function environment to hold the code for the inspect method. The name is just for // debugging. - auto method_env = std::make_unique(env, "autogenerated-inspect-method"); + auto method_env = + std::make_unique(env, "autogenerated-inspect-method-of-" + type->get_name()); // put the method in the debug segment. method_env->set_segment(DEBUG_SEGMENT); diff --git a/goalc/debugger/DebugInfo.cpp b/goalc/debugger/DebugInfo.cpp new file mode 100644 index 000000000..344805508 --- /dev/null +++ b/goalc/debugger/DebugInfo.cpp @@ -0,0 +1,31 @@ +#include +#include +#include "DebugInfo.h" +#include "third-party/fmt/core.h" + +DebugInfo::DebugInfo(std::string obj_name) : m_obj_name(std::move(obj_name)) {} + +std::string FunctionDebugInfo::disassemble_debug_info(bool* had_failure) { + std::string result = fmt::format("[{}]\n", name); + std::vector data; + u8 temp[128]; + for (const auto& x : instructions) { + auto count = x.instruction.emit(temp); + for (int i = 0; i < count; i++) { + data.push_back(temp[i]); + } + } + + result += disassemble_x86_function(data.data(), data.size(), 0x10000, 0x10000, instructions, irs, + had_failure); + + return result; +} + +std::string DebugInfo::disassemble_debug_functions(bool* had_failure) { + std::string result; + for (auto& kv : m_functions) { + result += kv.second.disassemble_debug_info(had_failure) + "\n\n"; + } + return result; +} \ No newline at end of file diff --git a/goalc/debugger/DebugInfo.h b/goalc/debugger/DebugInfo.h new file mode 100644 index 000000000..5268f9f28 --- /dev/null +++ b/goalc/debugger/DebugInfo.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include +#include "common/common_types.h" +#include "goalc/emitter/Instruction.h" +#include "goalc/debugger/disassemble.h" + +struct FunctionDebugInfo { + u32 offset_in_seg; // not including type tag. + u32 length; + u8 seg; + std::string name; + + std::vector irs; + std::vector instructions; + + std::string disassemble_debug_info(bool* had_failure); +}; + +class DebugInfo { + public: + explicit DebugInfo(std::string obj_name); + + FunctionDebugInfo& add_function(const std::string& name) { + if (m_functions.find(name) != m_functions.end()) { + assert(false); + } + auto& result = m_functions[name]; + result.name = name; + return result; + } + + bool lookup_function(FunctionDebugInfo** info, std::string* name, u32 offset, u8 seg) { + for (auto& kv : m_functions) { + auto start = kv.second.offset_in_seg; + auto end = start + kv.second.length; + if (offset >= start && offset < end && seg == kv.second.seg) { + *info = &kv.second; + *name = kv.first; + return true; + } + } + return false; + } + + void clear() { m_functions.clear(); } + + std::string disassemble_debug_functions(bool* had_failure); + + private: + std::string m_obj_name; + std::unordered_map m_functions; +}; diff --git a/goalc/debugger/Debugger.cpp b/goalc/debugger/Debugger.cpp index 34e89d9dd..2856f602a 100644 --- a/goalc/debugger/Debugger.cpp +++ b/goalc/debugger/Debugger.cpp @@ -10,7 +10,8 @@ #include "common/goal_constants.h" #include "common/symbols.h" #include "third-party/fmt/core.h" -#include "goalc/emitter/disassemble.h" +#include "goalc/debugger/disassemble.h" +#include "goalc/listener/Listener.h" /*! * Is the target halted? If we don't know or aren't connected, returns false. @@ -113,7 +114,7 @@ bool Debugger::attach_and_break() { m_running = false; // get info from target - get_break_info(); + update_break_info(); auto signal_count = get_signal_count(); assert(signal_count == 0); @@ -130,7 +131,11 @@ bool Debugger::attach_and_break() { * Read the registers, symbol table, and instructions near rip. * Print out some info about where we are. */ -void Debugger::get_break_info() { +void Debugger::update_break_info() { + // todo adjust rip if break instruction???? + + m_memory_map = m_listener->build_memory_map(); + // fmt::print("{}", m_memory_map.print()); read_symbol_table(); m_regs_valid = false; if (!xdbg::get_regs_now(m_debug_context.tid, &m_regs_at_break)) { @@ -145,13 +150,79 @@ void Debugger::get_break_info() { mem.resize(INSTR_DUMP_SIZE_REV + INSTR_DUMP_SIZE_FWD); // very basic asm dump. auto rip = m_regs_at_break.rip; + m_break_info.real_rip = rip; + m_break_info.goal_rip = rip - m_debug_context.base; + + m_break_info.disassembly_failed = false; + if (rip >= m_debug_context.base + EE_MAIN_MEM_LOW_PROTECT && rip < m_debug_context.base + EE_MAIN_MEM_SIZE) { read_memory(mem.data(), INSTR_DUMP_SIZE_REV + INSTR_DUMP_SIZE_FWD, rip - m_debug_context.base - INSTR_DUMP_SIZE_REV); - fmt::print("{}\n", disassemble_x86(mem.data(), mem.size(), rip - INSTR_DUMP_SIZE_REV, rip)); + auto map_loc = m_memory_map.lookup(rip - m_debug_context.base); + if (map_loc.empty) { + fmt::print("In unknown code\n"); + fmt::print("{}", disassemble_x86(mem.data(), mem.size(), rip - INSTR_DUMP_SIZE_REV, rip)); + m_break_info.disassembly_failed = true; + m_break_info.knows_object = false; + m_break_info.knows_function = false; + } else { + u64 obj_offset = rip - m_debug_context.base - map_loc.start_addr; + m_break_info.knows_object = true; + m_break_info.object_name = map_loc.obj_name; + m_break_info.object_seg = map_loc.seg_id; + m_break_info.object_offset = obj_offset; + FunctionDebugInfo* info = nullptr; + std::string name; + + if (get_debug_info_for_object(map_loc.obj_name) + .lookup_function(&info, &name, obj_offset, map_loc.seg_id)) { + update_continue_info(); + m_break_info.knows_function = true; + m_break_info.function_name = name; + m_break_info.function_offset = obj_offset - info->offset_in_seg; + + assert(!info->instructions.empty()); + + std::vector function_mem; + function_mem.resize(info->instructions.back().offset + + info->instructions.back().instruction.length()); + read_memory(function_mem.data(), function_mem.size(), + map_loc.start_addr + info->offset_in_seg); + + int rip_offset = 0; + if (m_continue_info.valid && m_continue_info.is_addr_breakpiont) { + int offset_in_fmem = int(m_continue_info.addr_breakpoint.goal_addr) - + (map_loc.start_addr + info->offset_in_seg); + if (offset_in_fmem < 0 || offset_in_fmem >= int(function_mem.size())) { + m_break_info.disassembly_failed = true; + } else { + function_mem.at(offset_in_fmem) = m_continue_info.addr_breakpoint.old_data; + rip_offset = -1; + } + } + + fmt::print( + "In function {} in segment {} of obj {}, offset_obj 0x{:x}, offset_func 0x{:x}\n", + name, map_loc.seg_id, map_loc.obj_name, obj_offset, m_break_info.function_offset); + + fmt::print("{}", disassemble_x86_function( + function_mem.data(), function_mem.size(), + m_debug_context.base + map_loc.start_addr + info->offset_in_seg, + rip + rip_offset, info->instructions, info->irs, + &m_break_info.disassembly_failed)); + + } else { + m_break_info.disassembly_failed = true; + m_break_info.knows_function = false; + fmt::print("In segment {} of obj {}, offset 0x{:x}\n", map_loc.seg_id, map_loc.obj_name, + obj_offset); + fmt::print("{}", disassemble_x86(mem.data(), mem.size(), rip - INSTR_DUMP_SIZE_REV, rip)); + } + } } else { + m_break_info.disassembly_failed = true; fmt::print("Not in GOAL code!\n"); } } @@ -171,7 +242,7 @@ bool Debugger::do_break() { } else { auto info = pop_signal(); assert(info.kind == xdbg::SignalInfo::BREAK); - get_break_info(); + update_break_info(); m_running = false; return true; } @@ -183,7 +254,7 @@ bool Debugger::do_break() { bool Debugger::do_continue() { assert(is_valid() && is_attached() && is_halted()); if (!m_regs_valid) { - get_break_info(); + update_break_info(); } assert(regs_valid()); @@ -506,20 +577,32 @@ void Debugger::update_continue_info() { } if (!m_regs_valid) { - get_break_info(); + update_break_info(); } auto kv = m_addr_breakpoints.find(get_regs().rip - 1); if (kv == m_addr_breakpoints.end()) { m_continue_info.subtract_1 = false; + m_continue_info.is_addr_breakpiont = false; } else { if (m_expecting_immeidate_break) { printf("Warning, conflicting break and breakpoints. Not sure why we stopped!\n"); } m_continue_info.subtract_1 = true; + m_continue_info.is_addr_breakpiont = true; + m_continue_info.addr_breakpoint = kv->second; } m_expecting_immeidate_break = false; m_continue_info.valid = true; +} + +DebugInfo& Debugger::get_debug_info_for_object(const std::string& object_name) { + auto kv = m_debug_info.find(object_name); + if (kv != m_debug_info.end()) { + return kv->second; + } + + return m_debug_info.insert(std::make_pair(object_name, DebugInfo(object_name))).first->second; } \ No newline at end of file diff --git a/goalc/debugger/Debugger.h b/goalc/debugger/Debugger.h index 229ec45e0..1f4cfef5d 100644 --- a/goalc/debugger/Debugger.h +++ b/goalc/debugger/Debugger.h @@ -13,10 +13,32 @@ #include #include "common/common_types.h" #include "common/cross_os_debug/xdbg.h" +#include "goalc/listener/MemoryMap.h" +#include "DebugInfo.h" + +namespace listener { +class Listener; +} + +struct BreakInfo { + u64 real_rip = 0; + u32 goal_rip = 0; + + bool knows_object = false; + std::string object_name; + u8 object_seg = -1; + u32 object_offset = -1; + + bool knows_function = false; + std::string function_name; + u32 function_offset = -1; + + bool disassembly_failed = false; +}; class Debugger { public: - Debugger() = default; + explicit Debugger(listener::Listener* listener) : m_listener(listener) {} ~Debugger(); bool is_halted() const; bool is_valid() const; @@ -36,7 +58,9 @@ class Debugger { bool get_symbol_value(const std::string& sym_name, u32* output); void add_addr_breakpoint(u32 addr); void remove_addr_breakpoint(u32 addr); - void get_break_info(); + void update_break_info(); + DebugInfo& get_debug_info_for_object(const std::string& object_name); + const BreakInfo& get_cached_break_info() { return m_break_info; } /*! * Get the x86 address of GOAL memory @@ -121,6 +145,8 @@ class Debugger { struct ContinueInfo { bool subtract_1 = false; bool valid = false; + bool is_addr_breakpiont = false; + Breakpoint addr_breakpoint; } m_continue_info; // for more complicated breakpoint stuff, we have a queue of stops. @@ -137,4 +163,10 @@ class Debugger { bool m_context_valid = false; bool m_running = true; bool m_attached = false; + + BreakInfo m_break_info; + + listener::Listener* m_listener = nullptr; + listener::MemoryMap m_memory_map; + std::unordered_map m_debug_info; }; diff --git a/goalc/debugger/disassemble.cpp b/goalc/debugger/disassemble.cpp new file mode 100644 index 000000000..6e1b63c27 --- /dev/null +++ b/goalc/debugger/disassemble.cpp @@ -0,0 +1,150 @@ +#include "disassemble.h" +#include "Zydis/Zydis.h" +#include "third-party/fmt/core.h" + +std::string disassemble_x86(u8* data, int len, u64 base_addr) { + std::string result; + ZydisDecoder decoder; + ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); + ZydisFormatter formatter; + ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); + ZydisDecodedInstruction instr; + + constexpr int print_buff_size = 512; + char print_buff[print_buff_size]; + int offset = 0; + while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, data + offset, len - offset, &instr))) { + result += fmt::format("[0x{:x}] ", base_addr); + ZydisFormatterFormatInstruction(&formatter, &instr, print_buff, print_buff_size, base_addr); + result += print_buff; + result += "\n"; + + offset += instr.length; + base_addr += instr.length; + } + + return result; +} + +std::string disassemble_x86(u8* data, int len, u64 base_addr, u64 highlight_addr) { + std::string result; + ZydisDecoder decoder; + ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); + ZydisFormatter formatter; + ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); + ZydisDecodedInstruction instr; + + constexpr int print_buff_size = 512; + char print_buff[print_buff_size]; + int offset = 0; + + assert(highlight_addr >= base_addr); + int mark_offset = int(highlight_addr - base_addr); + while (offset < len) { + char prefix = (offset == mark_offset) ? '-' : ' '; + if (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, data + offset, len - offset, &instr))) { + result += fmt::format("{:c} [0x{:x}] ", prefix, base_addr); + ZydisFormatterFormatInstruction(&formatter, &instr, print_buff, print_buff_size, base_addr); + result += print_buff; + result += "\n"; + offset += instr.length; + base_addr += instr.length; + } else { + result += fmt::format("{:c} [0x{:x}] INVALID (0x{:02x})\n", prefix, base_addr, data[offset]); + offset++; + } + } + + return result; +} + +std::string disassemble_x86_function(u8* data, + int len, + u64 base_addr, + u64 highlight_addr, + const std::vector& x86_instructions, + const std::vector& irs, + bool* had_failure) { + std::string result; + ZydisDecoder decoder; + ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); + ZydisFormatter formatter; + ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); + ZydisDecodedInstruction instr; + + constexpr int print_buff_size = 512; + char print_buff[print_buff_size]; + int offset = 0; + + int current_instruction_idx = -1; + int current_ir_idx = -1; + + assert(highlight_addr >= base_addr); + int mark_offset = int(highlight_addr - base_addr); + while (offset < len) { + char prefix = (offset == mark_offset) ? '-' : ' '; + if (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, data + offset, len - offset, &instr))) { + bool warn_messed_up = false; + bool print_ir = false; + // we should have a next instruction. + if (current_instruction_idx + 1 >= int(x86_instructions.size())) { + warn_messed_up = true; + if (had_failure) { + *had_failure = true; + } + } else { + // we should line up with the next instruction + if (x86_instructions.at(current_instruction_idx + 1).offset == offset) { + // perfect, everything is lined up! + current_instruction_idx++; + while (current_instruction_idx + 1 < int(x86_instructions.size()) && + x86_instructions.at(current_instruction_idx + 1).offset == offset) { + current_instruction_idx++; + } + } else { + printf("offset mess up, at %d, expected %d\n", offset, + x86_instructions.at(current_instruction_idx + 1).offset); + warn_messed_up = true; + if (had_failure) { + *had_failure = true; + } + } + } + + if (current_instruction_idx >= 0 && current_instruction_idx < int(x86_instructions.size())) { + const auto& debug_instr = x86_instructions.at(current_instruction_idx); + if (debug_instr.kind == InstructionInfo::IR && debug_instr.ir_idx != current_ir_idx) { + current_ir_idx = debug_instr.ir_idx; + print_ir = true; + } + } + + std::string line; + + line += fmt::format("{:c} [0x{:x}] ", prefix, base_addr); + ZydisFormatterFormatInstruction(&formatter, &instr, print_buff, print_buff_size, base_addr); + line += print_buff; + + if (print_ir && current_ir_idx >= 0 && current_ir_idx < int(irs.size())) { + if (line.size() < 50) { + line.append(50 - line.size(), ' '); + } + line += " "; + line += irs.at(current_ir_idx); + } + + if (warn_messed_up) { + line += " ;; function's instruction do not align with debug data, something is wrong."; + } + line += "\n"; + result += line; + offset += instr.length; + base_addr += instr.length; + } else { + result += fmt::format("{:c} [0x{:x}] INVALID (0x{:02x})\n", prefix, base_addr, data[offset]); + offset++; + } + } + + return result; +} \ No newline at end of file diff --git a/goalc/debugger/disassemble.h b/goalc/debugger/disassemble.h new file mode 100644 index 000000000..86da2dc30 --- /dev/null +++ b/goalc/debugger/disassemble.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include +#include "common/common_types.h" +#include "goalc/emitter/Instruction.h" + +struct InstructionInfo { + emitter::Instruction instruction; //! the actual x86 instruction + enum Kind { PROLOGUE, IR, EPILOGUE } kind; + int ir_idx = -1; + int offset = -1; + + InstructionInfo(const emitter::Instruction& _instruction, Kind _kind) + : instruction(_instruction), kind(_kind) {} + + InstructionInfo(const emitter::Instruction& _instruction, Kind _kind, int _ir_idx) + : instruction(_instruction), kind(_kind), ir_idx(_ir_idx) {} +}; + +std::string disassemble_x86(u8* data, int len, u64 base_addr); +std::string disassemble_x86(u8* data, int len, u64 base_addr, u64 highlight_addr); + +std::string disassemble_x86_function(u8* data, + int len, + u64 base_addr, + u64 highlight_addr, + const std::vector& x86_instructions, + const std::vector& irs, + bool* had_failure); \ No newline at end of file diff --git a/goalc/emitter/ObjectGenerator.cpp b/goalc/emitter/ObjectGenerator.cpp index b8fed2da0..740a2213b 100644 --- a/goalc/emitter/ObjectGenerator.cpp +++ b/goalc/emitter/ObjectGenerator.cpp @@ -14,6 +14,7 @@ */ #include "ObjectGenerator.h" +#include "goalc/debugger/DebugInfo.h" #include "common/goal_constants.h" #include "common/versions.h" @@ -43,16 +44,26 @@ ObjectFileData ObjectGenerator::generate_data_v3() { insert_data(seg, 0xae); } + // add debug info for the function start + function.debug->offset_in_seg = m_data_by_seg.at(seg).size(); + function.debug->seg = seg; + // insert instructions! - for (const auto& instr : function.instructions) { + + for (size_t instr_idx = 0; instr_idx < function.instructions.size(); instr_idx++) { + const auto& instr = function.instructions[instr_idx]; u8 temp[128]; auto count = instr.emit(temp); assert(count < 128); function.instruction_to_byte_in_data.push_back(data.size()); + function.debug->instructions.at(instr_idx).offset = + data.size() - function.debug->offset_in_seg; for (int i = 0; i < count; i++) { insert_data(seg, temp[i]); } } + + function.debug->length = m_data_by_seg.at(seg).size() - function.debug->offset_in_seg; } } @@ -98,12 +109,16 @@ ObjectFileData ObjectGenerator::generate_data_v3() { * Add a new function to seg, and return a FunctionRecord which can be used to specify this * new function. */ -FunctionRecord ObjectGenerator::add_function_to_seg(int seg, int min_align) { +FunctionRecord ObjectGenerator::add_function_to_seg(int seg, + FunctionDebugInfo* debug, + int min_align) { FunctionRecord rec; rec.seg = seg; rec.func_id = int(m_function_data_by_seg.at(seg).size()); + rec.debug = debug; m_function_data_by_seg.at(seg).emplace_back(); m_function_data_by_seg.at(seg).back().min_align = min_align; + m_function_data_by_seg.at(seg).back().debug = debug; m_all_function_records.push_back(rec); return rec; } @@ -117,13 +132,15 @@ FunctionRecord ObjectGenerator::get_existing_function_record(int f_idx) { * actual Instructions. These Instructions can be added with add_instruction. The IR_Record * can be used as a label for jump targets. */ -IR_Record ObjectGenerator::add_ir(const FunctionRecord& func) { +IR_Record ObjectGenerator::add_ir(const FunctionRecord& func, const std::string& debug_print) { IR_Record rec; rec.seg = func.seg; rec.func_id = func.func_id; auto& func_data = m_function_data_by_seg.at(rec.seg).at(rec.func_id); rec.ir_id = int(func_data.ir_to_instruction.size()); func_data.ir_to_instruction.push_back(int(func_data.instructions.size())); + assert(int(func.debug->irs.size()) == rec.ir_id); + func.debug->irs.push_back(debug_print); return rec; } @@ -162,12 +179,18 @@ InstructionRecord ObjectGenerator::add_instr(Instruction inst, IR_Record ir) { rec.ir_id = ir.ir_id; auto& func_data = m_function_data_by_seg.at(rec.seg).at(rec.func_id); rec.instr_id = int(func_data.instructions.size()); - func_data.instructions.push_back(inst); + func_data.instructions.emplace_back(inst); + auto debug = m_function_data_by_seg.at(ir.seg).at(ir.func_id).debug; + debug->instructions.emplace_back(inst, InstructionInfo::Kind::IR, ir.ir_id); return rec; } -void ObjectGenerator::add_instr_no_ir(FunctionRecord func, Instruction inst) { - m_function_data_by_seg.at(func.seg).at(func.func_id).instructions.push_back(inst); +void ObjectGenerator::add_instr_no_ir(FunctionRecord func, + Instruction inst, + InstructionInfo::Kind kind) { + auto info = InstructionInfo(inst, kind); + m_function_data_by_seg.at(func.seg).at(func.func_id).instructions.emplace_back(inst); + func.debug->instructions.push_back(info); } /*! diff --git a/goalc/emitter/ObjectGenerator.h b/goalc/emitter/ObjectGenerator.h index b6ec26c66..5eaf9475e 100644 --- a/goalc/emitter/ObjectGenerator.h +++ b/goalc/emitter/ObjectGenerator.h @@ -1,3 +1,8 @@ +/*! + * @file ObjectGenerator.h + * Generates GOAL object files with linking and debug data. + */ + #pragma once #ifndef JAK_OBJECTGENERATOR_H @@ -8,20 +13,33 @@ #include #include "ObjectFileData.h" #include "Instruction.h" +#include "goalc/debugger/DebugInfo.h" + +class FunctionDebugInfo; namespace emitter { +/*! + * A reference to a function added. + */ struct FunctionRecord { + FunctionDebugInfo* debug = nullptr; int seg = -1; int func_id = -1; }; +/*! + * A reference to an IR instruction. + */ struct IR_Record { int seg = -1; int func_id = -1; int ir_id = -1; }; +/*! + * A reference to an x86 instruction + */ struct InstructionRecord { int seg = -1; int func_id = -1; @@ -29,26 +47,28 @@ struct InstructionRecord { int instr_id = -1; }; +/*! + * A reference to static data. + */ struct StaticRecord { int seg = -1; int static_id = -1; }; -struct ObjectDebugInfo {}; - class ObjectGenerator { public: ObjectGenerator() = default; ObjectFileData generate_data_v3(); FunctionRecord add_function_to_seg(int seg, + FunctionDebugInfo* debug, int min_align = 16); // should align and insert function tag FunctionRecord get_existing_function_record(int f_idx); - IR_Record add_ir(const FunctionRecord& func); + IR_Record add_ir(const FunctionRecord& func, const std::string& debug_print); IR_Record get_future_ir_record(const FunctionRecord& func, int ir_id); IR_Record get_future_ir_record_in_same_func(const IR_Record& irec, int ir_id); InstructionRecord add_instr(Instruction inst, IR_Record ir); - void add_instr_no_ir(FunctionRecord func, Instruction inst); + void add_instr_no_ir(FunctionRecord func, Instruction inst, InstructionInfo::Kind kind); StaticRecord add_static_to_seg(int seg, int min_align = 16); std::vector& get_static_data(const StaticRecord& rec); void link_instruction_jump(InstructionRecord jump_instr, IR_Record destination); @@ -64,8 +84,6 @@ class ObjectGenerator { void link_instruction_to_function(const InstructionRecord& instr, const FunctionRecord& target_func); - ObjectDebugInfo create_debug_info(); - private: void handle_temp_static_type_links(int seg); void handle_temp_jump_links(int seg); @@ -81,11 +99,12 @@ class ObjectGenerator { std::vector generate_header_v3(); template - void insert_data(int seg, const T& x) { + u64 insert_data(int seg, const T& x) { auto& data = m_data_by_seg.at(seg); auto insert_location = data.size(); data.resize(insert_location + sizeof(T)); memcpy(data.data() + insert_location, &x, sizeof(T)); + return insert_location; } template @@ -101,6 +120,7 @@ class ObjectGenerator { std::vector ir_to_instruction; std::vector instruction_to_byte_in_data; int min_align = 16; + FunctionDebugInfo* debug = nullptr; }; struct StaticData { diff --git a/goalc/emitter/Register.h b/goalc/emitter/Register.h index b31ea0194..b39cc713a 100644 --- a/goalc/emitter/Register.h +++ b/goalc/emitter/Register.h @@ -20,6 +20,9 @@ enum class RegKind : u8 { GPR, XMM, INVALID }; std::string to_string(RegKind kind); +constexpr int GPR_SIZE = 8; +constexpr int XMM_SIZE = 16; + // registers by name enum X86_REG : s8 { RAX, // return, temp diff --git a/goalc/emitter/disassemble.cpp b/goalc/emitter/disassemble.cpp deleted file mode 100644 index 51d93b021..000000000 --- a/goalc/emitter/disassemble.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "disassemble.h" -#include "Zydis/Zydis.h" -#include "third-party/fmt/core.h" - -std::string disassemble_x86(u8* data, int len, u64 base_addr) { - std::string result; - ZydisDecoder decoder; - ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); - ZydisFormatter formatter; - ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); - ZydisDecodedInstruction instr; - - constexpr int print_buff_size = 512; - char print_buff[print_buff_size]; - int offset = 0; - while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, data + offset, len - offset, &instr))) { - result += fmt::format("[0x{:x}] ", base_addr); - ZydisFormatterFormatInstruction(&formatter, &instr, print_buff, print_buff_size, base_addr); - result += print_buff; - result += "\n"; - - offset += instr.length; - base_addr += instr.length; - } - - return result; -} - -std::string disassemble_x86(u8* data, int len, u64 base_addr, u64 highlight_addr) { - std::string result; - ZydisDecoder decoder; - ZydisDecoderInit(&decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64); - ZydisFormatter formatter; - ZydisFormatterInit(&formatter, ZYDIS_FORMATTER_STYLE_INTEL); - ZydisDecodedInstruction instr; - - constexpr int print_buff_size = 512; - char print_buff[print_buff_size]; - int offset = 0; - - assert(highlight_addr > base_addr); - int mark_offset = int(highlight_addr - base_addr); - while (offset < len) { - char prefix = (offset == mark_offset) ? '-' : ' '; - if (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&decoder, data + offset, len - offset, &instr))) { - result += fmt::format("{:c} [0x{:x}] ", prefix, base_addr); - ZydisFormatterFormatInstruction(&formatter, &instr, print_buff, print_buff_size, base_addr); - result += print_buff; - result += "\n"; - offset += instr.length; - base_addr += instr.length; - } else { - result += fmt::format("{:c} [0x{:x}] INVALID (0x{:02x})\n", prefix, base_addr, data[offset]); - offset++; - } - } - - return result; -} \ No newline at end of file diff --git a/goalc/emitter/disassemble.h b/goalc/emitter/disassemble.h deleted file mode 100644 index 893ee45c4..000000000 --- a/goalc/emitter/disassemble.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include -#include "common/common_types.h" - -std::string disassemble_x86(u8* data, int len, u64 base_addr); -std::string disassemble_x86(u8* data, int len, u64 base_addr, u64 highlight_addr); \ No newline at end of file diff --git a/goalc/listener/Listener.cpp b/goalc/listener/Listener.cpp index ebc8c1ee3..a398ee3f4 100644 --- a/goalc/listener/Listener.cpp +++ b/goalc/listener/Listener.cpp @@ -28,9 +28,12 @@ #include #include #include +#include #include "Listener.h" #include "common/versions.h" +#include "third-party/fmt/core.h" + using namespace versions; constexpr bool debug_listener = false; @@ -258,8 +261,6 @@ void Listener::receive_func() { if (hdr->msg_kind == ListenerMessageKind::MSG_PRINT) { printf("%s\n", str_buff); - } else { - printf("[OUTPUT] %s\n", str_buff); } rcv_mtx.lock(); @@ -438,6 +439,10 @@ bool Listener::wait_for_ack() { return false; } +/*! + * Handle an output message from the runtime. + * This is used to update the memory map and get initial information for the debugger. + */ void Listener::handle_output_message(const char* msg) { std::string all(msg); @@ -482,12 +487,13 @@ void Listener::handle_output_message(const char* msg) { auto load_kind_str = str.substr(x, next - x); x = next; - std::string seg_strings[3]; + std::string seg_strings[6]; for (auto& seg_string : seg_strings) { assert(x + 1 < str.length()); next = str.find(' ', x + 1); seg_string = str.substr(x, next - x); + x = next; } LoadEntry entry; @@ -496,23 +502,38 @@ void Listener::handle_output_message(const char* msg) { entry.segments[i] = std::stoul(seg_strings[i].substr(3), nullptr, 16); } + for (int i = 0; i < 3; i++) { + entry.segment_sizes[i] = std::stoul(seg_strings[i + 3].substr(3), nullptr, 16); + } + add_load(name_str.substr(2, name_str.length() - 3), entry); + // fmt::print("LOAD:\n{}", entry.print()); } else { // todo unload - printf("[Listener Warning] unknown output kind \"%s\"\n", kind.c_str()); + printf("[Listener Warning] unknown output message \"%s\"\n", msg); } } } +/*! + * Add a load to the load listing. + */ void Listener::add_load(const std::string& name, const LoadEntry& le) { - if (m_load_entries.find(name) != m_load_entries.end()) { - printf("[Listener Error] The runtime has loaded %s twice!\n", name.c_str()); + if (m_load_entries.find(name) != m_load_entries.end() && name != "*listener*") { + printf("[Listener Warning] The runtime has loaded %s twice!\n", name.c_str()); } m_load_entries[name] = le; } +/*! + * Add a debugger that the listener should inform. + */ void Listener::add_debugger(Debugger* debugger) { m_debugger = debugger; } +MemoryMap Listener::build_memory_map() { + return MemoryMap(m_load_entries); +} + } // namespace listener diff --git a/goalc/listener/Listener.h b/goalc/listener/Listener.h index 6dbfe0a97..0ff187490 100644 --- a/goalc/listener/Listener.h +++ b/goalc/listener/Listener.h @@ -17,14 +17,10 @@ #include "common/listener_common.h" #include "common/cross_os_debug/xdbg.h" #include "goalc/debugger/Debugger.h" +#include "MemoryMap.h" namespace listener { -struct LoadEntry { - uint32_t segments[3] = {0, 0, 0}; - std::string load_string; -}; - class Listener { public: static constexpr int BUFFER_SIZE = 32 * 1024 * 1024; @@ -42,8 +38,7 @@ class Listener { void send_code(std::vector& code); void add_debugger(Debugger* debugger); bool most_recent_send_was_acked() const { return got_ack; } - bool get_load_entry(const std::string& name, LoadEntry* out = nullptr); - std::vector get_all_loaded(); + MemoryMap build_memory_map(); private: void add_load(const std::string& name, const LoadEntry& le); diff --git a/goalc/listener/MemoryMap.cpp b/goalc/listener/MemoryMap.cpp new file mode 100644 index 000000000..b620f0b26 --- /dev/null +++ b/goalc/listener/MemoryMap.cpp @@ -0,0 +1,128 @@ +#include +#include +#include "MemoryMap.h" +#include "third-party/fmt/core.h" +#include "common/link_types.h" + +namespace { +uint32_t align16(uint32_t in) { + return (in + 15) & (~15); +} + +const char* segment_names[3] = {"main", "debug", "top-level"}; +} // namespace + +namespace listener { +std::string LoadEntry::print() const { + std::string result; + const SegmentTypes types[3] = {MAIN_SEGMENT, DEBUG_SEGMENT, TOP_LEVEL_SEGMENT}; + for (int i = 0; i < 3; i++) { + result += fmt::format("{} : 0x{:x} size 0x{:x}\n", segment_names[i], segments[int(types[i])], + segment_sizes[int(types[i])]); + } + return result; +} + +std::string MemoryMapEntry::print() const { + std::string result; + result += fmt::format("0x{:x} to 0x{:x}: ", start_addr, end_addr); + if (empty) { + result += "emtpy!\n"; + } else { + result += fmt::format("{} seg {}\n", obj_name, seg_id); + } + return result; +} + +MemoryMap::MemoryMap(const std::unordered_map& load_entries) { + std::vector entries; + + for (const auto& x : load_entries) { + for (int i = 0; i < N_SEG; i++) { + if (i != TOP_LEVEL_SEGMENT && x.second.segments[i] != 0 && x.second.segment_sizes[i] != 0) { + MemoryMapEntry entry; + entry.start_addr = x.second.segments[i]; + // we cheat the segment sizes to be 16 byte aligned. This avoids tiny gaps < 16 bytes in the + // memory map. + entry.end_addr = entry.start_addr + align16(x.second.segment_sizes[i]); + entry.seg_id = i; + entry.obj_name = x.first; + entry.empty = false; + entries.push_back(entry); + } + } + } + + std::sort(entries.begin(), entries.end(), [](const MemoryMapEntry& a, const MemoryMapEntry& b) { + return a.start_addr < b.start_addr; + }); + + u32 last_addr = 0; + for (const auto& entry : entries) { + if (entry.start_addr < last_addr) { + // this is bad. + printf("[Listener Error] We could not build a memory map.\n"); + MemoryMap temp(m_entries); + printf("%s\n", temp.print().c_str()); + printf("Can't add %s\n", entry.print().c_str()); + + assert(false); // todo, handle this more gracefully + } else if (entry.start_addr > last_addr) { + // this is fine, there's just a gap. + MemoryMapEntry gap; + gap.start_addr = last_addr; + gap.end_addr = entry.start_addr; + gap.empty = true; + m_entries.push_back(gap); + } + + m_entries.push_back(entry); + last_addr = entry.end_addr; + } + + MemoryMapEntry last_gap; + last_gap.empty = true; + last_gap.start_addr = last_addr; + last_gap.end_addr = UINT32_MAX; + m_entries.push_back(last_gap); +} + +std::string MemoryMap::print() const { + std::string result; + result += std::string(40, '-'); + result += '\n'; + for (auto& entry : m_entries) { + if (entry.empty) { + result += fmt::format(" [0x{:08x}] GAP of 0x{:x} bytes, until 0x{:x}\n", entry.start_addr, + entry.end_addr - entry.start_addr, entry.end_addr); + } else { + result += fmt::format( + " [0x{:08x}] SEGMENT of 0x{:x} bytes, until 0x{:x}\n name: {}\n kind: {}\n", + entry.start_addr, entry.end_addr - entry.start_addr, entry.end_addr, entry.obj_name, + segment_names[entry.seg_id]); + } + result += std::string(40, '-'); + result += '\n'; + } + return result; +} + +const MemoryMapEntry& MemoryMap::lookup(u32 addr) { + for (auto& entry : m_entries) { + if (addr >= entry.start_addr && addr < entry.end_addr) { + return entry; + } + } + assert(false); +} + +bool MemoryMap::lookup(const std::string& obj_name, u8 seg_id, MemoryMapEntry* out) { + for (auto& entry : m_entries) { + if (!entry.empty && entry.obj_name == obj_name && entry.seg_id == seg_id) { + *out = entry; + return true; + } + } + return false; +} +} // namespace listener \ No newline at end of file diff --git a/goalc/listener/MemoryMap.h b/goalc/listener/MemoryMap.h new file mode 100644 index 000000000..8deb5900d --- /dev/null +++ b/goalc/listener/MemoryMap.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include "common/common_types.h" + +namespace listener { + +struct LoadEntry { + uint32_t segments[3] = {0, 0, 0}; + uint32_t segment_sizes[3] = {0, 0, 0}; + std::string load_string; + std::string print() const; +}; + +struct MemoryMapEntry { + u32 start_addr = 0; + u32 end_addr = 0; + std::string obj_name; + u8 seg_id = 0; + bool empty = false; + std::string print() const; +}; + +class MemoryMap { + public: + MemoryMap() = default; + explicit MemoryMap(const std::unordered_map& load_entries); + explicit MemoryMap(std::vector entries) : m_entries(std::move(entries)) {} + std::string print() const; + const MemoryMapEntry& lookup(u32 addr); + bool lookup(const std::string& obj_name, u8 seg_id, MemoryMapEntry* out); + + private: + std::vector m_entries; +}; +} // namespace listener diff --git a/test/goalc/test_debugger.cpp b/test/goalc/test_debugger.cpp index 52bfa009d..46524d1df 100644 --- a/test/goalc/test_debugger.cpp +++ b/test/goalc/test_debugger.cpp @@ -148,23 +148,24 @@ TEST(Debugger, SimpleBreakpoint) { } else { compiler.connect_to_target(); compiler.poke_target(); - compiler.run_test_from_string("(defun test-function () (+ 1 2 3 4 5 6))"); - ; - compiler.run_test_from_string("(dbg)"); + compiler.run_test_from_string( + "(defun fake-function () 0) (defun test-function () (+ 1 2 3 4 5 6)) (defun " + "fake-function-2 () 0)"); + compiler.run_test_from_string("(dbg)", "a"); u32 func_addr; EXPECT_TRUE(compiler.get_debugger().get_symbol_value("test-function", &func_addr)); EXPECT_TRUE(compiler.get_debugger().is_valid()); EXPECT_TRUE(compiler.get_debugger().is_halted()); compiler.get_debugger().add_addr_breakpoint(func_addr); // todo from code. - compiler.run_test_from_string("(:cont)"); - compiler.run_test_from_string("(test-function)"); + compiler.run_test_from_string("(:cont)", "a"); + compiler.run_test_from_string("(test-function)", "a"); // wait for breakpoint to be hit. while (!compiler.get_debugger().is_halted()) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - compiler.get_debugger().get_break_info(); + compiler.get_debugger().update_break_info(); auto expected_instr_before_rip = compiler.get_debugger().get_x86_base_addr() + func_addr; auto rip = compiler.get_debugger().get_regs().rip; // instructions can be at most 15 bytes long. @@ -176,6 +177,15 @@ TEST(Debugger, SimpleBreakpoint) { EXPECT_TRUE(rsp > compiler.get_debugger().get_x86_base_addr() + EE_MAIN_MEM_SIZE - (16 * 1024)); EXPECT_TRUE(compiler.get_debugger().is_halted()); + auto bi = compiler.get_debugger().get_cached_break_info(); + EXPECT_TRUE(bi.knows_function); + EXPECT_TRUE(bi.knows_object); + EXPECT_TRUE(bi.object_name == "*listener*"); + EXPECT_TRUE(bi.function_name == "test-function"); + EXPECT_FALSE(bi.disassembly_failed); + // if we change this to be before the break instruction this might need to be 0 in the future. + EXPECT_EQ(bi.function_offset, 1); + compiler.get_debugger().remove_addr_breakpoint(func_addr); compiler.get_debugger().do_continue(); diff --git a/test/goalc/test_with_game.cpp b/test/goalc/test_with_game.cpp index ef7067325..445e64b07 100644 --- a/test/goalc/test_with_game.cpp +++ b/test/goalc/test_with_game.cpp @@ -137,6 +137,22 @@ TEST_F(WithGameTests, All) { get_test_pass_string("new-static-basic", 9)); runner.run_static_test(env, testCategory, "test-vector-dot.gc", get_test_pass_string("vector-dot", 1)); + + auto mem_map = compiler.listener().build_memory_map(); + + // we should have gkernel main segment + listener::MemoryMapEntry gk_main; + EXPECT_TRUE(mem_map.lookup("gkernel", MAIN_SEGMENT, &gk_main)); + auto lookup_2 = mem_map.lookup(gk_main.start_addr + 12); + EXPECT_TRUE(lookup_2.obj_name == "gkernel"); + EXPECT_FALSE(lookup_2.empty); + EXPECT_EQ(lookup_2.seg_id, MAIN_SEGMENT); + + auto di = compiler.get_debugger().get_debug_info_for_object("gcommon"); + bool fail = false; + auto result = di.disassemble_debug_functions(&fail); + // printf("Got\n%s\n", result.c_str()); + EXPECT_FALSE(fail); } TEST(TypeConsistency, TypeConsistency) {