diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index 705f3fba4..3906da1e0 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -433,6 +433,38 @@ MethodInfo TypeSystem::lookup_method(const std::string& type_name, const std::st throw std::runtime_error("lookup_method failed"); } +/*! + * Like lookup_method, but won't throw or print an error when things go wrong. + */ +bool TypeSystem::try_lookup_method(const std::string& type_name, int method_id, MethodInfo* info) { + auto kv = m_types.find(type_name); + if (kv == m_types.end()) { + return false; + } + + auto* iter_type = kv->second.get(); + // look up the method + while (true) { + if (method_id == GOAL_NEW_METHOD) { + if (iter_type->get_my_new_method(info)) { + return true; + } + } else { + if (iter_type->get_my_method(method_id, info)) { + return true; + } + } + + if (iter_type->has_parent()) { + iter_type = lookup_type(iter_type->get_parent()); + } else { + // couldn't find method. + break; + } + } + return false; +} + /*! * Lookup information on a method by ID number. Error if it can't be found. Will check parent types * if the given type doesn't specialize the method. diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index 82388d3de..b27c96aca 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -149,6 +149,7 @@ class TypeSystem { MethodInfo add_new_method(Type* type, const TypeSpec& ts); MethodInfo lookup_method(const std::string& type_name, const std::string& method_name); MethodInfo lookup_method(const std::string& type_name, int method_id); + bool try_lookup_method(const std::string& type_name, int method_id, MethodInfo* info); MethodInfo lookup_new_method(const std::string& type_name); void assert_method_id(const std::string& type_name, const std::string& method_name, int id); diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 18bc92b65..a66bd1255 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -30,6 +30,7 @@ add_library( IR2/AtomicOp.cpp IR2/AtomicOpBuilder.cpp + IR2/AtomicOpTypeAnalysis.cpp IR2/Env.cpp ObjectFile/LinkedObjectFile.cpp diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 80195c62a..2621aaa5a 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -93,6 +93,10 @@ class Function { DecompilerTypeSystem& dts, LinkedObjectFile& file, const std::unordered_map>& hints); + bool run_type_analysis_ir2(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + const std::unordered_map>& hints); void run_reg_usage(); bool build_expression(LinkedObjectFile& file); BlockTopologicalSort bb_topo_sort(); diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index 339acaa32..165610b08 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -14,18 +14,18 @@ TypeState construct_initial_typestate(const TypeSpec& f_ts) { for (int i = 0; i < int(f_ts.arg_count()) - 1; i++) { auto reg_id = goal_args[i]; auto reg_type = f_ts.get_arg(i); - result.gpr_types[reg_id] = TP_Type::make_from_typespec(reg_type); + result.gpr_types[reg_id] = TP_Type::make_from_ts(reg_type); } // todo, more specific process types for behaviors. - result.gpr_types[Reg::S6] = TP_Type::make_from_typespec(TypeSpec("process")); + result.gpr_types[Reg::S6] = TP_Type::make_from_ts(TypeSpec("process")); return result; } void apply_hints(const std::vector& hints, TypeState* state, DecompilerTypeSystem& dts) { for (auto& hint : hints) { try { - state->get(hint.reg) = TP_Type::make_from_typespec(dts.parse_type_spec(hint.type_name)); + state->get(hint.reg) = TP_Type::make_from_ts(dts.parse_type_spec(hint.type_name)); } catch (std::exception& e) { printf("failed to parse hint: %s\n", e.what()); assert(false); @@ -92,7 +92,7 @@ bool Function::run_type_analysis(const TypeSpec& my_type, try_apply_hints(op_id, hints, init_types, dts); } - // while the implementation of propagate_types is in progress, it may throw + // while the implementation of propagate_types_internal is in progress, it may throw // for unimplemented cases. Eventually this try/catch should be removed. try { op->propagate_types(*init_types, file, dts); @@ -134,4 +134,100 @@ bool Function::run_type_analysis(const TypeSpec& my_type, return true; } + +bool Function::run_type_analysis_ir2(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + const std::unordered_map>& hints) { + (void)file; + // STEP 0 - set decompiler type system settings for this function. In config we can manually + // specify some settings for type propagation to reduce the strictness of type propagation. + dts.type_prop_settings.reset(); + if (get_config().pair_functions_by_name.find(guessed_name.to_string()) != + get_config().pair_functions_by_name.end()) { + dts.type_prop_settings.allow_pair = true; + } + + if (guessed_name.kind == FunctionName::FunctionKind::METHOD) { + dts.type_prop_settings.current_method_type = guessed_name.type_name; + } + + std::vector block_init_types, op_types; + block_init_types.resize(basic_blocks.size()); + op_types.resize(ir2.atomic_ops->ops.size()); + auto& aop = ir2.atomic_ops; + + // STEP 1 - topologocial sort the blocks. This gives us an order where we: + // - never visit unreachable blocks (we can't type propagate these) + // - always visit at least one predecessor of a block before that block + auto order = bb_topo_sort(); + assert(!order.vist_order.empty()); + assert(order.vist_order.front() == 0); + + // STEP 2 - initialize type state for the first block to the function argument types. + block_init_types.at(0) = construct_initial_typestate(my_type); + // and add hints from config + try_apply_hints(0, hints, &block_init_types.at(0), dts); + + // STEP 3 - propagate types until the result stops changing + bool run_again = true; + while (run_again) { + run_again = false; + // do each block in the topological sort order: + for (auto block_id : order.vist_order) { + auto& block = basic_blocks.at(block_id); + TypeState* init_types = &block_init_types.at(block_id); + for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); + op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { + // apply type hints only if we are not the first op. + if (op_id != aop->block_id_to_first_atomic_op.at(block_id)) { + try_apply_hints(op_id, hints, init_types, dts); + } + + auto& op = aop->ops.at(op_id); + + // while the implementation of propagate_types_internal is in progress, it may throw + // for unimplemented cases. Eventually this try/catch should be removed. + try { + op_types.at(op_id) = op->propagate_types(*init_types, ir2.env, dts); + } catch (std::runtime_error& e) { + fmt::print("Type prop fail on {}: {}\n", guessed_name.to_string(), e.what()); + warnings += ";; Type prop attempted and failed.\n"; + ir2.env.set_types(block_init_types, op_types); + return false; + } + + // todo, set run again?? + + // for the next op... + init_types = &op_types.at(op_id); + } + + // propagate the types: for each possible succ + for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { + if (succ_block_id != -1) { + // apply hint + try_apply_hints(aop->block_id_to_first_atomic_op.at(succ_block_id), hints, init_types, + dts); + + // set types to LCA (current, new) + if (dts.tp_lca(&block_init_types.at(succ_block_id), *init_types)) { + // if something changed, run again! + run_again = true; + } + } + } + } + } + + auto last_type = op_types.back().get(Register(Reg::GPR, Reg::V0)).typespec(); + if (last_type != my_type.last_arg()) { + warnings += fmt::format(";; return type mismatch {} vs {}. ", last_type.print(), + my_type.last_arg().print()); + } + + ir2.env.set_types(block_init_types, op_types); + + return true; +} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR/IR.cpp b/decompiler/IR/IR.cpp index 5c6bb28ce..0572c0c23 100644 --- a/decompiler/IR/IR.cpp +++ b/decompiler/IR/IR.cpp @@ -47,16 +47,6 @@ void add_regs_to_str(const T& regs, std::string& str) { str.append(reg.to_charp()); } } - -u32 regs_to_gpr_mask(const std::vector& regs) { - u32 result = 0; - for (const auto& reg : regs) { - if (reg.get_kind() == Reg::GPR) { - result |= (1 << reg.get_gpr()); - } - } - return result; -} } // namespace std::string IR_Atomic::print_with_reguse(const LinkedObjectFile& file) const { diff --git a/decompiler/IR/IR_TypeAnalysis.cpp b/decompiler/IR/IR_TypeAnalysis.cpp index 4545c9941..f56303abe 100644 --- a/decompiler/IR/IR_TypeAnalysis.cpp +++ b/decompiler/IR/IR_TypeAnalysis.cpp @@ -163,13 +163,13 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, // but for now, this is probably good enough. if (kind == FLOAT) { // loading static data with a FLOAT kind load (lwc1), assume result is a float. - return TP_Type::make_from_typespec(dts.ts.make_typespec("float")); + return TP_Type::make_from_ts(dts.ts.make_typespec("float")); } if (size == 8) { // 8 byte integer constants are always loaded from a static pool // this could technically hide loading a different type from inside of a static basic. - return TP_Type::make_from_typespec(dts.ts.make_typespec("uint")); + return TP_Type::make_from_ts(dts.ts.make_typespec("uint")); } } @@ -191,7 +191,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, // remember that we're an object new. return TP_Type::make_object_new(method_type); } - return TP_Type::make_from_typespec(method_type); + return TP_Type::make_from_ts(method_type); } if (input_type.kind == TP_Type::Kind::TYPESPEC && input_type.typespec() == TypeSpec("type") && @@ -201,7 +201,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, auto method_info = dts.ts.lookup_method("object", method_id); if (method_id != GOAL_NEW_METHOD && method_id != GOAL_RELOC_METHOD) { // this can get us the wrong thing for `new` methods. And maybe relocate? - return TP_Type::make_from_typespec(method_info.type.substitute_for_method_call("object")); + return TP_Type::make_from_ts(method_info.type.substitute_for_method_call("object")); } } @@ -217,7 +217,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, case 2: case 4: case 8: - return TP_Type::make_from_typespec(TypeSpec("uint")); + return TP_Type::make_from_ts(TypeSpec("uint")); default: break; } @@ -228,13 +228,13 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, case 2: case 4: case 8: - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); default: break; } break; case FLOAT: - return TP_Type::make_from_typespec(TypeSpec("float")); + return TP_Type::make_from_ts(TypeSpec("float")); default: assert(false); } @@ -260,7 +260,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, for (auto& x : rd.tokens) { load_path.push_back(x.print()); } - return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); + return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); } } @@ -290,7 +290,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, if (input_type.kind == TP_Type::Kind::DYNAMIC_METHOD_ACCESS && ro.offset == 16) { // access method vtable. The input is type + (4 * method), and the 16 is the offset // of method 0. - return TP_Type::make_from_typespec(TypeSpec("function")); + return TP_Type::make_from_ts(TypeSpec("function")); } // Assume we're accessing a field of an object. FieldReverseLookupInput rd_in; @@ -320,7 +320,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, for (auto& x : rd.tokens) { load_path.push_back(x.print()); } - return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); + return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); } // rd failed, try as pair. @@ -334,10 +334,10 @@ TP_Type IR_Load::get_expression_type(const TypeState& input, // we can do. if (ro.offset == 2) { // cdr = another pair. - return TP_Type::make_from_typespec(TypeSpec("pair")); + return TP_Type::make_from_ts(TypeSpec("pair")); } else if (ro.offset == -2) { // car = some object. - return TP_Type::make_from_typespec(TypeSpec("object")); + return TP_Type::make_from_ts(TypeSpec("object")); } } } @@ -362,7 +362,7 @@ TP_Type IR_FloatMath2::get_expression_type(const TypeState& input, case SUB: case MIN: case MAX: - return TP_Type::make_from_typespec(dts.ts.make_typespec("float")); + return TP_Type::make_from_ts(dts.ts.make_typespec("float")); default: assert(false); } @@ -377,12 +377,12 @@ TP_Type IR_FloatMath1::get_expression_type(const TypeState& input, // FLOAT_TO_INT, INT_TO_FLOAT, ABS, NEG, SQRT switch (kind) { case FLOAT_TO_INT: - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); case INT_TO_FLOAT: case ABS: case NEG: case SQRT: - return TP_Type::make_from_typespec(TypeSpec("float")); + return TP_Type::make_from_ts(TypeSpec("float")); default: assert(false); } @@ -429,7 +429,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, if (arg0_type == arg1_type && is_int_or_uint(dts, arg0_type)) { // both are the same type and both are int/uint, so we assume that we're doing integer math. // we strip off any weird things like multiplication or integer constant. - return TP_Type::make_from_typespec(arg0_type.typespec()); + return TP_Type::make_from_ts(arg0_type.typespec()); } if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) { @@ -437,20 +437,20 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, // but we use arg1's if arg0 is an integer constant // in either case, strip off weird stuff. if (arg0_type.is_integer_constant() && !arg1_type.is_integer_constant()) { - return TP_Type::make_from_typespec(arg1_type.typespec()); + return TP_Type::make_from_ts(arg1_type.typespec()); } - return TP_Type::make_from_typespec(arg0_type.typespec()); + return TP_Type::make_from_ts(arg0_type.typespec()); } if (tc(dts, TypeSpec("binteger"), arg0_type) && is_int_or_uint(dts, arg1_type)) { - return TP_Type::make_from_typespec(TypeSpec("binteger")); + return TP_Type::make_from_ts(TypeSpec("binteger")); } // special cases for non-integers if ((arg0_type.typespec() == TypeSpec("object") || arg0_type.typespec() == TypeSpec("pair")) && (arg1_type.is_integer_constant(62) || arg1_type.is_integer_constant(61))) { // boxed object tag trick. - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); } // @@ -513,7 +513,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, if (rd.success) { // todo, load path. - return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type)); + return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); } } // @@ -538,13 +538,13 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, if (kind == ADD && arg0_type.typespec().base_type() == "pointer" && tc(dts, TypeSpec("integer"), arg1_type)) { // plain pointer plus integer = plain pointer - return TP_Type::make_from_typespec(TypeSpec("pointer")); + return TP_Type::make_from_ts(TypeSpec("pointer")); } if (kind == ADD && arg1_type.typespec().base_type() == "pointer" && tc(dts, TypeSpec("integer"), arg0_type)) { // plain pointer plus integer = plain pointer - return TP_Type::make_from_typespec(TypeSpec("pointer")); + return TP_Type::make_from_ts(TypeSpec("pointer")); } if (tc(dts, TypeSpec("structure"), arg1_type) && !dynamic_cast(arg0.get()) && @@ -552,7 +552,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, if (arg1_type.typespec() == TypeSpec("symbol") && arg0_type.is_integer_constant(SYM_INFO_OFFSET + POINTER_SIZE)) { // symbol -> GOAL String - return TP_Type::make_from_typespec(dts.ts.make_pointer_typespec("string")); + return TP_Type::make_from_ts(dts.ts.make_pointer_typespec("string")); } else { // byte access of offset array field trick. // arg1 holds a structure. @@ -563,7 +563,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, if (kind == AND) { // base case for and. Just get an integer. - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); } // @@ -587,7 +587,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input, // if (kind == SUB && tc(dts, TypeSpec("pointer"), arg0_type) && tc(dts, TypeSpec("pointer"), arg1_type)) { - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); } throw std::runtime_error( @@ -610,9 +610,9 @@ void BranchDelay::type_prop(TypeState& output, auto src = dynamic_cast(source.get()); assert(src); if (tc(dts, TypeSpec("uint"), output.get(src->reg))) { - output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("uint")); + output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("uint")); } else if (tc(dts, TypeSpec("int"), output.get(src->reg))) { - output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("int")); + output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("int")); } else { throw std::runtime_error("BranchDelay::type_prop DSLLV for src " + output.get(src->reg).print()); @@ -622,7 +622,7 @@ void BranchDelay::type_prop(TypeState& output, auto dst = dynamic_cast(destination.get()); assert(dst); // to match the behavior in IntMath1, assume signed when negating. - output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("int")); + output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("int")); } break; case SET_REG_FALSE: { auto dst = dynamic_cast(destination.get()); @@ -640,7 +640,7 @@ void BranchDelay::type_prop(TypeState& output, case SET_REG_TRUE: { auto dst = dynamic_cast(destination.get()); assert(dst); - output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("symbol")); + output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("symbol")); } break; case SET_BINTEGER: { @@ -682,14 +682,14 @@ TP_Type IR_IntMath1::get_expression_type(const TypeState& input, switch (kind) { case NEG: // if we negate a thing, let's just make it a signed integer. - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); case ABS: // if we take the absolute value of a thing, just make it signed. - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); case NOT: // otherwise, make it int/uint as needed (this works because we check is_int_or_uint // above) - return TP_Type::make_from_typespec(arg_type.typespec()); + return TP_Type::make_from_ts(arg_type.typespec()); } } @@ -709,7 +709,7 @@ TP_Type IR_SymbolValue::get_expression_type(const TypeState& input, // another annoying special case. We have a fake symbol called __START-OF-TABLE__ // which actually means that you get the first address in the symbol table. // it's not really a linked symbol, but the basic op builder represents it as one. - return TP_Type::make_from_typespec(TypeSpec("pointer")); + return TP_Type::make_from_ts(TypeSpec("pointer")); } // look up the type of the symbol @@ -724,7 +724,7 @@ TP_Type IR_SymbolValue::get_expression_type(const TypeState& input, } // otherwise, just return a normal typespec - return TP_Type::make_from_typespec(type->second); + return TP_Type::make_from_ts(type->second); } TP_Type IR_Symbol::get_expression_type(const TypeState& input, @@ -737,7 +737,7 @@ TP_Type IR_Symbol::get_expression_type(const TypeState& input, return TP_Type::make_false(); } - return TP_Type::make_from_typespec(TypeSpec("symbol")); + return TP_Type::make_from_ts(TypeSpec("symbol")); } TP_Type IR_IntegerConstant::get_expression_type(const TypeState& input, @@ -756,7 +756,7 @@ TP_Type IR_Compare::get_expression_type(const TypeState& input, (void)file; (void)dts; // really a boolean. - return TP_Type::make_from_typespec(TypeSpec("symbol")); + return TP_Type::make_from_ts(TypeSpec("symbol")); } void IR_Nop_Atomic::propagate_types(const TypeState& input, @@ -792,7 +792,7 @@ void IR_Call_Atomic::propagate_types(const TypeState& input, !dts.type_prop_settings.current_method_type.empty()) { // calling object new method. Set the result to a new object of our type end_types.get(Register(Reg::GPR, Reg::V0)) = - TP_Type::make_from_typespec(dts.type_prop_settings.current_method_type); + TP_Type::make_from_ts(dts.type_prop_settings.current_method_type); // update the call type call_type = in_tp.get_method_new_object_typespec(); call_type.get_arg(call_type.arg_count() - 1) = @@ -837,7 +837,7 @@ void IR_Call_Atomic::propagate_types(const TypeState& input, call_type = format_call_type; call_type_set = true; - end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_typespec(in_type.last_arg()); + end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg()); // we can also update register usage here. read_regs.clear(); @@ -858,7 +858,7 @@ void IR_Call_Atomic::propagate_types(const TypeState& input, call_type = in_type; call_type_set = true; - end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_typespec(in_type.last_arg()); + end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg()); // we can also update register usage here. read_regs.clear(); @@ -897,11 +897,11 @@ TP_Type IR_StaticAddress::get_expression_type(const TypeState& input, return TP_Type::make_from_string(file.get_goal_string_by_label(label)); } else { // otherwise, some other static basic. - return TP_Type::make_from_typespec(TypeSpec(word.symbol_name)); + return TP_Type::make_from_ts(TypeSpec(word.symbol_name)); } } } else if ((label.offset & 7) == PAIR_OFFSET) { - return TP_Type::make_from_typespec(TypeSpec("pair")); + return TP_Type::make_from_ts(TypeSpec("pair")); } throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name); @@ -916,7 +916,7 @@ void IR_AsmOp_Atomic::propagate_types(const TypeState& input, end_types = input; if (dst_reg) { if (name == "daddu") { - end_types.get(dst_reg->reg) = TP_Type::make_from_typespec(TypeSpec("uint")); + end_types.get(dst_reg->reg) = TP_Type::make_from_ts(TypeSpec("uint")); } } } @@ -936,7 +936,7 @@ TP_Type IR_EmptyPair::get_expression_type(const TypeState& input, (void)file; (void)dts; // GOAL's empty pair is actually a pair type, containing the empty pair as the car and cdr - return TP_Type::make_from_typespec(TypeSpec("pair")); + return TP_Type::make_from_ts(TypeSpec("pair")); } TP_Type IR_CMoveF::get_expression_type(const TypeState& input, @@ -945,6 +945,6 @@ TP_Type IR_CMoveF::get_expression_type(const TypeState& input, (void)input; (void)file; (void)dts; - return TP_Type::make_from_typespec(TypeSpec("symbol")); + return TP_Type::make_from_ts(TypeSpec("symbol")); } } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOp.cpp b/decompiler/IR2/AtomicOp.cpp index 886ddcd26..34c6804f8 100644 --- a/decompiler/IR2/AtomicOp.cpp +++ b/decompiler/IR2/AtomicOp.cpp @@ -62,6 +62,16 @@ bool AtomicOp::operator!=(const AtomicOp& other) const { return !((*this) == other); } +/*! + * Add GOAL temp registers to the clobber list. + */ +void AtomicOp::clobber_temps() { + for (auto clobber : {Reg::V1, Reg::AT, Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::T0, Reg::T1, + Reg::T2, Reg::T3, Reg::T4, Reg::T5, Reg::T6, Reg::T7, Reg::T8, Reg::T9}) { + m_clobber_regs.push_back(Register(Reg::GPR, clobber)); + } +} + ///////////////////////////// // SimpleAtom ///////////////////////////// @@ -1171,7 +1181,23 @@ std::unique_ptr SpecialOp::get_as_expr() const { throw std::runtime_error("SpecialOp::get_as_expr not yet implemented"); } -void SpecialOp::update_register_info() {} +void SpecialOp::update_register_info() { + switch (m_kind) { + case Kind::NOP: + case Kind::BREAK: + case Kind::CRASH: + return; + case Kind::SUSPEND: + // todo - confirm this is true. + // the suspend operation is written in a way where it doesn't use temporaries to make the call + // but the actual suspend operation doesn't seem to preserve temporaries. Maybe the plan was + // to save temp registers at some point, but they later gave up on this? + clobber_temps(); + return; + default: + assert(false); + } +} ///////////////////////////// // CallOp @@ -1218,6 +1244,7 @@ std::unique_ptr CallOp::get_as_expr() const { void CallOp::update_register_info() { // throw std::runtime_error("CallOp::update_register_info cannot be done until types are known"); m_read_regs.push_back(Register(Reg::GPR, Reg::T9)); + clobber_temps(); } ///////////////////////////// diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index 458def998..20f3be1df 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -11,6 +11,7 @@ namespace decompiler { class Expr; +class DecompilerTypeSystem; /*! * A "Variable" represents a register at a given instruction index. @@ -84,12 +85,13 @@ class Variable { * SetVarConditionOp * AsmOp * SetVarExprOp - * AsmOp */ class AtomicOp { public: explicit AtomicOp(int my_idx); std::string to_string(const std::vector& labels, const Env* env) const; + std::string reg_type_info_as_string(const TypeState& init_types, + const TypeState& end_types) const; virtual goos::Object to_form(const std::vector& labels, const Env* env) const = 0; virtual bool operator==(const AtomicOp& other) const = 0; @@ -121,6 +123,8 @@ class AtomicOp { // read twice. virtual void update_register_info() = 0; + TypeState propagate_types(const TypeState& input, const Env& env, DecompilerTypeSystem& dts); + const std::vector& read_regs() { return m_read_regs; } const std::vector& write_regs() { return m_write_regs; } const std::vector& clobber_regs() { return m_clobber_regs; } @@ -136,6 +140,12 @@ class AtomicOp { protected: int m_my_idx = -1; + // given the input types of all registers, figure out the output types. + virtual TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) = 0; + void clobber_temps(); + // the register values that are read (at the start of this op) std::vector m_read_regs; // the registers that have actual values written into them (at the end of this op) @@ -175,6 +185,10 @@ class SimpleAtom { assert(is_var()); return m_variable; } + s64 get_int() const { + assert(is_int()); + return m_int; + } bool is_int() const { return m_kind == Kind::INTEGER_CONSTANT; }; bool is_sym_ptr() const { return m_kind == Kind::SYMBOL_PTR; }; bool is_sym_val() const { return m_kind == Kind::SYMBOL_VAL; }; @@ -184,6 +198,7 @@ class SimpleAtom { bool operator!=(const SimpleAtom& other) const { return !((*this) == other); } void get_regs(std::vector* out) const; SimpleExpression as_expr() const; + TP_Type get_type(const TypeState& input, const Env& env, const DecompilerTypeSystem& dts) const; private: Kind m_kind = Kind::INVALID; @@ -256,6 +271,13 @@ class SimpleExpression { bool operator==(const SimpleExpression& other) const; bool is_identity() const { return m_kind == Kind::IDENTITY; } void get_regs(std::vector* out) const; + TP_Type get_type(const TypeState& input, const Env& env, const DecompilerTypeSystem& dts) const; + TP_Type get_type_int2(const TypeState& input, + const Env& env, + const DecompilerTypeSystem& dts) const; + TP_Type get_type_int1(const TypeState& input, + const Env& env, + const DecompilerTypeSystem& dts) const; private: Kind m_kind = Kind::INVALID; @@ -281,6 +303,9 @@ class SetVarOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; private: Variable m_dst; @@ -305,6 +330,9 @@ class AsmOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; private: Instruction m_instr; @@ -388,6 +416,9 @@ class SetVarConditionOp : public AtomicOp { std::unique_ptr get_as_expr() const override; void update_register_info() override; void invert() { m_condition.invert(); } + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; private: Variable m_dst; @@ -410,6 +441,9 @@ class StoreOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; private: int m_size; @@ -434,6 +468,10 @@ class LoadVarOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; + TP_Type get_src_type(const TypeState& input, const Env& env, DecompilerTypeSystem& dts) const; private: Kind m_kind; @@ -471,6 +509,9 @@ class IR2_BranchDelay { bool operator==(const IR2_BranchDelay& other) const; void get_regs(std::vector* write, std::vector* read) const; bool is_known() const { return m_kind != Kind::UNKNOWN; } + TypeState propagate_types(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) const; private: std::optional m_var[3]; @@ -496,6 +537,9 @@ class BranchOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; private: bool m_likely = false; @@ -526,6 +570,9 @@ class SpecialOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; private: Kind m_kind; @@ -546,6 +593,13 @@ class CallOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; + + protected: + TypeSpec m_call_type; + bool m_call_type_set = false; }; /*! @@ -571,6 +625,9 @@ class ConditionalMoveFalseOp : public AtomicOp { std::unique_ptr get_set_source_as_expr() const override; std::unique_ptr get_as_expr() const override; void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; private: Variable m_dst, m_src; diff --git a/decompiler/IR2/AtomicOpTypeAnalysis.cpp b/decompiler/IR2/AtomicOpTypeAnalysis.cpp new file mode 100644 index 000000000..55d469097 --- /dev/null +++ b/decompiler/IR2/AtomicOpTypeAnalysis.cpp @@ -0,0 +1,772 @@ +#include "third-party/fmt/core.h" +#include "decompiler/ObjectFile/LinkedObjectFile.h" +#include "common/log/log.h" +#include "AtomicOp.h" + +namespace decompiler { + +namespace { +bool tc(const DecompilerTypeSystem& dts, const TypeSpec& expected, const TP_Type& actual) { + return dts.ts.typecheck(expected, actual.typespec(), "", false, false); +} + +bool is_int_or_uint(const DecompilerTypeSystem& dts, const TP_Type& type) { + return tc(dts, TypeSpec("int"), type) || tc(dts, TypeSpec("uint"), type); +} + +struct IR2_RegOffset { + Register reg; + int offset; +}; + +bool get_as_reg_offset(const SimpleExpression& expr, IR2_RegOffset* out) { + if (expr.kind() == SimpleExpression::Kind::ADD && expr.get_arg(0).is_var() && + expr.get_arg(1).is_int()) { + out->reg = expr.get_arg(0).var().reg(); + out->offset = expr.get_arg(1).get_int(); + return true; + } + + if (expr.is_identity() && expr.get_arg(0).is_var()) { + out->reg = expr.get_arg(0).var().reg(); + out->offset = 0; + return true; + } + return false; +} + +RegClass get_reg_kind(const Register& r) { + switch (r.get_kind()) { + case Reg::GPR: + return RegClass::GPR_64; + case Reg::FPR: + return RegClass::FLOAT; + default: + assert(false); + } +} + +} // namespace + +std::string AtomicOp::reg_type_info_as_string(const TypeState& init_types, + const TypeState& end_types) const { + std::string result; + + auto read_mask = regs_to_gpr_mask(m_read_regs); + auto write_mask = regs_to_gpr_mask(m_write_regs); + auto clobber_mask = regs_to_gpr_mask(m_clobber_regs); + + result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask), + end_types.print_gpr_masked(write_mask)); + + if (clobber_mask) { + result += "cl: "; + for (auto& reg : m_clobber_regs) { + result += reg.to_string(); + result += ' '; + } + } + + return result; +} + +TP_Type SimpleAtom::get_type(const TypeState& input, + const Env& env, + const DecompilerTypeSystem& dts) const { + switch (m_kind) { + case Kind::EMPTY_LIST: + return TP_Type::make_from_ts("pair"); + case Kind::VARIABLE: + return input.get(var().reg()); + case Kind::INTEGER_CONSTANT: + return TP_Type::make_from_integer(m_int); + case Kind::SYMBOL_PTR: + if (m_string == "#f") { + return TP_Type::make_false(); + } else { + return TP_Type::make_from_ts("symbol"); + } + case Kind::SYMBOL_VAL: { + if (m_string == "#f") { + // if we ever read the false symbol, it should contain the false symbol as its value. + return TP_Type::make_false(); + } else if (m_string == "__START-OF-TABLE__") { + // another annoying special case. We have a fake symbol called __START-OF-TABLE__ + // which actually means that you get the first address in the symbol table. + // it's not really a linked symbol, but the basic op builder represents it as one. + return TP_Type::make_from_ts(TypeSpec("pointer")); + } + + // look up the type of the symbol + auto type = dts.symbol_types.find(m_string); + if (type == dts.symbol_types.end()) { + throw std::runtime_error("Don't have the type of symbol " + m_string); + } + + if (type->second == TypeSpec("type")) { + // if we get a type by symbol, we should remember which type we got it from. + return TP_Type::make_type_object(TypeSpec(m_string)); + } + + // otherwise, just return a normal typespec + return TP_Type::make_from_ts(type->second); + } + case Kind::STATIC_ADDRESS: { + auto label = env.file->labels.at(m_int); + // strings are 16-byte aligned, but functions are 8 byte aligned? + if ((label.offset & 7) == BASIC_OFFSET) { + // it's a basic! probably. + const auto& word = + env.file->words_by_seg.at(label.target_segment).at((label.offset - 4) / 4); + if (word.kind == LinkedWord::TYPE_PTR) { + if (word.symbol_name == "string") { + return TP_Type::make_from_string(env.file->get_goal_string_by_label(label)); + } else { + // otherwise, some other static basic. + return TP_Type::make_from_ts(TypeSpec(word.symbol_name)); + } + } + } else if ((label.offset & 7) == PAIR_OFFSET) { + return TP_Type::make_from_ts(TypeSpec("pair")); + } + throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name); + } + case Kind::INVALID: + default: + assert(false); + } + return {}; +} + +TP_Type SimpleExpression::get_type(const TypeState& input, + const Env& env, + const DecompilerTypeSystem& dts) const { + switch (m_kind) { + case Kind::IDENTITY: + return m_args[0].get_type(input, env, dts); + case Kind::GPR_TO_FPR: { + const auto& in_type = input.get(get_arg(0).var().reg()); + if (in_type.typespec() != TypeSpec("float")) { + lg::warn("GPR to FPR used on a {}", in_type.print()); + } + return TP_Type::make_from_ts("float"); + } + case Kind::FPR_TO_GPR: + case Kind::DIV_S: + return TP_Type::make_from_ts("float"); + case Kind::ADD: + case Kind::SUB: + case Kind::MUL_SIGNED: + case Kind::DIV_SIGNED: + case Kind::RIGHT_SHIFT_ARITH: + case Kind::RIGHT_SHIFT_LOGIC: + case Kind::MOD_SIGNED: + case Kind::MIN_SIGNED: + case Kind::MAX_SIGNED: + case Kind::OR: + case Kind::AND: + case Kind::NOR: + case Kind::XOR: + case Kind::LEFT_SHIFT: + case Kind::MUL_UNSIGNED: + return get_type_int2(input, env, dts); + case Kind::NEG: + case Kind::LOGNOT: + return get_type_int1(input, env, dts); + default: + throw std::runtime_error("Simple expression can't get_type: " + + to_form(env.file->labels, &env).print()); + } + return {}; +} + +TP_Type SimpleExpression::get_type_int1(const TypeState& input, + const Env& env, + const DecompilerTypeSystem& dts) const { + (void)input; + (void)dts; + auto arg_type = m_args[0].get_type(input, env, dts); + if (is_int_or_uint(dts, arg_type)) { + switch (m_kind) { + case Kind::NEG: + // if we negate a thing, let's just make it a signed integer. + return TP_Type::make_from_ts(TypeSpec("int")); + // case Kind::: + // // if we take the absolute value of a thing, just make it signed. + // return TP_Type::make_from_ts(TypeSpec("int")); + case Kind::LOGNOT: + // otherwise, make it int/uint as needed (this works because we check is_int_or_uint + // above) + return TP_Type::make_from_ts(arg_type.typespec()); + default: + break; + } + } + + throw std::runtime_error("IR_IntMath1::get_expression_type case not handled: " + + to_form(env.file->labels, &env).print() + " " + arg_type.print()); +} + +/*! + * Special case for "integer math". + */ +TP_Type SimpleExpression::get_type_int2(const TypeState& input, + const Env& env, + const DecompilerTypeSystem& dts) const { + auto arg0_type = m_args[0].get_type(input, env, dts); + auto arg1_type = m_args[1].get_type(input, env, dts); + + // special cases for integers + switch (m_kind) { + case Kind::LEFT_SHIFT: + // multiplication by constant power of two, optimized to a shift. + if (m_args[1].is_int() && is_int_or_uint(dts, arg0_type)) { + assert(m_args[1].get_int() >= 0); + assert(m_args[1].get_int() < 64); + return TP_Type::make_from_product(1ull << m_args[1].get_int()); + } + break; + + case Kind::MUL_SIGNED: { + if (arg0_type.is_integer_constant() && is_int_or_uint(dts, arg1_type)) { + return TP_Type::make_from_product(arg0_type.get_integer_constant()); + } + } break; + + case Kind::ADD: + if (arg0_type.is_product_with(4) && tc(dts, TypeSpec("type"), arg1_type)) { + // dynamic access into the method array with shift, add, offset-load + // no need to track the type because we don't know the method index anyway. + return TP_Type::make_partial_dyanmic_vtable_access(); + } + break; + + default: + break; + } + + if (arg0_type == arg1_type && is_int_or_uint(dts, arg0_type)) { + // both are the same type and both are int/uint, so we assume that we're doing integer math. + // we strip off any weird things like multiplication or integer constant. + return TP_Type::make_from_ts(arg0_type.typespec()); + } + + if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) { + // usually we would want to use arg0's type as the "winning" type. + // but we use arg1's if arg0 is an integer constant + // in either case, strip off weird stuff. + if (arg0_type.is_integer_constant() && !arg1_type.is_integer_constant()) { + return TP_Type::make_from_ts(arg1_type.typespec()); + } + return TP_Type::make_from_ts(arg0_type.typespec()); + } + + if (tc(dts, TypeSpec("binteger"), arg0_type) && is_int_or_uint(dts, arg1_type)) { + // if arg0 is a binteger, the result is probably a binteger as well + return TP_Type::make_from_ts("binteger"); + } + + // special cases for non-integers + if ((arg0_type.typespec() == TypeSpec("object") || arg0_type.typespec() == TypeSpec("pair")) && + (arg1_type.is_integer_constant(62) || arg1_type.is_integer_constant(61))) { + // boxed object tag trick. + return TP_Type::make_from_ts("int"); + } + + if (m_args[1].is_int() && m_kind == Kind::ADD && arg0_type.kind == TP_Type::Kind::TYPESPEC) { + // access a field. + FieldReverseLookupInput rd_in; + rd_in.deref = std::nullopt; + rd_in.stride = 0; + rd_in.offset = m_args[1].get_int(); + rd_in.base_type = arg0_type.typespec(); + auto rd = dts.ts.reverse_field_lookup(rd_in); + + if (rd.success) { + return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); + } + } + + if (m_kind == Kind::ADD && arg0_type.is_product() && arg1_type.kind == TP_Type::Kind::TYPESPEC) { + return TP_Type::make_object_plus_product(arg1_type.typespec(), arg0_type.get_multiplier()); + } + + if (m_kind == Kind::ADD && arg1_type.is_product() && arg0_type.kind == TP_Type::Kind::TYPESPEC) { + return TP_Type::make_object_plus_product(arg0_type.typespec(), arg1_type.get_multiplier()); + } + + if (m_kind == Kind::ADD && arg0_type.typespec().base_type() == "pointer" && + tc(dts, TypeSpec("integer"), arg1_type)) { + // plain pointer plus integer = plain pointer + return TP_Type::make_from_ts(TypeSpec("pointer")); + } + + if (m_kind == Kind::ADD && arg1_type.typespec().base_type() == "pointer" && + tc(dts, TypeSpec("integer"), arg0_type)) { + // plain pointer plus integer = plain pointer + return TP_Type::make_from_ts(TypeSpec("pointer")); + } + + if (tc(dts, TypeSpec("structure"), arg1_type) && !m_args[0].is_int() && + is_int_or_uint(dts, arg0_type)) { + if (arg1_type.typespec() == TypeSpec("symbol") && + arg0_type.is_integer_constant(SYM_INFO_OFFSET + POINTER_SIZE)) { + // symbol -> GOAL String + // NOTE - the offset doesn't fit in a s16, so it's loaded into a register first. + // so we expect the arg to be a variable, and the type propagation will figure out the + // integer constant. + return TP_Type::make_from_ts(dts.ts.make_pointer_typespec("string")); + } else { + // byte access of offset array field trick. + // arg1 holds a structure. + // arg0 is an integer in a register. + return TP_Type::make_object_plus_product(arg1_type.typespec(), 1); + } + } + + if (m_kind == Kind::AND) { + // base case for and. Just get an integer. + return TP_Type::make_from_ts(TypeSpec("int")); + } + + if (m_kind == Kind::SUB && tc(dts, TypeSpec("pointer"), arg0_type) && + tc(dts, TypeSpec("pointer"), arg1_type)) { + return TP_Type::make_from_ts(TypeSpec("int")); + } + + throw std::runtime_error(fmt::format("Can't get_type_int2: {}, args {} and {}", + to_form(env.file->labels, &env).print(), arg0_type.print(), + arg1_type.print())); +} + +TypeState IR2_BranchDelay::propagate_types(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) const { + TypeState output = input; + switch (m_kind) { + case Kind::DSLLV: { + // I believe this is only used in ash. We ignore the shift amount's type and just look + // at the input value. If it's a uint/int based type, we just return uint/int (not the type) + // this will kill any weird stuff like product, etc. + // if it's not an integer type, it's currently an error. + auto dst = m_var[0]->reg(); + auto src = m_var[1]->reg(); + if (tc(dts, TypeSpec("uint"), output.get(src))) { + output.get(dst) = TP_Type::make_from_ts("uint"); + } else if (tc(dts, TypeSpec("int"), output.get(src))) { + output.get(dst) = TP_Type::make_from_ts("int"); + } else { + throw std::runtime_error("BranchDelay::type_prop DSLLV for src " + output.get(src).print()); + } + } break; + case Kind::NEGATE: + // to match the behavior in IntMath1, assume signed when negating. + output.get(m_var[0]->reg()) = TP_Type::make_from_ts("int"); + break; + case Kind::SET_REG_FALSE: + output.get(m_var[0]->reg()) = TP_Type::make_false(); + break; + case Kind::SET_REG_REG: + output.get(m_var[0]->reg()) = output.get(m_var[1]->reg()); + break; + case Kind::SET_REG_TRUE: + output.get(m_var[0]->reg()) = TP_Type::make_from_ts(TypeSpec("symbol")); + break; + case Kind::SET_BINTEGER: + output.get(m_var[0]->reg()) = TP_Type::make_type_object(TypeSpec("binteger")); + break; + case Kind::SET_PAIR: + output.get(m_var[0]->reg()) = TP_Type::make_type_object(TypeSpec("pair")); + break; + case Kind::NOP: + break; + default: + throw std::runtime_error("Unhandled branch delay in type_prop: " + + to_form(env.file->labels, &env).print()); + } + return output; +} + +///////////////////////////////////////// +// Implementations of propagate_types_internal +///////////////////////////////////////// + +TypeState AtomicOp::propagate_types(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + // do op-specific type propagation + TypeState result = propagate_types_internal(input, env, dts); + // clobber + for (auto reg : m_clobber_regs) { + result.get(reg) = TP_Type::make_uninitialized(); + } + return result; +} + +TypeState SetVarOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + TypeState result = input; + result.get(m_dst.reg()) = m_src.get_type(input, env, dts); + return result; +} + +TypeState AsmOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + (void)env; + (void)dts; + TypeState result = input; + if (m_dst.has_value()) { + auto kind = m_dst->reg().get_kind(); + if (kind == Reg::GPR || kind == Reg::FPR) { + result.get(m_dst->reg()) = TP_Type::make_from_ts("int"); + } + } + return result; +} + +TypeState SetVarConditionOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + (void)env; + (void)dts; + TypeState result = input; + result.get(m_dst.reg()) = TP_Type::make_from_ts("symbol"); + return result; +} + +TypeState StoreOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + (void)env; + (void)dts; + return input; +} + +TP_Type LoadVarOp::get_src_type(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) const { + if (m_src.is_identity()) { + auto& src = m_src.get_arg(0); + if (src.is_static_addr()) { + if (m_kind == Kind::FLOAT) { + // assume anything loaded from floating point will be a float. + return TP_Type::make_from_ts("float"); + } + + if (m_size == 8) { + // 8 byte integer constants are always loaded from a static pool + // this could technically hide loading a different type from inside of a static basic. + return TP_Type::make_from_ts(dts.ts.make_typespec("uint")); + } + } + } + + /////////////////////////////////////// + // REGISTER + OFFSET (possibly 0) + /////////////////////////////////////// + IR2_RegOffset ro; + if (get_as_reg_offset(m_src, &ro)) { + auto& input_type = input.get(ro.reg); + + if (input_type.kind == TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD && ro.offset >= 16 && + (ro.offset & 3) == 0 && m_size == 4 && m_kind == Kind::UNSIGNED) { + // method get of fixed type + auto type_name = input_type.get_type_objects_typespec().base_type(); + auto method_id = (ro.offset - 16) / 4; + auto method_info = dts.ts.lookup_method(type_name, method_id); + auto method_type = method_info.type.substitute_for_method_call(type_name); + if (type_name == "object" && method_id == GOAL_NEW_METHOD) { + // remember that we're an object new. + return TP_Type::make_object_new(method_type); + } + return TP_Type::make_from_ts(method_type); + } + + if (input_type.kind == TP_Type::Kind::TYPESPEC && input_type.typespec() == TypeSpec("type") && + ro.offset >= 16 && (ro.offset & 3) == 0 && m_size == 4 && m_kind == Kind::UNSIGNED) { + // method get of an unknown type. We assume the most general "object" type. + auto method_id = (ro.offset - 16) / 4; + auto method_info = dts.ts.lookup_method("object", method_id); + if (method_id != GOAL_NEW_METHOD && method_id != GOAL_RELOC_METHOD) { + // this can get us the wrong thing for `new` methods. And maybe relocate? + return TP_Type::make_from_ts(method_info.type.substitute_for_method_call("object")); + } + } + + if (input_type.typespec() == TypeSpec("pointer")) { + // we got a plain pointer. let's just assume we're loading an integer. + // perhaps we should disable this feature by default on 4-byte loads if we're getting + // lots of false positives for loading pointers from plain pointers. + + switch (m_kind) { + case Kind::UNSIGNED: + switch (m_size) { + case 1: + case 2: + case 4: + case 8: + return TP_Type::make_from_ts(TypeSpec("uint")); + default: + break; + } + break; + case Kind::SIGNED: + switch (m_size) { + case 1: + case 2: + case 4: + case 8: + return TP_Type::make_from_ts(TypeSpec("int")); + default: + break; + } + break; + case Kind::FLOAT: + return TP_Type::make_from_ts(TypeSpec("float")); + default: + assert(false); + } + } + + if (input_type.kind == TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT) { + FieldReverseLookupInput rd_in; + DerefKind dk; + dk.is_store = false; + dk.reg_kind = get_reg_kind(ro.reg); + dk.sign_extend = m_kind == Kind::SIGNED; + dk.size = m_size; + rd_in.deref = dk; + rd_in.base_type = input_type.get_obj_plus_const_mult_typespec(); + rd_in.stride = input_type.get_multiplier(); + rd_in.offset = ro.offset; + auto rd = dts.ts.reverse_field_lookup(rd_in); + + if (rd.success) { + // load_path_set = true; + // load_path_addr_of = rd.addr_of; + // load_path_base = ro.reg_ir; + // for (auto& x : rd.tokens) { + // load_path.push_back(x.print()); + // } + return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); + } + } + + if (input_type.kind == TP_Type::Kind::TYPESPEC && ro.offset == -4 && m_kind == Kind::UNSIGNED && + m_size == 4 && ro.reg.get_kind() == Reg::GPR) { + // get type of basic likely, but misrecognized as an object. + // occurs often in typecase-like structures because other possible types are + // "stripped". + // load_path_base = ro.reg_ir; + // load_path_addr_of = false; + // load_path.push_back("type"); + // load_path_set = true; + + return TP_Type::make_type_object(input_type.typespec().base_type()); + } + + if (input_type.kind == TP_Type::Kind::DYNAMIC_METHOD_ACCESS && ro.offset == 16) { + // access method vtable. The input is type + (4 * method), and the 16 is the offset + // of method 0. + return TP_Type::make_from_ts(TypeSpec("function")); + } + // Assume we're accessing a field of an object. + FieldReverseLookupInput rd_in; + DerefKind dk; + dk.is_store = false; + dk.reg_kind = get_reg_kind(ro.reg); + dk.sign_extend = m_kind == Kind::SIGNED; + dk.size = m_size; + rd_in.deref = dk; + rd_in.base_type = input_type.typespec(); + rd_in.stride = 0; + rd_in.offset = ro.offset; + auto rd = dts.ts.reverse_field_lookup(rd_in); + + // only error on failure if "pair" is disabled. otherwise it might be a pair. + if (!rd.success && !dts.type_prop_settings.allow_pair) { + printf("input type is %s, offset is %d, sign %d size %d\n", rd_in.base_type.print().c_str(), + rd_in.offset, rd_in.deref.value().sign_extend, rd_in.deref.value().size); + throw std::runtime_error(fmt::format("Could not get type of load: {}. Reverse Deref Failed.", + to_form(env.file->labels, &env).print())); + } + + if (rd.success) { + // load_path_set = true; + // load_path_addr_of = rd.addr_of; + // load_path_base = ro.reg_ir; + // for (auto& x : rd.tokens) { + // load_path.push_back(x.print()); + // } + return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type)); + } + + // rd failed, try as pair. + if (dts.type_prop_settings.allow_pair) { + // we are strict here - only permit pair-type loads from object or pair. + // object is permitted for stuff like association lists where the car is also a pair. + if (m_kind == Kind::SIGNED && m_size == 4 && + (input_type.typespec() == TypeSpec("object") || + input_type.typespec() == TypeSpec("pair"))) { + // these rules are of course not always correct or the most specific, but it's the best + // we can do. + if (ro.offset == 2) { + // cdr = another pair. + return TP_Type::make_from_ts(TypeSpec("pair")); + } else if (ro.offset == -2) { + // car = some object. + return TP_Type::make_from_ts(TypeSpec("object")); + } + } + } + } + + throw std::runtime_error( + fmt::format("Could not get type of load: {}. ", to_form(env.file->labels, &env).print())); + + throw std::runtime_error("LoadVarOp can't get_src_type: " + + to_form(env.file->labels, &env).print()); +} + +TypeState LoadVarOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + TypeState result = input; + result.get(m_dst.reg()) = get_src_type(input, env, dts); + return result; +} + +TypeState BranchOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + return m_branch_delay.propagate_types(input, env, dts); +} + +TypeState SpecialOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + (void)env; + (void)dts; + // none of these write anything. Suspend clobbers, but this is taken care of automatically + switch (m_kind) { + case Kind::NOP: + case Kind::BREAK: + case Kind::CRASH: + case Kind::SUSPEND: + return input; + default: + assert(false); + } +} + +TypeState CallOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + (void)dts; + (void)env; + const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3, + Reg::T0, Reg::T1, Reg::T2, Reg::T3}; + + TypeState end_types = input; + + auto in_tp = input.get(Register(Reg::GPR, Reg::T9)); + if (in_tp.kind == TP_Type::Kind::OBJECT_NEW_METHOD && + !dts.type_prop_settings.current_method_type.empty()) { + // calling object new method. Set the result to a new object of our type + end_types.get(Register(Reg::GPR, Reg::V0)) = + TP_Type::make_from_ts(dts.type_prop_settings.current_method_type); + // update the call type + m_call_type = in_tp.get_method_new_object_typespec(); + m_call_type.get_arg(m_call_type.arg_count() - 1) = + TypeSpec(dts.type_prop_settings.current_method_type); + m_call_type_set = true; + return end_types; + } + + auto in_type = in_tp.typespec(); + + if (in_type.base_type() != "function") { + throw std::runtime_error("Called something that wasn't a function: " + in_type.print()); + } + + if (in_type.arg_count() < 1) { + throw std::runtime_error("Called a function, but we don't know its type"); + } + + if (in_type.arg_count() == 2 && in_type.get_arg(0) == TypeSpec("_varargs_")) { + // we're calling a varags function, which is format. We can determine the argument count + // by looking at the format string, if we can get it. + auto arg_type = input.get(Register(Reg::GPR, Reg::A1)); + if (arg_type.is_constant_string() || arg_type.is_format_string()) { + int arg_count = -1; + + if (arg_type.is_constant_string()) { + auto& str = arg_type.get_string(); + arg_count = dts.get_format_arg_count(str); + } else { + // is format string. + arg_count = arg_type.get_format_string_arg_count(); + } + + TypeSpec format_call_type("function"); + format_call_type.add_arg(TypeSpec("object")); // destination + format_call_type.add_arg(TypeSpec("string")); // format string + for (int i = 0; i < arg_count; i++) { + format_call_type.add_arg(TypeSpec("object")); + } + format_call_type.add_arg(TypeSpec("object")); + arg_count += 2; // for destination and format string. + m_call_type = format_call_type; + m_call_type_set = true; + + end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg()); + + // we can also update register usage here. + m_read_regs.clear(); + m_read_regs.emplace_back(Reg::GPR, Reg::T9); + for (int i = 0; i < arg_count; i++) { + m_read_regs.emplace_back(Reg::GPR, arg_regs[i]); + } + + return end_types; + } else { + throw std::runtime_error("Failed to get string for _varags_ call, got " + arg_type.print()); + } + } + // set the call type! + m_call_type = in_type; + m_call_type_set = true; + + end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg()); + + // we can also update register usage here. + m_read_regs.clear(); + m_read_regs.emplace_back(Reg::GPR, Reg::T9); + + for (uint32_t i = 0; i < in_type.arg_count() - 1; i++) { + m_read_regs.emplace_back(Reg::GPR, arg_regs[i]); + } + + return end_types; +} + +TypeState ConditionalMoveFalseOp::propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + (void)env; + (void)dts; + // these should only appear when paired with a (set! dest #t) earlier, so this expression + // shouldn't set any types. Still, double check and override if this fails. + TypeState result = input; + if (result.get(m_dst.reg()).typespec() != TypeSpec("symbol")) { + lg::warn("Conditional Moved #f into something of type {}", + result.get(m_dst.reg()).typespec().print()); + result.get(m_dst.reg()) = TP_Type::make_from_ts("symbol"); + } + + return result; +} + +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Env.cpp b/decompiler/IR2/Env.cpp index c0c2af100..172705f71 100644 --- a/decompiler/IR2/Env.cpp +++ b/decompiler/IR2/Env.cpp @@ -7,4 +7,14 @@ std::string Env::get_variable_name(Register reg, int atomic_idx) const { (void)atomic_idx; throw std::runtime_error("Env::get_variable_name not yet implemented."); } + +/*! + * Update the Env with the result of the type analysis pass. + */ +void Env::set_types(const std::vector& block_init_types, + const std::vector& op_end_types) { + m_block_init_types = block_init_types; + m_op_end_types = op_end_types; + m_has_types = true; +} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Env.h b/decompiler/IR2/Env.h index 98d71dc40..f2c4e9cc8 100644 --- a/decompiler/IR2/Env.h +++ b/decompiler/IR2/Env.h @@ -1,9 +1,14 @@ #pragma once #include +#include +#include +#include "decompiler/util/TP_Type.h" #include "decompiler/Disasm/Register.h" namespace decompiler { +class LinkedObjectFile; + /*! * An "environment" for a single function. * This contains data for an entire function, like which registers are live when, the types of @@ -13,9 +18,34 @@ namespace decompiler { class Env { public: bool has_local_vars() const { return m_has_local_vars; } + bool has_type_analysis() const { return m_has_types; } std::string get_variable_name(Register reg, int atomic_idx) const; + /*! + * Get the types in registers _after_ the given operation has completed. + */ + const TypeState& get_types_after_op(int atomic_op_id) const { + assert(m_has_types); + return m_op_end_types.at(atomic_op_id); + } + + /*! + * Get the types in registers at the beginning of this basic block, before any operations + * have occurred. + */ + const TypeState& get_types_at_block_entry(int block_id) const { + assert(m_has_types); + return m_block_init_types.at(block_id); + } + + void set_types(const std::vector& block_init_types, + const std::vector& op_end_types); + LinkedObjectFile* file = nullptr; + private: bool m_has_local_vars = false; + bool m_has_types = false; + std::vector m_block_init_types; + std::vector m_op_end_types; }; } // namespace decompiler \ No newline at end of file diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index fb16d43a4..9ee6b5049 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -70,6 +70,7 @@ class ObjectFileDB { void ir2_top_level_pass(); void ir2_basic_block_pass(); void ir2_atomic_op_pass(); + void ir2_type_analysis_pass(); void ir2_write_results(const std::string& output_dir); std::string ir2_to_file(ObjectFileData& data); std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg); @@ -83,6 +84,10 @@ class ObjectFileDB { DecompilerTypeSystem dts; std::string all_type_defs; + bool lookup_function_type(const FunctionName& name, + const std::string& obj_name, + TypeSpec* result); + private: void load_map_file(const std::string& map_data); void get_objs_from_dgo(const std::string& filename); diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index cbe56037c..fbd01744a 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -24,10 +24,19 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) { ir2_basic_block_pass(); lg::info("Converting to atomic ops..."); ir2_atomic_op_pass(); + lg::info("Running type analysis..."); + ir2_type_analysis_pass(); lg::info("Writing results..."); ir2_write_results(output_dir); } +/*! + * Analyze the top level function of each object. + * - Find global function definitions + * - Find type definitions + * - Find method definitions + * - Warn for non-unique function names. + */ void ObjectFileDB::ir2_top_level_pass() { Timer timer; int total_functions = 0; @@ -97,6 +106,7 @@ void ObjectFileDB::ir2_top_level_pass() { } }); + // we remember duplicates like this so we can warn on all occurances of the duplicate name for_each_function([&](Function& func, int segment_id, ObjectFileData& data) { (void)segment_id; auto name = func.guessed_name.to_string(); @@ -115,6 +125,12 @@ void ObjectFileDB::ir2_top_level_pass() { lg::info("{:4d} logins {:.2f}%\n", total_top_levels, 100.f * total_top_levels / total_functions); } +/*! + * Initial Function Analysis Pass to build the control flow graph. + * - Find basic blocks + * - Analyze prologue and epilogue + * - Build control flow graph + */ void ObjectFileDB::ir2_basic_block_pass() { Timer timer; // Main Pass over each function... @@ -127,6 +143,7 @@ void ObjectFileDB::ir2_basic_block_pass() { for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { total_functions++; + func.ir2.env.file = &data.linked_data; // first, find basic blocks. auto blocks = find_blocks_in_function(data.linked_data, segment_id, func); @@ -139,6 +156,11 @@ void ObjectFileDB::ir2_basic_block_pass() { if (!func.suspected_asm) { // find the prologue/epilogue so they can be excluded from basic blocks. func.analyze_prologue(data.linked_data); + } else { + // manually exclude the type tag from the basic block. + assert(func.basic_blocks.front().start_word == 0); + assert(func.basic_blocks.front().end_word >= 1); + func.basic_blocks.front().start_word = 1; } if (!func.suspected_asm) { @@ -162,6 +184,7 @@ void ObjectFileDB::ir2_basic_block_pass() { } if (func.suspected_asm) { + func.warnings.append(";; Assembly Function\n"); suspected_asm++; } }); @@ -178,6 +201,10 @@ void ObjectFileDB::ir2_basic_block_pass() { 100.f * inspect_methods / total_functions); } +/*! + * Conversion of MIPS instructions into AtomicOps. The AtomicOps represent what we + * think are IR of the original GOAL compiler. + */ void ObjectFileDB::ir2_atomic_op_pass() { Timer timer; int total_functions = 0; @@ -197,6 +224,7 @@ void ObjectFileDB::ir2_atomic_op_pass() { } catch (std::exception& e) { lg::warn("Function {} from {} could not be converted to atomic ops: {}", func.guessed_name.to_string(), data.to_unique_name(), e.what()); + func.warnings.append(";; Failed to convert to atomic ops\n"); } } }); @@ -207,6 +235,44 @@ void ObjectFileDB::ir2_atomic_op_pass() { 100.f * attempted / total_functions, 100.f * successful / attempted); } +/*! + * Analyze registers and determine the type in each register at each instruction. + * - Figure out the type of each function, from configs. + * - Propagate types. + */ +void ObjectFileDB::ir2_type_analysis_pass() { + Timer timer; + int total_functions = 0; + int non_asm_functions = 0; + int attempted_functions = 0; + int successful_functions = 0; + + for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) { + (void)segment_id; + total_functions++; + if (!func.suspected_asm) { + non_asm_functions++; + TypeSpec ts; + if (lookup_function_type(func.guessed_name, data.to_unique_name(), &ts)) { + attempted_functions++; + // try type analysis here. + auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()]; + if (func.run_type_analysis_ir2(ts, dts, data.linked_data, hints)) { + successful_functions++; + } else { + func.warnings.append(";; Type analysis failed\n"); + } + } else { + // lg::warn("Function {} didn't know its type", func.guessed_name.to_string()); + func.warnings.append(";; Type of function is unknown\n"); + } + } + }); + + lg::info("{}/{}/{}/{} (success/attempted/non-asm/total) in {:.2f} ms", successful_functions, + attempted_functions, non_asm_functions, total_functions, timer.getMs()); +} + void ObjectFileDB::ir2_write_results(const std::string& output_dir) { Timer timer; lg::info("Writing IR2 results to file..."); @@ -269,6 +335,40 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) { return result; } +namespace { +void append_commented(std::string& line, + bool& has_comment, + const std::string& to_append, + int offset = 0) { + // minimum length before comment appears. + constexpr int pre_comment_length = 30; + // if comment overflows, how much to indent the next one + constexpr int overflow_indent = 30; + + // pad, and add comment + if (!has_comment) { + if (line.length() < pre_comment_length) { + line.append(pre_comment_length - line.length(), ' '); + } + line += ";; "; + line += to_append; + has_comment = true; + } else { + if (std::max(int(line.length()), offset) + to_append.length() > 120) { + line += "\n"; + line.append(overflow_indent, ' '); + line += ";; "; + } else { + if (int(line.length()) < offset) { + line.append(offset - line.length(), ' '); + } + line += " "; + } + line += to_append; + } +} +} // namespace + std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& func, int seg) { std::string result; result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n"; @@ -282,14 +382,16 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& bool print_atomics = func.ir2.atomic_ops_succeeded; // print each instruction in the function. bool in_delay_slot = false; + int total_instructions_printed = 0; + int last_instr_printed = 0; - for (int i = 1; i < func.end_word - func.start_word; i++) { + std::string line; + auto print_instr_start = [&](int i) { // check for a label to print auto label_id = data.linked_data.get_label_at(seg, (func.start_word + i) * 4); if (label_id != -1) { result += data.linked_data.labels.at(label_id).name + ":\n"; } - // check for no misaligned labels in code segments. for (int j = 1; j < 4; j++) { assert(data.linked_data.get_label_at(seg, (func.start_word + i) * 4 + j) == -1); @@ -297,36 +399,13 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& // print the assembly instruction auto& instr = func.instructions.at(i); - std::string line = " " + instr.to_string(data.linked_data.labels); + line = " " + instr.to_string(data.linked_data.labels); + }; - // printf("%d inst %s\n", print_atomics, instr.to_string(data.linked_data.labels).c_str()); - - bool printed_comment = false; - - // print atomic op - if (print_atomics && func.instr_starts_atomic_op(i)) { - if (line.length() < 30) { - line.append(30 - line.length(), ' '); - } - line += - " ;; " + func.get_atomic_op_at_instr(i).to_string(data.linked_data.labels, &func.ir2.env); - printed_comment = true; - } - - // print linked strings - for (int iidx = 0; iidx < instr.n_src; iidx++) { - if (instr.get_src(iidx).is_label()) { - auto lab = data.linked_data.labels.at(instr.get_src(iidx).get_label()); - if (data.linked_data.is_string(lab.target_segment, lab.offset)) { - if (!printed_comment) { - line += " ;; "; - printed_comment = true; - } - line += " " + data.linked_data.get_goal_string(lab.target_segment, lab.offset / 4 - 1); - } - } - } - result += line + "\n"; + auto print_instr_end = [&](int i) { + auto& instr = func.instructions.at(i); + result += line; + result += "\n"; // print delay slot gap if (in_delay_slot) { @@ -338,10 +417,140 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& if (gOpcodeInfo[(int)instr.kind].has_delay_slot) { in_delay_slot = true; } + total_instructions_printed++; + assert(last_instr_printed + 1 == i); + last_instr_printed = i; + }; + + // first, print the prologue. we start at word 1 because word 0 is the type tag + for (int i = 1; i < func.basic_blocks.front().start_word; i++) { + print_instr_start(i); + print_instr_end(i); } + + // next, print each basic block + int end_idx = func.basic_blocks.front().start_word; + for (int block_id = 0; block_id < int(func.basic_blocks.size()); block_id++) { + // block number + result += "B" + std::to_string(block_id) + ":\n"; + auto& block = func.basic_blocks.at(block_id); + + const TypeState* init_types = nullptr; + if (func.ir2.env.has_type_analysis()) { + init_types = &func.ir2.env.get_types_at_block_entry(block_id); + } + + for (int instr_id = block.start_word; instr_id < block.end_word; instr_id++) { + print_instr_start(instr_id); + bool printed_comment = false; + + // print atomic op + int op_id = -1; + if (print_atomics && func.instr_starts_atomic_op(instr_id)) { + auto& op = func.get_atomic_op_at_instr(instr_id); + op_id = func.ir2.atomic_ops->instruction_to_atomic_op.at(instr_id); + append_commented(line, printed_comment, + op.to_string(data.linked_data.labels, &func.ir2.env)); + + if (func.ir2.env.has_type_analysis()) { + append_commented( + line, printed_comment, + op.reg_type_info_as_string(*init_types, func.ir2.env.get_types_after_op(op_id)), 50); + } + } + auto& instr = func.instructions.at(instr_id); + // print linked strings + for (int iidx = 0; iidx < instr.n_src; iidx++) { + if (instr.get_src(iidx).is_label()) { + auto lab = data.linked_data.labels.at(instr.get_src(iidx).get_label()); + if (data.linked_data.is_string(lab.target_segment, lab.offset)) { + append_commented( + line, printed_comment, + data.linked_data.get_goal_string(lab.target_segment, lab.offset / 4 - 1)); + } + } + } + print_instr_end(instr_id); + + if (print_atomics && func.ir2.env.has_type_analysis() && + func.instr_starts_atomic_op(instr_id)) { + init_types = &func.ir2.env.get_types_after_op(op_id); + } + } + end_idx = block.end_word; + } + + for (int i = end_idx; i < func.end_word - func.start_word; i++) { + print_instr_start(i); + print_instr_end(i); + } + result += "\n"; + assert(total_instructions_printed == (func.end_word - func.start_word - 1)); return result; } +/*! + * Try to look up the type of a function. Looks at the decompiler type info, the hints files, + * and other GOAL rules. + */ +bool ObjectFileDB::lookup_function_type(const FunctionName& name, + const std::string& obj_name, + TypeSpec* result) { + auto& cfg = get_config(); + + // don't return function types that are explictly flagged as bad in config. + if (cfg.no_type_analysis_functions_by_name.find(name.to_string()) != + cfg.no_type_analysis_functions_by_name.end()) { + return false; + } + + if (name.kind == FunctionName::FunctionKind::GLOBAL) { + // global GOAL function. + auto kv = dts.symbol_types.find(name.function_name); + if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) { + if (kv->second.base_type() != "function") { + lg::die("Found a function named {} but the symbol has type {}", name.to_string(), + kv->second.print()); + } + // good, found a global function with full type information. + *result = kv->second; + return true; + } + } else if (name.kind == FunctionName::FunctionKind::METHOD) { + MethodInfo info; + + if (dts.ts.try_lookup_method(name.type_name, name.method_id, &info)) { + if (info.type.arg_count() >= 1) { + if (info.type.base_type() != "function") { + lg::die("Found a method named {} but the symbol has type {}", name.to_string(), + info.type.print()); + } + // substitute the _type_ for the correct type. + *result = info.type.substitute_for_method_call(name.type_name); + return true; + } + } + + } else if (name.kind == FunctionName::FunctionKind::TOP_LEVEL_INIT) { + *result = dts.ts.make_function_typespec({}, "none"); + return true; + } else if (name.kind == FunctionName::FunctionKind::UNIDENTIFIED) { + // try looking up the object + const auto& map = get_config().anon_function_types_by_obj_by_id; + auto obj_kv = map.find(obj_name); + if (obj_kv != map.end()) { + auto func_kv = obj_kv->second.find(name.get_anon_id()); + if (func_kv != obj_kv->second.end()) { + *result = dts.parse_type_spec(func_kv->second); + return true; + } + } + } else { + assert(false); + } + return false; +} + } // namespace decompiler diff --git a/decompiler/util/DecompilerTypeSystem.cpp b/decompiler/util/DecompilerTypeSystem.cpp index 8dd4fbe8c..daaac0a85 100644 --- a/decompiler/util/DecompilerTypeSystem.cpp +++ b/decompiler/util/DecompilerTypeSystem.cpp @@ -199,7 +199,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add if (existing.kind == add.kind) { switch (existing.kind) { case TP_Type::Kind::TYPESPEC: { - auto new_result = TP_Type::make_from_typespec(coerce_to_reg_type(ts.lowest_common_ancestor( + auto new_result = TP_Type::make_from_ts(coerce_to_reg_type(ts.lowest_common_ancestor( existing.get_objects_typespec(), add.get_objects_typespec()))); *changed = (new_result != existing); return new_result; @@ -214,15 +214,15 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add case TP_Type::Kind::PRODUCT_WITH_CONSTANT: // we know they are different. *changed = true; - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); case TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT: *changed = true; // todo - there might be cases where we need to LCA the base types?? - return TP_Type::make_from_typespec(TypeSpec("object")); + return TP_Type::make_from_ts(TypeSpec("object")); case TP_Type::Kind::OBJECT_NEW_METHOD: *changed = true; // this case should never happen I think. - return TP_Type::make_from_typespec(TypeSpec("function")); + return TP_Type::make_from_ts(TypeSpec("function")); case TP_Type::Kind::STRING_CONSTANT: { auto existing_count = get_format_arg_count(existing.get_string()); auto added_count = get_format_arg_count(add.get_string()); @@ -230,19 +230,19 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add if (added_count == existing_count) { return TP_Type::make_from_format_string(existing_count); } else { - return TP_Type::make_from_typespec(TypeSpec("string")); + return TP_Type::make_from_ts(TypeSpec("string")); } } case TP_Type::Kind::INTEGER_CONSTANT: *changed = true; - return TP_Type::make_from_typespec(TypeSpec("int")); + return TP_Type::make_from_ts(TypeSpec("int")); case TP_Type::Kind::FORMAT_STRING: if (existing.get_format_string_arg_count() == add.get_format_string_arg_count()) { *changed = false; return existing; } else { *changed = true; - return TP_Type::make_from_typespec(TypeSpec("string")); + return TP_Type::make_from_ts(TypeSpec("string")); } case TP_Type::Kind::FALSE_AS_NULL: @@ -261,7 +261,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add if (existing_count == add_count) { result_type = TP_Type::make_from_format_string(existing_count); } else { - result_type = TP_Type::make_from_typespec(TypeSpec("string")); + result_type = TP_Type::make_from_ts(TypeSpec("string")); } *changed = (result_type != existing); @@ -269,7 +269,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add } // otherwise, as an absolute fallback, convert both to TypeSpecs and do TypeSpec LCA - auto new_result = TP_Type::make_from_typespec( + auto new_result = TP_Type::make_from_ts( coerce_to_reg_type(ts.lowest_common_ancestor(existing.typespec(), add.typespec()))); *changed = (new_result != existing); return new_result; diff --git a/decompiler/util/TP_Type.cpp b/decompiler/util/TP_Type.cpp index ce8c7e2ff..243eea371 100644 --- a/decompiler/util/TP_Type.cpp +++ b/decompiler/util/TP_Type.cpp @@ -2,6 +2,16 @@ #include "third-party/fmt/core.h" namespace decompiler { +u32 regs_to_gpr_mask(const std::vector& regs) { + u32 result = 0; + for (const auto& reg : regs) { + if (reg.get_kind() == Reg::GPR) { + result |= (1 << reg.get_gpr()); + } + } + return result; +} + std::string TypeState::print_gpr_masked(u32 mask) const { std::string result; for (int i = 0; i < 32; i++) { diff --git a/decompiler/util/TP_Type.h b/decompiler/util/TP_Type.h index e16a03aa0..ec3b85e3a 100644 --- a/decompiler/util/TP_Type.h +++ b/decompiler/util/TP_Type.h @@ -61,13 +61,15 @@ class TP_Type { return result; } - static TP_Type make_from_typespec(const TypeSpec& ts) { + static TP_Type make_from_ts(const TypeSpec& ts) { TP_Type result; result.kind = Kind::TYPESPEC; result.m_ts = ts; return result; } + static TP_Type make_from_ts(const std::string& ts) { return make_from_ts(TypeSpec(ts)); } + static TP_Type make_from_string(const std::string& str) { TP_Type result; result.kind = Kind::STRING_CONSTANT; @@ -192,4 +194,6 @@ struct TypeState { } } }; + +u32 regs_to_gpr_mask(const std::vector& regs); } // namespace decompiler \ No newline at end of file diff --git a/test/decompiler/test_AtomicOpBuilder.cpp b/test/decompiler/test_AtomicOpBuilder.cpp index c18fc53c2..77bc61398 100644 --- a/test/decompiler/test_AtomicOpBuilder.cpp +++ b/test/decompiler/test_AtomicOpBuilder.cpp @@ -418,7 +418,8 @@ TEST(DecompilerAtomicOpBuilder, DSUBU_DADDIU_MOVZ) { TEST(DecompilerAtomicOpBuilder, JALR_SLL) { test_case(assembly_from_list({"jalr ra, t9", "sll v0, ra, 0"}), {"(call!)"}, {{}}, {{"t9"}}, - {{}}); + {{"a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9", + "at", "v1"}}); } TEST(DecompilerAtomicOpBuilder, LB) { diff --git a/test/test_type_system.cpp b/test/test_type_system.cpp index bfdac40b6..73b895af2 100644 --- a/test/test_type_system.cpp +++ b/test/test_type_system.cpp @@ -196,6 +196,12 @@ TEST(TypeSystem, AddMethodAndLookupMethod) { EXPECT_EQ(ts.lookup_method("basic", "test-method-1").defined_in_type, "structure"); EXPECT_EQ(ts.lookup_method("basic", "test-method-1").type.print(), "(function integer string)"); EXPECT_EQ(ts.lookup_method("basic", "test-method-1").name, "test-method-1"); + + auto id = ts.lookup_method("basic", "test-method-1").id; + MethodInfo info; + EXPECT_TRUE(ts.try_lookup_method("basic", id, &info)); + EXPECT_FALSE(ts.try_lookup_method("not-a-real-type-name", id, &info)); + EXPECT_FALSE(ts.try_lookup_method("basic", id * 2, &info)); } TEST(TypeSystem, NewMethod) {