[Decompiler] Implement IR2 Type Analysis Pass (#193)

* type analysis setup

* more framework

* update printing

* wip

* add type analysis pass

* fix tests
This commit is contained in:
water111 2021-01-10 20:46:49 -05:00 committed by GitHub
parent 45d37733a8
commit fe693b5da2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 1358 additions and 103 deletions

View file

@ -433,6 +433,38 @@ MethodInfo TypeSystem::lookup_method(const std::string& type_name, const std::st
throw std::runtime_error("lookup_method failed");
}
/*!
* Like lookup_method, but won't throw or print an error when things go wrong.
*/
bool TypeSystem::try_lookup_method(const std::string& type_name, int method_id, MethodInfo* info) {
auto kv = m_types.find(type_name);
if (kv == m_types.end()) {
return false;
}
auto* iter_type = kv->second.get();
// look up the method
while (true) {
if (method_id == GOAL_NEW_METHOD) {
if (iter_type->get_my_new_method(info)) {
return true;
}
} else {
if (iter_type->get_my_method(method_id, info)) {
return true;
}
}
if (iter_type->has_parent()) {
iter_type = lookup_type(iter_type->get_parent());
} else {
// couldn't find method.
break;
}
}
return false;
}
/*!
* Lookup information on a method by ID number. Error if it can't be found. Will check parent types
* if the given type doesn't specialize the method.

View file

@ -149,6 +149,7 @@ class TypeSystem {
MethodInfo add_new_method(Type* type, const TypeSpec& ts);
MethodInfo lookup_method(const std::string& type_name, const std::string& method_name);
MethodInfo lookup_method(const std::string& type_name, int method_id);
bool try_lookup_method(const std::string& type_name, int method_id, MethodInfo* info);
MethodInfo lookup_new_method(const std::string& type_name);
void assert_method_id(const std::string& type_name, const std::string& method_name, int id);

View file

@ -30,6 +30,7 @@ add_library(
IR2/AtomicOp.cpp
IR2/AtomicOpBuilder.cpp
IR2/AtomicOpTypeAnalysis.cpp
IR2/Env.cpp
ObjectFile/LinkedObjectFile.cpp

View file

@ -93,6 +93,10 @@ class Function {
DecompilerTypeSystem& dts,
LinkedObjectFile& file,
const std::unordered_map<int, std::vector<TypeHint>>& hints);
bool run_type_analysis_ir2(const TypeSpec& my_type,
DecompilerTypeSystem& dts,
LinkedObjectFile& file,
const std::unordered_map<int, std::vector<TypeHint>>& hints);
void run_reg_usage();
bool build_expression(LinkedObjectFile& file);
BlockTopologicalSort bb_topo_sort();

View file

@ -14,18 +14,18 @@ TypeState construct_initial_typestate(const TypeSpec& f_ts) {
for (int i = 0; i < int(f_ts.arg_count()) - 1; i++) {
auto reg_id = goal_args[i];
auto reg_type = f_ts.get_arg(i);
result.gpr_types[reg_id] = TP_Type::make_from_typespec(reg_type);
result.gpr_types[reg_id] = TP_Type::make_from_ts(reg_type);
}
// todo, more specific process types for behaviors.
result.gpr_types[Reg::S6] = TP_Type::make_from_typespec(TypeSpec("process"));
result.gpr_types[Reg::S6] = TP_Type::make_from_ts(TypeSpec("process"));
return result;
}
void apply_hints(const std::vector<TypeHint>& hints, TypeState* state, DecompilerTypeSystem& dts) {
for (auto& hint : hints) {
try {
state->get(hint.reg) = TP_Type::make_from_typespec(dts.parse_type_spec(hint.type_name));
state->get(hint.reg) = TP_Type::make_from_ts(dts.parse_type_spec(hint.type_name));
} catch (std::exception& e) {
printf("failed to parse hint: %s\n", e.what());
assert(false);
@ -92,7 +92,7 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
try_apply_hints(op_id, hints, init_types, dts);
}
// while the implementation of propagate_types is in progress, it may throw
// while the implementation of propagate_types_internal is in progress, it may throw
// for unimplemented cases. Eventually this try/catch should be removed.
try {
op->propagate_types(*init_types, file, dts);
@ -134,4 +134,100 @@ bool Function::run_type_analysis(const TypeSpec& my_type,
return true;
}
bool Function::run_type_analysis_ir2(const TypeSpec& my_type,
DecompilerTypeSystem& dts,
LinkedObjectFile& file,
const std::unordered_map<int, std::vector<TypeHint>>& hints) {
(void)file;
// STEP 0 - set decompiler type system settings for this function. In config we can manually
// specify some settings for type propagation to reduce the strictness of type propagation.
dts.type_prop_settings.reset();
if (get_config().pair_functions_by_name.find(guessed_name.to_string()) !=
get_config().pair_functions_by_name.end()) {
dts.type_prop_settings.allow_pair = true;
}
if (guessed_name.kind == FunctionName::FunctionKind::METHOD) {
dts.type_prop_settings.current_method_type = guessed_name.type_name;
}
std::vector<TypeState> block_init_types, op_types;
block_init_types.resize(basic_blocks.size());
op_types.resize(ir2.atomic_ops->ops.size());
auto& aop = ir2.atomic_ops;
// STEP 1 - topologocial sort the blocks. This gives us an order where we:
// - never visit unreachable blocks (we can't type propagate these)
// - always visit at least one predecessor of a block before that block
auto order = bb_topo_sort();
assert(!order.vist_order.empty());
assert(order.vist_order.front() == 0);
// STEP 2 - initialize type state for the first block to the function argument types.
block_init_types.at(0) = construct_initial_typestate(my_type);
// and add hints from config
try_apply_hints(0, hints, &block_init_types.at(0), dts);
// STEP 3 - propagate types until the result stops changing
bool run_again = true;
while (run_again) {
run_again = false;
// do each block in the topological sort order:
for (auto block_id : order.vist_order) {
auto& block = basic_blocks.at(block_id);
TypeState* init_types = &block_init_types.at(block_id);
for (int op_id = aop->block_id_to_first_atomic_op.at(block_id);
op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) {
// apply type hints only if we are not the first op.
if (op_id != aop->block_id_to_first_atomic_op.at(block_id)) {
try_apply_hints(op_id, hints, init_types, dts);
}
auto& op = aop->ops.at(op_id);
// while the implementation of propagate_types_internal is in progress, it may throw
// for unimplemented cases. Eventually this try/catch should be removed.
try {
op_types.at(op_id) = op->propagate_types(*init_types, ir2.env, dts);
} catch (std::runtime_error& e) {
fmt::print("Type prop fail on {}: {}\n", guessed_name.to_string(), e.what());
warnings += ";; Type prop attempted and failed.\n";
ir2.env.set_types(block_init_types, op_types);
return false;
}
// todo, set run again??
// for the next op...
init_types = &op_types.at(op_id);
}
// propagate the types: for each possible succ
for (auto succ_block_id : {block.succ_ft, block.succ_branch}) {
if (succ_block_id != -1) {
// apply hint
try_apply_hints(aop->block_id_to_first_atomic_op.at(succ_block_id), hints, init_types,
dts);
// set types to LCA (current, new)
if (dts.tp_lca(&block_init_types.at(succ_block_id), *init_types)) {
// if something changed, run again!
run_again = true;
}
}
}
}
}
auto last_type = op_types.back().get(Register(Reg::GPR, Reg::V0)).typespec();
if (last_type != my_type.last_arg()) {
warnings += fmt::format(";; return type mismatch {} vs {}. ", last_type.print(),
my_type.last_arg().print());
}
ir2.env.set_types(block_init_types, op_types);
return true;
}
} // namespace decompiler

View file

@ -47,16 +47,6 @@ void add_regs_to_str(const T& regs, std::string& str) {
str.append(reg.to_charp());
}
}
u32 regs_to_gpr_mask(const std::vector<Register>& regs) {
u32 result = 0;
for (const auto& reg : regs) {
if (reg.get_kind() == Reg::GPR) {
result |= (1 << reg.get_gpr());
}
}
return result;
}
} // namespace
std::string IR_Atomic::print_with_reguse(const LinkedObjectFile& file) const {

View file

@ -163,13 +163,13 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
// but for now, this is probably good enough.
if (kind == FLOAT) {
// loading static data with a FLOAT kind load (lwc1), assume result is a float.
return TP_Type::make_from_typespec(dts.ts.make_typespec("float"));
return TP_Type::make_from_ts(dts.ts.make_typespec("float"));
}
if (size == 8) {
// 8 byte integer constants are always loaded from a static pool
// this could technically hide loading a different type from inside of a static basic.
return TP_Type::make_from_typespec(dts.ts.make_typespec("uint"));
return TP_Type::make_from_ts(dts.ts.make_typespec("uint"));
}
}
@ -191,7 +191,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
// remember that we're an object new.
return TP_Type::make_object_new(method_type);
}
return TP_Type::make_from_typespec(method_type);
return TP_Type::make_from_ts(method_type);
}
if (input_type.kind == TP_Type::Kind::TYPESPEC && input_type.typespec() == TypeSpec("type") &&
@ -201,7 +201,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
auto method_info = dts.ts.lookup_method("object", method_id);
if (method_id != GOAL_NEW_METHOD && method_id != GOAL_RELOC_METHOD) {
// this can get us the wrong thing for `new` methods. And maybe relocate?
return TP_Type::make_from_typespec(method_info.type.substitute_for_method_call("object"));
return TP_Type::make_from_ts(method_info.type.substitute_for_method_call("object"));
}
}
@ -217,7 +217,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
case 2:
case 4:
case 8:
return TP_Type::make_from_typespec(TypeSpec("uint"));
return TP_Type::make_from_ts(TypeSpec("uint"));
default:
break;
}
@ -228,13 +228,13 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
case 2:
case 4:
case 8:
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
default:
break;
}
break;
case FLOAT:
return TP_Type::make_from_typespec(TypeSpec("float"));
return TP_Type::make_from_ts(TypeSpec("float"));
default:
assert(false);
}
@ -260,7 +260,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
for (auto& x : rd.tokens) {
load_path.push_back(x.print());
}
return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type));
return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type));
}
}
@ -290,7 +290,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
if (input_type.kind == TP_Type::Kind::DYNAMIC_METHOD_ACCESS && ro.offset == 16) {
// access method vtable. The input is type + (4 * method), and the 16 is the offset
// of method 0.
return TP_Type::make_from_typespec(TypeSpec("function"));
return TP_Type::make_from_ts(TypeSpec("function"));
}
// Assume we're accessing a field of an object.
FieldReverseLookupInput rd_in;
@ -320,7 +320,7 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
for (auto& x : rd.tokens) {
load_path.push_back(x.print());
}
return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type));
return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type));
}
// rd failed, try as pair.
@ -334,10 +334,10 @@ TP_Type IR_Load::get_expression_type(const TypeState& input,
// we can do.
if (ro.offset == 2) {
// cdr = another pair.
return TP_Type::make_from_typespec(TypeSpec("pair"));
return TP_Type::make_from_ts(TypeSpec("pair"));
} else if (ro.offset == -2) {
// car = some object.
return TP_Type::make_from_typespec(TypeSpec("object"));
return TP_Type::make_from_ts(TypeSpec("object"));
}
}
}
@ -362,7 +362,7 @@ TP_Type IR_FloatMath2::get_expression_type(const TypeState& input,
case SUB:
case MIN:
case MAX:
return TP_Type::make_from_typespec(dts.ts.make_typespec("float"));
return TP_Type::make_from_ts(dts.ts.make_typespec("float"));
default:
assert(false);
}
@ -377,12 +377,12 @@ TP_Type IR_FloatMath1::get_expression_type(const TypeState& input,
// FLOAT_TO_INT, INT_TO_FLOAT, ABS, NEG, SQRT
switch (kind) {
case FLOAT_TO_INT:
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
case INT_TO_FLOAT:
case ABS:
case NEG:
case SQRT:
return TP_Type::make_from_typespec(TypeSpec("float"));
return TP_Type::make_from_ts(TypeSpec("float"));
default:
assert(false);
}
@ -429,7 +429,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input,
if (arg0_type == arg1_type && is_int_or_uint(dts, arg0_type)) {
// both are the same type and both are int/uint, so we assume that we're doing integer math.
// we strip off any weird things like multiplication or integer constant.
return TP_Type::make_from_typespec(arg0_type.typespec());
return TP_Type::make_from_ts(arg0_type.typespec());
}
if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) {
@ -437,20 +437,20 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input,
// but we use arg1's if arg0 is an integer constant
// in either case, strip off weird stuff.
if (arg0_type.is_integer_constant() && !arg1_type.is_integer_constant()) {
return TP_Type::make_from_typespec(arg1_type.typespec());
return TP_Type::make_from_ts(arg1_type.typespec());
}
return TP_Type::make_from_typespec(arg0_type.typespec());
return TP_Type::make_from_ts(arg0_type.typespec());
}
if (tc(dts, TypeSpec("binteger"), arg0_type) && is_int_or_uint(dts, arg1_type)) {
return TP_Type::make_from_typespec(TypeSpec("binteger"));
return TP_Type::make_from_ts(TypeSpec("binteger"));
}
// special cases for non-integers
if ((arg0_type.typespec() == TypeSpec("object") || arg0_type.typespec() == TypeSpec("pair")) &&
(arg1_type.is_integer_constant(62) || arg1_type.is_integer_constant(61))) {
// boxed object tag trick.
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
}
//
@ -513,7 +513,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input,
if (rd.success) {
// todo, load path.
return TP_Type::make_from_typespec(coerce_to_reg_type(rd.result_type));
return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type));
}
}
//
@ -538,13 +538,13 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input,
if (kind == ADD && arg0_type.typespec().base_type() == "pointer" &&
tc(dts, TypeSpec("integer"), arg1_type)) {
// plain pointer plus integer = plain pointer
return TP_Type::make_from_typespec(TypeSpec("pointer"));
return TP_Type::make_from_ts(TypeSpec("pointer"));
}
if (kind == ADD && arg1_type.typespec().base_type() == "pointer" &&
tc(dts, TypeSpec("integer"), arg0_type)) {
// plain pointer plus integer = plain pointer
return TP_Type::make_from_typespec(TypeSpec("pointer"));
return TP_Type::make_from_ts(TypeSpec("pointer"));
}
if (tc(dts, TypeSpec("structure"), arg1_type) && !dynamic_cast<IR_IntegerConstant*>(arg0.get()) &&
@ -552,7 +552,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input,
if (arg1_type.typespec() == TypeSpec("symbol") &&
arg0_type.is_integer_constant(SYM_INFO_OFFSET + POINTER_SIZE)) {
// symbol -> GOAL String
return TP_Type::make_from_typespec(dts.ts.make_pointer_typespec("string"));
return TP_Type::make_from_ts(dts.ts.make_pointer_typespec("string"));
} else {
// byte access of offset array field trick.
// arg1 holds a structure.
@ -563,7 +563,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input,
if (kind == AND) {
// base case for and. Just get an integer.
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
}
//
@ -587,7 +587,7 @@ TP_Type IR_IntMath2::get_expression_type(const TypeState& input,
//
if (kind == SUB && tc(dts, TypeSpec("pointer"), arg0_type) &&
tc(dts, TypeSpec("pointer"), arg1_type)) {
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
}
throw std::runtime_error(
@ -610,9 +610,9 @@ void BranchDelay::type_prop(TypeState& output,
auto src = dynamic_cast<IR_Register*>(source.get());
assert(src);
if (tc(dts, TypeSpec("uint"), output.get(src->reg))) {
output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("uint"));
output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("uint"));
} else if (tc(dts, TypeSpec("int"), output.get(src->reg))) {
output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("int"));
output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("int"));
} else {
throw std::runtime_error("BranchDelay::type_prop DSLLV for src " +
output.get(src->reg).print());
@ -622,7 +622,7 @@ void BranchDelay::type_prop(TypeState& output,
auto dst = dynamic_cast<IR_Register*>(destination.get());
assert(dst);
// to match the behavior in IntMath1, assume signed when negating.
output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("int"));
output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("int"));
} break;
case SET_REG_FALSE: {
auto dst = dynamic_cast<IR_Register*>(destination.get());
@ -640,7 +640,7 @@ void BranchDelay::type_prop(TypeState& output,
case SET_REG_TRUE: {
auto dst = dynamic_cast<IR_Register*>(destination.get());
assert(dst);
output.get(dst->reg) = TP_Type::make_from_typespec(TypeSpec("symbol"));
output.get(dst->reg) = TP_Type::make_from_ts(TypeSpec("symbol"));
} break;
case SET_BINTEGER: {
@ -682,14 +682,14 @@ TP_Type IR_IntMath1::get_expression_type(const TypeState& input,
switch (kind) {
case NEG:
// if we negate a thing, let's just make it a signed integer.
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
case ABS:
// if we take the absolute value of a thing, just make it signed.
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
case NOT:
// otherwise, make it int/uint as needed (this works because we check is_int_or_uint
// above)
return TP_Type::make_from_typespec(arg_type.typespec());
return TP_Type::make_from_ts(arg_type.typespec());
}
}
@ -709,7 +709,7 @@ TP_Type IR_SymbolValue::get_expression_type(const TypeState& input,
// another annoying special case. We have a fake symbol called __START-OF-TABLE__
// which actually means that you get the first address in the symbol table.
// it's not really a linked symbol, but the basic op builder represents it as one.
return TP_Type::make_from_typespec(TypeSpec("pointer"));
return TP_Type::make_from_ts(TypeSpec("pointer"));
}
// look up the type of the symbol
@ -724,7 +724,7 @@ TP_Type IR_SymbolValue::get_expression_type(const TypeState& input,
}
// otherwise, just return a normal typespec
return TP_Type::make_from_typespec(type->second);
return TP_Type::make_from_ts(type->second);
}
TP_Type IR_Symbol::get_expression_type(const TypeState& input,
@ -737,7 +737,7 @@ TP_Type IR_Symbol::get_expression_type(const TypeState& input,
return TP_Type::make_false();
}
return TP_Type::make_from_typespec(TypeSpec("symbol"));
return TP_Type::make_from_ts(TypeSpec("symbol"));
}
TP_Type IR_IntegerConstant::get_expression_type(const TypeState& input,
@ -756,7 +756,7 @@ TP_Type IR_Compare::get_expression_type(const TypeState& input,
(void)file;
(void)dts;
// really a boolean.
return TP_Type::make_from_typespec(TypeSpec("symbol"));
return TP_Type::make_from_ts(TypeSpec("symbol"));
}
void IR_Nop_Atomic::propagate_types(const TypeState& input,
@ -792,7 +792,7 @@ void IR_Call_Atomic::propagate_types(const TypeState& input,
!dts.type_prop_settings.current_method_type.empty()) {
// calling object new method. Set the result to a new object of our type
end_types.get(Register(Reg::GPR, Reg::V0)) =
TP_Type::make_from_typespec(dts.type_prop_settings.current_method_type);
TP_Type::make_from_ts(dts.type_prop_settings.current_method_type);
// update the call type
call_type = in_tp.get_method_new_object_typespec();
call_type.get_arg(call_type.arg_count() - 1) =
@ -837,7 +837,7 @@ void IR_Call_Atomic::propagate_types(const TypeState& input,
call_type = format_call_type;
call_type_set = true;
end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_typespec(in_type.last_arg());
end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg());
// we can also update register usage here.
read_regs.clear();
@ -858,7 +858,7 @@ void IR_Call_Atomic::propagate_types(const TypeState& input,
call_type = in_type;
call_type_set = true;
end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_typespec(in_type.last_arg());
end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg());
// we can also update register usage here.
read_regs.clear();
@ -897,11 +897,11 @@ TP_Type IR_StaticAddress::get_expression_type(const TypeState& input,
return TP_Type::make_from_string(file.get_goal_string_by_label(label));
} else {
// otherwise, some other static basic.
return TP_Type::make_from_typespec(TypeSpec(word.symbol_name));
return TP_Type::make_from_ts(TypeSpec(word.symbol_name));
}
}
} else if ((label.offset & 7) == PAIR_OFFSET) {
return TP_Type::make_from_typespec(TypeSpec("pair"));
return TP_Type::make_from_ts(TypeSpec("pair"));
}
throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name);
@ -916,7 +916,7 @@ void IR_AsmOp_Atomic::propagate_types(const TypeState& input,
end_types = input;
if (dst_reg) {
if (name == "daddu") {
end_types.get(dst_reg->reg) = TP_Type::make_from_typespec(TypeSpec("uint"));
end_types.get(dst_reg->reg) = TP_Type::make_from_ts(TypeSpec("uint"));
}
}
}
@ -936,7 +936,7 @@ TP_Type IR_EmptyPair::get_expression_type(const TypeState& input,
(void)file;
(void)dts;
// GOAL's empty pair is actually a pair type, containing the empty pair as the car and cdr
return TP_Type::make_from_typespec(TypeSpec("pair"));
return TP_Type::make_from_ts(TypeSpec("pair"));
}
TP_Type IR_CMoveF::get_expression_type(const TypeState& input,
@ -945,6 +945,6 @@ TP_Type IR_CMoveF::get_expression_type(const TypeState& input,
(void)input;
(void)file;
(void)dts;
return TP_Type::make_from_typespec(TypeSpec("symbol"));
return TP_Type::make_from_ts(TypeSpec("symbol"));
}
} // namespace decompiler

View file

@ -62,6 +62,16 @@ bool AtomicOp::operator!=(const AtomicOp& other) const {
return !((*this) == other);
}
/*!
* Add GOAL temp registers to the clobber list.
*/
void AtomicOp::clobber_temps() {
for (auto clobber : {Reg::V1, Reg::AT, Reg::A0, Reg::A1, Reg::A2, Reg::A3, Reg::T0, Reg::T1,
Reg::T2, Reg::T3, Reg::T4, Reg::T5, Reg::T6, Reg::T7, Reg::T8, Reg::T9}) {
m_clobber_regs.push_back(Register(Reg::GPR, clobber));
}
}
/////////////////////////////
// SimpleAtom
/////////////////////////////
@ -1171,7 +1181,23 @@ std::unique_ptr<Expr> SpecialOp::get_as_expr() const {
throw std::runtime_error("SpecialOp::get_as_expr not yet implemented");
}
void SpecialOp::update_register_info() {}
void SpecialOp::update_register_info() {
switch (m_kind) {
case Kind::NOP:
case Kind::BREAK:
case Kind::CRASH:
return;
case Kind::SUSPEND:
// todo - confirm this is true.
// the suspend operation is written in a way where it doesn't use temporaries to make the call
// but the actual suspend operation doesn't seem to preserve temporaries. Maybe the plan was
// to save temp registers at some point, but they later gave up on this?
clobber_temps();
return;
default:
assert(false);
}
}
/////////////////////////////
// CallOp
@ -1218,6 +1244,7 @@ std::unique_ptr<Expr> CallOp::get_as_expr() const {
void CallOp::update_register_info() {
// throw std::runtime_error("CallOp::update_register_info cannot be done until types are known");
m_read_regs.push_back(Register(Reg::GPR, Reg::T9));
clobber_temps();
}
/////////////////////////////

View file

@ -11,6 +11,7 @@
namespace decompiler {
class Expr;
class DecompilerTypeSystem;
/*!
* A "Variable" represents a register at a given instruction index.
@ -84,12 +85,13 @@ class Variable {
* SetVarConditionOp
* AsmOp
* SetVarExprOp
* AsmOp
*/
class AtomicOp {
public:
explicit AtomicOp(int my_idx);
std::string to_string(const std::vector<DecompilerLabel>& labels, const Env* env) const;
std::string reg_type_info_as_string(const TypeState& init_types,
const TypeState& end_types) const;
virtual goos::Object to_form(const std::vector<DecompilerLabel>& labels,
const Env* env) const = 0;
virtual bool operator==(const AtomicOp& other) const = 0;
@ -121,6 +123,8 @@ class AtomicOp {
// read twice.
virtual void update_register_info() = 0;
TypeState propagate_types(const TypeState& input, const Env& env, DecompilerTypeSystem& dts);
const std::vector<Register>& read_regs() { return m_read_regs; }
const std::vector<Register>& write_regs() { return m_write_regs; }
const std::vector<Register>& clobber_regs() { return m_clobber_regs; }
@ -136,6 +140,12 @@ class AtomicOp {
protected:
int m_my_idx = -1;
// given the input types of all registers, figure out the output types.
virtual TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) = 0;
void clobber_temps();
// the register values that are read (at the start of this op)
std::vector<Register> m_read_regs;
// the registers that have actual values written into them (at the end of this op)
@ -175,6 +185,10 @@ class SimpleAtom {
assert(is_var());
return m_variable;
}
s64 get_int() const {
assert(is_int());
return m_int;
}
bool is_int() const { return m_kind == Kind::INTEGER_CONSTANT; };
bool is_sym_ptr() const { return m_kind == Kind::SYMBOL_PTR; };
bool is_sym_val() const { return m_kind == Kind::SYMBOL_VAL; };
@ -184,6 +198,7 @@ class SimpleAtom {
bool operator!=(const SimpleAtom& other) const { return !((*this) == other); }
void get_regs(std::vector<Register>* out) const;
SimpleExpression as_expr() const;
TP_Type get_type(const TypeState& input, const Env& env, const DecompilerTypeSystem& dts) const;
private:
Kind m_kind = Kind::INVALID;
@ -256,6 +271,13 @@ class SimpleExpression {
bool operator==(const SimpleExpression& other) const;
bool is_identity() const { return m_kind == Kind::IDENTITY; }
void get_regs(std::vector<Register>* out) const;
TP_Type get_type(const TypeState& input, const Env& env, const DecompilerTypeSystem& dts) const;
TP_Type get_type_int2(const TypeState& input,
const Env& env,
const DecompilerTypeSystem& dts) const;
TP_Type get_type_int1(const TypeState& input,
const Env& env,
const DecompilerTypeSystem& dts) const;
private:
Kind m_kind = Kind::INVALID;
@ -281,6 +303,9 @@ class SetVarOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
private:
Variable m_dst;
@ -305,6 +330,9 @@ class AsmOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
private:
Instruction m_instr;
@ -388,6 +416,9 @@ class SetVarConditionOp : public AtomicOp {
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
void invert() { m_condition.invert(); }
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
private:
Variable m_dst;
@ -410,6 +441,9 @@ class StoreOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
private:
int m_size;
@ -434,6 +468,10 @@ class LoadVarOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
TP_Type get_src_type(const TypeState& input, const Env& env, DecompilerTypeSystem& dts) const;
private:
Kind m_kind;
@ -471,6 +509,9 @@ class IR2_BranchDelay {
bool operator==(const IR2_BranchDelay& other) const;
void get_regs(std::vector<Register>* write, std::vector<Register>* read) const;
bool is_known() const { return m_kind != Kind::UNKNOWN; }
TypeState propagate_types(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) const;
private:
std::optional<Variable> m_var[3];
@ -496,6 +537,9 @@ class BranchOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
private:
bool m_likely = false;
@ -526,6 +570,9 @@ class SpecialOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
private:
Kind m_kind;
@ -546,6 +593,13 @@ class CallOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
protected:
TypeSpec m_call_type;
bool m_call_type_set = false;
};
/*!
@ -571,6 +625,9 @@ class ConditionalMoveFalseOp : public AtomicOp {
std::unique_ptr<Expr> get_set_source_as_expr() const override;
std::unique_ptr<Expr> get_as_expr() const override;
void update_register_info() override;
TypeState propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) override;
private:
Variable m_dst, m_src;

View file

@ -0,0 +1,772 @@
#include "third-party/fmt/core.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
#include "common/log/log.h"
#include "AtomicOp.h"
namespace decompiler {
namespace {
bool tc(const DecompilerTypeSystem& dts, const TypeSpec& expected, const TP_Type& actual) {
return dts.ts.typecheck(expected, actual.typespec(), "", false, false);
}
bool is_int_or_uint(const DecompilerTypeSystem& dts, const TP_Type& type) {
return tc(dts, TypeSpec("int"), type) || tc(dts, TypeSpec("uint"), type);
}
struct IR2_RegOffset {
Register reg;
int offset;
};
bool get_as_reg_offset(const SimpleExpression& expr, IR2_RegOffset* out) {
if (expr.kind() == SimpleExpression::Kind::ADD && expr.get_arg(0).is_var() &&
expr.get_arg(1).is_int()) {
out->reg = expr.get_arg(0).var().reg();
out->offset = expr.get_arg(1).get_int();
return true;
}
if (expr.is_identity() && expr.get_arg(0).is_var()) {
out->reg = expr.get_arg(0).var().reg();
out->offset = 0;
return true;
}
return false;
}
RegClass get_reg_kind(const Register& r) {
switch (r.get_kind()) {
case Reg::GPR:
return RegClass::GPR_64;
case Reg::FPR:
return RegClass::FLOAT;
default:
assert(false);
}
}
} // namespace
std::string AtomicOp::reg_type_info_as_string(const TypeState& init_types,
const TypeState& end_types) const {
std::string result;
auto read_mask = regs_to_gpr_mask(m_read_regs);
auto write_mask = regs_to_gpr_mask(m_write_regs);
auto clobber_mask = regs_to_gpr_mask(m_clobber_regs);
result += fmt::format("[{}] -> [{}]", init_types.print_gpr_masked(read_mask),
end_types.print_gpr_masked(write_mask));
if (clobber_mask) {
result += "cl: ";
for (auto& reg : m_clobber_regs) {
result += reg.to_string();
result += ' ';
}
}
return result;
}
TP_Type SimpleAtom::get_type(const TypeState& input,
const Env& env,
const DecompilerTypeSystem& dts) const {
switch (m_kind) {
case Kind::EMPTY_LIST:
return TP_Type::make_from_ts("pair");
case Kind::VARIABLE:
return input.get(var().reg());
case Kind::INTEGER_CONSTANT:
return TP_Type::make_from_integer(m_int);
case Kind::SYMBOL_PTR:
if (m_string == "#f") {
return TP_Type::make_false();
} else {
return TP_Type::make_from_ts("symbol");
}
case Kind::SYMBOL_VAL: {
if (m_string == "#f") {
// if we ever read the false symbol, it should contain the false symbol as its value.
return TP_Type::make_false();
} else if (m_string == "__START-OF-TABLE__") {
// another annoying special case. We have a fake symbol called __START-OF-TABLE__
// which actually means that you get the first address in the symbol table.
// it's not really a linked symbol, but the basic op builder represents it as one.
return TP_Type::make_from_ts(TypeSpec("pointer"));
}
// look up the type of the symbol
auto type = dts.symbol_types.find(m_string);
if (type == dts.symbol_types.end()) {
throw std::runtime_error("Don't have the type of symbol " + m_string);
}
if (type->second == TypeSpec("type")) {
// if we get a type by symbol, we should remember which type we got it from.
return TP_Type::make_type_object(TypeSpec(m_string));
}
// otherwise, just return a normal typespec
return TP_Type::make_from_ts(type->second);
}
case Kind::STATIC_ADDRESS: {
auto label = env.file->labels.at(m_int);
// strings are 16-byte aligned, but functions are 8 byte aligned?
if ((label.offset & 7) == BASIC_OFFSET) {
// it's a basic! probably.
const auto& word =
env.file->words_by_seg.at(label.target_segment).at((label.offset - 4) / 4);
if (word.kind == LinkedWord::TYPE_PTR) {
if (word.symbol_name == "string") {
return TP_Type::make_from_string(env.file->get_goal_string_by_label(label));
} else {
// otherwise, some other static basic.
return TP_Type::make_from_ts(TypeSpec(word.symbol_name));
}
}
} else if ((label.offset & 7) == PAIR_OFFSET) {
return TP_Type::make_from_ts(TypeSpec("pair"));
}
throw std::runtime_error("IR_StaticAddress couldn't figure out the type: " + label.name);
}
case Kind::INVALID:
default:
assert(false);
}
return {};
}
TP_Type SimpleExpression::get_type(const TypeState& input,
const Env& env,
const DecompilerTypeSystem& dts) const {
switch (m_kind) {
case Kind::IDENTITY:
return m_args[0].get_type(input, env, dts);
case Kind::GPR_TO_FPR: {
const auto& in_type = input.get(get_arg(0).var().reg());
if (in_type.typespec() != TypeSpec("float")) {
lg::warn("GPR to FPR used on a {}", in_type.print());
}
return TP_Type::make_from_ts("float");
}
case Kind::FPR_TO_GPR:
case Kind::DIV_S:
return TP_Type::make_from_ts("float");
case Kind::ADD:
case Kind::SUB:
case Kind::MUL_SIGNED:
case Kind::DIV_SIGNED:
case Kind::RIGHT_SHIFT_ARITH:
case Kind::RIGHT_SHIFT_LOGIC:
case Kind::MOD_SIGNED:
case Kind::MIN_SIGNED:
case Kind::MAX_SIGNED:
case Kind::OR:
case Kind::AND:
case Kind::NOR:
case Kind::XOR:
case Kind::LEFT_SHIFT:
case Kind::MUL_UNSIGNED:
return get_type_int2(input, env, dts);
case Kind::NEG:
case Kind::LOGNOT:
return get_type_int1(input, env, dts);
default:
throw std::runtime_error("Simple expression can't get_type: " +
to_form(env.file->labels, &env).print());
}
return {};
}
TP_Type SimpleExpression::get_type_int1(const TypeState& input,
const Env& env,
const DecompilerTypeSystem& dts) const {
(void)input;
(void)dts;
auto arg_type = m_args[0].get_type(input, env, dts);
if (is_int_or_uint(dts, arg_type)) {
switch (m_kind) {
case Kind::NEG:
// if we negate a thing, let's just make it a signed integer.
return TP_Type::make_from_ts(TypeSpec("int"));
// case Kind:::
// // if we take the absolute value of a thing, just make it signed.
// return TP_Type::make_from_ts(TypeSpec("int"));
case Kind::LOGNOT:
// otherwise, make it int/uint as needed (this works because we check is_int_or_uint
// above)
return TP_Type::make_from_ts(arg_type.typespec());
default:
break;
}
}
throw std::runtime_error("IR_IntMath1::get_expression_type case not handled: " +
to_form(env.file->labels, &env).print() + " " + arg_type.print());
}
/*!
* Special case for "integer math".
*/
TP_Type SimpleExpression::get_type_int2(const TypeState& input,
const Env& env,
const DecompilerTypeSystem& dts) const {
auto arg0_type = m_args[0].get_type(input, env, dts);
auto arg1_type = m_args[1].get_type(input, env, dts);
// special cases for integers
switch (m_kind) {
case Kind::LEFT_SHIFT:
// multiplication by constant power of two, optimized to a shift.
if (m_args[1].is_int() && is_int_or_uint(dts, arg0_type)) {
assert(m_args[1].get_int() >= 0);
assert(m_args[1].get_int() < 64);
return TP_Type::make_from_product(1ull << m_args[1].get_int());
}
break;
case Kind::MUL_SIGNED: {
if (arg0_type.is_integer_constant() && is_int_or_uint(dts, arg1_type)) {
return TP_Type::make_from_product(arg0_type.get_integer_constant());
}
} break;
case Kind::ADD:
if (arg0_type.is_product_with(4) && tc(dts, TypeSpec("type"), arg1_type)) {
// dynamic access into the method array with shift, add, offset-load
// no need to track the type because we don't know the method index anyway.
return TP_Type::make_partial_dyanmic_vtable_access();
}
break;
default:
break;
}
if (arg0_type == arg1_type && is_int_or_uint(dts, arg0_type)) {
// both are the same type and both are int/uint, so we assume that we're doing integer math.
// we strip off any weird things like multiplication or integer constant.
return TP_Type::make_from_ts(arg0_type.typespec());
}
if (is_int_or_uint(dts, arg0_type) && is_int_or_uint(dts, arg1_type)) {
// usually we would want to use arg0's type as the "winning" type.
// but we use arg1's if arg0 is an integer constant
// in either case, strip off weird stuff.
if (arg0_type.is_integer_constant() && !arg1_type.is_integer_constant()) {
return TP_Type::make_from_ts(arg1_type.typespec());
}
return TP_Type::make_from_ts(arg0_type.typespec());
}
if (tc(dts, TypeSpec("binteger"), arg0_type) && is_int_or_uint(dts, arg1_type)) {
// if arg0 is a binteger, the result is probably a binteger as well
return TP_Type::make_from_ts("binteger");
}
// special cases for non-integers
if ((arg0_type.typespec() == TypeSpec("object") || arg0_type.typespec() == TypeSpec("pair")) &&
(arg1_type.is_integer_constant(62) || arg1_type.is_integer_constant(61))) {
// boxed object tag trick.
return TP_Type::make_from_ts("int");
}
if (m_args[1].is_int() && m_kind == Kind::ADD && arg0_type.kind == TP_Type::Kind::TYPESPEC) {
// access a field.
FieldReverseLookupInput rd_in;
rd_in.deref = std::nullopt;
rd_in.stride = 0;
rd_in.offset = m_args[1].get_int();
rd_in.base_type = arg0_type.typespec();
auto rd = dts.ts.reverse_field_lookup(rd_in);
if (rd.success) {
return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type));
}
}
if (m_kind == Kind::ADD && arg0_type.is_product() && arg1_type.kind == TP_Type::Kind::TYPESPEC) {
return TP_Type::make_object_plus_product(arg1_type.typespec(), arg0_type.get_multiplier());
}
if (m_kind == Kind::ADD && arg1_type.is_product() && arg0_type.kind == TP_Type::Kind::TYPESPEC) {
return TP_Type::make_object_plus_product(arg0_type.typespec(), arg1_type.get_multiplier());
}
if (m_kind == Kind::ADD && arg0_type.typespec().base_type() == "pointer" &&
tc(dts, TypeSpec("integer"), arg1_type)) {
// plain pointer plus integer = plain pointer
return TP_Type::make_from_ts(TypeSpec("pointer"));
}
if (m_kind == Kind::ADD && arg1_type.typespec().base_type() == "pointer" &&
tc(dts, TypeSpec("integer"), arg0_type)) {
// plain pointer plus integer = plain pointer
return TP_Type::make_from_ts(TypeSpec("pointer"));
}
if (tc(dts, TypeSpec("structure"), arg1_type) && !m_args[0].is_int() &&
is_int_or_uint(dts, arg0_type)) {
if (arg1_type.typespec() == TypeSpec("symbol") &&
arg0_type.is_integer_constant(SYM_INFO_OFFSET + POINTER_SIZE)) {
// symbol -> GOAL String
// NOTE - the offset doesn't fit in a s16, so it's loaded into a register first.
// so we expect the arg to be a variable, and the type propagation will figure out the
// integer constant.
return TP_Type::make_from_ts(dts.ts.make_pointer_typespec("string"));
} else {
// byte access of offset array field trick.
// arg1 holds a structure.
// arg0 is an integer in a register.
return TP_Type::make_object_plus_product(arg1_type.typespec(), 1);
}
}
if (m_kind == Kind::AND) {
// base case for and. Just get an integer.
return TP_Type::make_from_ts(TypeSpec("int"));
}
if (m_kind == Kind::SUB && tc(dts, TypeSpec("pointer"), arg0_type) &&
tc(dts, TypeSpec("pointer"), arg1_type)) {
return TP_Type::make_from_ts(TypeSpec("int"));
}
throw std::runtime_error(fmt::format("Can't get_type_int2: {}, args {} and {}",
to_form(env.file->labels, &env).print(), arg0_type.print(),
arg1_type.print()));
}
TypeState IR2_BranchDelay::propagate_types(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) const {
TypeState output = input;
switch (m_kind) {
case Kind::DSLLV: {
// I believe this is only used in ash. We ignore the shift amount's type and just look
// at the input value. If it's a uint/int based type, we just return uint/int (not the type)
// this will kill any weird stuff like product, etc.
// if it's not an integer type, it's currently an error.
auto dst = m_var[0]->reg();
auto src = m_var[1]->reg();
if (tc(dts, TypeSpec("uint"), output.get(src))) {
output.get(dst) = TP_Type::make_from_ts("uint");
} else if (tc(dts, TypeSpec("int"), output.get(src))) {
output.get(dst) = TP_Type::make_from_ts("int");
} else {
throw std::runtime_error("BranchDelay::type_prop DSLLV for src " + output.get(src).print());
}
} break;
case Kind::NEGATE:
// to match the behavior in IntMath1, assume signed when negating.
output.get(m_var[0]->reg()) = TP_Type::make_from_ts("int");
break;
case Kind::SET_REG_FALSE:
output.get(m_var[0]->reg()) = TP_Type::make_false();
break;
case Kind::SET_REG_REG:
output.get(m_var[0]->reg()) = output.get(m_var[1]->reg());
break;
case Kind::SET_REG_TRUE:
output.get(m_var[0]->reg()) = TP_Type::make_from_ts(TypeSpec("symbol"));
break;
case Kind::SET_BINTEGER:
output.get(m_var[0]->reg()) = TP_Type::make_type_object(TypeSpec("binteger"));
break;
case Kind::SET_PAIR:
output.get(m_var[0]->reg()) = TP_Type::make_type_object(TypeSpec("pair"));
break;
case Kind::NOP:
break;
default:
throw std::runtime_error("Unhandled branch delay in type_prop: " +
to_form(env.file->labels, &env).print());
}
return output;
}
/////////////////////////////////////////
// Implementations of propagate_types_internal
/////////////////////////////////////////
TypeState AtomicOp::propagate_types(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
// do op-specific type propagation
TypeState result = propagate_types_internal(input, env, dts);
// clobber
for (auto reg : m_clobber_regs) {
result.get(reg) = TP_Type::make_uninitialized();
}
return result;
}
TypeState SetVarOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
TypeState result = input;
result.get(m_dst.reg()) = m_src.get_type(input, env, dts);
return result;
}
TypeState AsmOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
(void)env;
(void)dts;
TypeState result = input;
if (m_dst.has_value()) {
auto kind = m_dst->reg().get_kind();
if (kind == Reg::GPR || kind == Reg::FPR) {
result.get(m_dst->reg()) = TP_Type::make_from_ts("int");
}
}
return result;
}
TypeState SetVarConditionOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
(void)env;
(void)dts;
TypeState result = input;
result.get(m_dst.reg()) = TP_Type::make_from_ts("symbol");
return result;
}
TypeState StoreOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
(void)env;
(void)dts;
return input;
}
TP_Type LoadVarOp::get_src_type(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) const {
if (m_src.is_identity()) {
auto& src = m_src.get_arg(0);
if (src.is_static_addr()) {
if (m_kind == Kind::FLOAT) {
// assume anything loaded from floating point will be a float.
return TP_Type::make_from_ts("float");
}
if (m_size == 8) {
// 8 byte integer constants are always loaded from a static pool
// this could technically hide loading a different type from inside of a static basic.
return TP_Type::make_from_ts(dts.ts.make_typespec("uint"));
}
}
}
///////////////////////////////////////
// REGISTER + OFFSET (possibly 0)
///////////////////////////////////////
IR2_RegOffset ro;
if (get_as_reg_offset(m_src, &ro)) {
auto& input_type = input.get(ro.reg);
if (input_type.kind == TP_Type::Kind::TYPE_OF_TYPE_OR_CHILD && ro.offset >= 16 &&
(ro.offset & 3) == 0 && m_size == 4 && m_kind == Kind::UNSIGNED) {
// method get of fixed type
auto type_name = input_type.get_type_objects_typespec().base_type();
auto method_id = (ro.offset - 16) / 4;
auto method_info = dts.ts.lookup_method(type_name, method_id);
auto method_type = method_info.type.substitute_for_method_call(type_name);
if (type_name == "object" && method_id == GOAL_NEW_METHOD) {
// remember that we're an object new.
return TP_Type::make_object_new(method_type);
}
return TP_Type::make_from_ts(method_type);
}
if (input_type.kind == TP_Type::Kind::TYPESPEC && input_type.typespec() == TypeSpec("type") &&
ro.offset >= 16 && (ro.offset & 3) == 0 && m_size == 4 && m_kind == Kind::UNSIGNED) {
// method get of an unknown type. We assume the most general "object" type.
auto method_id = (ro.offset - 16) / 4;
auto method_info = dts.ts.lookup_method("object", method_id);
if (method_id != GOAL_NEW_METHOD && method_id != GOAL_RELOC_METHOD) {
// this can get us the wrong thing for `new` methods. And maybe relocate?
return TP_Type::make_from_ts(method_info.type.substitute_for_method_call("object"));
}
}
if (input_type.typespec() == TypeSpec("pointer")) {
// we got a plain pointer. let's just assume we're loading an integer.
// perhaps we should disable this feature by default on 4-byte loads if we're getting
// lots of false positives for loading pointers from plain pointers.
switch (m_kind) {
case Kind::UNSIGNED:
switch (m_size) {
case 1:
case 2:
case 4:
case 8:
return TP_Type::make_from_ts(TypeSpec("uint"));
default:
break;
}
break;
case Kind::SIGNED:
switch (m_size) {
case 1:
case 2:
case 4:
case 8:
return TP_Type::make_from_ts(TypeSpec("int"));
default:
break;
}
break;
case Kind::FLOAT:
return TP_Type::make_from_ts(TypeSpec("float"));
default:
assert(false);
}
}
if (input_type.kind == TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT) {
FieldReverseLookupInput rd_in;
DerefKind dk;
dk.is_store = false;
dk.reg_kind = get_reg_kind(ro.reg);
dk.sign_extend = m_kind == Kind::SIGNED;
dk.size = m_size;
rd_in.deref = dk;
rd_in.base_type = input_type.get_obj_plus_const_mult_typespec();
rd_in.stride = input_type.get_multiplier();
rd_in.offset = ro.offset;
auto rd = dts.ts.reverse_field_lookup(rd_in);
if (rd.success) {
// load_path_set = true;
// load_path_addr_of = rd.addr_of;
// load_path_base = ro.reg_ir;
// for (auto& x : rd.tokens) {
// load_path.push_back(x.print());
// }
return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type));
}
}
if (input_type.kind == TP_Type::Kind::TYPESPEC && ro.offset == -4 && m_kind == Kind::UNSIGNED &&
m_size == 4 && ro.reg.get_kind() == Reg::GPR) {
// get type of basic likely, but misrecognized as an object.
// occurs often in typecase-like structures because other possible types are
// "stripped".
// load_path_base = ro.reg_ir;
// load_path_addr_of = false;
// load_path.push_back("type");
// load_path_set = true;
return TP_Type::make_type_object(input_type.typespec().base_type());
}
if (input_type.kind == TP_Type::Kind::DYNAMIC_METHOD_ACCESS && ro.offset == 16) {
// access method vtable. The input is type + (4 * method), and the 16 is the offset
// of method 0.
return TP_Type::make_from_ts(TypeSpec("function"));
}
// Assume we're accessing a field of an object.
FieldReverseLookupInput rd_in;
DerefKind dk;
dk.is_store = false;
dk.reg_kind = get_reg_kind(ro.reg);
dk.sign_extend = m_kind == Kind::SIGNED;
dk.size = m_size;
rd_in.deref = dk;
rd_in.base_type = input_type.typespec();
rd_in.stride = 0;
rd_in.offset = ro.offset;
auto rd = dts.ts.reverse_field_lookup(rd_in);
// only error on failure if "pair" is disabled. otherwise it might be a pair.
if (!rd.success && !dts.type_prop_settings.allow_pair) {
printf("input type is %s, offset is %d, sign %d size %d\n", rd_in.base_type.print().c_str(),
rd_in.offset, rd_in.deref.value().sign_extend, rd_in.deref.value().size);
throw std::runtime_error(fmt::format("Could not get type of load: {}. Reverse Deref Failed.",
to_form(env.file->labels, &env).print()));
}
if (rd.success) {
// load_path_set = true;
// load_path_addr_of = rd.addr_of;
// load_path_base = ro.reg_ir;
// for (auto& x : rd.tokens) {
// load_path.push_back(x.print());
// }
return TP_Type::make_from_ts(coerce_to_reg_type(rd.result_type));
}
// rd failed, try as pair.
if (dts.type_prop_settings.allow_pair) {
// we are strict here - only permit pair-type loads from object or pair.
// object is permitted for stuff like association lists where the car is also a pair.
if (m_kind == Kind::SIGNED && m_size == 4 &&
(input_type.typespec() == TypeSpec("object") ||
input_type.typespec() == TypeSpec("pair"))) {
// these rules are of course not always correct or the most specific, but it's the best
// we can do.
if (ro.offset == 2) {
// cdr = another pair.
return TP_Type::make_from_ts(TypeSpec("pair"));
} else if (ro.offset == -2) {
// car = some object.
return TP_Type::make_from_ts(TypeSpec("object"));
}
}
}
}
throw std::runtime_error(
fmt::format("Could not get type of load: {}. ", to_form(env.file->labels, &env).print()));
throw std::runtime_error("LoadVarOp can't get_src_type: " +
to_form(env.file->labels, &env).print());
}
TypeState LoadVarOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
TypeState result = input;
result.get(m_dst.reg()) = get_src_type(input, env, dts);
return result;
}
TypeState BranchOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
return m_branch_delay.propagate_types(input, env, dts);
}
TypeState SpecialOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
(void)env;
(void)dts;
// none of these write anything. Suspend clobbers, but this is taken care of automatically
switch (m_kind) {
case Kind::NOP:
case Kind::BREAK:
case Kind::CRASH:
case Kind::SUSPEND:
return input;
default:
assert(false);
}
}
TypeState CallOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
(void)dts;
(void)env;
const Reg::Gpr arg_regs[8] = {Reg::A0, Reg::A1, Reg::A2, Reg::A3,
Reg::T0, Reg::T1, Reg::T2, Reg::T3};
TypeState end_types = input;
auto in_tp = input.get(Register(Reg::GPR, Reg::T9));
if (in_tp.kind == TP_Type::Kind::OBJECT_NEW_METHOD &&
!dts.type_prop_settings.current_method_type.empty()) {
// calling object new method. Set the result to a new object of our type
end_types.get(Register(Reg::GPR, Reg::V0)) =
TP_Type::make_from_ts(dts.type_prop_settings.current_method_type);
// update the call type
m_call_type = in_tp.get_method_new_object_typespec();
m_call_type.get_arg(m_call_type.arg_count() - 1) =
TypeSpec(dts.type_prop_settings.current_method_type);
m_call_type_set = true;
return end_types;
}
auto in_type = in_tp.typespec();
if (in_type.base_type() != "function") {
throw std::runtime_error("Called something that wasn't a function: " + in_type.print());
}
if (in_type.arg_count() < 1) {
throw std::runtime_error("Called a function, but we don't know its type");
}
if (in_type.arg_count() == 2 && in_type.get_arg(0) == TypeSpec("_varargs_")) {
// we're calling a varags function, which is format. We can determine the argument count
// by looking at the format string, if we can get it.
auto arg_type = input.get(Register(Reg::GPR, Reg::A1));
if (arg_type.is_constant_string() || arg_type.is_format_string()) {
int arg_count = -1;
if (arg_type.is_constant_string()) {
auto& str = arg_type.get_string();
arg_count = dts.get_format_arg_count(str);
} else {
// is format string.
arg_count = arg_type.get_format_string_arg_count();
}
TypeSpec format_call_type("function");
format_call_type.add_arg(TypeSpec("object")); // destination
format_call_type.add_arg(TypeSpec("string")); // format string
for (int i = 0; i < arg_count; i++) {
format_call_type.add_arg(TypeSpec("object"));
}
format_call_type.add_arg(TypeSpec("object"));
arg_count += 2; // for destination and format string.
m_call_type = format_call_type;
m_call_type_set = true;
end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg());
// we can also update register usage here.
m_read_regs.clear();
m_read_regs.emplace_back(Reg::GPR, Reg::T9);
for (int i = 0; i < arg_count; i++) {
m_read_regs.emplace_back(Reg::GPR, arg_regs[i]);
}
return end_types;
} else {
throw std::runtime_error("Failed to get string for _varags_ call, got " + arg_type.print());
}
}
// set the call type!
m_call_type = in_type;
m_call_type_set = true;
end_types.get(Register(Reg::GPR, Reg::V0)) = TP_Type::make_from_ts(in_type.last_arg());
// we can also update register usage here.
m_read_regs.clear();
m_read_regs.emplace_back(Reg::GPR, Reg::T9);
for (uint32_t i = 0; i < in_type.arg_count() - 1; i++) {
m_read_regs.emplace_back(Reg::GPR, arg_regs[i]);
}
return end_types;
}
TypeState ConditionalMoveFalseOp::propagate_types_internal(const TypeState& input,
const Env& env,
DecompilerTypeSystem& dts) {
(void)env;
(void)dts;
// these should only appear when paired with a (set! dest #t) earlier, so this expression
// shouldn't set any types. Still, double check and override if this fails.
TypeState result = input;
if (result.get(m_dst.reg()).typespec() != TypeSpec("symbol")) {
lg::warn("Conditional Moved #f into something of type {}",
result.get(m_dst.reg()).typespec().print());
result.get(m_dst.reg()) = TP_Type::make_from_ts("symbol");
}
return result;
}
} // namespace decompiler

View file

@ -7,4 +7,14 @@ std::string Env::get_variable_name(Register reg, int atomic_idx) const {
(void)atomic_idx;
throw std::runtime_error("Env::get_variable_name not yet implemented.");
}
/*!
* Update the Env with the result of the type analysis pass.
*/
void Env::set_types(const std::vector<TypeState>& block_init_types,
const std::vector<TypeState>& op_end_types) {
m_block_init_types = block_init_types;
m_op_end_types = op_end_types;
m_has_types = true;
}
} // namespace decompiler

View file

@ -1,9 +1,14 @@
#pragma once
#include <string>
#include <vector>
#include <cassert>
#include "decompiler/util/TP_Type.h"
#include "decompiler/Disasm/Register.h"
namespace decompiler {
class LinkedObjectFile;
/*!
* An "environment" for a single function.
* This contains data for an entire function, like which registers are live when, the types of
@ -13,9 +18,34 @@ namespace decompiler {
class Env {
public:
bool has_local_vars() const { return m_has_local_vars; }
bool has_type_analysis() const { return m_has_types; }
std::string get_variable_name(Register reg, int atomic_idx) const;
/*!
* Get the types in registers _after_ the given operation has completed.
*/
const TypeState& get_types_after_op(int atomic_op_id) const {
assert(m_has_types);
return m_op_end_types.at(atomic_op_id);
}
/*!
* Get the types in registers at the beginning of this basic block, before any operations
* have occurred.
*/
const TypeState& get_types_at_block_entry(int block_id) const {
assert(m_has_types);
return m_block_init_types.at(block_id);
}
void set_types(const std::vector<TypeState>& block_init_types,
const std::vector<TypeState>& op_end_types);
LinkedObjectFile* file = nullptr;
private:
bool m_has_local_vars = false;
bool m_has_types = false;
std::vector<TypeState> m_block_init_types;
std::vector<TypeState> m_op_end_types;
};
} // namespace decompiler

View file

@ -70,6 +70,7 @@ class ObjectFileDB {
void ir2_top_level_pass();
void ir2_basic_block_pass();
void ir2_atomic_op_pass();
void ir2_type_analysis_pass();
void ir2_write_results(const std::string& output_dir);
std::string ir2_to_file(ObjectFileData& data);
std::string ir2_function_to_string(ObjectFileData& data, Function& function, int seg);
@ -83,6 +84,10 @@ class ObjectFileDB {
DecompilerTypeSystem dts;
std::string all_type_defs;
bool lookup_function_type(const FunctionName& name,
const std::string& obj_name,
TypeSpec* result);
private:
void load_map_file(const std::string& map_data);
void get_objs_from_dgo(const std::string& filename);

View file

@ -24,10 +24,19 @@ void ObjectFileDB::analyze_functions_ir2(const std::string& output_dir) {
ir2_basic_block_pass();
lg::info("Converting to atomic ops...");
ir2_atomic_op_pass();
lg::info("Running type analysis...");
ir2_type_analysis_pass();
lg::info("Writing results...");
ir2_write_results(output_dir);
}
/*!
* Analyze the top level function of each object.
* - Find global function definitions
* - Find type definitions
* - Find method definitions
* - Warn for non-unique function names.
*/
void ObjectFileDB::ir2_top_level_pass() {
Timer timer;
int total_functions = 0;
@ -97,6 +106,7 @@ void ObjectFileDB::ir2_top_level_pass() {
}
});
// we remember duplicates like this so we can warn on all occurances of the duplicate name
for_each_function([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
auto name = func.guessed_name.to_string();
@ -115,6 +125,12 @@ void ObjectFileDB::ir2_top_level_pass() {
lg::info("{:4d} logins {:.2f}%\n", total_top_levels, 100.f * total_top_levels / total_functions);
}
/*!
* Initial Function Analysis Pass to build the control flow graph.
* - Find basic blocks
* - Analyze prologue and epilogue
* - Build control flow graph
*/
void ObjectFileDB::ir2_basic_block_pass() {
Timer timer;
// Main Pass over each function...
@ -127,6 +143,7 @@ void ObjectFileDB::ir2_basic_block_pass() {
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
total_functions++;
func.ir2.env.file = &data.linked_data;
// first, find basic blocks.
auto blocks = find_blocks_in_function(data.linked_data, segment_id, func);
@ -139,6 +156,11 @@ void ObjectFileDB::ir2_basic_block_pass() {
if (!func.suspected_asm) {
// find the prologue/epilogue so they can be excluded from basic blocks.
func.analyze_prologue(data.linked_data);
} else {
// manually exclude the type tag from the basic block.
assert(func.basic_blocks.front().start_word == 0);
assert(func.basic_blocks.front().end_word >= 1);
func.basic_blocks.front().start_word = 1;
}
if (!func.suspected_asm) {
@ -162,6 +184,7 @@ void ObjectFileDB::ir2_basic_block_pass() {
}
if (func.suspected_asm) {
func.warnings.append(";; Assembly Function\n");
suspected_asm++;
}
});
@ -178,6 +201,10 @@ void ObjectFileDB::ir2_basic_block_pass() {
100.f * inspect_methods / total_functions);
}
/*!
* Conversion of MIPS instructions into AtomicOps. The AtomicOps represent what we
* think are IR of the original GOAL compiler.
*/
void ObjectFileDB::ir2_atomic_op_pass() {
Timer timer;
int total_functions = 0;
@ -197,6 +224,7 @@ void ObjectFileDB::ir2_atomic_op_pass() {
} catch (std::exception& e) {
lg::warn("Function {} from {} could not be converted to atomic ops: {}",
func.guessed_name.to_string(), data.to_unique_name(), e.what());
func.warnings.append(";; Failed to convert to atomic ops\n");
}
}
});
@ -207,6 +235,44 @@ void ObjectFileDB::ir2_atomic_op_pass() {
100.f * attempted / total_functions, 100.f * successful / attempted);
}
/*!
* Analyze registers and determine the type in each register at each instruction.
* - Figure out the type of each function, from configs.
* - Propagate types.
*/
void ObjectFileDB::ir2_type_analysis_pass() {
Timer timer;
int total_functions = 0;
int non_asm_functions = 0;
int attempted_functions = 0;
int successful_functions = 0;
for_each_function_def_order([&](Function& func, int segment_id, ObjectFileData& data) {
(void)segment_id;
total_functions++;
if (!func.suspected_asm) {
non_asm_functions++;
TypeSpec ts;
if (lookup_function_type(func.guessed_name, data.to_unique_name(), &ts)) {
attempted_functions++;
// try type analysis here.
auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()];
if (func.run_type_analysis_ir2(ts, dts, data.linked_data, hints)) {
successful_functions++;
} else {
func.warnings.append(";; Type analysis failed\n");
}
} else {
// lg::warn("Function {} didn't know its type", func.guessed_name.to_string());
func.warnings.append(";; Type of function is unknown\n");
}
}
});
lg::info("{}/{}/{}/{} (success/attempted/non-asm/total) in {:.2f} ms", successful_functions,
attempted_functions, non_asm_functions, total_functions, timer.getMs());
}
void ObjectFileDB::ir2_write_results(const std::string& output_dir) {
Timer timer;
lg::info("Writing IR2 results to file...");
@ -269,6 +335,40 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) {
return result;
}
namespace {
void append_commented(std::string& line,
bool& has_comment,
const std::string& to_append,
int offset = 0) {
// minimum length before comment appears.
constexpr int pre_comment_length = 30;
// if comment overflows, how much to indent the next one
constexpr int overflow_indent = 30;
// pad, and add comment
if (!has_comment) {
if (line.length() < pre_comment_length) {
line.append(pre_comment_length - line.length(), ' ');
}
line += ";; ";
line += to_append;
has_comment = true;
} else {
if (std::max(int(line.length()), offset) + to_append.length() > 120) {
line += "\n";
line.append(overflow_indent, ' ');
line += ";; ";
} else {
if (int(line.length()) < offset) {
line.append(offset - line.length(), ' ');
}
line += " ";
}
line += to_append;
}
}
} // namespace
std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function& func, int seg) {
std::string result;
result += ";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n";
@ -282,14 +382,16 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function&
bool print_atomics = func.ir2.atomic_ops_succeeded;
// print each instruction in the function.
bool in_delay_slot = false;
int total_instructions_printed = 0;
int last_instr_printed = 0;
for (int i = 1; i < func.end_word - func.start_word; i++) {
std::string line;
auto print_instr_start = [&](int i) {
// check for a label to print
auto label_id = data.linked_data.get_label_at(seg, (func.start_word + i) * 4);
if (label_id != -1) {
result += data.linked_data.labels.at(label_id).name + ":\n";
}
// check for no misaligned labels in code segments.
for (int j = 1; j < 4; j++) {
assert(data.linked_data.get_label_at(seg, (func.start_word + i) * 4 + j) == -1);
@ -297,36 +399,13 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function&
// print the assembly instruction
auto& instr = func.instructions.at(i);
std::string line = " " + instr.to_string(data.linked_data.labels);
line = " " + instr.to_string(data.linked_data.labels);
};
// printf("%d inst %s\n", print_atomics, instr.to_string(data.linked_data.labels).c_str());
bool printed_comment = false;
// print atomic op
if (print_atomics && func.instr_starts_atomic_op(i)) {
if (line.length() < 30) {
line.append(30 - line.length(), ' ');
}
line +=
" ;; " + func.get_atomic_op_at_instr(i).to_string(data.linked_data.labels, &func.ir2.env);
printed_comment = true;
}
// print linked strings
for (int iidx = 0; iidx < instr.n_src; iidx++) {
if (instr.get_src(iidx).is_label()) {
auto lab = data.linked_data.labels.at(instr.get_src(iidx).get_label());
if (data.linked_data.is_string(lab.target_segment, lab.offset)) {
if (!printed_comment) {
line += " ;; ";
printed_comment = true;
}
line += " " + data.linked_data.get_goal_string(lab.target_segment, lab.offset / 4 - 1);
}
}
}
result += line + "\n";
auto print_instr_end = [&](int i) {
auto& instr = func.instructions.at(i);
result += line;
result += "\n";
// print delay slot gap
if (in_delay_slot) {
@ -338,10 +417,140 @@ std::string ObjectFileDB::ir2_function_to_string(ObjectFileData& data, Function&
if (gOpcodeInfo[(int)instr.kind].has_delay_slot) {
in_delay_slot = true;
}
total_instructions_printed++;
assert(last_instr_printed + 1 == i);
last_instr_printed = i;
};
// first, print the prologue. we start at word 1 because word 0 is the type tag
for (int i = 1; i < func.basic_blocks.front().start_word; i++) {
print_instr_start(i);
print_instr_end(i);
}
// next, print each basic block
int end_idx = func.basic_blocks.front().start_word;
for (int block_id = 0; block_id < int(func.basic_blocks.size()); block_id++) {
// block number
result += "B" + std::to_string(block_id) + ":\n";
auto& block = func.basic_blocks.at(block_id);
const TypeState* init_types = nullptr;
if (func.ir2.env.has_type_analysis()) {
init_types = &func.ir2.env.get_types_at_block_entry(block_id);
}
for (int instr_id = block.start_word; instr_id < block.end_word; instr_id++) {
print_instr_start(instr_id);
bool printed_comment = false;
// print atomic op
int op_id = -1;
if (print_atomics && func.instr_starts_atomic_op(instr_id)) {
auto& op = func.get_atomic_op_at_instr(instr_id);
op_id = func.ir2.atomic_ops->instruction_to_atomic_op.at(instr_id);
append_commented(line, printed_comment,
op.to_string(data.linked_data.labels, &func.ir2.env));
if (func.ir2.env.has_type_analysis()) {
append_commented(
line, printed_comment,
op.reg_type_info_as_string(*init_types, func.ir2.env.get_types_after_op(op_id)), 50);
}
}
auto& instr = func.instructions.at(instr_id);
// print linked strings
for (int iidx = 0; iidx < instr.n_src; iidx++) {
if (instr.get_src(iidx).is_label()) {
auto lab = data.linked_data.labels.at(instr.get_src(iidx).get_label());
if (data.linked_data.is_string(lab.target_segment, lab.offset)) {
append_commented(
line, printed_comment,
data.linked_data.get_goal_string(lab.target_segment, lab.offset / 4 - 1));
}
}
}
print_instr_end(instr_id);
if (print_atomics && func.ir2.env.has_type_analysis() &&
func.instr_starts_atomic_op(instr_id)) {
init_types = &func.ir2.env.get_types_after_op(op_id);
}
}
end_idx = block.end_word;
}
for (int i = end_idx; i < func.end_word - func.start_word; i++) {
print_instr_start(i);
print_instr_end(i);
}
result += "\n";
assert(total_instructions_printed == (func.end_word - func.start_word - 1));
return result;
}
/*!
* Try to look up the type of a function. Looks at the decompiler type info, the hints files,
* and other GOAL rules.
*/
bool ObjectFileDB::lookup_function_type(const FunctionName& name,
const std::string& obj_name,
TypeSpec* result) {
auto& cfg = get_config();
// don't return function types that are explictly flagged as bad in config.
if (cfg.no_type_analysis_functions_by_name.find(name.to_string()) !=
cfg.no_type_analysis_functions_by_name.end()) {
return false;
}
if (name.kind == FunctionName::FunctionKind::GLOBAL) {
// global GOAL function.
auto kv = dts.symbol_types.find(name.function_name);
if (kv != dts.symbol_types.end() && kv->second.arg_count() >= 1) {
if (kv->second.base_type() != "function") {
lg::die("Found a function named {} but the symbol has type {}", name.to_string(),
kv->second.print());
}
// good, found a global function with full type information.
*result = kv->second;
return true;
}
} else if (name.kind == FunctionName::FunctionKind::METHOD) {
MethodInfo info;
if (dts.ts.try_lookup_method(name.type_name, name.method_id, &info)) {
if (info.type.arg_count() >= 1) {
if (info.type.base_type() != "function") {
lg::die("Found a method named {} but the symbol has type {}", name.to_string(),
info.type.print());
}
// substitute the _type_ for the correct type.
*result = info.type.substitute_for_method_call(name.type_name);
return true;
}
}
} else if (name.kind == FunctionName::FunctionKind::TOP_LEVEL_INIT) {
*result = dts.ts.make_function_typespec({}, "none");
return true;
} else if (name.kind == FunctionName::FunctionKind::UNIDENTIFIED) {
// try looking up the object
const auto& map = get_config().anon_function_types_by_obj_by_id;
auto obj_kv = map.find(obj_name);
if (obj_kv != map.end()) {
auto func_kv = obj_kv->second.find(name.get_anon_id());
if (func_kv != obj_kv->second.end()) {
*result = dts.parse_type_spec(func_kv->second);
return true;
}
}
} else {
assert(false);
}
return false;
}
} // namespace decompiler

View file

@ -199,7 +199,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add
if (existing.kind == add.kind) {
switch (existing.kind) {
case TP_Type::Kind::TYPESPEC: {
auto new_result = TP_Type::make_from_typespec(coerce_to_reg_type(ts.lowest_common_ancestor(
auto new_result = TP_Type::make_from_ts(coerce_to_reg_type(ts.lowest_common_ancestor(
existing.get_objects_typespec(), add.get_objects_typespec())));
*changed = (new_result != existing);
return new_result;
@ -214,15 +214,15 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add
case TP_Type::Kind::PRODUCT_WITH_CONSTANT:
// we know they are different.
*changed = true;
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
case TP_Type::Kind::OBJECT_PLUS_PRODUCT_WITH_CONSTANT:
*changed = true;
// todo - there might be cases where we need to LCA the base types??
return TP_Type::make_from_typespec(TypeSpec("object"));
return TP_Type::make_from_ts(TypeSpec("object"));
case TP_Type::Kind::OBJECT_NEW_METHOD:
*changed = true;
// this case should never happen I think.
return TP_Type::make_from_typespec(TypeSpec("function"));
return TP_Type::make_from_ts(TypeSpec("function"));
case TP_Type::Kind::STRING_CONSTANT: {
auto existing_count = get_format_arg_count(existing.get_string());
auto added_count = get_format_arg_count(add.get_string());
@ -230,19 +230,19 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add
if (added_count == existing_count) {
return TP_Type::make_from_format_string(existing_count);
} else {
return TP_Type::make_from_typespec(TypeSpec("string"));
return TP_Type::make_from_ts(TypeSpec("string"));
}
}
case TP_Type::Kind::INTEGER_CONSTANT:
*changed = true;
return TP_Type::make_from_typespec(TypeSpec("int"));
return TP_Type::make_from_ts(TypeSpec("int"));
case TP_Type::Kind::FORMAT_STRING:
if (existing.get_format_string_arg_count() == add.get_format_string_arg_count()) {
*changed = false;
return existing;
} else {
*changed = true;
return TP_Type::make_from_typespec(TypeSpec("string"));
return TP_Type::make_from_ts(TypeSpec("string"));
}
case TP_Type::Kind::FALSE_AS_NULL:
@ -261,7 +261,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add
if (existing_count == add_count) {
result_type = TP_Type::make_from_format_string(existing_count);
} else {
result_type = TP_Type::make_from_typespec(TypeSpec("string"));
result_type = TP_Type::make_from_ts(TypeSpec("string"));
}
*changed = (result_type != existing);
@ -269,7 +269,7 @@ TP_Type DecompilerTypeSystem::tp_lca(const TP_Type& existing, const TP_Type& add
}
// otherwise, as an absolute fallback, convert both to TypeSpecs and do TypeSpec LCA
auto new_result = TP_Type::make_from_typespec(
auto new_result = TP_Type::make_from_ts(
coerce_to_reg_type(ts.lowest_common_ancestor(existing.typespec(), add.typespec())));
*changed = (new_result != existing);
return new_result;

View file

@ -2,6 +2,16 @@
#include "third-party/fmt/core.h"
namespace decompiler {
u32 regs_to_gpr_mask(const std::vector<Register>& regs) {
u32 result = 0;
for (const auto& reg : regs) {
if (reg.get_kind() == Reg::GPR) {
result |= (1 << reg.get_gpr());
}
}
return result;
}
std::string TypeState::print_gpr_masked(u32 mask) const {
std::string result;
for (int i = 0; i < 32; i++) {

View file

@ -61,13 +61,15 @@ class TP_Type {
return result;
}
static TP_Type make_from_typespec(const TypeSpec& ts) {
static TP_Type make_from_ts(const TypeSpec& ts) {
TP_Type result;
result.kind = Kind::TYPESPEC;
result.m_ts = ts;
return result;
}
static TP_Type make_from_ts(const std::string& ts) { return make_from_ts(TypeSpec(ts)); }
static TP_Type make_from_string(const std::string& str) {
TP_Type result;
result.kind = Kind::STRING_CONSTANT;
@ -192,4 +194,6 @@ struct TypeState {
}
}
};
u32 regs_to_gpr_mask(const std::vector<Register>& regs);
} // namespace decompiler

View file

@ -418,7 +418,8 @@ TEST(DecompilerAtomicOpBuilder, DSUBU_DADDIU_MOVZ) {
TEST(DecompilerAtomicOpBuilder, JALR_SLL) {
test_case(assembly_from_list({"jalr ra, t9", "sll v0, ra, 0"}), {"(call!)"}, {{}}, {{"t9"}},
{{}});
{{"a0", "a1", "a2", "a3", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
"at", "v1"}});
}
TEST(DecompilerAtomicOpBuilder, LB) {

View file

@ -196,6 +196,12 @@ TEST(TypeSystem, AddMethodAndLookupMethod) {
EXPECT_EQ(ts.lookup_method("basic", "test-method-1").defined_in_type, "structure");
EXPECT_EQ(ts.lookup_method("basic", "test-method-1").type.print(), "(function integer string)");
EXPECT_EQ(ts.lookup_method("basic", "test-method-1").name, "test-method-1");
auto id = ts.lookup_method("basic", "test-method-1").id;
MethodInfo info;
EXPECT_TRUE(ts.try_lookup_method("basic", id, &info));
EXPECT_FALSE(ts.try_lookup_method("not-a-real-type-name", id, &info));
EXPECT_FALSE(ts.try_lookup_method("basic", id * 2, &info));
}
TEST(TypeSystem, NewMethod) {