jak-project/decompiler/analysis/cfg_builder.cpp
Tyler Wilding b3e77c673f
decomp/lsp: Differentiate warnings from likely/definite errors (#1725)
* decomp: differentiate potential false positive warnings from likely/certain failures

* lsp: handle IR2 errors

* decomp: downgrade an expr building warning as often expressions build fine

* tests: update reference tests since comments aren't ignored

* decomp: simplify warnings interface

* tests: update ref tests
2022-08-06 11:52:36 -04:00

2054 lines
73 KiB
C++

/*!
* @file cfg_builder.cpp
* Initial conversion from Control Flow Graph to IR2 Form.
*/
#include "cfg_builder.h"
#include "decompiler/Function/Function.h"
#include "decompiler/IR2/Form.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
#include "decompiler/util/MatchParam.h"
namespace decompiler {
namespace {
Form* cfg_to_ir(FormPool& pool, Function& f, const CfgVtx* vtx);
/*!
* If it's a form containing multiple elements, return a pointer to the branch element and the end
* and also a pointer to the Form containing the branch element.
* Otherwise returns nullptr. Useful to modify or remove branches found at the end of blocks,
* and inline things into the begin they were found in.
*/
std::pair<BranchElement*, Form*> get_condition_branch_as_vector(Form* in) {
// With the current Form setup, we'll never have to dig deper to find the branch.
// so we can just return the input as the Form*.
// If this changes, this can be fixed here, rather than refactoring the whole thing.
if (in->size() > 1) {
auto irb = dynamic_cast<BranchElement*>(in->back());
ASSERT(irb);
return std::make_pair(irb, in);
}
return std::make_pair(nullptr, nullptr);
}
/*!
* Given an IR, find a branch IR at the end, and also the location of it so it can be patched.
* Returns nullptr as the first item in the pair if it didn't work.
* Use this to inspect a sequence ending in branch and have to ability to replace the branch with
* something else if needed.
*/
std::pair<BranchElement*, FormElement**> get_condition_branch(Form* in) {
BranchElement* condition_branch = dynamic_cast<BranchElement*>(in->back());
FormElement** condition_branch_location = in->back_ref();
if (!condition_branch) {
auto as_return = dynamic_cast<ReturnElement*>(in->back());
if (as_return) {
return get_condition_branch(as_return->dead_code);
}
}
if (!condition_branch) {
auto as_break = dynamic_cast<BreakElement*>(in->back());
if (as_break) {
return get_condition_branch(as_break->dead_code);
}
}
return std::make_pair(condition_branch, condition_branch_location);
}
/*!
* Given a CondWithElse IR, remove the internal branches and set the condition to be an actual
* compare IR instead of a branch.
* Doesn't "rebalance" the leading condition because this runs way before expression compaction.
*/
void clean_up_cond_with_else(FormPool& pool, FormElement* ir, const Env& env) {
auto cwe = dynamic_cast<CondWithElseElement*>(ir);
ASSERT(cwe);
for (auto& e : cwe->entries) {
// don't reclean already cleaned things.
if (e.cleaned) {
continue;
}
auto jump_to_next = get_condition_branch(e.condition);
ASSERT(jump_to_next.first);
ASSERT(jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
// patch the branch to next with a condition.
auto replacement = jump_to_next.first->op()->get_condition_as_form(pool, env);
replacement->invert();
*(jump_to_next.second) = replacement;
// check the jump at the end of a block.
auto jump_to_end = get_condition_branch(e.body);
ASSERT(jump_to_end.first);
ASSERT(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
ASSERT(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS);
// if possible, we just want to remove this from the sequence its in.
// but sometimes there's a case with nothing in it so there is no sequence.
// in this case, we can just replace the branch with a NOP IR to indicate that nothing
// happens in this case, but there was still GOAL code to test for it.
// this happens rarely, as you would expect.
auto as_end_of_sequence = get_condition_branch_as_vector(e.body);
if (as_end_of_sequence.first) {
ASSERT(as_end_of_sequence.second->size() > 1);
as_end_of_sequence.second->pop_back();
} else {
// we need to have _something_ as the body, so we just put an (empty).
*(jump_to_end.second) = pool.alloc_element<EmptyElement>();
}
e.cleaned = true;
}
}
/*!
* Replace the branch at the end of an until loop's condition with a condition.
*/
void clean_up_until_loop(FormPool& pool, UntilElement* ir, const Env& env) {
auto condition_branch = get_condition_branch(ir->condition);
ASSERT(condition_branch.first);
if (condition_branch.first->op()->branch_delay().kind() != IR2_BranchDelay::Kind::NOP) {
ASSERT_MSG(
condition_branch.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_FALSE,
fmt::format(
"bad delay slot in until loop: {} in {}\n", env.func->name(),
condition_branch.first->op()->branch_delay().to_form(env.file->labels, env).print()));
ir->false_destination = condition_branch.first->op()->branch_delay().var(0);
}
auto replacement = condition_branch.first->op()->get_condition_as_form(pool, env);
replacement->invert();
*(condition_branch.second) = replacement;
}
/*!
* Remove the true branch at the end of an infinite while loop.
*/
void clean_up_infinite_while_loop(FormPool& pool, WhileElement* ir) {
auto jump = get_condition_branch(ir->body);
ASSERT(jump.first);
ASSERT(jump.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
ASSERT(jump.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS);
auto as_end_of_sequence = get_condition_branch_as_vector(ir->body);
if (as_end_of_sequence.first) {
// there's more in the sequence, just remove the last thing.
ASSERT(as_end_of_sequence.second->size() > 1);
as_end_of_sequence.second->pop_back();
} else {
// Nothing else in the sequence, just replace the jump with an (empty)
*(jump.second) = pool.alloc_element<EmptyElement>();
}
ir->cleaned = true; // so we don't try this later...
}
/*!
* Remove the branch in a return statement
*/
void clean_up_return(FormPool& pool, ReturnElement* ir) {
auto jump_to_end = get_condition_branch(ir->return_code);
ASSERT(jump_to_end.first);
ASSERT(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
ASSERT(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS);
auto as_end_of_sequence = get_condition_branch_as_vector(ir->return_code);
if (as_end_of_sequence.first) {
ASSERT(as_end_of_sequence.second->size() > 1);
as_end_of_sequence.second->pop_back();
} else {
*(jump_to_end.second) = pool.alloc_element<EmptyElement>();
}
}
void clean_up_return_final(const Function& f, ReturnElement* ir) {
SetVarElement* dead = dynamic_cast<SetVarElement*>(ir->dead_code->try_as_single_element());
if (!dead) {
dead = dynamic_cast<SetVarElement*>(ir->dead_code->elts().front());
for (int i = 1; i < ir->dead_code->size(); i++) {
if (!dynamic_cast<EmptyElement*>(ir->dead_code->at(i))) {
dead = nullptr;
break;
}
}
}
if (!dead) {
throw std::runtime_error(fmt::format("failed to recognize dead code after return, got {}",
ir->dead_code->to_string(f.ir2.env)));
}
ASSERT(dead);
auto src = dynamic_cast<SimpleExpressionElement*>(dead->src()->try_as_single_element());
ASSERT(src);
ASSERT(src->expr().is_identity() && src->expr().get_arg(0).is_int() &&
src->expr().get_arg(0).get_int() == 0);
ir->dead_code = nullptr;
}
/*!
* Remove the branch in a break (really return-from nonfunction scope)
*/
void clean_up_break(FormPool& pool, BreakElement* ir, const Env&) {
auto jump_to_end = get_condition_branch(ir->return_code);
ASSERT(jump_to_end.first);
ASSERT(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
ASSERT(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS);
auto as_end_of_sequence = get_condition_branch_as_vector(ir->return_code);
if (as_end_of_sequence.first) {
ASSERT(as_end_of_sequence.second->size() > 1);
as_end_of_sequence.second->pop_back();
} else {
*(jump_to_end.second) = pool.alloc_element<EmptyElement>();
}
}
void clean_up_break_final(const Function& f, BreakElement* ir, const Env& env) {
EmptyElement* dead_empty = dynamic_cast<EmptyElement*>(ir->dead_code->try_as_single_element());
if (dead_empty) {
ir->dead_code = nullptr;
return;
}
SetVarElement* dead = dynamic_cast<SetVarElement*>(ir->dead_code->try_as_single_element());
if (!dead) {
dead = dynamic_cast<SetVarElement*>(ir->dead_code->elts().front());
for (int i = 1; i < ir->dead_code->size(); i++) {
if (!dynamic_cast<EmptyElement*>(ir->dead_code->at(i))) {
dead = nullptr;
break;
}
}
}
if (!dead) {
if (ir->dead_code->to_string(env) == "(nop!)") {
ir->dead_code = nullptr;
return;
}
}
if (!dead) {
throw std::runtime_error(fmt::format("failed to recognize dead code after break, got {}",
ir->dead_code->to_string(f.ir2.env)));
}
ASSERT(dead);
auto src = dynamic_cast<SimpleExpressionElement*>(dead->src()->try_as_single_element());
ASSERT(src);
if (src->expr().is_identity() && src->expr().get_arg(0).is_int() &&
src->expr().get_arg(0).get_int() == 0) {
ir->dead_code = nullptr;
}
}
/*!
* Does the instruction in the delay slot set a register to false?
* Note. a beql s7, x followed by a or y, x, r0 will count as this. I don't know why but
* GOAL does this on comparisons to false.
*/
bool delay_slot_sets_false(BranchElement* branch, SetVarOp& delay) {
ASSERT(branch->op()->likely());
ASSERT(branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NO_DELAY);
if (delay.src().is_identity() && delay.src().get_arg(0).is_sym_val() &&
delay.src().get_arg(0).get_str() == "#f") {
return true;
}
if (branch->op()->condition().kind() == IR2_Condition::Kind::FALSE) {
if (delay.src().is_identity() && delay.src().get_arg(0).is_var()) {
auto src_var = delay.src().get_arg(0).var();
auto& cond = branch->op()->condition();
auto cond_reg = cond.src(0).var().reg();
return cond_reg == src_var.reg();
}
}
return false;
// if (branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_FALSE) {
// return true;
// }
//
// if (branch->op()->condition().kind() == IR2_Condition::Kind::FALSE &&
// branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_REG) {
// auto& cond = branch->op()->condition();
// auto& delay = branch->op()->branch_delay();
// auto cond_reg = cond.src(0).var().reg();
// auto src_reg = delay.var(1).reg();
// return cond_reg == src_reg;
// }
//
// return false;
}
/*!
* Does the instruction in the delay slot set a register to a truthy value, like in a GOAL
* or form branch? Either it explicitly sets #t, or it tests the value for being not false,
* then uses that
*/
bool delay_slot_sets_truthy(BranchElement* branch, SetVarOp& delay) {
ASSERT(branch->op()->likely());
ASSERT(branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NO_DELAY);
if (delay.src().is_identity() && delay.src().get_arg(0).is_sym_ptr() &&
delay.src().get_arg(0).get_str() == "#t") {
return true;
}
if (branch->op()->condition().kind() == IR2_Condition::Kind::TRUTHY) {
if (delay.src().is_identity() && delay.src().get_arg(0).is_var()) {
auto src_var = delay.src().get_arg(0).var();
auto& cond = branch->op()->condition();
auto cond_reg = cond.src(0).var().reg();
return cond_reg == src_var.reg();
}
}
// if (branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_TRUE) {
// return true;
// }
//
// if (branch->op()->condition().kind() == IR2_Condition::Kind::TRUTHY &&
// branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_REG) {
// auto& cond = branch->op()->condition();
// auto& delay = branch->op()->branch_delay();
// auto cond_reg = cond.src(0).var().reg();
// auto src_reg = delay.var(1).reg();
// return cond_reg == src_reg;
// }
return false;
}
/*!
* Try to convert a short circuit to an and.
*/
bool try_clean_up_sc_as_and(FormPool& pool, Function& func, ShortCircuitElement* ir) {
Register destination;
RegisterAccess ir_dest;
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(ir->entries.at(i).condition);
ASSERT(branch.first);
ASSERT(ir->entries.at(i).branch_delay.has_value());
if (!delay_slot_sets_false(branch.first, *ir->entries.at(i).branch_delay)) {
return false;
}
if (i == 0) {
// first case, remember the destination
ir_dest = ir->entries.at(i).branch_delay->dst();
destination = ir_dest.reg();
} else {
// check destination against the first case.
if (destination != ir->entries.at(i).branch_delay->dst().reg()) {
return false;
}
}
}
ir->kind = ShortCircuitElement::AND;
ir->final_result = ir_dest;
bool live_out_result = false;
// now get rid of the branches
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(ir->entries.at(i).condition);
ASSERT(branch.first);
if (func.ir2.env.has_reg_use()) {
auto delay_id = ir->entries.at(i).branch_delay->dst().idx();
auto& delay_info = func.ir2.env.reg_use().op.at(delay_id);
auto branch_id = branch.first->op()->op_id();
auto& branch_info = func.ir2.env.reg_use().op.at(branch_id);
for (auto x : delay_info.consumes) {
branch_info.consumes.insert(x);
}
auto delay_op = func.ir2.atomic_ops->ops.at(delay_id).get();
auto as_set = dynamic_cast<SetVarOp*>(delay_op);
ASSERT(as_set);
if (as_set->src().is_var()) {
// must be the case where the src should have truthy in it.
// lg::warn("Disabling use of {} in or delay slot", as_set->to_string(func.ir2.env));
func.ir2.env.disable_use(as_set->src().var());
}
// we also want to fix up the use/def info for the result.
// it's somewhat arbitrary, but we use the convention that the short-circuit defs
// are eliminated:
if (func.ir2.env.has_local_vars()) {
auto& ud_info = func.ir2.env.get_use_def_info(as_set->dst());
if (i == int(ir->entries.size()) - 2) {
if (ud_info.def_count() == 1) {
// the final case of the or doesn't explicitly set the destination register.
// this can happen if the move is eliminated during coloring.
// for now, let's leave this last def here, just so it looks like _something_ sets it.
} else {
// lg::warn("Disabling def of {} in final or delay slot",
// as_set->to_string(func.ir2.env));
func.ir2.env.disable_def(as_set->dst(), func.warnings);
}
} else {
// lg::warn("Disabling def of {} in or delay slot", as_set->to_string(func.ir2.env));
func.ir2.env.disable_def(as_set->dst(), func.warnings);
}
}
if (i == 0) {
live_out_result = (branch_info.written_and_unused.find(ir_dest.reg()) ==
branch_info.written_and_unused.end());
} else {
bool this_live_out = (branch_info.written_and_unused.find(ir_dest.reg()) ==
branch_info.written_and_unused.end());
if (live_out_result != this_live_out) {
lg::error("Bad live out result on {}. At 0 was {} now at {} is {}", func.name(),
live_out_result, i, this_live_out);
}
ASSERT(live_out_result == this_live_out);
}
}
auto replacement = branch.first->op()->get_condition_as_form(pool, func.ir2.env);
replacement->invert();
*(branch.second) = replacement;
}
ir->used_as_value = live_out_result;
return true;
}
/*!
* Try to convert a short circuit to an or.
* Note - this will convert an and to a very strange or, so always use the try as and first.
*/
bool try_clean_up_sc_as_or(FormPool& pool, Function& func, ShortCircuitElement* ir) {
// all cases of an or, excluding the last one, should move the value "true" into the result reg
// in case the short circuit is taken. The final case should just write the result reg.
Register destination; // destination register
RegisterAccess ir_dest; // destination access (the first one)
// all but the last one (these should all do the delay slot trick)
// this first pass is where we can reject this as an or.
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
// short circuit branch
auto branch = get_condition_branch(ir->entries.at(i).condition);
ASSERT(branch.first);
ASSERT(ir->entries.at(i).branch_delay.has_value());
// the branch should write true (there's two ways this can happen)
if (!delay_slot_sets_truthy(branch.first, *ir->entries.at(i).branch_delay)) {
return false;
}
if (i == 0) {
// first case, remember the destination
ir_dest = ir->entries.at(i).branch_delay->dst();
destination = ir_dest.reg();
} else {
// check destination against the first case.
if (destination != ir->entries.at(i).branch_delay->dst().reg()) {
return false;
}
}
}
// at this point, we know that all non-last cases write the destination, and what
// the destination is.
// so we commit to rewriting this thing as an or, and any errors from here on are fatal.
ir->kind = ShortCircuitElement::OR;
ir->final_result = ir_dest;
// we also would like to know if the result of this OR is used or not.
// there's also a sanity check that:
// if the result is used - all writes to the result reg _should_ be live
// if the result is unused - all writes to the result reg should be dead.
// otherwise it means that our control flow graph is messed up, and we abort.
bool live_out_result = false;
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(ir->entries.at(i).condition);
ASSERT(branch.first);
if (func.ir2.env.has_reg_use()) {
auto delay_id = ir->entries.at(i).branch_delay->dst().idx();
auto& delay_info = func.ir2.env.reg_use().op.at(delay_id);
// cheat for the old method
auto branch_id = branch.first->op()->op_id();
auto& branch_info = func.ir2.env.reg_use().op.at(branch_id);
for (auto x : delay_info.consumes) {
branch_info.consumes.insert(x);
}
// the branch may look like this:
// bnel s7, a3, L283
// or a2, a3, r0
// which reads a3 twice. But we want it to count as only once, as the second read
// is inserted by the GOAL compiler, not by putting a var twice in the source code.
auto delay_op = func.ir2.atomic_ops->ops.at(delay_id).get();
auto as_set = dynamic_cast<SetVarOp*>(delay_op);
ASSERT(as_set);
if (as_set->src().is_var()) {
// must be the case where the src should have truthy in it.
// lg::warn("Disabling use of {} in or delay slot", as_set->to_string(func.ir2.env));
func.ir2.env.disable_use(as_set->src().var());
}
// we also want to fix up the use/def info for the result.
// it's somewhat arbitrary, but we use the convention that the short-circuit defs
// are eliminated:
if (func.ir2.env.has_local_vars()) {
auto& ud_info = func.ir2.env.get_use_def_info(as_set->dst());
if (i == int(ir->entries.size()) - 2) {
if (ud_info.def_count() == 1) {
// the final case of the or doesn't explicitly set the destination register.
// this can happen if the move is eliminated during coloring.
// for now, let's leave this last def here, just so it looks like _something_ sets it.
// TODO - what if this isn't a def in the last slot? Does it matter?
} else {
// lg::warn("Disabling def of {} in final or delay slot",
// as_set->to_string(func.ir2.env));
func.ir2.env.disable_def(as_set->dst(), func.warnings);
}
} else {
// lg::warn("Disabling def of {} in or delay slot", as_set->to_string(func.ir2.env));
func.ir2.env.disable_def(as_set->dst(), func.warnings);
}
}
if (i == 0) {
live_out_result = (delay_info.written_and_unused.find(ir_dest.reg()) ==
delay_info.written_and_unused.end());
} else {
bool this_live_out = (delay_info.written_and_unused.find(ir_dest.reg()) ==
delay_info.written_and_unused.end());
ASSERT(live_out_result == this_live_out);
}
}
auto replacement = branch.first->op()->get_condition_as_form(pool, func.ir2.env);
*(branch.second) = replacement;
}
// TODO - check the one remaining def location?
ir->used_as_value = live_out_result;
return true;
}
void clean_up_sc(FormPool& pool, Function& func, ShortCircuitElement* ir);
/*!
* A form like (and x (or y z)) will be recognized as a single SC Vertex by the CFG pass.
* In the case where we fail to clean it up as an AND or an OR, we should attempt splitting.
* Part of the complexity here is that we want to clean up the split recursively so things like
* (and x (or y (and a b)))
* or
* (and x (or y (and a b)) c d (or z))
* will work correctly. This may require doing more splitting on both sections!
*/
bool try_splitting_nested_sc(FormPool& pool, Function& func, ShortCircuitElement* ir) {
auto first_branch = get_condition_branch(ir->entries.front().condition);
ASSERT(first_branch.first);
ASSERT(ir->entries.front().branch_delay.has_value());
bool first_is_and = delay_slot_sets_false(first_branch.first, *ir->entries.front().branch_delay);
bool first_is_or = delay_slot_sets_truthy(first_branch.first, *ir->entries.front().branch_delay);
if (first_is_and == first_is_or) {
throw std::runtime_error(fmt::format(
"Failed to split nested sc. This may mean that abs/ash/type-of was misrecognized as "
"and/or:\n{}",
ir->to_string(func.ir2.env)));
}
ASSERT(first_is_and != first_is_or); // one or the other but not both!
int first_different = -1; // the index of the first one that's different.
for (int i = 1; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(ir->entries.at(i).condition);
ASSERT(branch.first);
ASSERT(ir->entries.at(i).branch_delay.has_value());
bool is_and = delay_slot_sets_false(branch.first, *ir->entries.at(i).branch_delay);
bool is_or = delay_slot_sets_truthy(branch.first, *ir->entries.at(i).branch_delay);
ASSERT_MSG(is_and != is_or, fmt::format("bad nested sc in {}", func.name()));
if (first_different == -1) {
// haven't seen a change yet.
if (first_is_and != is_and) {
// change!
first_different = i;
break;
}
}
}
ASSERT(first_different != -1);
std::vector<ShortCircuitElement::Entry> nested_ir;
for (int i = first_different; i < int(ir->entries.size()); i++) {
nested_ir.push_back(ir->entries.at(i));
}
auto s = int(ir->entries.size());
for (int i = first_different; i < s; i++) {
ir->entries.pop_back();
}
// nested_sc has no parent yet.
auto nested_sc = pool.alloc_element<ShortCircuitElement>(nested_ir);
clean_up_sc(pool, func, nested_sc);
// the real trick
ShortCircuitElement::Entry nested_entry;
// sets both parents
nested_entry.condition = pool.alloc_single_form(ir, nested_sc);
ir->entries.push_back(nested_entry);
clean_up_sc(pool, func, ir);
return true;
}
/*!
* Try to clean up a single short circuit IR. It may get split up into nested IR_ShortCircuits
* if there is a case like (and a (or b c))
*/
void clean_up_sc(FormPool& pool, Function& func, ShortCircuitElement* ir) {
ASSERT(ir->entries.size() > 0);
if (ir->entries.size() == 1) {
// need to fake the final entry.
ShortCircuitElement::Entry empty_final;
empty_final.condition = pool.alloc_single_element_form<EmptyElement>(ir);
ir->entries.push_back(empty_final);
}
if (!try_clean_up_sc_as_and(pool, func, ir)) {
if (!try_clean_up_sc_as_or(pool, func, ir)) {
if (!try_splitting_nested_sc(pool, func, ir)) {
ASSERT(false);
}
}
}
}
const SimpleAtom* get_atom_src(const Form* form) {
auto* elt = form->try_as_single_element();
if (elt) {
auto* as_expr = dynamic_cast<SimpleExpressionElement*>(elt);
if (as_expr) {
if (as_expr->expr().is_identity()) {
return &as_expr->expr().get_arg(0);
}
}
}
return nullptr;
}
/*!
* A GOAL comparison which produces a boolean is recognized as a cond-no-else by the CFG analysis.
* But it should not be decompiled as a branching statement.
* This either succeeds or asserts and must be called with with something that can be converted
* successfully
*/
void convert_cond_no_else_to_compare(FormPool& pool,
Function& f,
FormElement** ir_loc,
Form* parent_form) {
CondNoElseElement* cne = dynamic_cast<CondNoElseElement*>(*ir_loc);
ASSERT(cne);
auto condition = get_condition_branch(cne->entries.front().condition);
ASSERT(condition.first);
auto body = dynamic_cast<SetVarElement*>(cne->entries.front().body->try_as_single_element());
ASSERT(body);
auto dst = body->dst();
auto src_atom = get_atom_src(body->src());
ASSERT(src_atom);
ASSERT(src_atom->is_sym_val());
ASSERT(src_atom->get_str() == "#f");
ASSERT(cne->entries.size() == 1);
// safe to do this here because we never give up on this.
f.ir2.env.disable_def(condition.first->op()->branch_delay().var(0), f.warnings);
auto condition_as_single =
dynamic_cast<BranchElement*>(cne->entries.front().condition->try_as_single_element());
auto condition_replacement = condition.first->op()->get_condition_as_form(pool, f.ir2.env);
auto crf = pool.alloc_single_form(nullptr, condition_replacement);
auto replacement = pool.alloc_element<SetVarElement>(dst, crf, true, TypeSpec("symbol"));
replacement->parent_form = cne->parent_form;
if (condition_as_single) {
*ir_loc = replacement;
} else {
// lg::error("Weird case in {}", f.name());
(void)f;
auto seq = cne->entries.front().condition;
seq->pop_back();
seq->push_back(replacement);
parent_form->pop_back();
for (auto& x : seq->elts()) {
parent_form->push_back(x);
}
// auto condition_as_seq = dynamic_cast<IR_Begin*>(cne->entries.front().condition.get());
// ASSERT(condition_as_seq);
// if (condition_as_seq) {
// auto replacement = std::make_shared<IR_Begin>();
// replacement->forms = condition_as_seq->forms;
// ASSERT(condition.second == &condition_as_seq->forms.back());
// replacement->forms.pop_back();
// replacement->forms.push_back(std::make_shared<IR_Set>(
// IR_Set::REG_64, dst,
// std::make_shared<IR_Compare>(condition.first->condition, condition.first)));
// *ir = replacement;
// }
}
}
void clean_up_cond_no_else_final(Function& func, CondNoElseElement* cne) {
for (size_t idx = 0; idx < cne->entries.size(); idx++) {
auto& entry = cne->entries.at(idx);
if (entry.false_destination.has_value()) {
auto fr = entry.false_destination;
ASSERT(fr.has_value());
cne->final_destination = *fr;
} else {
fmt::print("failed to clean up cond_no_else_final: {}\n", func.name());
ASSERT(false);
}
}
auto last_branch = dynamic_cast<BranchElement*>(cne->entries.back().original_condition_branch);
ASSERT(last_branch);
if (func.ir2.env.has_reg_use()) {
auto& last_branch_info = func.ir2.env.reg_use().op.at(last_branch->op()->op_id());
cne->used_as_value = last_branch_info.written_and_unused.find(cne->final_destination.reg()) ==
last_branch_info.written_and_unused.end();
}
// check that all other delay slot writes are unused.
for (size_t i = 0; i < cne->entries.size() - 1; i++) {
if (func.ir2.env.has_reg_use()) {
auto branch = dynamic_cast<BranchElement*>(cne->entries.at(i).original_condition_branch);
auto& branch_info_i = func.ir2.env.reg_use().op.at(branch->op()->op_id());
auto reg = cne->entries.at(i).false_destination;
ASSERT(reg.has_value());
ASSERT(branch);
if (branch_info_i.written_and_unused.find(reg->reg()) ==
branch_info_i.written_and_unused.end()) {
lg::error("Branch delay register used improperly: {}", reg->to_string(func.ir2.env));
throw std::runtime_error("Bad delay slot in clean_up_cond_no_else_final");
}
// ASSERT(branch_info_i.written_and_unused.find(reg->reg()) !=
// branch_info_i.written_and_unused.end());
}
}
if (func.ir2.env.has_local_vars()) {
for (size_t i = 0; i < cne->entries.size(); i++) {
if (func.ir2.env.has_reg_use()) {
auto reg = cne->entries.at(i).false_destination;
// lg::warn("Disable def of {} at {}\n", reg->to_string(func.ir2.env), reg->idx());
func.ir2.env.disable_def(*reg, func.warnings);
}
}
}
}
/*!
* Replace internal branches inside a CondNoElse IR.
* If possible will simplify the entire expression into a comparison operation if possible.
* Will record which registers are set to false in branch delay slots.
* The exact behavior here isn't really clear to me. It's possible that these delay set false
* were disabled in cases where the result of the cond was none, or was a number or something.
* But it generally seems inconsistent. The expression propagation step will have to deal with
* this.
*/
void clean_up_cond_no_else(FormPool& pool, Function& f, FormElement** ir_loc, Form* parent_form) {
auto cne = dynamic_cast<CondNoElseElement*>(*ir_loc);
ASSERT(cne);
for (size_t idx = 0; idx < cne->entries.size(); idx++) {
auto& e = cne->entries.at(idx);
if (e.cleaned) {
continue;
}
auto jump_to_next = get_condition_branch(e.condition);
ASSERT(jump_to_next.first);
if (jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_TRUE &&
cne->entries.size() == 1) {
convert_cond_no_else_to_compare(pool, f, ir_loc, parent_form);
return;
} else {
ASSERT(jump_to_next.first->op()->branch_delay().kind() ==
IR2_BranchDelay::Kind::SET_REG_FALSE ||
jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
ASSERT(jump_to_next.first->op()->condition().kind() != IR2_Condition::Kind::ALWAYS);
if (jump_to_next.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::SET_REG_FALSE) {
ASSERT(!e.false_destination);
e.false_destination = jump_to_next.first->op()->branch_delay().var(0);
ASSERT(e.false_destination);
}
e.original_condition_branch = *jump_to_next.second;
auto replacement = jump_to_next.first->op()->get_condition_as_form(pool, f.ir2.env);
replacement->invert();
*(jump_to_next.second) = replacement;
e.cleaned = true;
if (idx != cne->entries.size() - 1) {
auto jump_to_end = get_condition_branch(e.body);
ASSERT(jump_to_end.first);
ASSERT(jump_to_end.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
ASSERT(jump_to_end.first->op()->condition().kind() == IR2_Condition::Kind::ALWAYS);
auto as_end_of_sequence = get_condition_branch_as_vector(e.body);
if (as_end_of_sequence.first) {
ASSERT(as_end_of_sequence.second->size() > 1);
as_end_of_sequence.second->pop_back();
} else {
*(jump_to_end.second) = pool.alloc_element<EmptyElement>();
}
}
}
}
}
/*!
* Match for a (set! reg (math reg reg)) form
*/
bool is_op_3(FormElement* ir,
MatchParam<SimpleExpression::Kind> kind,
MatchParam<Register> dst,
MatchParam<Register> src0,
MatchParam<Register> src1,
Register* dst_out = nullptr,
Register* src0_out = nullptr,
Register* src1_out = nullptr) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarElement*>(ir);
if (!set) {
return false;
}
// destination should be a register
auto dest = set->dst();
if (dst != dest.reg()) {
return false;
}
auto math = dynamic_cast<const SimpleExpressionElement*>(set->src()->try_as_single_element());
if (!math || kind != math->expr().kind()) {
return false;
}
if (get_simple_expression_arg_count(math->expr().kind()) != 2) {
return false;
}
auto arg0 = math->expr().get_arg(0);
auto arg1 = math->expr().get_arg(1);
if (!arg0.is_var() || src0 != arg0.var().reg() || !arg1.is_var() || src1 != arg1.var().reg()) {
return false;
}
// it's a match!
if (dst_out) {
*dst_out = dest.reg();
}
if (src0_out) {
*src0_out = arg0.var().reg();
}
if (src1_out) {
*src1_out = arg1.var().reg();
}
return true;
}
bool is_op_3(AtomicOp* op,
MatchParam<SimpleExpression::Kind> kind,
MatchParam<Register> dst,
MatchParam<Register> src0,
MatchParam<Register> src1,
Register* dst_out = nullptr,
Register* src0_out = nullptr,
Register* src1_out = nullptr) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarOp*>(op);
if (!set) {
return false;
}
// destination should be a register
auto dest = set->dst();
if (dst != dest.reg()) {
return false;
}
auto math = set->src();
if (kind != math.kind()) {
return false;
}
if (get_simple_expression_arg_count(math.kind()) != 2) {
return false;
}
auto arg0 = math.get_arg(0);
auto arg1 = math.get_arg(1);
if (!arg0.is_var() || src0 != arg0.var().reg() || !arg1.is_var() || src1 != arg1.var().reg()) {
return false;
}
// it's a match!
if (dst_out) {
*dst_out = dest.reg();
}
if (src0_out) {
*src0_out = arg0.var().reg();
}
if (src1_out) {
*src1_out = arg1.var().reg();
}
return true;
}
bool is_op_2(FormElement* ir,
MatchParam<SimpleExpression::Kind> kind,
MatchParam<Register> dst,
MatchParam<Register> src0,
Register* dst_out = nullptr,
Register* src0_out = nullptr) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarElement*>(ir);
if (!set) {
return false;
}
// destination should be a register
auto dest = set->dst();
if (dst != dest.reg()) {
return false;
}
auto math = dynamic_cast<const SimpleExpressionElement*>(set->src()->try_as_single_element());
if (!math || kind != math->expr().kind()) {
return false;
}
auto arg = math->expr().get_arg(0);
if (!arg.is_var() || src0 != arg.var().reg()) {
return false;
}
// it's a match!
if (dst_out) {
*dst_out = dest.reg();
}
if (src0_out) {
*src0_out = arg.var().reg();
}
return true;
}
/*!
* Try to convert this SC Vertex into an abs (integer).
* Will return a converted abs IR if successful, or nullptr if its not possible
*/
Form* try_sc_as_abs(FormPool& pool, Function& f, const ShortCircuit* vtx) {
if (vtx->entries.size() != 1) {
return nullptr;
}
auto b0_c = vtx->entries.at(0).condition;
auto b0_d = dynamic_cast<BlockVtx*>(vtx->entries.at(0).likely_delay);
if (!b0_c || !b0_d) {
return nullptr;
}
auto b0_ptr = cfg_to_ir(pool, f, b0_c);
// auto b0_ir = dynamic_cast<IR_Begin*>(b0_ptr.get());
BranchElement* branch = dynamic_cast<BranchElement*>(b0_ptr->back());
if (!branch) {
return nullptr;
}
auto delay_start = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(b0_d->block_id);
auto delay_end = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(b0_d->block_id);
if (delay_end - delay_start != 1) {
return nullptr;
}
auto& delay_op = f.ir2.atomic_ops->ops.at(delay_start);
auto* delay = dynamic_cast<SetVarOp*>(delay_op.get());
// check the branch instruction
if (!branch->op()->likely() ||
branch->op()->condition().kind() != IR2_Condition::Kind::LESS_THAN_ZERO_SIGNED ||
!is_op_2(delay, SimpleExpression::Kind::NEG, {}, {})) {
// todo - if there was an abs(unsigned), it would be missed here.
return nullptr;
}
auto input = branch->op()->condition().src(0);
auto output = delay->dst();
ASSERT(input.is_var());
ASSERT(input.var().reg() == delay->src().get_arg(0).var().reg());
// remove the branch
b0_ptr->pop_back();
// add the ash
auto& info = f.ir2.env.reg_use();
auto final_op_idx = input.var().idx();
RegSet consumed = info.op.at(final_op_idx).consumes;
if (output.reg() == input.var().reg()) {
consumed.insert(output.reg());
}
auto src_abs = pool.alloc_single_element_form<AbsElement>(nullptr, input.var(), consumed);
auto replacement = pool.alloc_element<SetVarElement>(output, src_abs, true, TypeSpec("int"));
b0_ptr->push_back(replacement);
return b0_ptr;
}
/*!
* Attempt to convert a short circuit expression into an arithmetic shift.
* GOAL's shift function accepts positive/negative numbers to determine the direction
* of the shift.
*/
Form* try_sc_as_ash(FormPool& pool, Function& f, const ShortCircuit* vtx) {
if (vtx->entries.size() != 2) {
return nullptr;
}
// todo, I think b0 could possibly be something more complicated, depending on how we order.
auto b0_c = vtx->entries.at(0).condition;
auto b0_d = dynamic_cast<BlockVtx*>(vtx->entries.at(0).likely_delay);
auto b1 = dynamic_cast<BlockVtx*>(vtx->entries.at(1).condition);
if (!b0_c || !b0_d || !b1 || vtx->entries.at(1).likely_delay) {
return nullptr;
}
auto b0_c_ptr = cfg_to_ir(pool, f, b0_c);
auto b1_ptr = cfg_to_ir(pool, f, b1);
auto delay_start = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(b0_d->block_id);
auto delay_end = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(b0_d->block_id);
if (delay_end - delay_start != 1) {
return nullptr;
}
auto& delay_op = f.ir2.atomic_ops->ops.at(delay_start);
auto* delay = dynamic_cast<SetVarOp*>(delay_op.get());
auto branch = dynamic_cast<BranchElement*>(b0_c_ptr->back());
if (!branch || b1_ptr->size() != 2) {
return nullptr;
}
// check the branch instruction
if (!branch->op()->likely() ||
branch->op()->condition().kind() != IR2_Condition::Kind::GEQ_ZERO_SIGNED ||
!is_op_3(delay, SimpleExpression::Kind::LEFT_SHIFT, {}, {}, {})) {
return nullptr;
}
/*
* bgezl s5, L109 ; s5 is the shift amount
dsllv a0, a0, s5 ; a0 is both input and output here
dsubu a1, r0, s5 ; a1 is a temp here
dsrav a0, a0, a1 ; a0 is both input and output here
*/
auto sa_in = branch->op()->condition().src(0);
ASSERT(sa_in.is_var());
auto result = delay->dst();
auto value_in = delay->src().get_arg(0).var();
auto sa_in2 = delay->src().get_arg(1).var();
ASSERT(sa_in.var().reg() == sa_in2.reg());
auto dsubu_candidate = b1_ptr->at(0);
auto dsrav_candidate = b1_ptr->at(1);
Register clobber;
if (!is_op_2(dsubu_candidate, SimpleExpression::Kind::NEG, {}, sa_in.var().reg(), &clobber)) {
return nullptr;
}
bool is_arith = is_op_3(dsrav_candidate, SimpleExpression::Kind::RIGHT_SHIFT_ARITH, result.reg(),
value_in.reg(), clobber);
bool is_logical = is_op_3(dsrav_candidate, SimpleExpression::Kind::RIGHT_SHIFT_LOGIC,
result.reg(), value_in.reg(), clobber);
if (!is_arith && !is_logical) {
return nullptr;
}
std::optional<RegisterAccess> clobber_ir;
auto dsubu_set = dynamic_cast<SetVarElement*>(dsubu_candidate);
auto dsrav_set = dynamic_cast<SetVarElement*>(dsrav_candidate);
ASSERT(dsubu_set && dsrav_set);
if (clobber != result.reg()) {
clobber_ir = dsubu_set->dst();
}
RegisterAccess dest_ir = result;
SimpleAtom shift_ir = branch->op()->condition().src(0);
auto value_ir =
dynamic_cast<const SimpleExpressionElement*>(dsrav_set->src()->try_as_single_element())
->expr()
.get_arg(0);
// remove the branch
b0_c_ptr->pop_back();
auto& info = f.ir2.env.reg_use();
auto final_op_idx = value_ir.var().idx();
RegSet consumed = info.op.at(final_op_idx).consumes;
for (auto var : {shift_ir.var(), value_ir.var()}) {
if (var.reg() == clobber) {
consumed.insert(var.reg());
}
if (var.reg() == dest_ir.reg()) {
consumed.insert(var.reg());
}
}
// setup
auto ash_form = pool.alloc_single_element_form<AshElement>(
nullptr, shift_ir.var(), value_ir.var(), clobber_ir, is_arith, consumed);
auto set_form = pool.alloc_element<SetVarElement>(dest_ir, ash_form, true, TypeSpec("int"));
b0_c_ptr->push_back(set_form);
// fix up reg info
f.ir2.env.disable_use(delay->src().get_arg(0).var());
// fix up the other ones:
f.ir2.env.disable_use(branch->op()->condition().src(0).var()); // bgezl X, L
auto dsubu_var = dynamic_cast<SimpleExpressionElement*>(dsubu_set->src()->try_as_single_element())
->expr()
.get_arg(0)
.var();
f.ir2.env.disable_use(dsubu_var);
// and the def too
f.ir2.env.disable_def(dsrav_set->dst(), f.warnings);
return b0_c_ptr;
}
bool is_set_symbol_value(SetVarOp& op, const std::string& name) {
return op.src().is_identity() && op.src().get_arg(0).is_sym_val() &&
op.src().get_arg(0).get_str() == name;
}
bool is_set_symbol_value(SetVarElement* op, const std::string& name) {
auto src = op->src()->try_as_element<SimpleExpressionElement>();
if (!src) {
return false;
}
return src->expr().is_identity() && src->expr().get_arg(0).is_sym_val() &&
src->expr().get_arg(0).get_str() == name;
}
SetVarOp get_delay_op(const Function& f, const BlockVtx* vtx) {
auto delay_start = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(vtx->block_id);
auto delay_end = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(vtx->block_id);
if (delay_end - delay_start != 1) {
ASSERT(false);
}
auto& delay_op = f.ir2.atomic_ops->ops.at(delay_start);
auto* delay = dynamic_cast<SetVarOp*>(delay_op.get());
if (!delay) {
fmt::print("bad delay: {}\n", delay_op->to_string(f.ir2.env));
ASSERT(false);
}
return *delay;
}
LoadVarOp get_delay_load_op(const Function& f, const BlockVtx* vtx) {
auto delay_start = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(vtx->block_id);
auto delay_end = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(vtx->block_id);
if (delay_end - delay_start != 1) {
ASSERT(false);
}
auto& delay_op = f.ir2.atomic_ops->ops.at(delay_start);
auto* delay = dynamic_cast<LoadVarOp*>(delay_op.get());
if (!delay) {
fmt::print("bad delay: {}\n", delay_op->to_string(f.ir2.env));
ASSERT(false);
}
return *delay;
}
Form* try_sc_as_type_of_jak2(FormPool& pool, Function& f, const ShortCircuit* vtx) {
/*
dsll32 a2, a0, 29 * temp_reg0, src
dsrl32 a2, a2, 29 * temp_reg0, temp_reg0
beql a2, r0, L289 branch0
B1:
lw a0, binteger(s7)
B2:
addiu a3, r0, 4
beql a2, a3, L289 branch1
B3:
lwu a0, -4(a0)
B4:
addiu a0, r0, 2
beql a2, a0, L289 branch2
B5:
lw a0, pair(s7)
B6:
lw a0, symbol(s7)
B7:
L289:
*/
if (vtx->entries.size() != 4) {
return nullptr;
}
auto b0_c = dynamic_cast<CfgVtx*>(vtx->entries.at(0).condition);
auto b0_d = dynamic_cast<BlockVtx*>(vtx->entries.at(0).likely_delay);
auto b1_c = dynamic_cast<BlockVtx*>(vtx->entries.at(1).condition);
auto b1_d = dynamic_cast<BlockVtx*>(vtx->entries.at(1).likely_delay);
auto b2_c = dynamic_cast<BlockVtx*>(vtx->entries.at(2).condition);
auto b2_d = dynamic_cast<BlockVtx*>(vtx->entries.at(2).likely_delay);
auto b3_c = dynamic_cast<BlockVtx*>(vtx->entries.at(3).condition);
if (!b0_c || !b0_d || !b1_c || !b1_d || !b2_c || !b2_d || !b3_c ||
vtx->entries.at(3).likely_delay) {
return nullptr;
}
// get the branch ir's
auto b0_ptr = cfg_to_ir(pool, f, b0_c); // should be begin.
if (b0_ptr->size() <= 2) {
fmt::print("fail1\n");
return nullptr;
}
auto b1_ptr = cfg_to_ir(pool, f, b1_c);
if (b1_ptr->size() <= 1) {
fmt::print("fail2\n");
return nullptr;
}
auto b2_ptr = cfg_to_ir(pool, f, b2_c);
if (b2_ptr->size() <= 1) {
fmt::print("fail3\n");
return nullptr;
}
auto b3_ptr = cfg_to_ir(pool, f, b3_c);
auto b3_ir = dynamic_cast<SetVarElement*>(b3_ptr->try_as_single_element());
if (!b3_ir) {
fmt::print("fail4\n");
return nullptr;
}
// identify the left shift
auto set_shift_left = dynamic_cast<SetVarElement*>(b0_ptr->at(b0_ptr->size() - 3));
if (!set_shift_left) {
fmt::print("fail5\n");
return nullptr;
}
auto temp_reg0 = set_shift_left->dst();
auto shift_left =
dynamic_cast<SimpleExpressionElement*>(set_shift_left->src()->try_as_single_element());
if (!shift_left || shift_left->expr().kind() != SimpleExpression::Kind::LEFT_SHIFT) {
fmt::print("fail6\n");
return nullptr;
}
auto src_reg = shift_left->expr().get_arg(0).var();
auto sa_left = shift_left->expr().get_arg(1);
if (!sa_left.is_int() || sa_left.get_int() != 61) {
fmt::print("fail7\n");
return nullptr;
}
// identify the right shift
auto set_shift_right = dynamic_cast<SetVarElement*>(b0_ptr->at(b0_ptr->size() - 2));
if (!set_shift_right) {
fmt::print("fail8\n");
return nullptr;
}
if (set_shift_right->dst().reg() != set_shift_left->dst().reg()) {
fmt::print("fail9\n");
return nullptr;
}
auto shift_right =
dynamic_cast<SimpleExpressionElement*>(set_shift_right->src()->try_as_single_element());
if (!shift_right || shift_right->expr().kind() != SimpleExpression::Kind::RIGHT_SHIFT_LOGIC) {
fmt::print("fail10\n");
return nullptr;
}
if (temp_reg0.reg() != shift_right->expr().get_arg(0).var().reg()) {
fmt::print("fail11\n");
return nullptr;
}
auto sa_right = shift_right->expr().get_arg(1);
if (!sa_right.is_int() || sa_right.get_int() != 61) {
fmt::print("fail12\n");
return nullptr;
}
// branch 0
auto first_branch = dynamic_cast<BranchElement*>(b0_ptr->back());
auto b0_delay_op = get_delay_op(f, b0_d);
if (!first_branch || !is_set_symbol_value(b0_delay_op, "binteger") ||
first_branch->op()->condition().kind() != IR2_Condition::Kind::ZERO ||
!first_branch->op()->likely()) {
fmt::print("fail13\n");
return nullptr;
}
auto temp_reg = first_branch->op()->condition().src(0).var();
ASSERT(temp_reg.reg() == temp_reg0.reg());
auto dst_reg = b0_delay_op.dst();
// branch 1
if (b1_ptr->size() != 2) {
fmt::print("fail14\n");
return nullptr;
}
auto second_branch_pre_op = dynamic_cast<SetVarElement*>(b1_ptr->at(0));
if (!second_branch_pre_op) {
fmt::print("fail15\n");
return nullptr;
}
{
auto pos = second_branch_pre_op->src();
auto pos_as_se = pos->try_as_element<SimpleExpressionElement>();
if (!pos_as_se || !pos_as_se->expr().is_identity() || !pos_as_se->expr().get_arg(0).is_int(4)) {
fmt::print("fail16\n");
return nullptr;
}
}
auto temp_reg1 = second_branch_pre_op->dst();
auto second_branch = dynamic_cast<BranchElement*>(b1_ptr->at(1));
/*
beql a2, a3, L289 branch1
B3:
lwu a0, -4(a0)
*/
if (!second_branch || second_branch->op()->condition().kind() != IR2_Condition::Kind::EQUAL ||
!second_branch->op()->likely() || !second_branch->op()->condition().src(0).is_var() ||
second_branch->op()->condition().src(0).var().reg() != temp_reg0.reg() ||
!second_branch->op()->condition().src(1).is_var() ||
second_branch->op()->condition().src(1).var().reg() != temp_reg1.reg()) {
fmt::print("fail17\n");
return nullptr;
}
if (!b1_d) {
fmt::print("fail18\n");
return nullptr;
}
auto b1_delay_op = get_delay_load_op(f, b1_d);
if (b1_delay_op.kind() != LoadVarOp::Kind::UNSIGNED || b1_delay_op.size() != 4) {
fmt::print("fail19\n");
return nullptr;
}
IR2_RegOffset ro;
if (!get_as_reg_offset(b1_delay_op.src(), &ro)) {
fmt::print("fail20\n");
return nullptr;
}
if (ro.offset != -4) {
fmt::print("fail21\n");
return nullptr;
}
if (ro.reg != src_reg.reg() || b1_delay_op.get_set_destination().reg() != dst_reg.reg()) {
fmt::print("fail22\n");
return nullptr;
}
/////////////////////////////////
// branch2
/*
* B4:
addiu a0, r0, 2
beql a2, a0, L289 branch2
B5:
lw a0, pair(s7)
*/
if (b2_ptr->size() != 2) {
fmt::print("fail23\n");
return nullptr;
}
auto third_branch_pre_op = dynamic_cast<SetVarElement*>(b2_ptr->at(0));
if (!third_branch_pre_op) {
fmt::print("fail24\n");
return nullptr;
}
{
auto pos = third_branch_pre_op->src();
auto pos_as_se = pos->try_as_element<SimpleExpressionElement>();
if (!pos_as_se || !pos_as_se->expr().is_identity() || !pos_as_se->expr().get_arg(0).is_int(2)) {
fmt::print("fail25\n");
return nullptr;
}
}
auto temp_reg2 = third_branch_pre_op->dst();
auto third_branch = dynamic_cast<BranchElement*>(b2_ptr->at(1));
if (!third_branch || third_branch->op()->condition().kind() != IR2_Condition::Kind::EQUAL ||
!third_branch->op()->likely() || !third_branch->op()->condition().src(0).is_var() ||
third_branch->op()->condition().src(0).var().reg() != temp_reg0.reg() ||
!third_branch->op()->condition().src(1).is_var() ||
third_branch->op()->condition().src(1).var().reg() != temp_reg2.reg()) {
fmt::print("fail26\n");
return nullptr;
}
if (!b2_d) {
fmt::print("fail27\n");
return nullptr;
}
auto b2_delay_op = get_delay_op(f, b2_d);
if (!is_set_symbol_value(b2_delay_op, "pair") || b2_delay_op.dst().reg() != dst_reg.reg()) {
fmt::print("fail28\n");
return nullptr;
}
if (!is_set_symbol_value(b3_ir, "symbol")) {
fmt::print("fail29\n");
return nullptr;
}
// we passed, time to clean things up...
// remove the stuff from the back of block 0.
b0_ptr->pop_back();
b0_ptr->pop_back();
b0_ptr->pop_back();
auto obj = pool.alloc_single_element_form<SimpleExpressionElement>(
nullptr, shift_left->expr().get_arg(0).as_expr(), set_shift_right->dst().idx());
auto type_op = pool.alloc_single_element_form<TypeOfElement>(nullptr, obj);
auto op = pool.alloc_element<SetVarElement>(dst_reg, type_op, true, TypeSpec("type"));
b0_ptr->push_back(op);
// if (b1_delay_op.src().)
f.ir2.env.disable_def(b0_delay_op.dst(), f.warnings);
f.ir2.env.disable_def(b1_delay_op.get_set_destination(), f.warnings);
f.ir2.env.disable_def(b2_delay_op.dst(), f.warnings);
f.ir2.env.disable_use(shift_left->expr().get_arg(0).var());
f.warnings.warning("Using new Jak 2 rtype-of");
return b0_ptr;
}
/*!
* Try to convert a short circuiting expression into a "type-of" expression.
* We do this before attempting the normal and/or expressions.
*/
Form* try_sc_as_type_of_jak1(FormPool& pool, Function& f, const ShortCircuit* vtx) {
// the assembly looks like this:
/*
dsll32 v1, a0, 29 ;; (set! v1 (shl a0 61))
beql v1, r0, L60 ;; (bl! (= v1 r0) L60 (unknown-branch-delay))
lw v1, binteger(s7)
bgtzl v1, L60 ;; (bl! (>0.s v1) L60 (unknown-branch-delay))
lw v1, pair(s7)
lwu v1, -4(a0) ;; (set! v1 (l.wu (+.i a0 -4)))
L60:
*/
// some of these checks may be a little bit overkill but it's a nice way to sanity check that
// we have actually decoded everything correctly.
if (vtx->entries.size() != 3) {
return nullptr;
}
auto b0_c = dynamic_cast<CfgVtx*>(vtx->entries.at(0).condition);
auto b0_d = dynamic_cast<BlockVtx*>(vtx->entries.at(0).likely_delay);
auto b1_c = dynamic_cast<BlockVtx*>(vtx->entries.at(1).condition);
auto b1_d = dynamic_cast<BlockVtx*>(vtx->entries.at(1).likely_delay);
auto b2_c = dynamic_cast<BlockVtx*>(vtx->entries.at(2).condition);
if (!b0_c || !b0_d || !b1_c || !b1_d || !b2_c || vtx->entries.at(2).likely_delay) {
return nullptr;
}
auto b0_ptr = cfg_to_ir(pool, f, b0_c); // should be begin.
if (b0_ptr->size() <= 1) {
return nullptr;
}
auto b1_ptr = cfg_to_ir(pool, f, b1_c);
auto b1_ir = dynamic_cast<BranchElement*>(b1_ptr->try_as_single_element());
auto b2_ptr = cfg_to_ir(pool, f, b2_c);
auto b2_ir = dynamic_cast<SetVarElement*>(b2_ptr->try_as_single_element());
if (!b1_ir || !b2_ir) {
return nullptr;
}
auto set_shift = dynamic_cast<SetVarElement*>(b0_ptr->at(b0_ptr->size() - 2));
if (!set_shift) {
return nullptr;
}
auto temp_reg0 = set_shift->dst();
auto shift = dynamic_cast<SimpleExpressionElement*>(set_shift->src()->try_as_single_element());
if (!shift || shift->expr().kind() != SimpleExpression::Kind::LEFT_SHIFT) {
return nullptr;
}
auto src_reg = shift->expr().get_arg(0).var();
auto sa = shift->expr().get_arg(1);
if (!sa.is_int() || sa.get_int() != 61) {
return nullptr;
}
auto first_branch = dynamic_cast<BranchElement*>(b0_ptr->back());
auto second_branch = b1_ir;
auto else_case = b2_ir;
auto b0_delay_op = get_delay_op(f, b0_d);
if (!first_branch || !is_set_symbol_value(b0_delay_op, "binteger") ||
first_branch->op()->condition().kind() != IR2_Condition::Kind::ZERO ||
!first_branch->op()->likely()) {
return nullptr;
}
auto temp_reg = first_branch->op()->condition().src(0).var();
ASSERT(temp_reg.reg() == temp_reg0.reg());
auto dst_reg = b0_delay_op.dst();
auto b1_delay_op = get_delay_op(f, b1_d);
if (!second_branch || !is_set_symbol_value(b1_delay_op, "pair") ||
second_branch->op()->condition().kind() != IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED ||
!second_branch->op()->likely()) {
return nullptr;
}
// check we agree on destination register.
auto dst_reg2 = b1_delay_op.dst();
ASSERT(dst_reg2.reg() == dst_reg.reg());
// else case is a lwu to grab the type from a basic
ASSERT(else_case);
auto dst_reg3 = else_case->dst();
ASSERT(dst_reg3.reg() == dst_reg.reg());
auto load_op = dynamic_cast<LoadSourceElement*>(else_case->src()->try_as_single_element());
if (!load_op || load_op->kind() != LoadVarOp::Kind::UNSIGNED || load_op->size() != 4) {
return nullptr;
}
auto load_loc =
dynamic_cast<SimpleExpressionElement*>(load_op->location()->try_as_single_element());
if (!load_loc || load_loc->expr().kind() != SimpleExpression::Kind::ADD) {
return nullptr;
}
auto src_reg3 = load_loc->expr().get_arg(0);
auto offset = load_loc->expr().get_arg(1);
if (!src_reg3.is_var() || !offset.is_int()) {
return nullptr;
}
ASSERT(src_reg3.var().reg() == src_reg.reg());
ASSERT(offset.get_int() == -4);
// remove the branch
b0_ptr->pop_back();
// remove the shift
b0_ptr->pop_back();
// add the type-of
auto obj = pool.alloc_single_element_form<SimpleExpressionElement>(
nullptr, shift->expr().get_arg(0).as_expr(), set_shift->dst().idx());
auto type_op = pool.alloc_single_element_form<TypeOfElement>(nullptr, obj);
auto op = pool.alloc_element<SetVarElement>(else_case->dst(), type_op, true, TypeSpec("type"));
b0_ptr->push_back(op);
// fix register info
f.ir2.env.disable_def(b0_delay_op.dst(), f.warnings);
f.ir2.env.disable_def(b1_delay_op.dst(), f.warnings);
f.ir2.env.disable_use(shift->expr().get_arg(0).var());
return b0_ptr;
}
Form* try_sc_as_type_of(FormPool& pool, Function& f, const ShortCircuit* vtx, GameVersion version) {
switch (version) {
case GameVersion::Jak1:
return try_sc_as_type_of_jak1(pool, f, vtx);
case GameVersion::Jak2:
return try_sc_as_type_of_jak2(pool, f, vtx);
default:
ASSERT(false);
return nullptr;
}
}
Form* merge_cond_else_with_sc_cond(FormPool& pool,
Function& f,
const CondWithElse* cwe,
Form* else_ir) {
if (else_ir->size() != 2) {
return nullptr;
}
auto first = dynamic_cast<ShortCircuitElement*>(else_ir->at(0));
auto second = dynamic_cast<CondNoElseElement*>(else_ir->at(1));
if (!first || !second) {
return nullptr;
}
std::vector<CondNoElseElement::Entry> entries;
for (auto& x : cwe->entries) {
CondNoElseElement::Entry e;
e.condition = cfg_to_ir(pool, f, x.condition);
e.body = cfg_to_ir(pool, f, x.body);
entries.push_back(std::move(e));
}
auto first_condition = pool.alloc_empty_form();
first_condition->push_back(else_ir->at(0));
for (auto& x : second->entries.front().condition->elts()) {
first_condition->push_back(x);
}
second->entries.front().condition = first_condition;
for (auto& x : second->entries) {
entries.push_back(x);
}
auto result = pool.alloc_single_element_form<CondNoElseElement>(nullptr, entries);
clean_up_cond_no_else(pool, f, result->back_ref(), result);
return result;
}
namespace {
template <typename T>
bool contains(const std::vector<T>& vec, const T& val) {
for (auto& x : vec) {
if (val == x) {
return true;
}
}
return false;
}
} // namespace
/*!
* Push x to output (can be a Form or std::vector<FormElement>).
* Will take of grouping the delay slots for likely asm branches into a single operation.
*/
template <typename T>
void push_back_form_regroup_asm_likely_branches(T* output, FormElement* x, Function& f) {
std::vector<FormElement*> hack_temp;
if (output->size() > 0) {
auto back_as_asm = dynamic_cast<AtomicOpElement*>(output->back());
if (back_as_asm) {
auto back_as_branch = dynamic_cast<AsmBranchOp*>(back_as_asm->op());
if (back_as_branch && back_as_branch->is_likely()) {
auto& pool = *f.ir2.form_pool;
auto elt = pool.alloc_element<AsmBranchElement>(back_as_branch,
pool.alloc_single_form(nullptr, x), true);
output->pop_back();
output->push_back(elt);
return;
}
}
}
output->push_back(x);
}
template <typename T>
void convert_and_inline(FormPool& pool, Function& f, const BlockVtx* as_block, T* output) {
auto start_op = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(as_block->block_id);
auto end_op = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(as_block->block_id);
std::vector<int> add_map;
for (auto i = start_op; i < end_op; i++) {
// convert to a form.
auto op = f.ir2.atomic_ops->ops.at(i)->get_as_form(pool, f.ir2.env);
bool add = true;
// check if we got a set, which we might want to eliminate
auto op_as_set = dynamic_cast<SetVarElement*>(op);
if (op_as_set) {
// check for deadness
if (op_as_set->is_dead_set()) {
// we want to eliminate, but we should we fix up the register info.
// now adding.
// add = false;
auto consumed_expr =
dynamic_cast<SimpleExpressionElement*>(op_as_set->src()->try_as_single_element());
ASSERT(consumed_expr);
auto& consumed = consumed_expr->expr().get_arg(0).var();
auto& ri_outer = f.ir2.env.reg_use().op.at(consumed.idx()); // meh
if (ri_outer.consumes.find(consumed.reg()) != ri_outer.consumes.end()) {
for (int j = i; j-- > start_op;) {
auto& ri = f.ir2.env.reg_use().op.at(j);
auto& ao = f.ir2.atomic_ops->ops.at(j);
if (contains(ao->write_regs(), consumed.reg())) {
ri.written_and_unused.insert(consumed.reg());
// fmt::print("GOT 3, making {} wau by {}\n", consumed.reg().to_charp(),
// ao->to_string(f.ir2.env));
// HACK - regenerate:
if (add_map.at(j - start_op) != -1) {
// fmt::print("regenerating {} to ", output->at(add_map.at(j -
// start_op))->to_string(f.ir2.env));
output->at(add_map.at(j - start_op)) =
f.ir2.atomic_ops->ops.at(j)->get_as_form(pool, f.ir2.env);
// fmt::print("{}\n", output->at(add_map.at(j -
// start_op))->to_string(f.ir2.env));
}
break;
}
if (contains(ao->read_regs(), consumed.reg())) {
// fmt::print("GOT 2, making {} consumed by {}\n",
// consumed.reg().to_charp(),
// ao->to_string(f.ir2.env));
ri.consumes.insert(consumed.reg());
break;
}
}
}
}
}
if (add) {
add_map.push_back(output->size());
// output->push_back(op);
push_back_form_regroup_asm_likely_branches(output, op, f);
} else {
add_map.push_back(-1);
}
}
}
void insert_cfg_into_list(FormPool& pool,
Function& f,
const CfgVtx* vtx,
std::vector<FormElement*>* output) {
auto as_sequence = dynamic_cast<const SequenceVtx*>(vtx);
auto as_block = dynamic_cast<const BlockVtx*>(vtx);
if (as_sequence) {
// inline the sequence.
if (as_sequence->needs_label) {
output->push_back(pool.alloc_element<LabelElement>(vtx->get_first_block_id()));
}
for (auto& x : as_sequence->seq) {
insert_cfg_into_list(pool, f, x, output);
}
} else if (as_block) {
if (as_block->needs_label) {
output->push_back(pool.alloc_element<LabelElement>(vtx->get_first_block_id()));
}
convert_and_inline(pool, f, as_block, output);
} else {
auto ir = cfg_to_ir(pool, f, vtx);
for (auto x : ir->elts()) {
push_back_form_regroup_asm_likely_branches(output, x, f);
// output->push_back(x);
}
}
}
Form* cfg_to_ir_allow_null(FormPool& pool, Function& f, const CfgVtx* vtx) {
if (vtx) {
return cfg_to_ir(pool, f, vtx);
} else {
return pool.alloc_single_element_form<EmptyElement>(nullptr);
}
}
Form* cfg_to_ir_helper(FormPool& pool, Function& f, const CfgVtx* vtx) {
if (dynamic_cast<const BlockVtx*>(vtx)) {
auto* bv = dynamic_cast<const BlockVtx*>(vtx);
Form* output = pool.alloc_empty_form();
convert_and_inline(pool, f, bv, output);
return output;
} else if (dynamic_cast<const SequenceVtx*>(vtx)) {
auto* sv = dynamic_cast<const SequenceVtx*>(vtx);
Form* output = pool.alloc_empty_form();
insert_cfg_into_list(pool, f, sv, &output->elts());
return output;
} else if (dynamic_cast<const WhileLoop*>(vtx)) {
auto wvtx = dynamic_cast<const WhileLoop*>(vtx);
return pool.alloc_single_element_form<WhileElement>(
nullptr, cfg_to_ir(pool, f, wvtx->condition), cfg_to_ir(pool, f, wvtx->body));
} else if (dynamic_cast<const UntilLoop*>(vtx)) {
auto wvtx = dynamic_cast<const UntilLoop*>(vtx);
auto result = pool.alloc_single_element_form<UntilElement>(
nullptr, cfg_to_ir(pool, f, wvtx->condition), cfg_to_ir(pool, f, wvtx->body));
clean_up_until_loop(pool, dynamic_cast<UntilElement*>(result->try_as_single_element()),
f.ir2.env);
return result;
} else if (dynamic_cast<const UntilLoop_single*>(vtx)) {
auto wvtx = dynamic_cast<const UntilLoop_single*>(vtx);
auto empty = pool.alloc_single_element_form<EmptyElement>(nullptr);
auto result = pool.alloc_single_element_form<UntilElement>(
nullptr, cfg_to_ir(pool, f, wvtx->block), empty);
clean_up_until_loop(pool, dynamic_cast<UntilElement*>(result->try_as_single_element()),
f.ir2.env);
return result;
} else if (dynamic_cast<const InfiniteLoopBlock*>(vtx)) {
auto wvtx = dynamic_cast<const InfiniteLoopBlock*>(vtx);
auto condition = pool.alloc_single_element_form<ConditionElement>(
nullptr, IR2_Condition::Kind::ALWAYS, std::nullopt, std::nullopt, RegSet(), false);
auto result = pool.alloc_single_element_form<WhileElement>(nullptr, condition,
cfg_to_ir(pool, f, wvtx->block));
clean_up_infinite_while_loop(pool,
dynamic_cast<WhileElement*>(result->try_as_single_element()));
return result;
} else if (dynamic_cast<const CondWithElse*>(vtx)) {
auto* cvtx = dynamic_cast<const CondWithElse*>(vtx);
// the cfg analysis pass may recognize some things out of order, which can cause
// fake nesting. This is actually a problem at this point because it can turn a normal
// cond into a cond with else, which emits different instructions. This attempts to recognize
// an else which is actually more cases and compacts it into a single statement. At this point
// I don't know if this is sufficient to catch all cases. it may even recognize the wrong
// thing in some cases... maybe we should check the delay slot instead?
auto else_ir = cfg_to_ir(pool, f, cvtx->else_vtx);
auto fancy_compact_result = merge_cond_else_with_sc_cond(pool, f, cvtx, else_ir);
if (fancy_compact_result) {
return fancy_compact_result;
}
// this case is disabled because I _think_ it is now properly handled elsewhere.
if (false /*&& dynamic_cast<IR_Cond*>(else_ir.get())*/) {
// auto extra_cond = dynamic_cast<IR_Cond*>(else_ir.get());
// std::vector<IR_Cond::Entry> entries;
// for (auto& x : cvtx->entries) {
// IR_Cond::Entry e;
// e.condition = cfg_to_ir(f, file, x.condition);
// e.body = cfg_to_ir(f, file, x.body);
// entries.push_back(std::move(e));
// }
// for (auto& x : extra_cond->entries) {
// entries.push_back(x);
// }
// std::shared_ptr<IR> result = std::make_shared<IR_Cond>(entries);
// clean_up_cond_no_else(&result, file);
// return result;
} else {
std::vector<CondWithElseElement::Entry> entries;
for (auto& x : cvtx->entries) {
CondWithElseElement::Entry e;
e.condition = cfg_to_ir(pool, f, x.condition);
e.body = cfg_to_ir(pool, f, x.body);
entries.push_back(std::move(e));
}
auto result = pool.alloc_single_element_form<CondWithElseElement>(nullptr, entries, else_ir);
clean_up_cond_with_else(
pool, dynamic_cast<CondWithElseElement*>(result->try_as_single_element()), f.ir2.env);
return result;
}
} else if (dynamic_cast<const ShortCircuit*>(vtx)) {
auto* svtx = dynamic_cast<const ShortCircuit*>(vtx);
// try as a type of expression first
auto as_type_of = try_sc_as_type_of(pool, f, svtx, f.ir2.env.version);
if (as_type_of) {
return as_type_of;
}
auto as_ash = try_sc_as_ash(pool, f, svtx);
if (as_ash) {
return as_ash;
}
auto as_abs = try_sc_as_abs(pool, f, svtx);
if (as_abs) {
return as_abs;
}
// now try as a normal and/or
std::vector<ShortCircuitElement::Entry> entries;
for (auto& x : svtx->entries) {
ShortCircuitElement::Entry e;
e.condition = cfg_to_ir(pool, f, x.condition);
if (x.likely_delay) {
auto delay = dynamic_cast<BlockVtx*>(x.likely_delay);
ASSERT(delay);
auto delay_start = f.ir2.atomic_ops->block_id_to_first_atomic_op.at(delay->block_id);
auto delay_end = f.ir2.atomic_ops->block_id_to_end_atomic_op.at(delay->block_id);
ASSERT(delay_end - delay_start == 1);
auto& op = f.ir2.atomic_ops->ops.at(delay_start);
auto op_as_expr = dynamic_cast<SetVarOp*>(op.get());
if (!op_as_expr) {
fmt::print("bad in {}\n", f.name());
fmt::print("{}\n", op->to_string(f.ir2.env));
}
ASSERT(op_as_expr);
e.branch_delay = *op_as_expr;
}
entries.push_back(e);
}
auto result = pool.alloc_single_element_form<ShortCircuitElement>(nullptr, entries);
clean_up_sc(pool, f, dynamic_cast<ShortCircuitElement*>(result->try_as_single_element()));
return result;
} else if (dynamic_cast<const CondNoElse*>(vtx)) {
auto* cvtx = dynamic_cast<const CondNoElse*>(vtx);
std::vector<CondNoElseElement::Entry> entries;
for (auto& x : cvtx->entries) {
CondNoElseElement::Entry e;
e.condition = cfg_to_ir(pool, f, x.condition);
e.body = cfg_to_ir(pool, f, x.body);
entries.push_back(std::move(e));
}
auto result = pool.alloc_single_element_form<CondNoElseElement>(nullptr, entries);
clean_up_cond_no_else(pool, f, result->back_ref(), result);
return result;
} else if (dynamic_cast<const GotoEnd*>(vtx)) {
auto* cvtx = dynamic_cast<const GotoEnd*>(vtx);
// dead code should always be (set! var 0)
auto dead_code = cfg_to_ir(pool, f, cvtx->unreachable_block);
// auto dead = dynamic_cast<SetVarElement*>(dead_code->try_as_single_element());
// if (!dead) {
// lg::error("failed to recognize dead code after return, got {}",
// dead_code->to_string(f.ir2.env));
// }
// ASSERT(dead);
// auto src = dynamic_cast<SimpleExpressionElement*>(dead->src()->try_as_single_element());
// ASSERT(src);
// ASSERT(src->expr().is_identity() && src->expr().get_arg(0).is_int() &&
// src->expr().get_arg(0).get_int() == 0);
auto result = pool.alloc_single_element_form<ReturnElement>(
nullptr, cfg_to_ir(pool, f, cvtx->body), dead_code);
clean_up_return(pool, dynamic_cast<ReturnElement*>(result->try_as_single_element()));
return result;
} else if (dynamic_cast<const Break*>(vtx)) {
auto* cvtx = dynamic_cast<const Break*>(vtx);
auto result = pool.alloc_single_element_form<BreakElement>(
nullptr, cfg_to_ir(pool, f, cvtx->body),
cfg_to_ir_allow_null(pool, f, cvtx->unreachable_block), cvtx->dest_block_id);
clean_up_break(pool, dynamic_cast<BreakElement*>(result->try_as_single_element()), f.ir2.env);
return result;
} else if (dynamic_cast<const EmptyVtx*>(vtx)) {
return pool.alloc_single_element_form<EmptyElement>(nullptr);
}
return nullptr;
}
Form* cfg_to_ir(FormPool& pool, Function& f, const CfgVtx* vtx) {
// we cache these because some functions will do a conversion, give up, and throw away the result.
// converting multiple times means that env-modifications will happen multiple times.
auto cached = pool.lookup_cached_conversion(vtx);
if (cached) {
return cached;
}
Form* result = cfg_to_ir_helper(pool, f, vtx);
if (vtx->needs_label) {
result->elts().insert(result->elts().begin(),
pool.alloc_element<LabelElement>(vtx->get_first_block_id()));
}
pool.cache_conversion(vtx, result);
return result;
}
/*!
* Post processing pass to clean up while loops - annoyingly the block before a while loop
* has a jump to the condition branch that we need to remove. This currently happens after all
* conversion but this may need to be revisited depending on the final order of simplifications.
*/
void clean_up_while_loops(FormPool& pool, Form* sequence, const Env& env) {
std::vector<size_t> to_remove; // the list of branches to remove by index in this sequence
for (int i = 0; i < sequence->size(); i++) {
auto* form_as_while = dynamic_cast<WhileElement*>(sequence->at(i));
if (form_as_while && !form_as_while->cleaned) {
ASSERT(i != 0);
auto prev_as_branch = dynamic_cast<BranchElement*>(sequence->at(i - 1));
ASSERT(prev_as_branch);
// printf("got while intro branch %s\n", prev_as_branch->print(file).c_str());
// this should be an always jump. We'll assume that the CFG builder successfully checked
// the brach destination, but we will check the condition.
ASSERT(prev_as_branch->op()->condition().kind() == IR2_Condition::Kind::ALWAYS);
ASSERT(prev_as_branch->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
to_remove.push_back(i - 1);
// now we should try to find the condition branch:
auto condition_branch = get_condition_branch(form_as_while->condition);
ASSERT(condition_branch.first);
ASSERT(condition_branch.first->op()->branch_delay().kind() == IR2_BranchDelay::Kind::NOP);
// printf("got while condition branch %s\n", condition_branch.first->print(file).c_str());
auto replacement = condition_branch.first->op()->get_condition_as_form(pool, env);
*(condition_branch.second) = replacement;
}
}
// remove the implied forward always branches.
for (int i = int(to_remove.size()); i-- > 0;) {
auto idx = to_remove.at(i);
ASSERT(dynamic_cast<BranchElement*>(sequence->at(idx)));
sequence->elts().erase(sequence->elts().begin() + idx);
}
}
} // namespace
void build_initial_forms(Function& function) {
auto& cfg = function.cfg;
if (!cfg->is_fully_resolved()) {
return;
}
try {
auto& pool = function.ir2.form_pool;
auto top_level = function.cfg->get_single_top_level();
std::vector<FormElement*> top_level_elts;
insert_cfg_into_list(*pool, function, top_level, &top_level_elts);
auto result = pool->alloc_sequence_form(nullptr, top_level_elts);
result->apply_form([&](Form* form) { clean_up_while_loops(*pool, form, function.ir2.env); });
result->apply([&](FormElement* form) {
auto as_cne = dynamic_cast<CondNoElseElement*>(form);
if (as_cne) {
clean_up_cond_no_else_final(function, as_cne);
}
auto as_return = dynamic_cast<ReturnElement*>(form);
if (as_return) {
clean_up_return_final(function, as_return);
}
auto as_break = dynamic_cast<BreakElement*>(form);
if (as_break) {
clean_up_break_final(function, as_break, function.ir2.env);
}
});
function.ir2.top_form = result;
} catch (std::runtime_error& e) {
function.warnings.error(e.what());
lg::warn("Failed to build initial forms in {}: {}", function.name(), e.what());
}
}
} // namespace decompiler