Auto stash before merge of "logging" and "upstream/w/cfg_2_ir"

This commit is contained in:
Shay 2020-10-06 15:35:57 -06:00
parent 4f091fd62a
commit c5681a6cc7
4 changed files with 534 additions and 146 deletions

View file

@ -6,17 +6,6 @@
#include "decompiler/Function/Function.h"
#include "decompiler/Disasm/InstructionMatching.h"
/*!
* TODO
* - fix "right aligned" nested and/or or or/ands
* - can either fix in here, or maybe in cfgvertex? not sure...
* - check for missing inverts
* - finish cleaning up and/or. There may be some extra work to invert the final condition.
* if it turns out this is needed. Or just wrap it in a giant "not" and figure it out later on?
* - store the destination of things when possible (cond else, short circuits)
* - revisit weird destinations in conds.
*/
namespace {
std::shared_ptr<IR> cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx);
@ -118,7 +107,6 @@ void clean_up_cond_with_else(std::shared_ptr<IR>* ir, LinkedObjectFile& file) {
assert(jump_to_next.first->branch_delay.kind == BranchDelay::NOP);
// patch the jump to next with a condition.
auto replacement = std::make_shared<IR_Compare>(jump_to_next.first->condition);
replacement->condition.invert();
*(jump_to_next.second) = replacement;
// patch the jump at the end of a block.
@ -145,130 +133,6 @@ void clean_up_cond_with_else(std::shared_ptr<IR>* ir, LinkedObjectFile& file) {
}
}
/*!
* Does the instruction in the delay slot set a register to false?
* Note. a beql s7, x followed by a or y, x, r0 will count as this. I don't know why but
* GOAL does this on comparisons to false.
*/
bool delay_slot_sets_false(IR_Branch* branch) {
if (branch->branch_delay.kind == BranchDelay::SET_REG_FALSE) {
return true;
}
if (branch->condition.kind == Condition::FALSE &&
branch->branch_delay.kind == BranchDelay::SET_REG_REG) {
auto reg_check = dynamic_cast<IR_Register*>(branch->condition.src0.get());
assert(reg_check);
auto reg_read = dynamic_cast<IR_Register*>(branch->branch_delay.source.get());
assert(reg_read);
return reg_check->reg == reg_read->reg;
}
return false;
}
/*!
* Does the instruction in the delay slot set a register to a truthy value, like in a GOAL
* or form branch? Either it explicitly sets #t, or it tests the value for being not false,
* then uses that
*/
bool delay_slot_sets_truthy(IR_Branch* branch) {
if (branch->branch_delay.kind == BranchDelay::SET_REG_TRUE) {
return true;
}
if (branch->condition.kind == Condition::TRUTHY &&
branch->branch_delay.kind == BranchDelay::SET_REG_REG) {
auto reg_check = dynamic_cast<IR_Register*>(branch->condition.src0.get());
assert(reg_check);
auto reg_read = dynamic_cast<IR_Register*>(branch->branch_delay.source.get());
assert(reg_read);
return reg_check->reg == reg_read->reg;
}
return false;
}
/*!
* Try to convert a short circuit to an and.
*/
bool try_clean_up_sc_as_and(std::shared_ptr<IR_ShortCircuit>& ir, LinkedObjectFile& file) {
Register destination;
std::shared_ptr<IR> ir_dest = nullptr;
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(&ir->entries.at(i).condition);
assert(branch.first);
if (!delay_slot_sets_false(branch.first)) {
return false;
}
if (i == 0) {
ir_dest = branch.first->branch_delay.destination;
destination = dynamic_cast<IR_Register*>(branch.first->branch_delay.destination.get())->reg;
} else {
if (destination !=
dynamic_cast<IR_Register*>(branch.first->branch_delay.destination.get())->reg) {
return false;
}
}
}
ir->kind = IR_ShortCircuit::AND;
ir->final_result = ir_dest;
// now get rid of the branches
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(&ir->entries.at(i).condition);
assert(branch.first);
auto replacement = std::make_shared<IR_Compare>(branch.first->condition);
replacement->condition.invert();
*(branch.second) = replacement;
}
return true;
}
/*!
* Try to convert a short circuit to an or.
* Note - this will convert an and to a very strange or, so always use the try as and first.
*/
bool try_clean_up_sc_as_or(std::shared_ptr<IR_ShortCircuit>& ir, LinkedObjectFile& file) {
Register destination;
for (int i = 0; i < int(ir->entries.size()) - 1; i++) {
auto branch = get_condition_branch(&ir->entries.at(i).condition);
assert(branch.first);
if (!delay_slot_sets_truthy(branch.first)) {
printf("reject %s\n", branch.first->print(file).c_str());
return false;
}
assert(dynamic_cast<IR_Register*>(branch.first->branch_delay.destination.get()));
if (i == 0) {
destination = dynamic_cast<IR_Register*>(branch.first->branch_delay.destination.get())->reg;
} else {
if (destination !=
dynamic_cast<IR_Register*>(branch.first->branch_delay.destination.get())->reg) {
return false;
}
}
}
ir->kind = IR_ShortCircuit::OR;
// todo write the destination somewhere...
return true;
}
void clean_up_sc(std::shared_ptr<IR_ShortCircuit>& ir, LinkedObjectFile& file) {
(void)file;
assert(ir->entries.size() > 1);
if (!try_clean_up_sc_as_and(ir, file)) {
if (!try_clean_up_sc_as_or(ir, file)) {
assert(false);
}
}
}
/*!
* A GOAL comparison which produces a boolean is recognized as a cond-no-else by the CFG analysis.
* But it should not be decompiled as a branching statement.
@ -289,6 +153,11 @@ void convert_cond_no_else_to_compare(std::shared_ptr<IR>* ir) {
auto condition_as_single = dynamic_cast<IR_Branch*>(cne->entries.front().condition.get());
if (condition_as_single) {
// as far as I can tell this is totally valid but just happens to not appear?
// if this case is ever hit in the future it's fine and we just need to implement this.
// but leaving empty for now so there's fewer things to test.
// assert(false);
auto replacement = std::make_shared<IR_Set>(
IR_Set::REG_64, dst, std::make_shared<IR_Compare>(condition.first->condition));
*ir = replacement;
@ -343,7 +212,6 @@ void clean_up_cond_no_else(std::shared_ptr<IR>* ir, LinkedObjectFile& file) {
}
auto replacement = std::make_shared<IR_Compare>(jump_to_next.first->condition);
replacement->condition.invert();
*(jump_to_next.second) = replacement;
e.cleaned = true;
@ -414,19 +282,12 @@ bool is_int_math_3(IR* ir,
return true;
}
/*!
* Are these IR's both the same register? False if either is not a register.
*/
bool is_same_reg(IR* a, IR* b) {
auto ar = dynamic_cast<IR_Register*>(a);
auto br = dynamic_cast<IR_Register*>(b);
return ar && br && ar->reg == br->reg;
}
/*!
* Try to convert this SC Vertex into an abs (integer).
* Will return a converted abs IR if successful, or nullptr if its not possible
*/
std::shared_ptr<IR> try_sc_as_abs(Function& f, LinkedObjectFile& file, ShortCircuit* vtx) {
if (vtx->entries.size() != 1) {
return nullptr;
@ -837,7 +698,6 @@ std::shared_ptr<IR> cfg_to_ir(Function& f, LinkedObjectFile& file, CfgVtx* vtx)
entries.push_back(e);
}
auto result = std::make_shared<IR_ShortCircuit>(entries);
clean_up_sc(result, file);
// todo clean these into real and/or.
return result;
} else if (dynamic_cast<CondNoElse*>(vtx)) {

View file

@ -0,0 +1,527 @@
#include "LispPrint.h"
#include <cassert>
#include <iostream>
#include <vector>
//////// HACK - symbol table now looks up by string, which makes it really stupid and store
// all strings twice.
// should probably just remove it
/*!
* String interning
*/
std::string* SymbolTable::intern(const std::string& str) {
if (map.find(str) == map.end()) {
auto* new_string = new std::string(str);
map[str] = new_string;
return new_string;
} else {
return map[str];
}
}
/*!
* Global interned string table
*/
SymbolTable gSymbolTable;
SymbolTable::SymbolTable() {
empty_pair = std::make_shared<Form>();
empty_pair->kind = FormKind::EMPTY_LIST;
}
SymbolTable::~SymbolTable() {
for (const auto& kv : map)
delete kv.second;
}
/*!
* Convert a form to a one-line string.
*/
std::string Form::toStringSimple() {
std::string result;
buildStringSimple(result);
return result;
}
void Form::buildStringSimple(std::string& str) {
std::vector<FormToken> tokens;
toTokenList(tokens);
for (auto& token : tokens) {
switch (token.kind) {
case TokenKind::WHITESPACE:
str.push_back(' ');
break;
case TokenKind::SYMBOL:
str.append(*token.str);
break;
case TokenKind::OPEN_PAREN:
str.push_back('(');
break;
case TokenKind::DOT:
str.push_back('.');
break;
case TokenKind::CLOSE_PAREN:
str.push_back(')');
break;
case TokenKind::EMPTY_PAIR:
str.append("()");
break;
case TokenKind::SPECIAL_SYMBOL:
str.append(*token.str);
break;
default:
throw std::runtime_error("buildStringSimple unknown token kind");
}
}
}
void Form::toTokenList(std::vector<FormToken>& tokens) {
switch (kind) {
case FormKind::SYMBOL:
tokens.emplace_back(TokenKind::SYMBOL, symbol);
break;
case FormKind::PAIR: {
tokens.emplace_back(TokenKind::OPEN_PAREN);
Form* toPrint = this;
for (;;) {
if (toPrint->kind == FormKind::PAIR) {
toPrint->pair[0]->toTokenList(tokens); // print CAR
toPrint = toPrint->pair[1].get();
if (toPrint->kind == FormKind::EMPTY_LIST) {
tokens.emplace_back(TokenKind::CLOSE_PAREN);
return;
} else {
tokens.emplace_back(TokenKind::WHITESPACE);
}
} else { // not a proper list!
tokens.emplace_back(TokenKind::DOT);
tokens.emplace_back(TokenKind::WHITESPACE);
toPrint->toTokenList(tokens);
tokens.emplace_back(TokenKind::CLOSE_PAREN);
return;
}
}
} break;
case FormKind::EMPTY_LIST:
tokens.emplace_back(TokenKind::EMPTY_PAIR);
break;
default:
throw std::runtime_error("unhandled form type in buildSimpleString");
break;
}
}
///////////////////
// Pretty Printer
///////////////////
/*!
* Linked list node representing a token in the output (whitespace, paren, newline, etc)
*/
struct PrettyPrinterNode {
FormToken* tok = nullptr; // if we aren't a newline, we will have a token.
int line = -1; // line that token occurs on. undef for newlines
int lineIndent = -1; // indent of line. only valid for first token in the line
int offset = -1; // offset of beginning of token from left margin
int specialIndentDelta = 0;
bool is_line_separator = false; // true if line separator (not a token)
PrettyPrinterNode *next = nullptr, *prev = nullptr; // linked list
PrettyPrinterNode* paren =
nullptr; // pointer to open paren if in parens. open paren points to close and vice versa
explicit PrettyPrinterNode(FormToken& _tok) { tok = &_tok; }
PrettyPrinterNode() = default;
};
/*!
* Splice in a line break after the given node, it there isn't one already and if it isn't the last
* node.
*/
static void insertNewlineAfter(PrettyPrinterNode* node, int specialIndentDelta) {
if (node->next && !node->next->is_line_separator) {
auto* nl = new PrettyPrinterNode;
auto* next = node->next;
node->next = nl;
nl->prev = node;
nl->next = next;
next->prev = nl;
nl->is_line_separator = true;
nl->specialIndentDelta = specialIndentDelta;
}
}
/*!
* Splice in a line break before the given node, if there isn't one already and if it isn't the
* first node.
*/
static void insertNewlineBefore(PrettyPrinterNode* node, int specialIndentDelta) {
if (node->prev && !node->prev->is_line_separator) {
auto* nl = new PrettyPrinterNode;
auto* prev = node->prev;
prev->next = nl;
nl->prev = prev;
nl->next = node;
node->prev = nl;
nl->is_line_separator = true;
nl->specialIndentDelta = specialIndentDelta;
}
}
/*!
* Break a list across multiple lines. This is the fundamental reducing operation of this algorithm
*/
static void breakList(PrettyPrinterNode* leftParen) {
assert(!leftParen->is_line_separator);
assert(leftParen->tok->kind == TokenKind::OPEN_PAREN);
auto* rp = leftParen->paren;
assert(rp->tok->kind == TokenKind::CLOSE_PAREN);
for (auto* n = leftParen->next; n && n != rp; n = n->next) {
if (!n->is_line_separator) {
if (n->tok->kind == TokenKind::OPEN_PAREN) {
n = n->paren;
assert(n->tok->kind == TokenKind::CLOSE_PAREN);
insertNewlineAfter(n, 0);
} else if (n->tok->kind != TokenKind::WHITESPACE) {
assert(n->tok->kind != TokenKind::CLOSE_PAREN);
insertNewlineAfter(n, 0);
}
}
}
}
/*!
* Compute proper line numbers, offsets, and indents for a list of tokens with newlines
* Will add newlines for close parens if needed.
*/
static PrettyPrinterNode* propagatePretty(PrettyPrinterNode* list, int line_length) {
// propagate line numbers
PrettyPrinterNode* rv = nullptr;
int line = list->line;
for (auto* n = list; n; n = n->next) {
if (n->is_line_separator) {
line++;
} else {
n->line = line;
// add the weird newline.
if (n->tok->kind == TokenKind::CLOSE_PAREN) {
if (n->line != n->paren->line) {
if (n->prev && !n->prev->is_line_separator) {
insertNewlineBefore(n, 0);
line++;
}
if (n->next && !n->next->is_line_separator) {
insertNewlineAfter(n, 0);
}
}
}
}
}
// compute offsets and indents
std::vector<int> indentStack;
indentStack.push_back(0);
int offset = 0;
PrettyPrinterNode* line_start = list;
bool previous_line_sep = false;
for (auto* n = list; n; n = n->next) {
if (n->is_line_separator) {
previous_line_sep = true;
offset = indentStack.back() += n->specialIndentDelta;
} else {
if (previous_line_sep) {
line_start = n;
n->lineIndent = offset;
previous_line_sep = false;
}
n->offset = offset;
offset += n->tok->toString().length();
if (offset > line_length && !rv)
rv = line_start;
if (n->tok->kind == TokenKind::OPEN_PAREN) {
if (!n->prev || n->prev->is_line_separator) {
indentStack.push_back(offset + 1);
} else {
indentStack.push_back(offset - 1);
}
}
if (n->tok->kind == TokenKind::CLOSE_PAREN) {
indentStack.pop_back();
}
}
}
return rv;
}
/*!
* Get the token on the start of the next line. nullptr if we're the last line.
*/
static PrettyPrinterNode* getNextLine(PrettyPrinterNode* start) {
assert(!start->is_line_separator);
int line = start->line;
for (;;) {
if (start->is_line_separator || start->line == line) {
if (start->next)
start = start->next;
else
return nullptr;
} else {
break;
}
}
return start;
}
/*!
* Get the next open paren on the current line (can start in the middle of line, not inclusive of
* start) nullptr if there's no open parens on the rest of this line.
*/
static PrettyPrinterNode* getNextListOnLine(PrettyPrinterNode* start) {
int line = start->line;
assert(!start->is_line_separator);
if (!start->next || start->next->is_line_separator)
return nullptr;
start = start->next;
while (!start->is_line_separator && start->line == line) {
if (start->tok->kind == TokenKind::OPEN_PAREN)
return start;
if (!start->next)
return nullptr;
start = start->next;
}
return nullptr;
}
/*!
* Get the first open paren on the current line (can start in the middle of line, inclusive of
* start) nullptr if there's no open parens on the rest of this line
*/
static PrettyPrinterNode* getFirstListOnLine(PrettyPrinterNode* start) {
int line = start->line;
assert(!start->is_line_separator);
while (!start->is_line_separator && start->line == line) {
if (start->tok->kind == TokenKind::OPEN_PAREN)
return start;
if (!start->next)
return nullptr;
start = start->next;
}
return nullptr;
}
/*!
* Get the first token on the first line which exceeds the max length
*/
static PrettyPrinterNode* getFirstBadLine(PrettyPrinterNode* start, int line_length) {
assert(!start->is_line_separator);
int currentLine = start->line;
auto* currentLineNode = start;
for (;;) {
if (start->is_line_separator) {
assert(start->next);
start = start->next;
} else {
if (start->line != currentLine) {
currentLine = start->line;
currentLineNode = start;
}
if (start->offset > line_length) {
return currentLineNode;
}
if (!start->next) {
return nullptr;
}
start = start->next;
}
}
}
/*!
* Break insertion algorithm.
*/
static void insertBreaksAsNeeded(PrettyPrinterNode* head, int line_length) {
PrettyPrinterNode* last_line_complete = nullptr;
PrettyPrinterNode* line_to_start_line_search = head;
// loop over lines
for (;;) {
// compute lines as needed
propagatePretty(head, line_length);
// search for a bad line starting at the last line we fixed
PrettyPrinterNode* candidate_line = getFirstBadLine(line_to_start_line_search, line_length);
// if we got the same line we started on, this means we couldn't fix it.
if (candidate_line == last_line_complete) {
candidate_line = nullptr; // so we say our candidate was bad and try to find another
PrettyPrinterNode* next_line = getNextLine(line_to_start_line_search);
if (next_line) {
candidate_line = getFirstBadLine(next_line, line_length);
}
}
if (!candidate_line)
break;
// okay, we have a line which needs fixing.
assert(!candidate_line->prev || candidate_line->prev->is_line_separator);
PrettyPrinterNode* form_to_start = getFirstListOnLine(candidate_line);
for (;;) {
if (!form_to_start) {
printf("pretty printer has failed. Fix the bug or increase the the line length.\n");
assert(false);
}
breakList(form_to_start);
propagatePretty(head, line_length);
if (getFirstBadLine(candidate_line, line_length) != candidate_line) {
break;
}
form_to_start = getNextListOnLine(form_to_start);
if (!form_to_start)
break;
}
last_line_complete = candidate_line;
line_to_start_line_search = candidate_line;
}
}
static void insertSpecialBreaks(PrettyPrinterNode* node) {
for (; node; node = node->next) {
if (!node->is_line_separator && node->tok->kind == TokenKind::SYMBOL) {
std::string& name = *node->tok->str;
if (name == "deftype") {
auto* parent_type_dec = getNextListOnLine(node);
if (parent_type_dec) {
insertNewlineAfter(parent_type_dec->paren, 0);
}
}
}
}
}
std::string Form::toStringPretty(int indent, int line_length) {
(void)indent;
(void)line_length;
std::vector<FormToken> tokens;
toTokenList(tokens);
assert(!tokens.empty());
std::string pretty;
// build linked list of nodes
PrettyPrinterNode* head = new PrettyPrinterNode(tokens[0]);
PrettyPrinterNode* node = head;
head->line = 0;
head->offset = 0;
head->lineIndent = 0;
int offset = head->tok->toString().length();
for (size_t i = 1; i < tokens.size(); i++) {
node->next = new PrettyPrinterNode(tokens[i]);
node->next->prev = node;
node = node->next;
node->line = 0;
node->offset = offset;
offset += node->tok->toString().length();
node->lineIndent = 0;
}
// attach parens.
std::vector<PrettyPrinterNode*> parenStack;
parenStack.push_back(nullptr);
for (PrettyPrinterNode* n = head; n; n = n->next) {
if (n->tok->kind == TokenKind::OPEN_PAREN) {
parenStack.push_back(n);
} else if (n->tok->kind == TokenKind::CLOSE_PAREN) {
n->paren = parenStack.back();
parenStack.back()->paren = n;
parenStack.pop_back();
} else {
n->paren = parenStack.back();
}
}
assert(parenStack.size() == 1);
assert(!parenStack.back());
insertSpecialBreaks(head);
propagatePretty(head, line_length);
insertBreaksAsNeeded(head, line_length);
// write to string
bool newline_prev = true;
for (PrettyPrinterNode* n = head; n; n = n->next) {
if (n->is_line_separator) {
pretty.push_back('\n');
newline_prev = true;
} else {
if (newline_prev) {
pretty.append(n->lineIndent, ' ');
newline_prev = false;
if (n->tok->kind == TokenKind::WHITESPACE)
continue;
}
pretty.append(n->tok->toString());
}
}
for (;;) {
if (!head)
break;
auto* next = head->next;
delete head;
head = next;
}
return pretty;
}
std::shared_ptr<Form> toForm(const std::string& str) {
auto f = std::make_shared<Form>();
f->kind = FormKind::SYMBOL;
f->symbol = gSymbolTable.intern(str);
return f;
}
std::shared_ptr<Form> buildList(std::shared_ptr<Form> form) {
auto f = std::make_shared<Form>();
f->kind = FormKind::PAIR;
f->pair[0] = form;
f->pair[1] = gSymbolTable.getEmptyPair();
return f;
}
std::shared_ptr<Form> buildList(const std::string& str) {
return buildList(toForm(str));
}
std::shared_ptr<Form> buildList(std::shared_ptr<Form>* forms, int count) {
auto f = std::make_shared<Form>();
f->kind = FormKind::PAIR;
f->pair[0] = forms[0];
if (count - 1) {
f->pair[1] = buildList(forms + 1, count - 1);
} else {
f->pair[1] = gSymbolTable.getEmptyPair();
}
return f;
}
std::shared_ptr<Form> buildList(std::vector<std::shared_ptr<Form>>& forms) {
if (forms.empty()) {
return gSymbolTable.getEmptyPair();
}
return buildList(forms.data(), forms.size());
}
std::shared_ptr<Form> buildList(std::vector<std::string>& forms) {
if (forms.empty()) {
return gSymbolTable.getEmptyPair();
}
std::vector<std::shared_ptr<Form>> f;
for (auto& x : forms) {
f.push_back(toForm(x));
}
return buildList(f.data(), f.size());
}

View file

@ -11,6 +11,7 @@
int main(int argc, char** argv) {
while (true) {
spdlog::set_level(spdlog::level::debug);
auto my_logger = spdlog::basic_logger_mt("file_logger", "logs/basic-log.txt");
// run the runtime in a loop so we can reset the game and have it restart cleanly
spdlog::info("gk {}.{} OK!\n", versions::GOAL_VERSION_MAJOR, versions::GOAL_VERSION_MINOR);