jak-project/common/goos/PrettyPrinter2.cpp
water111 f3c63f26bb
fix let* format, new on stack guessing case, type failure, handle casts (#1860)
Fixes https://github.com/open-goal/jak-project/issues/1821 by adding a
special case for `new` method calls where the argument with type
`symbol` is actually an address to uninitialized structure on the stack.

Fixes https://github.com/open-goal/jak-project/issues/1849 (or at least
the cause of the issue Vaser gave in chat, and one random one I found in
`debug-sphere`)

Fixes https://github.com/open-goal/jak-project/issues/1853

Fixes https://github.com/open-goal/jak-project/issues/1857 by moving the
cast into the cond if the body is a single form and the destination type
is a bitfield/enum which is likely to work well. Seems to work on the
examples we could find in jak 1 and jak 2.

Also fixes an issue with casts on the result of `handle->process` (a
common place to use casts)

the output of process->handle is a plain process. Most of the time, you
end up casting this to a more specific. If you add a cast on every use
of the variable, the decompiler will decide to change the type of that
variable to the more specific type, and this breaks the handle cast.

so previously it was impossible to get code like
```
    (let* ((s2-0 (the-as swingpole (handle->process (-> self control hack))))
           (gp-0 (-> s2-0 dir))
           )
```
But now it will work
2022-09-07 21:58:09 -04:00

475 lines
15 KiB
C++

#include "PrettyPrinter2.h"
#include "common/common_types.h"
#include "common/util/Assert.h"
#include "third-party/fmt/core.h"
namespace pretty_print {
namespace v2 {
// Note: there's some recursive stuff, but we only recurse once per list depth.
// The previous issues we had with stack overflow only happened when there was a stack frame per
// element in a list.
// The main node type.
// unlike v1, this nests lists.
// these have pointers to parents, so generally not safe to copy.
struct Node {
Node() = default;
Node(const std::string& str) : kind(Kind::ATOM), atom_str(str) {}
Node(std::vector<Node>&& list, bool is_list)
: kind(is_list ? Kind::LIST : Kind::IMPROPER_LIST), child_nodes(std::move(list)) {}
enum class Kind : u8 { ATOM, LIST, IMPROPER_LIST, INVALID } kind = Kind::INVALID;
std::vector<Node> child_nodes;
std::string atom_str;
// number of quotes this is wrapped in.
u32 quoted = 0;
Node* parent = nullptr;
u32 my_depth = 0;
void link(Node* this_parent, std::vector<Node*>* bfs_order, u32 depth) {
parent = this_parent;
my_depth = depth;
bfs_order->push_back(this);
switch (kind) {
case Kind::ATOM:
break;
case Kind::LIST:
case Kind::IMPROPER_LIST:
ASSERT(!child_nodes.empty());
for (auto& child : child_nodes) {
child.link(this, bfs_order, depth + 1);
}
break;
default:
ASSERT(false);
}
}
bool needs_end_paren_newline() const {
if (break_list) {
return true;
}
if (!child_nodes.empty()) {
return child_nodes.back().needs_end_paren_newline();
}
return false;
}
std::string debug_to_string() const {
switch (kind) {
case Kind::ATOM:
return fmt::format("[atom {}]", atom_str);
case Kind::LIST:
return "[list]";
case Kind::IMPROPER_LIST:
return "[improper list]";
default:
ASSERT(false);
}
}
// how wide is this text? not including the indentation of this subtree.
u32 text_len = 0;
bool break_list = false;
u8 top_line_count = 0;
u8 sub_elt_indent = 0;
};
Node to_node(const goos::Object& obj) {
switch (obj.type) {
case goos::ObjectType::EMPTY_LIST:
// just treat this as a printing "atom"
return Node("()");
case goos::ObjectType::INTEGER:
case goos::ObjectType::FLOAT:
case goos::ObjectType::CHAR:
case goos::ObjectType::SYMBOL:
case goos::ObjectType::STRING:
// these are all atoms that the pretty printer should just treat as a blob.
return Node(obj.print());
case goos::ObjectType::PAIR: {
// we've got three cases: quoted thing, proper list, improper list.
auto& first = obj.as_pair()->car;
if (first.is_symbol() && first.as_symbol()->name == "quote") {
auto& second = obj.as_pair()->cdr;
if (second.is_pair() && second.as_pair()->cdr.is_empty_list()) {
Node result = to_node(second.as_pair()->car);
result.quoted++;
return result;
}
}
// not quoted, so either list or pair
std::vector<Node> children;
auto* to_print = &obj;
for (;;) {
if (to_print->is_pair()) {
// first print the car:
children.push_back(to_node(to_print->as_pair()->car));
// then load up the cdr as the next thing to print
to_print = &to_print->as_pair()->cdr;
if (to_print->is_empty_list()) {
// we're done, add a close paren and finish
return Node(std::move(children), true);
}
} else {
children.push_back(to_node(*to_print));
return Node(std::move(children), false);
}
}
} break;
// these are unsupported by the pretty printer.
case goos::ObjectType::ARRAY: // todo, we should probably handle arrays.
case goos::ObjectType::LAMBDA:
case goos::ObjectType::MACRO:
case goos::ObjectType::ENVIRONMENT:
throw std::runtime_error("tried to pretty print a goos object kind which is not supported.");
default:
ASSERT(false);
}
}
void recompute_lengths(const std::vector<Node*>& bfs_order) {
// iterate from leaves up
for (auto it = bfs_order.rbegin(); it != bfs_order.rend(); it++) {
Node* node = *it;
switch (node->kind) {
case Node::Kind::ATOM:
node->text_len = node->atom_str.length() + node->quoted;
break;
case Node::Kind::IMPROPER_LIST:
case Node::Kind::LIST: {
if (node->break_list) {
// special case compute first line length
int first_line_len = 1 + node->quoted; // open paren + quotes
int nodes_on_first_line =
std::min(int(node->child_nodes.size()), int(node->top_line_count));
if (nodes_on_first_line > 0) {
for (int node_idx = 0; node_idx < nodes_on_first_line; node_idx++) {
first_line_len += node->child_nodes.at(node_idx).text_len;
first_line_len++; // trailing space
}
first_line_len--; // last one doesn't have a trailing space
}
int max_line_len = first_line_len;
// now the length of all the things below
for (u32 node_idx = nodes_on_first_line; node_idx < node->child_nodes.size();
node_idx++) {
int line_len = node->sub_elt_indent + node->child_nodes.at(node_idx).text_len;
max_line_len = std::max(max_line_len, line_len);
}
node->text_len = max_line_len;
} else {
node->text_len = 1 + node->quoted; // open paren + quotes
for (auto& child : node->child_nodes) {
node->text_len += (child.text_len + 1); // space or close paren.
}
}
} break;
default:
ASSERT(false);
}
}
}
/*!
* Note: this has special cases for how to insert breaks.
* These rules will be used if the printer decides it should break up the list.
* If you want to force a form to always be broken up, see insert_required_breaks
*/
void break_list(Node* node) {
ASSERT(!node->break_list);
node->break_list = true;
node->sub_elt_indent = 2;
node->top_line_count = 1;
const std::unordered_set<std::string> sameline_splitters = {
"if", "<", ">", "<=", ">=", "set!", "=", "!=", "+",
"-", "*", "/", "the", "->", "and", "or", "logand", "logior",
"logxor", "+!", "*!", "logtest?", "not", "zero?", "nonzero?"};
if (node->child_nodes.at(0).kind == Node::Kind::LIST) {
// ((foo
// bar
node->sub_elt_indent = 1;
} else if (node->child_nodes.at(0).kind == Node::Kind::ATOM) {
auto& name = node->child_nodes[0].atom_str;
if (name == "defun" || name == "defun-debug" || name == "defbehavior" || name == "defstate") {
// things with three things in the top line: (defun <name> <args>
node->top_line_count = 3;
} else if (name == "defskelgroup") {
// things with 5 things in the top line: (defskelgroup <name> <art> jgeo janim
node->top_line_count = 5;
node->sub_elt_indent += name.size();
} else if (name == "process-new") {
// things with 3 things in the top line
node->top_line_count = 3;
node->sub_elt_indent += name.size();
} else if (name == "ja" || name == "ja-no-eval") {
node->top_line_count = 3;
node->sub_elt_indent += name.size();
} else if (name == "defmethod") {
// things with 4 things in the top line: (defmethod <method> <type> <args>
node->top_line_count = 4;
} else if (name == "until" || name == "while" || name == "dotimes" || name == "countdown" ||
name == "when" || name == "behavior" || name == "lambda" || name == "defpart" ||
name == "define") {
node->top_line_count = 2;
} else if (name == "let" || name == "let*" || name == "rlet") {
// special case for things like let.
node->top_line_count = 2; // (let <defs>
if (node->child_nodes.size() > 1 && node->child_nodes[1].child_nodes.size() > 1 &&
!node->child_nodes[1].break_list) {
// and break the defs.
break_list(&node->child_nodes[1]);
}
} else if (sameline_splitters.count(name) > 0) {
// if has a special indent rule:
node->top_line_count = 2;
node->sub_elt_indent += name.size();
} else if (name == "cond") {
// cond should always be broken up
for (size_t i = 1; i < node->child_nodes.size(); i++) {
auto& cond_body = node->child_nodes[i];
if (cond_body.kind == Node::Kind::LIST && !cond_body.break_list) {
break_list(&cond_body);
}
}
} else if (name == "case") {
// case gets a second thing on top, plus break up everything.
node->top_line_count = 2;
for (size_t i = 2; i < node->child_nodes.size(); i++) {
auto& cond_body = node->child_nodes[i];
if (cond_body.kind == Node::Kind::LIST && !cond_body.break_list) {
break_list(&cond_body);
}
}
}
}
Node* child = node;
for (Node* p = node->parent; p; p = p->parent) {
if (!p->break_list && &p->child_nodes.back() != child) {
break_list(p);
}
child = p;
}
}
void insert_required_breaks(const std::vector<Node*>& bfs_order) {
const std::unordered_set<std::string> always_break = {
"when", "defun-debug", "countdown", "case", "defun", "defmethod", "let",
"until", "while", "if", "dotimes", "cond", "else", "defbehavior",
"with-pp", "rlet", "defstate", "behavior", "defpart", "loop", "let*"};
for (auto node : bfs_order) {
if (!node->break_list && node->kind == Node::Kind::LIST &&
node->child_nodes.at(0).kind == Node::Kind::ATOM) {
if (always_break.count(node->child_nodes[0].atom_str) > 0) {
break_list(node);
}
}
}
}
int run_algorithm(const std::vector<Node*>& bfs_order, int line_length) {
// our approach is to go in reverse order and find the first list node that is:
// - too long
// - not already split.
// the "magic" of v2 is:
// the "too long" check above igores the sublist.
int num_broken = 0;
std::optional<s32> min_depth;
for (auto it = bfs_order.rbegin(); it != bfs_order.rend(); it++) {
Node* node = *it;
if (min_depth && node->my_depth < min_depth) {
break;
}
if (node->kind != Node::Kind::ATOM && (int)node->text_len > line_length &&
node->break_list == false) {
break_list(node);
num_broken++;
if (!min_depth) {
min_depth = node->my_depth;
}
}
}
recompute_lengths(bfs_order);
return num_broken;
}
int compute_extra_offset(const std::string& str, int s0, int ei) {
ASSERT(!str.empty());
for (size_t i = str.length(); i-- > 0;) {
if ((int)i == s0) {
return ei + str.length() - s0;
} else if (i == '\n') {
return str.length() - i;
}
}
return ei + str.length() - s0;
}
void append_node_to_string(const Node* node,
std::string& str,
int init_indent_level,
int next_indent_level) {
for (int i = 0; i < init_indent_level; i++) {
str.push_back(' ');
}
for (u32 i = 0; i < node->quoted; i++) {
str.push_back('\'');
}
switch (node->kind) {
case Node::Kind::ATOM:
str.append(node->atom_str);
break;
case Node::Kind::IMPROPER_LIST:
case Node::Kind::LIST:
if (node->break_list) {
str.push_back('(');
size_t node_idx = 0;
int listing_indent = next_indent_level + node->quoted + node->sub_elt_indent;
int extra_indent = 0;
int old_indent = listing_indent;
if (node->top_line_count) {
listing_indent -= node->sub_elt_indent;
listing_indent += node->child_nodes.front().kind == Node::Kind::LIST ? 1 : 2;
}
for (; node_idx < node->top_line_count; node_idx++) {
size_t s0 = str.length();
if (node->kind == Node::Kind::IMPROPER_LIST &&
&node->child_nodes.at(node_idx) == &node->child_nodes.back()) {
str.append(". ");
}
// so, if these need to break, they should have a bigger indent.
append_node_to_string(&node->child_nodes.at(node_idx), str, 0,
listing_indent + extra_indent);
extra_indent = compute_extra_offset(str, s0, extra_indent);
str.push_back(' ');
}
if (node->top_line_count) {
listing_indent = old_indent;
}
if (node->top_line_count > 0) {
str.pop_back();
}
str.push_back('\n');
bool after_key = false;
for (; node_idx < node->child_nodes.size(); node_idx++) {
if (node->kind == Node::Kind::IMPROPER_LIST &&
&node->child_nodes.at(node_idx) == &node->child_nodes.back()) {
for (int i = 0; i < listing_indent; i++) {
str.push_back(' ');
}
str.append(".\n");
}
append_node_to_string(&node->child_nodes.at(node_idx), str,
after_key ? 0 : listing_indent, listing_indent);
if (node->child_nodes.at(node_idx).kind == Node::Kind::ATOM &&
node->child_nodes.at(node_idx).atom_str.at(0) == ':' &&
node->child_nodes.at(node_idx).atom_str.find(' ') == std::string::npos) {
str.push_back(' ');
after_key = true;
} else {
str.push_back('\n');
after_key = false;
}
}
for (int i = 0; i < listing_indent; i++) {
str.push_back(' ');
}
str.push_back(')');
} else {
str.push_back('(');
ASSERT(!node->child_nodes.empty());
int listing_indent = next_indent_level + node->quoted;
int extra_indent = 1;
int c0 = 0;
for (auto& child : node->child_nodes) {
if (node->kind == Node::Kind::IMPROPER_LIST && &child == &node->child_nodes.back()) {
str.append(". ");
}
size_t s0 = str.length();
append_node_to_string(&child, str, 0, listing_indent + extra_indent);
str.push_back(' ');
extra_indent += (str.length() - s0);
if (&child == &node->child_nodes.at(0) && !child.break_list) {
//
if (child.kind == Node::Kind::LIST) {
c0 = 0;
} else {
c0 = str.length() - s0;
}
}
}
str.pop_back();
if (node->needs_end_paren_newline()) {
str.push_back('\n');
for (int i = 0; i < listing_indent + c0 + 1; i++) {
str.push_back(' ');
}
}
str.push_back(')');
}
break;
default:
ASSERT(false);
}
}
std::string node_to_string(const Node* node) {
std::string result;
append_node_to_string(node, result, 0, 0);
return result;
}
} // namespace v2
std::string to_string(const goos::Object& obj, int line_length) {
using namespace v2;
// construct the tree
Node root = to_node(obj);
// create tree links and order by depth
std::vector<Node*> bfs_order;
root.link(nullptr, &bfs_order, 0);
insert_required_breaks(bfs_order);
// compute subtree lengths
recompute_lengths(bfs_order);
int max_depth = 0;
for (auto node : bfs_order) {
max_depth = std::max((int)node->my_depth, max_depth);
}
int num_broken = 1;
while (num_broken) {
num_broken = run_algorithm(bfs_order, line_length);
}
return node_to_string(&root);
}
} // namespace pretty_print