mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 11:26:18 -04:00
c162c66118
This PR does two main things: 1. Work through the main low-hanging fruit issues in the formatter keeping it from feeling mature and usable 2. Iterate and prove that point by formatting all of the Jak 1 code base. **This has removed around 100K lines in total.** - The decompiler will now format it's results for jak 1 to keep things from drifting back to where they were. This is controlled by a new config flag `format_code`. How am I confident this hasn't broken anything?: - I compiled the entire project and stored it's `out/jak1/obj` files separately - I then recompiled the project after formatting and wrote a script that md5's each file and compares it (`compare-compilation-outputs.py` - The results (eventually) were the same: ![Screenshot 2024-05-25 132900](https://github.com/open-goal/jak-project/assets/13153231/015e6f20-8d19-49b7-9951-97fa88ddc6c2) > This proves that the only difference before and after is non-critical whitespace for all code/macros that is actually in use. I'm still aware of improvements that could be made to the formatter, as well as general optimization of it's performance. But in general these are for rare or non-critical situations in my opinion and I'll work through them before doing Jak 2. The vast majority looks great and is working properly at this point. Those known issues are the following if you are curious: ![image](https://github.com/open-goal/jak-project/assets/13153231/0edfaba1-6d36-40f5-ab23-0642209867c4)
192 lines
6.7 KiB
C++
192 lines
6.7 KiB
C++
#include "formatting_rules.h"
|
|
|
|
#include <set>
|
|
|
|
#include "common/util/string_util.h"
|
|
|
|
#include "fmt/core.h"
|
|
|
|
namespace formatter_rules {
|
|
|
|
// TODO - probably need to include quoted literals as well, though the grammar currently does not
|
|
// differentiate between a quoted symbol and a quoted form
|
|
const std::set<std::string> constant_types = {"kwd_lit", "num_lit", "str_lit",
|
|
"char_lit", "null_lit", "bool_lit"};
|
|
const std::set<std::string> constant_type_forms = {"meters", "seconds", "degrees"};
|
|
|
|
namespace constant_list {
|
|
bool is_constant_list(const FormatterTreeNode& node) {
|
|
if (!node.is_list() || node.refs.empty()) {
|
|
return false;
|
|
}
|
|
if (!node.refs.at(0).token) {
|
|
return true;
|
|
}
|
|
const auto& type = node.refs.at(0).metadata.node_type;
|
|
return constant_types.find(type) != constant_types.end();
|
|
}
|
|
} // namespace constant_list
|
|
|
|
namespace blank_lines {
|
|
|
|
bool should_insert_blank_line(const FormatterTreeNode& containing_node,
|
|
const FormatterTreeNode& node,
|
|
const int index) {
|
|
// We only do this at the top level and don't leave a trailing new-line
|
|
if (!containing_node.metadata.is_top_level || index >= (int)containing_node.refs.size() - 1) {
|
|
return false;
|
|
}
|
|
// If it's a comment, but has no following blank lines, dont insert a blank line
|
|
if (node.metadata.is_comment && node.metadata.num_blank_lines_following == 0) {
|
|
return false;
|
|
}
|
|
// If the next form is a comment and is inline, don't insert a new line
|
|
if ((index + 1) < (int)containing_node.refs.size() &&
|
|
containing_node.refs.at(index + 1).metadata.is_comment &&
|
|
containing_node.refs.at(index + 1).metadata.is_inline) {
|
|
return false;
|
|
}
|
|
|
|
if (node.formatting_config.elide_top_level_newline) {
|
|
if ((index + 1) < (int)containing_node.refs.size() &&
|
|
containing_node.refs.at(index + 1).metadata.is_comment) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace blank_lines
|
|
|
|
namespace comments {
|
|
std::vector<std::string> format_block_comment(const std::string& comment) {
|
|
// Normalize block comments, remove any trailing or leading whitespace
|
|
// Only allow annotations on the first line, like #|@file
|
|
// Don't mess with internal indentation as the user might intend it to be a certain way.
|
|
std::string new_comment = "";
|
|
std::string comment_contents = "";
|
|
bool seek_until_whitespace = str_util::starts_with(comment, "#|@");
|
|
int chars_seeked = 0;
|
|
for (const auto& c : comment) {
|
|
if (c == '\n' || (seek_until_whitespace && (c == ' ' || c == '\t')) ||
|
|
(!seek_until_whitespace && (c != '#' && c != '|'))) {
|
|
break;
|
|
}
|
|
chars_seeked++;
|
|
new_comment += c;
|
|
}
|
|
// Remove the first line content and any leading whitespace
|
|
comment_contents = str_util::ltrim_newlines(comment.substr(chars_seeked));
|
|
// Remove trailing whitespace
|
|
comment_contents = str_util::rtrim(comment_contents);
|
|
// remove |#
|
|
if (str_util::ends_with(comment_contents, "|#")) {
|
|
comment_contents.pop_back();
|
|
comment_contents.pop_back();
|
|
}
|
|
comment_contents = str_util::rtrim(comment_contents);
|
|
std::vector<std::string> lines = {new_comment};
|
|
const auto contents_as_lines = str_util::split_string(comment_contents, "\n");
|
|
if (contents_as_lines.size() > 1) {
|
|
for (const auto& line : contents_as_lines) {
|
|
lines.push_back(line);
|
|
}
|
|
lines.push_back("|#");
|
|
} else {
|
|
lines.at(0) = fmt::format("{} {} |#", new_comment, str_util::trim(contents_as_lines.at(0)));
|
|
}
|
|
return lines;
|
|
}
|
|
} // namespace comments
|
|
|
|
namespace constant_pairs {
|
|
|
|
// TODO - remove index, not needed, could just pass in the previous node
|
|
bool is_element_second_in_constant_pair(const FormatterTreeNode& containing_node,
|
|
const FormatterTreeNode& node,
|
|
const int index) {
|
|
if (containing_node.refs.empty() || index == 0) {
|
|
return false;
|
|
}
|
|
// Ensure that a keyword came before hand
|
|
if (containing_node.refs.at(index - 1).metadata.node_type != "kwd_lit") {
|
|
return false;
|
|
} else if (node.metadata.node_type == "kwd_lit") {
|
|
// NOTE - there is ambiugity here which cannot be totally solved (i think?)
|
|
// if the element itself is also a keyword, assume this is two adjacent keywords and they should
|
|
// not be paired
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// TODO - potentially remove the above
|
|
bool is_element_second_in_constant_pair_new(const FormatterTreeNode& prev_node,
|
|
const FormatterTreeNode& curr_node) {
|
|
if (prev_node.metadata.node_type == "kwd_lit") {
|
|
// Handle standard constant types
|
|
// TODO - pair up sym_names as well
|
|
if (constant_types.find(curr_node.metadata.node_type) != constant_types.end()) {
|
|
if (curr_node.metadata.node_type != "kwd_lit") {
|
|
// NOTE - there is ambiugity here which cannot be totally solved (i think?)
|
|
// if the element itself is also a keyword, assume this is two adjacent keywords and they
|
|
// should not be paired
|
|
return true;
|
|
}
|
|
}
|
|
// Quoted symbols
|
|
if (curr_node.metadata.node_type == "sym_name" && curr_node.node_prefix &&
|
|
(curr_node.node_prefix.value() == "'" || curr_node.node_prefix.value() == ",")) {
|
|
return true;
|
|
}
|
|
if (!curr_node.refs.empty()) {
|
|
// Constant forms special cases (ie. meters)
|
|
if (constant_type_forms.find(curr_node.refs.at(0).token_str()) != constant_type_forms.end()) {
|
|
return true;
|
|
}
|
|
// If they are just a list of symbol names (enum or simple method call)
|
|
bool all_symbols = true;
|
|
for (const auto& ref : curr_node.refs) {
|
|
if (ref.metadata.node_type != "sym_name") {
|
|
all_symbols = false;
|
|
break;
|
|
}
|
|
}
|
|
if (all_symbols) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool form_should_be_constant_paired(const FormatterTreeNode& node) {
|
|
// Criteria for a list to be constant paired:
|
|
// - needs to start with a non-symbol
|
|
// - needs atleast the minimum amount of pairs, so 2 pairs can still be inlined
|
|
if (node.refs.empty()) {
|
|
return false;
|
|
}
|
|
int num_pairs = 0;
|
|
for (int i = 0; i < (int)node.refs.size() - 1; i++) {
|
|
const auto& ref = node.refs.at(i);
|
|
const auto& next_ref = node.refs.at(i + 1);
|
|
if (ref.token && next_ref.token) {
|
|
// If the first element a keyword and the following item is a constant, it's a pair
|
|
// move forward one extra index
|
|
if (ref.metadata.node_type == "kwd_lit" &&
|
|
constant_types.find(next_ref.metadata.node_type) != constant_types.end()) {
|
|
num_pairs++;
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
return num_pairs >= min_pair_amount;
|
|
}
|
|
|
|
} // namespace constant_pairs
|
|
|
|
} // namespace formatter_rules
|