mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 11:26:18 -04:00
363 lines
15 KiB
C++
363 lines
15 KiB
C++
#include "formatter.h"
|
|
|
|
#include "formatter_tree.h"
|
|
|
|
#include "common/log/log.h"
|
|
#include "common/util/FileUtil.h"
|
|
#include "common/util/string_util.h"
|
|
|
|
#include "tree_sitter/api.h"
|
|
|
|
#include "third-party/fmt/core.h"
|
|
|
|
// Declare the `tree_sitter_opengoal` function, which is
|
|
// implemented by the `tree-sitter-opengoal` library.
|
|
extern "C" {
|
|
extern const TSLanguage* tree_sitter_opengoal();
|
|
}
|
|
|
|
int hang_indentation_width(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.token || curr_node.refs.empty()) {
|
|
return 0;
|
|
}
|
|
// Get the first element of the form
|
|
const auto& first_elt = curr_node.refs.at(0);
|
|
if (first_elt.token) {
|
|
return first_elt.token->length() +
|
|
2; // +2 because the opening paren and then the following space
|
|
}
|
|
// Otherwise, continue nesting
|
|
return 1 + hang_indentation_width(first_elt);
|
|
}
|
|
|
|
// TODO - compute length of each node and store it
|
|
void apply_formatting_config(
|
|
FormatterTreeNode& curr_node,
|
|
std::optional<std::shared_ptr<formatter_rules::config::FormFormattingConfig>>
|
|
config_from_parent = {}) {
|
|
using namespace formatter_rules;
|
|
// node is empty, base-case
|
|
if (curr_node.token || curr_node.refs.empty()) {
|
|
return;
|
|
}
|
|
// first, check to see if this form already has a predefined formatting configuration
|
|
// if it does, that simplifies things because there is only 1 way of formatting the form
|
|
std::optional<formatter_rules::config::FormFormattingConfig> predefined_config;
|
|
if (!config_from_parent && !curr_node.refs.empty() && curr_node.refs.at(0).token) {
|
|
const auto& form_head = curr_node.refs.at(0).token;
|
|
if (form_head && config::opengoal_form_config.find(form_head.value()) !=
|
|
config::opengoal_form_config.end()) {
|
|
predefined_config = config::opengoal_form_config.at(form_head.value());
|
|
curr_node.formatting_config = predefined_config.value();
|
|
}
|
|
} else if (config_from_parent) {
|
|
predefined_config = *config_from_parent.value();
|
|
curr_node.formatting_config = predefined_config.value();
|
|
}
|
|
// In order to keep things simple, as well as because its ineffectual in lisp code (you can only
|
|
// enforce it so much without making things unreadable), line width will not matter for deciding
|
|
// whether or not to hang or flow the form
|
|
//
|
|
// This means that a hang would ALWAYS win, because it's 1 less line break. Therefore this
|
|
// simplifies our approach there is no need to explore both braches to see which one would be
|
|
// preferred.
|
|
//
|
|
// Instead, we either use the predefined configuration (obviously) or we do some checks for some
|
|
// outlier conditions to see if things should be formatted differently
|
|
//
|
|
// Otherwise, we always default to a hang.
|
|
//
|
|
// NOTE - any modifications here to child elements could be superseeded later in the recursion
|
|
// in order to maintain your sanity, only modify things here that _arent_ touched by default
|
|
// configurations. These are explicitly prepended with `parent_mutable_`
|
|
if (!predefined_config) {
|
|
if (curr_node.metadata.is_top_level) {
|
|
curr_node.formatting_config.indentation_width = 0;
|
|
curr_node.formatting_config.hang_forms = false;
|
|
} else if (constant_list::is_constant_list(curr_node)) {
|
|
// - Check if the form is a constant list (ie. a list of numbers)
|
|
curr_node.formatting_config.indentation_width = 1;
|
|
curr_node.formatting_config.hang_forms = false;
|
|
curr_node.formatting_config.has_constant_pairs =
|
|
constant_pairs::form_should_be_constant_paired(curr_node);
|
|
// If applicable, iterate through the constant pairs, since we can potentially pair up
|
|
// non-constant second elements in a pair (like a function call), there is the potential that
|
|
// they need to spill to the next line and get indented in extra. This is an exceptional
|
|
// circumstance, we do NOT do this sort of thing when formatting normal forms (cond/case pairs
|
|
// are another similar situation)
|
|
if (curr_node.formatting_config.has_constant_pairs) {
|
|
for (int i = 0; i < (int)curr_node.refs.size(); i++) {
|
|
auto& child_ref = curr_node.refs.at(i);
|
|
const auto type = child_ref.metadata.node_type;
|
|
if (constant_types.find(type) == constant_types.end() &&
|
|
constant_pairs::is_element_second_in_constant_pair(curr_node, child_ref, i)) {
|
|
child_ref.formatting_config.parent_mutable_extra_indent = 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
} else if (curr_node.formatting_config.hang_forms && curr_node.refs.size() > 1 &&
|
|
curr_node.refs.at(1).metadata.is_comment) {
|
|
// - Check if the second argument is a comment, it looks better if we flow instead
|
|
curr_node.formatting_config.hang_forms = false;
|
|
}
|
|
}
|
|
// If we are hanging, lets determine the indentation width since it is based on the form itself
|
|
if (curr_node.formatting_config.hang_forms) {
|
|
curr_node.formatting_config.indentation_width = hang_indentation_width(curr_node);
|
|
}
|
|
// iterate through the refs
|
|
for (int i = 0; i < (int)curr_node.refs.size(); i++) {
|
|
auto& ref = curr_node.refs.at(i);
|
|
if (!ref.token) {
|
|
// If the child has a pre-defined configuration at that index, we pass it along
|
|
if (predefined_config &&
|
|
predefined_config->index_configs.find(i) != predefined_config->index_configs.end()) {
|
|
apply_formatting_config(ref, predefined_config->index_configs.at(i));
|
|
} else if (predefined_config && predefined_config->default_index_config) {
|
|
apply_formatting_config(ref, predefined_config->default_index_config);
|
|
} else {
|
|
apply_formatting_config(ref);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int get_total_form_inlined_width(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.token) {
|
|
return curr_node.token->length();
|
|
}
|
|
int width = 1;
|
|
for (const auto& ref : curr_node.refs) {
|
|
width += get_total_form_inlined_width(ref);
|
|
}
|
|
return width + 1;
|
|
}
|
|
|
|
bool form_contains_comment(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.metadata.is_comment) {
|
|
return true;
|
|
}
|
|
for (const auto& ref : curr_node.refs) {
|
|
const auto contains_comment = form_contains_comment(ref);
|
|
if (contains_comment) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool form_contains_node_that_prevents_inlining(const FormatterTreeNode& curr_node) {
|
|
if (curr_node.formatting_config.should_prevent_inlining(curr_node.formatting_config,
|
|
curr_node.refs.size())) {
|
|
return true;
|
|
}
|
|
for (const auto& ref : curr_node.refs) {
|
|
const auto prevents_inlining = form_contains_node_that_prevents_inlining(ref);
|
|
if (prevents_inlining) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool can_node_be_inlined(const FormatterTreeNode& curr_node, int cursor_pos) {
|
|
using namespace formatter_rules;
|
|
// First off, we cannot inline the top level
|
|
if (curr_node.metadata.is_top_level) {
|
|
return false;
|
|
}
|
|
// If the config explicitly prevents inlining, or it contains a sub-node that prevents inlining
|
|
if (curr_node.formatting_config.prevent_inlining ||
|
|
form_contains_node_that_prevents_inlining(curr_node)) {
|
|
return false;
|
|
}
|
|
// nor can we inline something that contains a comment in the middle
|
|
if (form_contains_comment(curr_node)) {
|
|
return false;
|
|
}
|
|
// constant pairs are not inlined!
|
|
if (curr_node.formatting_config.has_constant_pairs) {
|
|
return false;
|
|
}
|
|
// If this is set in the config, then the form is intended to be partially inlined
|
|
if (curr_node.formatting_config.inline_until_index({})) {
|
|
return false;
|
|
}
|
|
// let's see if we can inline the form all on one line to do that, we recursively explore
|
|
// the form to find the total width
|
|
int line_width = cursor_pos + get_total_form_inlined_width(curr_node);
|
|
return line_width <= indent::line_width_target; // TODO - comments
|
|
}
|
|
|
|
std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
|
|
std::vector<std::string> output = {},
|
|
int cursor_pos = 0) {
|
|
using namespace formatter_rules;
|
|
if (!curr_node.token && curr_node.refs.empty()) {
|
|
// special case to handle an empty list
|
|
return {"()"};
|
|
}
|
|
|
|
// If its a token, just print the token and move on
|
|
if (curr_node.token) {
|
|
return {curr_node.token.value()};
|
|
}
|
|
|
|
bool inline_form = can_node_be_inlined(curr_node, cursor_pos);
|
|
// TODO - also if the form is inlinable, we can skip all the complication below and just...inline
|
|
// it!
|
|
// TODO - should figure out the inlining here as well, instead of the bool above
|
|
|
|
// Iterate the form, building up a list of the final lines but don't worry about indentation
|
|
// at this stage. Once the lines are finalized, it's easy to add the indentation later
|
|
//
|
|
// This means we may combine elements onto the same line in this step.
|
|
std::vector<std::string> form_lines = {};
|
|
|
|
for (int i = 0; i < (int)curr_node.refs.size(); i++) {
|
|
const auto& ref = curr_node.refs.at(i);
|
|
// Add new line entry
|
|
if (ref.token) {
|
|
// Cleanup block-comments
|
|
std::string val = ref.token.value();
|
|
if (ref.metadata.node_type == "block_comment") {
|
|
// TODO - change this sanitization to return a list of lines instead of a single new-lined
|
|
// line
|
|
val = comments::format_block_comment(ref.token.value());
|
|
}
|
|
form_lines.push_back(val);
|
|
} else {
|
|
// If it's not a token, we have to recursively build up the form
|
|
// TODO - add the cursor_pos here
|
|
const auto& lines = apply_formatting(ref, {}, cursor_pos);
|
|
for (int i = 0; i < (int)lines.size(); i++) {
|
|
const auto& line = lines.at(i);
|
|
form_lines.push_back(fmt::format(
|
|
"{}{}", str_util::repeat(ref.formatting_config.parent_mutable_extra_indent, " "),
|
|
line));
|
|
}
|
|
}
|
|
// If we are hanging forms, combine the first two forms onto the same line
|
|
if (i == (int)curr_node.refs.size() - 1 && form_lines.size() > 1 &&
|
|
(curr_node.formatting_config.hang_forms ||
|
|
curr_node.formatting_config.combine_first_two_lines)) {
|
|
form_lines.at(0) += fmt::format(" {}", form_lines.at(1));
|
|
form_lines.erase(form_lines.begin() + 1);
|
|
} else if ((i + 1) < (int)curr_node.refs.size()) {
|
|
const auto& next_ref = curr_node.refs.at(i + 1);
|
|
// combine the next inline comment or constant pair
|
|
if ((next_ref.metadata.node_type == "comment" && next_ref.metadata.is_inline) ||
|
|
(curr_node.formatting_config.has_constant_pairs &&
|
|
constant_pairs::is_element_second_in_constant_pair(curr_node, next_ref, i + 1))) {
|
|
if (next_ref.token) {
|
|
form_lines.at(form_lines.size() - 1) += fmt::format(" {}", next_ref.token.value());
|
|
i++;
|
|
} else if (can_node_be_inlined(next_ref, cursor_pos)) {
|
|
const auto& lines = apply_formatting(next_ref, {}, cursor_pos); // TODO - cursor pos
|
|
for (const auto& line : lines) {
|
|
form_lines.at(form_lines.size() - 1) += fmt::format(" {}", line);
|
|
}
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
// If we are at the top level, potential separate with a new line
|
|
if (blank_lines::should_insert_blank_line(curr_node, ref, i)) {
|
|
form_lines.at(form_lines.size() - 1) += "\n";
|
|
}
|
|
}
|
|
|
|
// Consolidate any lines if the configuration requires it
|
|
if (curr_node.formatting_config.inline_until_index(form_lines)) {
|
|
std::vector<std::string> new_form_lines = {};
|
|
for (int i = 0; i < (int)form_lines.size(); i++) {
|
|
if (i < curr_node.formatting_config.inline_until_index(form_lines)) {
|
|
if (new_form_lines.empty()) {
|
|
new_form_lines.push_back(form_lines.at(i));
|
|
} else {
|
|
new_form_lines.at(0) += fmt::format(" {}", form_lines.at(i));
|
|
}
|
|
} else {
|
|
new_form_lines.push_back(form_lines.at(i));
|
|
}
|
|
}
|
|
form_lines = new_form_lines;
|
|
}
|
|
|
|
// Apply necessary indentation to each line and add parens
|
|
if (!curr_node.metadata.is_top_level) {
|
|
std::string form_surround_start = "(";
|
|
std::string form_surround_end = ")";
|
|
form_lines[0] = fmt::format("{}{}", form_surround_start, form_lines[0]);
|
|
form_lines[form_lines.size() - 1] =
|
|
fmt::format("{}{}", form_lines[form_lines.size() - 1], form_surround_end);
|
|
}
|
|
std::string curr_form = "";
|
|
if (curr_node.formatting_config.parent_mutable_extra_indent > 0) {
|
|
curr_form += str_util::repeat(curr_node.formatting_config.parent_mutable_extra_indent, " ");
|
|
}
|
|
if (inline_form) {
|
|
form_lines = {fmt::format("{}", fmt::join(form_lines, " "))};
|
|
} else {
|
|
for (int i = 0; i < (int)form_lines.size(); i++) {
|
|
if (i > 0) {
|
|
auto& line = form_lines.at(i);
|
|
line = fmt::format("{}{}",
|
|
str_util::repeat(curr_node.formatting_config.indentation_width_for_index(
|
|
curr_node.formatting_config, i),
|
|
" "),
|
|
line);
|
|
}
|
|
}
|
|
}
|
|
return form_lines;
|
|
}
|
|
|
|
std::string join_formatted_lines(const std::vector<std::string> lines) {
|
|
// TODO - respect original file line endings
|
|
return fmt::format("{}", fmt::join(lines, "\n"));
|
|
}
|
|
|
|
std::optional<std::string> formatter::format_code(const std::string& source) {
|
|
// Create a parser.
|
|
std::shared_ptr<TSParser> parser(ts_parser_new(), TreeSitterParserDeleter());
|
|
|
|
// Set the parser's language (JSON in this case).
|
|
ts_parser_set_language(parser.get(), tree_sitter_opengoal());
|
|
|
|
// Build a syntax tree based on source code stored in a string.
|
|
std::shared_ptr<TSTree> tree(
|
|
ts_parser_parse_string(parser.get(), NULL, source.c_str(), source.length()),
|
|
TreeSitterTreeDeleter());
|
|
|
|
// Get the root node of the syntax tree.
|
|
TSNode root_node = ts_tree_root_node(tree.get());
|
|
if (ts_node_is_null(root_node) || ts_node_has_error(root_node)) {
|
|
return std::nullopt;
|
|
}
|
|
|
|
try {
|
|
// There are three phases of formatting
|
|
// 1. Simplify the AST down to something that is easier to work on from a formatting perspective
|
|
// this also gathers basic metadata that can be done at this stage, like if the token is a
|
|
// comment or if the form is on the top-level
|
|
auto formatting_tree = FormatterTree(source, root_node);
|
|
// 2. Recursively iterate through this simplified FormatterTree and figure out what rules
|
|
// need to be applied to produce an optimal result
|
|
apply_formatting_config(formatting_tree.root);
|
|
// 3. Use this updated FormatterTree to print out the final source-code, while doing so
|
|
// we may deviate from the optimal result to produce something even more optimal by inlining
|
|
// forms that can fit within the line width.
|
|
const auto formatted_lines = apply_formatting(formatting_tree.root);
|
|
// 4. Now we joint he lines together, it's easier when formatting to leave all lines independent
|
|
// so adding indentation is easier
|
|
const auto formatted_source = join_formatted_lines(formatted_lines);
|
|
return formatted_source;
|
|
} catch (std::exception& e) {
|
|
lg::error("Unable to format code - {}", e.what());
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|