#include "formatter.h" #include "formatter_tree.h" #include "common/log/log.h" #include "common/util/FileUtil.h" #include "common/util/string_util.h" #include "tree_sitter/api.h" #include "third-party/fmt/core.h" // Declare the `tree_sitter_opengoal` function, which is // implemented by the `tree-sitter-opengoal` library. extern "C" { extern const TSLanguage* tree_sitter_opengoal(); } int hang_indentation_width(const FormatterTreeNode& curr_node) { if (curr_node.token || curr_node.refs.empty()) { return 0; } // Get the first element of the form const auto& first_elt = curr_node.refs.at(0); if (first_elt.token) { return first_elt.token->length() + 2; // +2 because the opening paren and then the following space } // Otherwise, continue nesting return 1 + hang_indentation_width(first_elt); } // TODO - compute length of each node and store it void apply_formatting_config( FormatterTreeNode& curr_node, std::optional> config_from_parent = {}) { using namespace formatter_rules; // node is empty, base-case if (curr_node.token || curr_node.refs.empty()) { return; } // first, check to see if this form already has a predefined formatting configuration // if it does, that simplifies things because there is only 1 way of formatting the form std::optional predefined_config; if (!config_from_parent && !curr_node.refs.empty() && curr_node.refs.at(0).token) { const auto& form_head = curr_node.refs.at(0).token; if (form_head && config::opengoal_form_config.find(form_head.value()) != config::opengoal_form_config.end()) { predefined_config = config::opengoal_form_config.at(form_head.value()); curr_node.formatting_config = predefined_config.value(); } } else if (config_from_parent) { predefined_config = *config_from_parent.value(); curr_node.formatting_config = predefined_config.value(); } // In order to keep things simple, as well as because its ineffectual in lisp code (you can only // enforce it so much without making things unreadable), line width will not matter for deciding // whether or not to hang or flow the form // // This means that a hang would ALWAYS win, because it's 1 less line break. Therefore this // simplifies our approach there is no need to explore both braches to see which one would be // preferred. // // Instead, we either use the predefined configuration (obviously) or we do some checks for some // outlier conditions to see if things should be formatted differently // // Otherwise, we always default to a hang. // // NOTE - any modifications here to child elements could be superseeded later in the recursion // in order to maintain your sanity, only modify things here that _arent_ touched by default // configurations. These are explicitly prepended with `parent_mutable_` if (!predefined_config) { if (curr_node.metadata.is_top_level) { curr_node.formatting_config.indentation_width = 0; curr_node.formatting_config.hang_forms = false; } else if (constant_list::is_constant_list(curr_node)) { // - Check if the form is a constant list (ie. a list of numbers) curr_node.formatting_config.indentation_width = 1; curr_node.formatting_config.hang_forms = false; curr_node.formatting_config.has_constant_pairs = constant_pairs::form_should_be_constant_paired(curr_node); // If applicable, iterate through the constant pairs, since we can potentially pair up // non-constant second elements in a pair (like a function call), there is the potential that // they need to spill to the next line and get indented in extra. This is an exceptional // circumstance, we do NOT do this sort of thing when formatting normal forms (cond/case pairs // are another similar situation) if (curr_node.formatting_config.has_constant_pairs) { for (int i = 0; i < (int)curr_node.refs.size(); i++) { auto& child_ref = curr_node.refs.at(i); const auto type = child_ref.metadata.node_type; if (constant_types.find(type) == constant_types.end() && constant_pairs::is_element_second_in_constant_pair(curr_node, child_ref, i)) { child_ref.formatting_config.parent_mutable_extra_indent = 2; } } } } else if (curr_node.formatting_config.hang_forms && curr_node.refs.size() > 1 && curr_node.refs.at(1).metadata.is_comment) { // - Check if the second argument is a comment, it looks better if we flow instead curr_node.formatting_config.hang_forms = false; } } // If we are hanging, lets determine the indentation width since it is based on the form itself if (curr_node.formatting_config.hang_forms) { curr_node.formatting_config.indentation_width = hang_indentation_width(curr_node); } // iterate through the refs for (int i = 0; i < (int)curr_node.refs.size(); i++) { auto& ref = curr_node.refs.at(i); if (!ref.token) { // If the child has a pre-defined configuration at that index, we pass it along if (predefined_config && predefined_config->index_configs.find(i) != predefined_config->index_configs.end()) { apply_formatting_config(ref, predefined_config->index_configs.at(i)); } else if (predefined_config && predefined_config->default_index_config) { apply_formatting_config(ref, predefined_config->default_index_config); } else { apply_formatting_config(ref); } } } } int get_total_form_inlined_width(const FormatterTreeNode& curr_node) { if (curr_node.token) { return curr_node.token->length(); } int width = 1; for (const auto& ref : curr_node.refs) { width += get_total_form_inlined_width(ref); } return width + 1; } bool form_contains_comment(const FormatterTreeNode& curr_node) { if (curr_node.metadata.is_comment) { return true; } for (const auto& ref : curr_node.refs) { const auto contains_comment = form_contains_comment(ref); if (contains_comment) { return true; } } return false; } bool form_contains_node_that_prevents_inlining(const FormatterTreeNode& curr_node) { if (curr_node.formatting_config.should_prevent_inlining(curr_node.formatting_config, curr_node.refs.size())) { return true; } for (const auto& ref : curr_node.refs) { const auto prevents_inlining = form_contains_node_that_prevents_inlining(ref); if (prevents_inlining) { return true; } } return false; } bool can_node_be_inlined(const FormatterTreeNode& curr_node, int cursor_pos) { using namespace formatter_rules; // First off, we cannot inline the top level if (curr_node.metadata.is_top_level) { return false; } // If the config explicitly prevents inlining, or it contains a sub-node that prevents inlining if (curr_node.formatting_config.prevent_inlining || form_contains_node_that_prevents_inlining(curr_node)) { return false; } // nor can we inline something that contains a comment in the middle if (form_contains_comment(curr_node)) { return false; } // constant pairs are not inlined! if (curr_node.formatting_config.has_constant_pairs) { return false; } // If this is set in the config, then the form is intended to be partially inlined if (curr_node.formatting_config.inline_until_index({})) { return false; } // let's see if we can inline the form all on one line to do that, we recursively explore // the form to find the total width int line_width = cursor_pos + get_total_form_inlined_width(curr_node); return line_width <= indent::line_width_target; // TODO - comments } std::vector apply_formatting(const FormatterTreeNode& curr_node, std::vector output = {}, int cursor_pos = 0) { using namespace formatter_rules; if (!curr_node.token && curr_node.refs.empty()) { // special case to handle an empty list return {"()"}; } // If its a token, just print the token and move on if (curr_node.token) { return {curr_node.token.value()}; } bool inline_form = can_node_be_inlined(curr_node, cursor_pos); // TODO - also if the form is inlinable, we can skip all the complication below and just...inline // it! // TODO - should figure out the inlining here as well, instead of the bool above // Iterate the form, building up a list of the final lines but don't worry about indentation // at this stage. Once the lines are finalized, it's easy to add the indentation later // // This means we may combine elements onto the same line in this step. std::vector form_lines = {}; for (int i = 0; i < (int)curr_node.refs.size(); i++) { const auto& ref = curr_node.refs.at(i); // Add new line entry if (ref.token) { // Cleanup block-comments std::string val = ref.token.value(); if (ref.metadata.node_type == "block_comment") { // TODO - change this sanitization to return a list of lines instead of a single new-lined // line val = comments::format_block_comment(ref.token.value()); } form_lines.push_back(val); } else { // If it's not a token, we have to recursively build up the form // TODO - add the cursor_pos here const auto& lines = apply_formatting(ref, {}, cursor_pos); for (int i = 0; i < (int)lines.size(); i++) { const auto& line = lines.at(i); form_lines.push_back(fmt::format( "{}{}", str_util::repeat(ref.formatting_config.parent_mutable_extra_indent, " "), line)); } } // If we are hanging forms, combine the first two forms onto the same line if (i == (int)curr_node.refs.size() - 1 && form_lines.size() > 1 && (curr_node.formatting_config.hang_forms || curr_node.formatting_config.combine_first_two_lines)) { form_lines.at(0) += fmt::format(" {}", form_lines.at(1)); form_lines.erase(form_lines.begin() + 1); } else if ((i + 1) < (int)curr_node.refs.size()) { const auto& next_ref = curr_node.refs.at(i + 1); // combine the next inline comment or constant pair if ((next_ref.metadata.node_type == "comment" && next_ref.metadata.is_inline) || (curr_node.formatting_config.has_constant_pairs && constant_pairs::is_element_second_in_constant_pair(curr_node, next_ref, i + 1))) { if (next_ref.token) { form_lines.at(form_lines.size() - 1) += fmt::format(" {}", next_ref.token.value()); i++; } else if (can_node_be_inlined(next_ref, cursor_pos)) { const auto& lines = apply_formatting(next_ref, {}, cursor_pos); // TODO - cursor pos for (const auto& line : lines) { form_lines.at(form_lines.size() - 1) += fmt::format(" {}", line); } i++; } } } // If we are at the top level, potential separate with a new line if (blank_lines::should_insert_blank_line(curr_node, ref, i)) { form_lines.at(form_lines.size() - 1) += "\n"; } } // Consolidate any lines if the configuration requires it if (curr_node.formatting_config.inline_until_index(form_lines)) { std::vector new_form_lines = {}; for (int i = 0; i < (int)form_lines.size(); i++) { if (i < curr_node.formatting_config.inline_until_index(form_lines)) { if (new_form_lines.empty()) { new_form_lines.push_back(form_lines.at(i)); } else { new_form_lines.at(0) += fmt::format(" {}", form_lines.at(i)); } } else { new_form_lines.push_back(form_lines.at(i)); } } form_lines = new_form_lines; } // Apply necessary indentation to each line and add parens if (!curr_node.metadata.is_top_level) { std::string form_surround_start = "("; std::string form_surround_end = ")"; form_lines[0] = fmt::format("{}{}", form_surround_start, form_lines[0]); form_lines[form_lines.size() - 1] = fmt::format("{}{}", form_lines[form_lines.size() - 1], form_surround_end); } std::string curr_form = ""; if (curr_node.formatting_config.parent_mutable_extra_indent > 0) { curr_form += str_util::repeat(curr_node.formatting_config.parent_mutable_extra_indent, " "); } if (inline_form) { form_lines = {fmt::format("{}", fmt::join(form_lines, " "))}; } else { for (int i = 0; i < (int)form_lines.size(); i++) { if (i > 0) { auto& line = form_lines.at(i); line = fmt::format("{}{}", str_util::repeat(curr_node.formatting_config.indentation_width_for_index( curr_node.formatting_config, i), " "), line); } } } return form_lines; } std::string join_formatted_lines(const std::vector lines) { // TODO - respect original file line endings return fmt::format("{}", fmt::join(lines, "\n")); } std::optional formatter::format_code(const std::string& source) { // Create a parser. std::shared_ptr parser(ts_parser_new(), TreeSitterParserDeleter()); // Set the parser's language (JSON in this case). ts_parser_set_language(parser.get(), tree_sitter_opengoal()); // Build a syntax tree based on source code stored in a string. std::shared_ptr tree( ts_parser_parse_string(parser.get(), NULL, source.c_str(), source.length()), TreeSitterTreeDeleter()); // Get the root node of the syntax tree. TSNode root_node = ts_tree_root_node(tree.get()); if (ts_node_is_null(root_node) || ts_node_has_error(root_node)) { return std::nullopt; } try { // There are three phases of formatting // 1. Simplify the AST down to something that is easier to work on from a formatting perspective // this also gathers basic metadata that can be done at this stage, like if the token is a // comment or if the form is on the top-level auto formatting_tree = FormatterTree(source, root_node); // 2. Recursively iterate through this simplified FormatterTree and figure out what rules // need to be applied to produce an optimal result apply_formatting_config(formatting_tree.root); // 3. Use this updated FormatterTree to print out the final source-code, while doing so // we may deviate from the optimal result to produce something even more optimal by inlining // forms that can fit within the line width. const auto formatted_lines = apply_formatting(formatting_tree.root); // 4. Now we joint he lines together, it's easier when formatting to leave all lines independent // so adding indentation is easier const auto formatted_source = join_formatted_lines(formatted_lines); return formatted_source; } catch (std::exception& e) { lg::error("Unable to format code - {}", e.what()); } return std::nullopt; }