2023-05-28 13:22:00 -04:00
|
|
|
#include "formatter_tree.h"
|
|
|
|
|
2023-06-04 13:19:29 -04:00
|
|
|
#include "common/util/string_util.h"
|
|
|
|
|
2024-03-05 22:11:52 -05:00
|
|
|
#include "fmt/core.h"
|
2023-06-18 17:19:35 -04:00
|
|
|
|
2023-06-06 20:34:50 -04:00
|
|
|
std::string get_source_code(const std::string& source, const TSNode& node) {
|
|
|
|
uint32_t start = ts_node_start_byte(node);
|
|
|
|
uint32_t end = ts_node_end_byte(node);
|
2023-06-18 17:19:35 -04:00
|
|
|
return source.substr(start, end - start);
|
2023-06-06 20:34:50 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
int num_blank_lines_following_node(const std::string& source, const TSNode& node) {
|
2023-06-18 17:19:35 -04:00
|
|
|
int num_lines = -1; // The first new-line encountered is not a blank line
|
2023-06-06 20:34:50 -04:00
|
|
|
uint32_t cursor = ts_node_end_byte(node);
|
2023-06-18 17:19:35 -04:00
|
|
|
// TODO - this breaks on lines with whitespace as well, should probably seek past that!
|
2023-06-06 20:34:50 -04:00
|
|
|
while (cursor < source.length() && source.at(cursor) == '\n') {
|
|
|
|
num_lines++;
|
|
|
|
cursor++;
|
|
|
|
}
|
|
|
|
return num_lines;
|
|
|
|
}
|
|
|
|
|
2023-06-18 17:19:35 -04:00
|
|
|
// Check if the original source only has whitespace up to a new-line before it's token
|
2023-06-06 20:34:50 -04:00
|
|
|
bool node_preceeded_by_only_whitespace(const std::string& source, const TSNode& node) {
|
2023-06-18 17:19:35 -04:00
|
|
|
// NOTE - this returns incorrectly because we skip brackets in lists, we'll see if that matters
|
|
|
|
int32_t pos = ts_node_start_byte(node) - 1;
|
2023-06-06 20:34:50 -04:00
|
|
|
while (pos > 0) {
|
|
|
|
const auto& c = source.at(pos);
|
|
|
|
if (c == '\n') {
|
|
|
|
return true;
|
|
|
|
} else if (c == ' ' || c == '\t') {
|
|
|
|
pos--;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
FormatterTreeNode::FormatterTreeNode(const std::string& source, const TSNode& node)
|
|
|
|
: token(get_source_code(source, node)) {
|
2023-06-18 17:19:35 -04:00
|
|
|
metadata.node_type = ts_node_type(node);
|
2023-06-22 00:16:18 -04:00
|
|
|
metadata.is_comment = str_util::starts_with(str_util::ltrim(token.value()), ";") ||
|
|
|
|
str_util::starts_with(str_util::ltrim(token.value()), "#|");
|
2023-06-18 17:19:35 -04:00
|
|
|
// Set any metadata based on the value of the token
|
2023-06-06 20:34:50 -04:00
|
|
|
metadata.num_blank_lines_following = num_blank_lines_following_node(source, node);
|
|
|
|
metadata.is_inline = !node_preceeded_by_only_whitespace(source, node);
|
|
|
|
};
|
|
|
|
|
2023-05-28 13:22:00 -04:00
|
|
|
// Check if the original source only has whitespace up to a new-line after it's token
|
|
|
|
bool node_followed_by_only_whitespace(const std::string& source, const TSNode& node) {
|
|
|
|
uint32_t pos = ts_node_end_byte(node);
|
|
|
|
while (pos < source.length()) {
|
|
|
|
const auto& c = source.at(pos);
|
|
|
|
if (c == '\n') {
|
|
|
|
return true;
|
|
|
|
} else if (c == ' ' || c == '\t') {
|
|
|
|
pos++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2023-06-04 13:19:29 -04:00
|
|
|
bool nodes_on_same_line(const std::string& source, const TSNode& n1, const TSNode& n2) {
|
|
|
|
// Get the source between the two lines, if there are any new-lines, the answer is NO
|
|
|
|
uint32_t start = ts_node_start_byte(n1);
|
|
|
|
uint32_t end = ts_node_end_byte(n2);
|
|
|
|
const auto code_between = source.substr(start, end - start);
|
|
|
|
return !str_util::contains(code_between, "\n");
|
|
|
|
}
|
|
|
|
|
2023-05-28 13:22:00 -04:00
|
|
|
FormatterTree::FormatterTree(const std::string& source, const TSNode& root_node) {
|
2023-06-04 13:19:29 -04:00
|
|
|
root = FormatterTreeNode();
|
2023-06-06 20:34:50 -04:00
|
|
|
root.metadata.is_top_level = true;
|
2023-05-28 13:22:00 -04:00
|
|
|
construct_formatter_tree_recursive(source, root_node, root);
|
|
|
|
}
|
|
|
|
|
2024-01-18 20:09:40 -05:00
|
|
|
const std::unordered_map<std::string, std::vector<std::string>> node_type_ignorable_contents = {
|
2024-06-05 22:17:31 -04:00
|
|
|
{"list_lit", {"(", ")"}}};
|
2024-01-18 20:09:40 -05:00
|
|
|
|
2023-05-28 13:22:00 -04:00
|
|
|
// TODO make an imperative version eventually
|
2024-06-05 22:17:31 -04:00
|
|
|
// TODO - cleanup duplication
|
2023-05-28 13:22:00 -04:00
|
|
|
void FormatterTree::construct_formatter_tree_recursive(const std::string& source,
|
|
|
|
TSNode curr_node,
|
2024-01-18 20:09:40 -05:00
|
|
|
FormatterTreeNode& tree_node,
|
|
|
|
std::optional<std::string> node_prefix) {
|
2023-05-28 13:22:00 -04:00
|
|
|
if (ts_node_child_count(curr_node) == 0) {
|
2024-06-05 22:17:31 -04:00
|
|
|
auto new_node = FormatterTreeNode(source, curr_node);
|
|
|
|
new_node.node_prefix = node_prefix;
|
|
|
|
tree_node.refs.push_back(new_node);
|
2023-05-28 13:22:00 -04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
const std::string curr_node_type = ts_node_type(curr_node);
|
2023-06-04 13:19:29 -04:00
|
|
|
FormatterTreeNode list_node;
|
2024-01-18 20:09:40 -05:00
|
|
|
std::optional<std::string> next_node_prefix;
|
2023-05-28 13:22:00 -04:00
|
|
|
if (curr_node_type == "list_lit") {
|
2023-06-04 13:19:29 -04:00
|
|
|
list_node = FormatterTreeNode();
|
2023-06-18 17:19:35 -04:00
|
|
|
} else if (curr_node_type == "str_lit") {
|
|
|
|
// Strings are a special case, they are literals and essentially tokens but the grammar can
|
|
|
|
// detect formatter identifiers, this is useful for semantic highlighting but doesn't matter for
|
|
|
|
// formatting So for strings, we treat them as if they should be a single token
|
|
|
|
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
|
|
|
|
return;
|
2023-10-20 21:24:31 -04:00
|
|
|
} else if (curr_node_type == "quoting_lit") {
|
2024-06-05 22:17:31 -04:00
|
|
|
if (node_prefix) {
|
|
|
|
node_prefix.value() += "'";
|
|
|
|
} else {
|
|
|
|
node_prefix = "'";
|
|
|
|
}
|
|
|
|
construct_formatter_tree_recursive(source, ts_node_child(curr_node, 1), tree_node, node_prefix);
|
|
|
|
return;
|
2024-01-18 20:09:40 -05:00
|
|
|
} else if (curr_node_type == "unquoting_lit") {
|
2024-06-05 22:17:31 -04:00
|
|
|
if (node_prefix) {
|
|
|
|
node_prefix.value() += ",";
|
|
|
|
} else {
|
|
|
|
node_prefix = ",";
|
|
|
|
}
|
|
|
|
construct_formatter_tree_recursive(source, ts_node_child(curr_node, 1), tree_node, node_prefix);
|
|
|
|
return;
|
2024-01-18 20:09:40 -05:00
|
|
|
} else if (curr_node_type == "quasi_quoting_lit") {
|
2024-06-05 22:17:31 -04:00
|
|
|
if (node_prefix) {
|
|
|
|
node_prefix.value() += "`";
|
|
|
|
} else {
|
|
|
|
node_prefix = "`";
|
|
|
|
}
|
|
|
|
construct_formatter_tree_recursive(source, ts_node_child(curr_node, 1), tree_node, node_prefix);
|
|
|
|
return;
|
|
|
|
} else if (curr_node_type == "unquote_splicing_lit") {
|
|
|
|
if (node_prefix) {
|
|
|
|
node_prefix.value() += ",@";
|
|
|
|
} else {
|
|
|
|
node_prefix = ",@";
|
|
|
|
}
|
|
|
|
construct_formatter_tree_recursive(source, ts_node_child(curr_node, 1), tree_node, node_prefix);
|
|
|
|
return;
|
2024-01-18 20:09:40 -05:00
|
|
|
}
|
|
|
|
std::vector<std::string> skippable_nodes = {};
|
|
|
|
if (node_type_ignorable_contents.find(curr_node_type) != node_type_ignorable_contents.end()) {
|
|
|
|
skippable_nodes = node_type_ignorable_contents.at(curr_node_type);
|
2023-05-28 13:22:00 -04:00
|
|
|
}
|
|
|
|
for (size_t i = 0; i < ts_node_child_count(curr_node); i++) {
|
|
|
|
const auto child_node = ts_node_child(curr_node, i);
|
2024-06-05 22:17:31 -04:00
|
|
|
auto debug_child = ts_node_string(child_node);
|
2023-05-28 13:22:00 -04:00
|
|
|
const auto contents = get_source_code(source, child_node);
|
2024-01-18 20:09:40 -05:00
|
|
|
bool skip_node = false;
|
|
|
|
for (const auto& skippable_content : skippable_nodes) {
|
|
|
|
if (skippable_content == contents) {
|
|
|
|
skip_node = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (skip_node) {
|
2023-05-28 13:22:00 -04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (curr_node_type == "list_lit") {
|
2024-06-05 22:17:31 -04:00
|
|
|
construct_formatter_tree_recursive(source, child_node, list_node, {});
|
2024-01-18 20:09:40 -05:00
|
|
|
if (node_prefix) {
|
|
|
|
list_node.node_prefix = node_prefix;
|
|
|
|
}
|
2023-05-28 13:22:00 -04:00
|
|
|
} else {
|
2024-06-05 22:17:31 -04:00
|
|
|
construct_formatter_tree_recursive(source, child_node, tree_node, node_prefix);
|
2024-01-18 20:09:40 -05:00
|
|
|
if (node_prefix && !tree_node.refs.empty()) {
|
|
|
|
tree_node.refs.at(tree_node.refs.size() - 1).node_prefix = node_prefix;
|
|
|
|
}
|
2023-05-28 13:22:00 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (curr_node_type == "list_lit") {
|
2024-06-05 22:17:31 -04:00
|
|
|
// special case for empty lists
|
|
|
|
if (node_prefix && !list_node.node_prefix) {
|
|
|
|
list_node.node_prefix = node_prefix;
|
|
|
|
}
|
2023-05-28 13:22:00 -04:00
|
|
|
tree_node.refs.push_back(list_node);
|
|
|
|
}
|
|
|
|
}
|