2023-05-28 13:22:00 -04:00
|
|
|
#pragma once
|
|
|
|
|
2023-06-04 13:19:29 -04:00
|
|
|
#include <memory>
|
2023-05-28 13:22:00 -04:00
|
|
|
#include <optional>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "tree_sitter/api.h"
|
|
|
|
|
|
|
|
// Treesitter is fantastic for validating and parsing our code into a structured tree format without
|
|
|
|
// whitespace so we can do that ourselves (formatting) However, the treesitter AST is a bit too
|
|
|
|
// detailed for purposes of formatting.
|
|
|
|
//
|
|
|
|
// When formatting there is no need to know things like are we
|
|
|
|
// in a function, or a symbol, etc. This extra information is fantastic for parsing or manipulating
|
|
|
|
// the language, but becomes burdensome when just trying to write elegant formatting code when all
|
|
|
|
// we really care about is:
|
|
|
|
// - getting all the text tokens for the source code
|
|
|
|
// - having them in a proper, nested format
|
|
|
|
// The treesitter format is complicated and highly nested, leading to some very hard to understand
|
|
|
|
// code. So my solution is a 2-pass format.
|
|
|
|
//
|
|
|
|
// Pass 1 - convert the AST into a simplified FormatterTree
|
|
|
|
// Pass 2 - use the simplified tree to output the final code
|
|
|
|
|
2023-06-04 13:19:29 -04:00
|
|
|
class FormatterTreeNode {
|
2023-05-28 13:22:00 -04:00
|
|
|
public:
|
2023-06-04 13:19:29 -04:00
|
|
|
struct Metadata {
|
2023-06-18 17:19:35 -04:00
|
|
|
std::string node_type;
|
2023-06-06 20:34:50 -04:00
|
|
|
bool is_top_level = false;
|
|
|
|
bool is_comment = false;
|
|
|
|
bool is_inline = false;
|
|
|
|
int num_blank_lines_following = 0;
|
2023-08-05 15:23:09 -04:00
|
|
|
bool is_binding_list = false; // TODO set this
|
2023-05-28 13:22:00 -04:00
|
|
|
};
|
2023-06-04 13:19:29 -04:00
|
|
|
std::vector<FormatterTreeNode> refs;
|
|
|
|
Metadata metadata;
|
|
|
|
// The token is optional because list nodes do not contain a token, they just contain a bunch of
|
|
|
|
// eventually token node refs
|
|
|
|
std::optional<std::string> token;
|
2023-05-28 13:22:00 -04:00
|
|
|
|
2023-06-04 13:19:29 -04:00
|
|
|
FormatterTreeNode() = default;
|
2023-06-06 20:34:50 -04:00
|
|
|
FormatterTreeNode(const std::string& source, const TSNode& node);
|
2023-06-04 13:19:29 -04:00
|
|
|
FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){};
|
2023-08-05 15:23:09 -04:00
|
|
|
|
|
|
|
bool is_list() const { return token.has_value(); }
|
2023-06-04 13:19:29 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
// A FormatterTree has a very simple and crude tree structure where:
|
|
|
|
// - Nodes are essentially forms, which contain in-order tokens or references to nested forms
|
|
|
|
// - Nodes can have associated metadata, often related to their context in the original code
|
|
|
|
// - Nodes can also have multiple formatting rules associated with them. Often this is the default
|
|
|
|
// rule or based on pre-configured overrides due to the head of the form, ex. 'defun'
|
|
|
|
class FormatterTree {
|
|
|
|
public:
|
2023-05-28 13:22:00 -04:00
|
|
|
FormatterTree(const std::string& source, const TSNode& root_node);
|
2023-06-04 13:19:29 -04:00
|
|
|
FormatterTreeNode root;
|
2023-05-28 13:22:00 -04:00
|
|
|
|
|
|
|
private:
|
|
|
|
void construct_formatter_tree_recursive(const std::string& source,
|
|
|
|
TSNode curr_node,
|
2023-06-04 13:19:29 -04:00
|
|
|
FormatterTreeNode& tree_node);
|
2023-05-28 13:22:00 -04:00
|
|
|
};
|