more progress on the type system

This commit is contained in:
water 2020-08-24 21:20:32 -04:00
parent 7b641170bf
commit 8927bd976d
6 changed files with 568 additions and 163 deletions

View file

@ -95,7 +95,9 @@ bool Field::operator==(const Field& other) const {
// parent class of types, also has method logic.
Type::Type(std::string parent, std::string name, bool is_boxed)
: m_parent(std::move(parent)), m_name(std::move(name)), m_is_boxed(is_boxed) {}
: m_parent(std::move(parent)), m_name(std::move(name)), m_is_boxed(is_boxed) {
m_runtime_name = m_name;
}
/*!
* Get the name of a type. This should be a unique identifier that can be used to find this
@ -105,6 +107,14 @@ std::string Type::get_name() const {
return m_name;
}
std::string Type::get_runtime_name() const {
return m_runtime_name;
}
void Type::set_runtime_type(std::string name) {
m_runtime_name = std::move(name);
}
/*!
* Get the parent type's name.
*/

View file

@ -58,7 +58,9 @@ class Type {
bool has_parent() const;
std::string get_name() const;
std::string get_runtime_name() const;
std::string get_parent() const;
void set_runtime_type(std::string name);
bool get_my_method(const std::string& name, MethodInfo* out) const;
bool get_my_last_method(MethodInfo* out) const;
bool get_my_new_method(MethodInfo* out) const;
@ -77,6 +79,7 @@ class Type {
std::string m_parent; // the parent type (is empty for none and object)
std::string m_name;
std::string m_runtime_name;
bool m_is_boxed = false; // does this have runtime type information?
};

View file

@ -1,8 +1,5 @@
/*!
* @file TypeSpec.h
* A TypeSpec is a reference to a Type, or possible a compound type.
* A compound type contains a "root type", which must by a Type, and a list of "type arguments",
* which are TypeSpecs.
*/
#ifndef JAK_TYPESPEC_H
@ -10,8 +7,18 @@
#include <vector>
#include <string>
#include <cassert>
class Type;
/*!
* A TypeSpec is a reference to a Type, or possible a compound type. This is the best way to
* refer to a type, as it supports compound types and also will work correctly after a type has been
* redefined. Doing sane things after types change will make debugging GOAL code more pleasant.
*
* A compound type contains a "root type", which must by a Type, and a list of "type
* arguments", which are TypeSpecs.
*/
class TypeSpec {
public:
// create a typespec for a single type
@ -26,6 +33,10 @@ class TypeSpec {
void add_arg(const TypeSpec& ts) { m_arguments.push_back(ts); }
const std::string base_type() const { return m_type; }
const TypeSpec& get_single_arg() const {
assert(m_arguments.size() == 1);
return m_arguments.front();
}
private:
std::string m_type;

View file

@ -10,7 +10,8 @@ TypeSystem::TypeSystem() {
}
/*!
* Specify a new type. If the type definition changes, it is an error if throw_on_redefine is set.
* Add a new type. If the type exists, and this new type is different, it is an error if
* throw_on_redefine is set. The type should be fully set up (fields, etc) before running this.
*/
Type* TypeSystem::add_type(const std::string& name, std::unique_ptr<Type> type) {
auto kv = m_types.find(name);
@ -61,14 +62,72 @@ Type* TypeSystem::add_type(const std::string& name, std::unique_ptr<Type> type)
/*!
* Inform the type system that there will eventually be a type named "name".
* This will allow the type system to generate TypeSpecs for this type, but not access a Type*.
* This will allow the type system to generate TypeSpecs for this type, but not access detailed
* information, or know the exact size.
*/
void TypeSystem::forward_declare_type(std::string name) {
m_forward_declared_types.insert(std::move(name));
}
/*!
* Get the runtime type (as a name string) of a TypeSpec. Gets the runtime type of the primary
* type of the TypeSpec.
*/
std::string TypeSystem::get_runtime_type(const TypeSpec& ts) {
return lookup_type(ts)->get_runtime_name();
}
/*!
* Get information about what happens if you dereference an object of given type
*/
DerefInfo TypeSystem::get_deref_info(const TypeSpec& ts) {
DerefInfo info;
// default to GPR
info.reg = RegKind::GPR_64;
info.mem_deref = true;
if (ts.base_type() == "inline-array") {
// it's an inline array of structures. We can "dereference". But really we don't do a memory
// dereference, we just add stride*idx to the pointer.
info.can_deref = true; // deref operators should work...
info.mem_deref = false; // but don't actually dereference a pointer
info.result_type = ts.get_single_arg(); // what we're an inline-array of
info.sign_extend = false; // not applicable anyway
auto result_type = lookup_type(info.result_type);
if (result_type->is_reference()) {
info.stride =
align(result_type->get_size_in_memory(), result_type->get_inline_array_alignment());
} else {
// can't have an inline array of value types!
assert(false);
}
} else if (ts.base_type() == "pointer") {
info.can_deref = true;
info.result_type = ts.get_single_arg();
auto result_type = lookup_type(info.result_type);
if (result_type->is_reference()) {
// in memory, an array of pointers
info.stride = POINTER_SIZE;
info.sign_extend = false;
} else {
// an array of values, which should be loaded in the correct way to the correct register
info.stride = result_type->get_size_in_memory();
info.sign_extend = result_type->get_load_signed();
info.reg = result_type->get_preferred_reg_kind();
assert(result_type->get_size_in_memory() == result_type->get_load_size());
}
} else {
info.can_deref = false;
}
return info;
}
/*!
* Create a simple typespec. The type must be defined or forward declared for this to work.
* If you really need a TypeSpec which refers to a non-existent type, just construct your own.
*/
TypeSpec TypeSystem::make_typespec(const std::string& name) {
if (m_types.find(name) != m_types.end() ||
@ -81,7 +140,8 @@ TypeSpec TypeSystem::make_typespec(const std::string& name) {
}
/*!
* Create a typespec for a function.
* Create a typespec for a function. If the function doesn't return anything, use "none" as the
* return type.
*/
TypeSpec TypeSystem::make_function_typespec(const std::vector<std::string>& arg_types,
const std::string& return_type) {
@ -94,7 +154,38 @@ TypeSpec TypeSystem::make_function_typespec(const std::vector<std::string>& arg_
}
/*!
* Get full type information. Throws if the type doesn't exist.
* Create a TypeSpec for a pointer to a type.
*/
TypeSpec TypeSystem::make_pointer_typespec(const std::string& type) {
return make_pointer_typespec(make_typespec(type));
}
/*!
* Create a TypeSpec for a pointer to a type.
*/
TypeSpec TypeSystem::make_pointer_typespec(const TypeSpec& type) {
return TypeSpec("pointer", {type});
}
/*!
* Create a TypeSpec for an inline-array of type
*/
TypeSpec TypeSystem::make_inline_array_typespec(const std::string& type) {
return make_inline_array_typespec(make_typespec(type));
}
/*!
* Create a TypeSpec for an inline-array of type
*/
TypeSpec TypeSystem::make_inline_array_typespec(const TypeSpec& type) {
return TypeSpec("inline-array", {type});
}
/*!
* Get full type information. Throws if the type doesn't exist. If the given type is redefined after
* a call to lookup_type, the Type* will still be valid, but will point to the old data. Whenever
* possible, don't store a Type* and store a TypeSpec instead. The TypeSpec can then be used with
* lookup_type to find the most up-to-date type information.
*/
Type* TypeSystem::lookup_type(const std::string& name) {
auto kv = m_types.find(name);
@ -112,21 +203,34 @@ Type* TypeSystem::lookup_type(const std::string& name) {
}
/*!
* Get the method ID for the specified method. If it doesn't exist, add it.
* If it does exist, and newly specified method is different, throws an error.
* Get full type information. Throws if the type doesn't exist. If the given type is redefined after
* a call to lookup_type, the Type* will still be valid, but will point to the old data. Whenever
* possible, don't store a Type* and store a TypeSpec instead. The TypeSpec can then be used with
* lookup_type to find the most up-to-date type information.
*/
Type* TypeSystem::lookup_type(const TypeSpec& ts) {
return lookup_type(ts.base_type());
}
/*!
* Add a method, if it doesn't exist. If the method already exists (possibly in a parent), checks to
* see if this is an identical definition. If not, it's an error, and if so, nothing happens.
* Returns the info of either the existing or newly created method.
*
* This is not used to override methods, but instead to create truly new methods. The one exception
* is overriding the "new" method - the TypeSystem will track that because overridden new methods
* may have different arguments.
*/
MethodInfo TypeSystem::add_method(Type* type, const std::string& method_name, const TypeSpec& ts) {
if (method_name == "new") {
return add_new_method(type, ts);
}
MethodInfo existing_info;
bool got_existing = false;
// first lookup the type
auto* iter_type = type;
// look up the method
MethodInfo existing_info;
bool got_existing = false;
auto* iter_type = type;
while (true) {
if (iter_type->get_my_method(method_name, &existing_info)) {
got_existing = true;
@ -148,6 +252,8 @@ MethodInfo TypeSystem::add_method(Type* type, const std::string& method_name, co
"[TypeSystem] The method {} of type {} was originally defined as {}, but has been "
"redefined as {}\n",
method_name, type->get_name(), existing_info.type.print(), ts.print());
// unlike type re-definition, method re-definition is almost certain to go wrong.
// probably better to give up.
throw std::runtime_error("method redefinition");
}
@ -160,6 +266,8 @@ MethodInfo TypeSystem::add_method(Type* type, const std::string& method_name, co
/*!
* Special case to add a new method, as new methods can specialize the arguments.
* If it turns out that other child methods can specialize arguments (seems like a bad idea), this
* may be generalized.
*/
MethodInfo TypeSystem::add_new_method(Type* type, const TypeSpec& ts) {
MethodInfo existing;
@ -241,36 +349,70 @@ MethodInfo TypeSystem::lookup_new_method(const std::string& type_name) {
}
/*!
* Get the next free method ID of a type.
* Makes sure a method exists at the given ID for the given type, possibly defined in a parent.
*/
int TypeSystem::get_next_method_id(Type* type) {
MethodInfo info;
void TypeSystem::assert_method_id(const std::string& type_name,
const std::string& method_name,
int id) {
auto info = lookup_method(type_name, method_name);
if (info.id != id) {
fmt::print(
"[TypeSystem] Method ID assertion failed: type {}, method {} id was {}, expected {}\n",
type_name, method_name, info.id, id);
}
}
while (true) {
if (type->get_my_last_method(&info)) {
return info.id + 1;
}
/*!
* Lookup detailed information about a field of a type by name, including type, offset,
* and how to access it.
*/
FieldLookupInfo TypeSystem::lookup_field_info(const std::string& type_name,
const std::string& field_name) {
FieldLookupInfo info;
info.field = lookup_field(type_name, field_name);
if (type->has_parent()) {
type = lookup_type(type->get_parent());
// get array size, for bounds checking (when possible)
if (info.field.is_array() && !info.field.is_dynamic()) {
info.array_size = info.field.array_size();
}
auto base_type = lookup_type(info.field.type());
if (base_type->is_reference()) {
if (info.field.is_inline()) {
if (info.field.is_array()) {
// inline array of reference types
info.needs_deref = false;
info.type = make_inline_array_typespec(info.field.type());
} else {
// inline object
info.needs_deref = false;
info.type = info.field.type();
}
} else {
// nobody has defined any method yet. New is special and doens't use this, so we return
// one after new.
return 1;
if (info.field.is_array()) {
info.needs_deref = false;
info.type = make_pointer_typespec(info.field.type());
} else {
info.needs_deref = true;
info.type = info.field.type();
}
}
} else {
if (info.field.is_array()) {
info.needs_deref = false;
info.type = make_pointer_typespec(info.field.type());
} else {
// not array
info.needs_deref = true;
info.type = info.field.type();
}
}
return info;
}
Field TypeSystem::lookup_field(const std::string& type_name, const std::string& field_name) {
auto type = get_type_of_type<StructureType>(type_name);
Field field;
if (!type->lookup_field(field_name, &field)) {
fmt::print("[TypeSystem] Type {} has no field named {}\n", type_name, field_name);
throw std::runtime_error("lookup_field failed");
}
return field;
}
/*!
* Make sure a field is located at the specified offset.
*/
void TypeSystem::assert_field_offset(const std::string& type_name,
const std::string& field_name,
int offset) {
@ -282,30 +424,63 @@ void TypeSystem::assert_field_offset(const std::string& type_name,
}
}
StructureType* TypeSystem::add_builtin_structure(const std::string& parent,
const std::string& type_name) {
add_type(type_name, std::make_unique<StructureType>(parent, type_name));
return get_type_of_type<StructureType>(type_name);
}
/*!
* Add a field to a type. If offset_override is -1 (the default), will place it automatically.
*/
int TypeSystem::add_field_to_type(StructureType* type,
const std::string& field_name,
const TypeSpec& field_type,
bool is_inline,
bool is_dynamic,
int array_size,
int offset_override) {
if (type->lookup_field(field_name, nullptr)) {
fmt::print("[TypeSystem] Type {} already has a field named {}\n", type->get_name(), field_name);
throw std::runtime_error("add_field_to_type duplicate field names");
}
BasicType* TypeSystem::add_builtin_basic(const std::string& parent, const std::string& type_name) {
add_type(type_name, std::make_unique<BasicType>(parent, type_name));
return get_type_of_type<BasicType>(type_name);
}
// first, construct the field
Field field(field_name, field_type);
if (is_inline) {
field.set_inline();
}
ValueType* TypeSystem::add_builtin_value_type(const std::string& parent,
const std::string& type_name,
int size,
bool boxed,
bool sign_extend,
RegKind reg) {
add_type(type_name,
std::make_unique<ValueType>(parent, type_name, boxed, size, sign_extend, reg));
return get_type_of_type<ValueType>(type_name);
}
if (is_dynamic) {
field.set_dynamic();
type->set_dynamic();
}
void TypeSystem::builtin_structure_inherit(StructureType* st) {
st->inherit(get_type_of_type<StructureType>(st->get_parent()));
if (array_size != -1) {
field.set_array(array_size);
}
int offset = offset_override;
int field_alignment = get_alignment_in_type(field);
if (offset == -1) {
// we need to compute the offset ourself!
offset = align(type->get_size_in_memory(), field_alignment);
} else {
int aligned_offset = align(type->get_size_in_memory(), field_alignment);
if (offset != aligned_offset) {
fmt::print(
"[TypeSystem] Tried to overwrite offset of field to be {}, but it is not aligned "
"correctly\n",
offset);
throw std::runtime_error("add_field_to_type bad offset_override");
}
}
field.set_offset(offset);
field.set_alignment(field_alignment);
int after_field = offset + get_size_in_type(field);
if (type->get_size_in_memory() < after_field) {
type->override_size_in_memory(after_field);
}
type->add_field(field, type->get_size_in_memory());
return offset;
}
/*!
@ -417,6 +592,41 @@ void TypeSystem::add_builtin_types() {
// don't inherit
}
/*!
* Debugging function to print out all types, and their methods and fields.
*/
std::string TypeSystem::print_all_type_information() const {
std::string result;
for (auto& kv : m_types) {
result += kv.second->print() + "\n";
}
return result;
}
/*!
* Get the next free method ID of a type.
*/
int TypeSystem::get_next_method_id(Type* type) {
MethodInfo info;
while (true) {
if (type->get_my_last_method(&info)) {
return info.id + 1;
}
if (type->has_parent()) {
type = lookup_type(type->get_parent());
} else {
// nobody has defined any method yet. New is special and doens't use this, so we return
// one after new.
return 1;
}
}
}
/*!
* For debugging, todo remove.
*/
int TypeSystem::manual_add_field_to_type(StructureType* type,
const std::string& field_name,
const TypeSpec& field_type,
@ -431,90 +641,29 @@ int TypeSystem::manual_add_field_to_type(StructureType* type,
return offset;
}
int TypeSystem::add_field_to_type(StructureType* type,
const std::string& field_name,
const TypeSpec& field_type,
bool is_inline,
bool is_dynamic,
int array_size,
int offset_override) {
if (type->lookup_field(field_name, nullptr)) {
fmt::print("[TypeSystem] Type {} already has a field named {}\n", type->get_name(), field_name);
throw std::runtime_error("add_field_to_type duplicate field names");
/*!
* Lookup a field of a type by name
*/
Field TypeSystem::lookup_field(const std::string& type_name, const std::string& field_name) {
auto type = get_type_of_type<StructureType>(type_name);
Field field;
if (!type->lookup_field(field_name, &field)) {
fmt::print("[TypeSystem] Type {} has no field named {}\n", type_name, field_name);
throw std::runtime_error("lookup_field failed");
}
// first, construct the field
Field field(field_name, field_type);
if (is_inline) {
field.set_inline();
}
if (is_dynamic) {
field.set_dynamic();
type->set_dynamic();
}
if (array_size != -1) {
field.set_array(array_size);
}
int offset = offset_override;
int field_alignment = get_alignment_in_type(field);
if (offset == -1) {
// we need to compute the offset ourself!
offset = align(type->get_size_in_memory(), field_alignment);
} else {
int aligned_offset = align(type->get_size_in_memory(), field_alignment);
if (offset != aligned_offset) {
fmt::print(
"[TypeSystem] Tried to overwrite offset of field to be {}, but it is not aligned "
"correctly\n",
offset);
throw std::runtime_error("add_field_to_type bad offset_override");
}
}
field.set_offset(offset);
field.set_alignment(field_alignment);
int after_field = offset + get_size_in_type(field);
if (type->get_size_in_memory() < after_field) {
type->override_size_in_memory(after_field);
}
type->add_field(field, type->get_size_in_memory());
return offset;
}
std::string TypeSystem::print_all_type_information() const {
std::string result;
for (auto& kv : m_types) {
result += kv.second->print() + "\n";
}
return result;
}
void TypeSystem::assert_method_id(const std::string& type_name,
const std::string& method_name,
int id) {
auto info = lookup_method(type_name, method_name);
if (info.id != id) {
fmt::print(
"[TypeSystem] Method ID assertion failed: type {}, method {} id was {}, expected {}\n",
type_name, method_name, info.id, id);
}
}
Type* TypeSystem::lookup_type(const TypeSpec& ts) {
return lookup_type(ts.base_type());
return field;
}
/*!
* Get the minimum required aligment of a field.
*/
int TypeSystem::get_alignment_in_type(const Field& field) {
auto field_type = lookup_type(field.type());
if (field.is_inline()) {
if (field.is_array()) {
// TODO - is this actually correct? or do we use in_memory for the first element and
// inline_array for the ones that follow?
return field_type->get_inline_array_alignment();
} else {
// it is an inlined field, so return the alignment in memory
@ -532,6 +681,10 @@ int TypeSystem::get_alignment_in_type(const Field& field) {
return POINTER_SIZE;
}
/*!
* Get the size of a field in a type. The array sizes should be consistent with get_deref_info's
* stride.
*/
int TypeSystem::get_size_in_type(const Field& field) {
if (field.is_dynamic()) {
return 0;
@ -564,4 +717,45 @@ int TypeSystem::get_size_in_type(const Field& field) {
}
}
}
}
/*!
* Add a simple structure type - don't use this outside of add_builtin_types as it forces you to do
* things in the wrong order.
*/
StructureType* TypeSystem::add_builtin_structure(const std::string& parent,
const std::string& type_name) {
add_type(type_name, std::make_unique<StructureType>(parent, type_name));
return get_type_of_type<StructureType>(type_name);
}
/*!
* Add a simple basic type - don't use this outside of add_builtin_types as it forces you to do
* things in the wrong order.
*/
BasicType* TypeSystem::add_builtin_basic(const std::string& parent, const std::string& type_name) {
add_type(type_name, std::make_unique<BasicType>(parent, type_name));
return get_type_of_type<BasicType>(type_name);
}
/*!
* Add a simple value type - don't use this outside of add_builtin_types as it forces you to do
* things in the wrong order.
*/
ValueType* TypeSystem::add_builtin_value_type(const std::string& parent,
const std::string& type_name,
int size,
bool boxed,
bool sign_extend,
RegKind reg) {
add_type(type_name,
std::make_unique<ValueType>(parent, type_name, boxed, size, sign_extend, reg));
return get_type_of_type<ValueType>(type_name);
}
/*!
* Helper for inheritance of structure types when setting up builtin types.
*/
void TypeSystem::builtin_structure_inherit(StructureType* st) {
st->inherit(get_type_of_type<StructureType>(st->get_parent()));
}

View file

@ -10,23 +10,52 @@
#include "TypeSpec.h"
#include "Type.h"
struct FieldLookupInfo {
Field field;
TypeSpec type;
bool needs_deref = true;
int array_size = -1;
};
struct DerefInfo {
bool can_deref = false;
bool mem_deref = false;
bool sign_extend = false;
RegKind reg = RegKind::INVALID;
int stride = -1;
TypeSpec result_type;
};
class TypeSystem {
public:
TypeSystem();
void add_builtin_types();
void add_builtin_globals();
void forward_declare_type(std::string name);
Type* add_type(const std::string& name, std::unique_ptr<Type> type);
void forward_declare_type(std::string name);
std::string get_runtime_type(const TypeSpec& ts);
DerefInfo get_deref_info(const TypeSpec& ts);
TypeSpec make_typespec(const std::string& name);
TypeSpec make_function_typespec(const std::vector<std::string>& arg_types,
const std::string& return_type);
TypeSpec make_pointer_typespec(const std::string& type);
TypeSpec make_pointer_typespec(const TypeSpec& type);
TypeSpec make_inline_array_typespec(const std::string& type);
TypeSpec make_inline_array_typespec(const TypeSpec& type);
Type* lookup_type(const TypeSpec& ts);
Type* lookup_type(const std::string& name);
MethodInfo add_method(Type* type, const std::string& method_name, const TypeSpec& ts);
MethodInfo lookup_method(const std::string& type_name, const std::string& method_name);
Field lookup_field(const std::string& type_name, const std::string& field_name);
MethodInfo add_new_method(Type* type, const TypeSpec& ts);
MethodInfo lookup_method(const std::string& type_name, const std::string& method_name);
MethodInfo lookup_new_method(const std::string& type_name);
void assert_method_id(const std::string& type_name, const std::string& method_name, int id);
FieldLookupInfo lookup_field_info(const std::string& type_name, const std::string& field_name);
void assert_field_offset(const std::string& type_name, const std::string& field_name, int offset);
int add_field_to_type(StructureType* type,
const std::string& field_name,
const TypeSpec& field_type,
@ -35,17 +64,13 @@ class TypeSystem {
int array_size = -1,
int offset_override = -1);
Type* lookup_type(const TypeSpec& ts);
Type* lookup_type(const std::string& name);
TypeSpec make_typespec(const std::string& name);
TypeSpec make_function_typespec(const std::vector<std::string>& arg_types,
const std::string& return_type);
void assert_method_id(const std::string& type_name, const std::string& method_name, int id);
void assert_field_offset(const std::string& type_name, const std::string& field_name, int offset);
void add_builtin_types();
std::string print_all_type_information() const;
/*!
* Get a type by name and cast to a child class of Type*. Must succeed.
*/
template <typename T>
T* get_type_of_type(const std::string& type_name) {
auto x = lookup_type(type_name);
@ -56,10 +81,10 @@ class TypeSystem {
return result;
}
private:
int get_size_in_type(const Field& field);
int get_alignment_in_type(const Field& field);
private:
Field lookup_field(const std::string& type_name, const std::string& field_name);
int get_next_method_id(Type* type);
int manual_add_field_to_type(StructureType* type,
const std::string& field_name,

View file

@ -1,5 +1,117 @@
Type System
--------------
This document explains the GOAL type system. The GOAL type system supports runtime typing, single inheritance, virtual methods, and dynamically sized structures.
Everything in GOAL has a type at compile time. A subset of compile-time types are also available in the runtime as objects with the same name as the type. For example, there is a `string` type, and a runtime there is a global object named `string` which is an object of type `type` containing information about the `string` type.
Some objects have runtime type information, and others don't. Objects which have runtime type information can have their type identified at runtime, and are called "boxed objects". Objects without runtime type information are called "unboxed objects". An unboxed object cannot reliably be detected as a unboxed object - you can't write a function that takes an arbitrary object and tells you if its boxed or not. However, boxed objects can always be recognized as boxed.
All types have a parent type, and all types descend from the parent type `object`, except for the special type `none`. The `none` type doesn't exist in the runtime and is used to represent an invalid value that the compiler should not use. For example, the return type of a function which doesn't return anything is `none`, and attempting to use this value should cause an error.
Here are some important special types:
- `object` - the parent of all types
- `structure` - parent type of any type with fields
- `basic` - parent type of any `structure` with runtime type information.
All types have methods. Objects have access to all of their parents methods, and may override parent methods. All types have these 9 methods:
- `new` - like a constructor, returns a new object. It's not used in all cases, and on all types, and needs more documentation.
- `delete` - basically unused, but like a destructor. Often calls `kfree`, which does nothing.
- `print` - prints a short, one line representation of the object to the `PrintBuffer`
- `inspect` - prints a multi-line description of the object to the `PrintBuffer`. Usually auto-generated by the compiler and prints out the name and value of each field.
- `length` - Returns a length if the type has something like a length (number of characters in string, etc). Otherwise returns 0. Usually returns the number of filled slots, instead of the total number of allocated slots, when there is possibly a difference.
- `asize-of` - Gets the size in memory of the entire object. Usually this just looks this up from the appropriate `type`, unless it's dynamically sized.
- `copy` - Create a copy of this object on the given heap. Not used very much?
- `relocate` - Some GOAL objects will be moved in memory by the kernel as part of the compacting actor heap system. After being moved, the `relocate` method will be called with the offset of the move, and the object should fix up any internal pointers which may point to the old location. It's also called on v2 objects loaded by the linker when they are first loaded into memory.
- `memusage` - Not understood yet, but probably returns how much memory in bytes the object uses. Not supported by all objects.
Usually a method which overrides a parent method will have the same argument and return types. The only exception is `new` methods, which can have different argument/return types from the parent.
The compiler's implementation for calling a method is:
- Is the type a basic?
- If so, look up the type using runtime type information
- Get the method from the vtable
- Is the type not a basic?
- Get the method from the vtable of the compile-time type
- Note that this process isn't very efficient - instead of directly linking to the slot in the vtable (one deref) it first looks up the `type` by symbol, then the slot (two derefs). I have no idea why it's done this way.
In general, I suspect that the method system was modified after GOAL was first created. There is some evidence that types were once stored in the symbol table, but were removed because the symbol table became full. This could explain some of the weirdness around method calls/definition rules, and the disaster `method-set!` function.
GOAL Value Types
--------------------
Some GOAL types are "value types", meaning they are passed by value when used as arguments to functions, return values from functions, local variables, and when using `set!`. These are always very small and fit directly into the CPU registers. Some example value types:
- Floating point numbers
- Integers
GOAL Reference Types
--------------------
Other GOAL types are "reference types", meaning they act like a reference to data when used as arguments to functions, return values from functions, local variables, and when using `set!`. The data can be allocated on a heap, on the stack, or as part of static data included when loading code (which is technically also on a heap). All structure/basic types are reference types.
You can think of these like C/C++ pointers or references, which is how it is implemented. The difference is that there's no special notation for this. A GOAL `string` object is like a C/C++ `string*` or `string&`. A GOAL "pointer to reference type" is like a C/C++ `my_type**`.
Note - this is quite a bit different from C/C++. In C++ you can have a structure with value semantics (normal), or reference semantics (C++ reference or pointer). In GOAL, there is no value semantics for structures! This is great because it means function arguments/variables always fit into registers.
GOAL Fields
--------------
GOAL field definitions look like this:
`(name type-name [optional stuff])`
where optional stuff can include these, in any order:
- `:inline #t` (default is false), to mark field as inline. This can only be done for a reference type, and indicates that the data should be stored inline, in the type, rather than just storing a reference to data stored elsewhere.
- `:dynamic #t` (default is false), to mark field as dynamically-sized array (must be the last field in the type)
- a number, to give an array size.
- `:offset x` where x is a number, to manually specify where the field is located
There are many combinations of reference/value, dynamic/not-dynamic, inline/not-inline, array-size/no-array-size, and it can be confusing. This list explains all that are valid.
- Value type, no modifiers: a single value is stored in the field. The field type is the value type.
- Value type, `:dynamic #t`: the field marks the beginning of an array (of unknown size). Field type is `(pointer your-type)`
- Value type, with array size: the field marks the beginning of an array (of known size). Field type is `(pointer your-type)`
- Value type, with `:inline #t`: invalid in all cases.
- Reference type, no modifiers: a single reference is stored in the type. Type of field is `your-type` (a C++ pointer).
- Reference type, `:inline #t`: a single object is stored inside the type. Type of field is `your-type` still (a C++ pointer). The access logic is different to make this work.
- Reference type, `:dynamic #t` or array size: the field marks the beginning of an **array of references**. Field type is `(pointer your-type)`. Like C array of pointers.
- Reference type, `:inline #t` and (`:dynamic #t` or array size): the field marks the beginning of an **array of inline objects**. Field type is `(inline-array your-type)`. Like C array of structs.
Bonus ones, for where the array is stored _outside_ of the type:
- A dynamically typed GOAL array, stored outside your type (think `std::vector`): use `(name (array your-type))`
- A dynamically type GOAL array, stored inside your type: Not allowed, `array` is dynamic!
- An array of value types, stored outside your type: use `(name (pointer your-type))`
- An array of references (C++ array of pointers), stored outside your type: use `(name (pointer your-ref-type))`
- An array of objects of reference type (C++ array of structs), stored outside your type: use `(name (inline-array your-ref-type))`
Of course, you can combine these, to get even more confusing types! But this seems uncommon.
GOAL Field Placement
---------------------
The exact rules for placing fields in GOAL types is unknown, but the simple approach of "place the next field as close as possible to the end of the last field" seems to get it right almost all the time. However, we need to be extra certain that we lay out type fields correctly because many GOAL types have overlapping fields.
The theory I'm going with for now is:
- The order of fields in the `inspect` method is the order fields are listed in in the type definition
- In the rare cases this is wrong, this is due to somebody manually specifying an offset.
As a result, we should specify offsets like this:
- If we think a field was manually placed, use `:offset` to override. This is certain to be right
- If we think a field was automatically placed, use `:offset-assert` to inform the compiler where we expect it to be. In this case it will still place the field automatically, but if the result is different from the `:offset-assert`, it will throw an error.
- Avoid defining any fields without `:offset` or `:offset-assert`
GOAL Arrays
---------------
For value types, arrays work as you expect. They have type `(pointer your-type)`. Arrays of references come in two versions:
- Array of references: `(pointer your-type)`, like a C array of pointers
- Array of inline objects: `(inline-array your-type)`, like a C array of structs
The default alignment of structs is 16 bytes, which is also the minimum alignment of `kmalloc`, and the minimum alignment used when using a refence type as an inline field. However, it's possible to violate this rule in a `(inline-array your-type)` to be more efficient. The `your-type` can set a flag indicating it should be packed in an inline array.
I believe the alignment then becomes the maximum of the minimum alignment of the `your-type` fields. So if you have a type with two `uint32`s (alignment 4 bytes), an `(inline-array your-type)` can then have spacing of 8 bytes, instead of the usual minimum 16. The behavior of a `(field-name your-type :inline #t)` is unchanged and will still align at the minimum of 16 bytes. I _believe_ that the first element of the array will still have an alignment of 16.
There's a single type system library, located in `common/type_system`. It will be used in both the decompiler and compiler. The plan is to have a single `all_types.gc` file which contains all type information (type definitions and types of globals). The decompiler will help generate this, but some small details may need to be filled in manually for some types. Later versions of the decompiler can use this information to figure out what fields of types are being accessed. We can also add a test to make sure that types defined in the decompiled game match `all_types.gc`.
@ -14,6 +126,46 @@ There's a single type system library, located in `common/type_system`. It will
- Lowest common ancestor implementation for compiler to figure out return types for branching forms.
- Logic to catch multiple incompatible type defintions for both compiler warnings and decompiler sanity checks
Compile Time vs. Run Time types
------------------------
The types in the runtime are only a subset of the compile time types. Here are the rules I've discovered so far
- Any compound types become just the first type. So `(pointer my-type)` becomes `pointer`.
- The `inline-array` class just becomes `pointer`.
- Some children of integers disappear, but others don't. The rules for this aren't known yet.
Special `_type_` for methods
----------------------------
The first argument of a method always contains the object that the method is being called on. It also must have the type `_type_`, which will be substituted by the type system (at compile time) using the following rules:
- At method definition: replace with the type that the method is being defined for.
- At method call: replace with the compile-time type of the object the method is being called on.
A method can have other arguments or a return value that's of type `_type_`. This special "type" will be replaced __at compile time__ with the type which is defining or calling the method. No part of this exists at runtime. It may seem weird, but there are two uses for this.
The first is to allow children to specialize methods and have their own child type as an argument type. For example, say you have a method `is-same-shape`, which compares two objects and sees if they are the same shape. Suppose you first defined this for type `square` with
```
(defmethod square is-same-shape ((obj1 square) (obj2 square))
(= (-> obj1 side-length) (-> obj2 side-length))
)
```
Then, if you created a child class of `square` called `rectangle` (this is a terrible way to use inheritance, but it's just an example), and overrode the `is-same-shape` method, you would have to have arguments that are `square`s, which blocks you from accessing `rectangle`-specific fields. The solution is to define the original method with type `_type_` for the first two arguments. Then, the method defined for `rectangle` also will have arguments of type `_type_`, which will expand to `rectangle`.
The second use is for a return value. For example, the `print` and `inspect` methods both return the object that is passed to them, which will always be the same type as the argument passed in. If `print` was define as `(function object object)`, then `(print my-square)` would lose the information that the return object is a `square`. If `print` is a `(function _type_ _type)`, the type system will know that `(print my-square)` will return a `square`.
Inline Array Class
--------------------
There's a weird `inline-array-class` type that's not fully understood yet. It uses `heap-base`.
Heap Base
--------------
This is a field in `type`. What does it mean? It's zero for most types (at least the early types).
Second Size Field
-------------------
There are two fields in `type` for storing the size. The first one stores the exact size, and by default the second stores the size rounded up to the nearest 16 bytes. Why? Who uses it? Does it ever get changed?
The Type System
-------------------
The type system will store:
@ -35,23 +187,33 @@ Method System
All type definitions should also define all the methods, in the order they appear in the vtable. I suspect GOAL had this as well because the method ordering otherwise seems random, and in some cases impossible to get right unless (at least) the number of methods was specified in the type declaration.
Todo
---------
- [ ] Difference between "runtime" and "compile time" types?
- [ ] `inline-array` vs `pointer`
- [ ] Arrays which aren't `array`s and aren't fields.
- [ ] `type_of_field` (returning the correct `pointer`, `inline-array` type for arrays/dynamics)
- [ ] `deref_type`
- [x] Difference between "runtime" and "compile time" types?
- [ ] `inline-array` and `pointer`
- [x] Arrays which aren't `array`s and aren't fields.
- [x] `lookup_field_info` (returning the correct field type for arrays/dynamics, info about how to deref)
- [x] `deref_info`
- [ ] Finish builtin types
- [ ] Tests for builtin types
- [ ] Tests for...
- [ ] Builtin types
- [ ] Methods
- [ ] Multiple definition checks
- [ ] Deref
- [ ] Array access
- [ ] Field creation
- [ ] Support for `_type_` / method specific stuff. (maybe this should live outside the type system?)
- [ ] Ability to export type in `deftype` form.
- [ ] Tests for multiple definition checks
- [x] Multiple definition checks
- [ ] Reverse field (offset/deref to fields)
- [ ] Type Checking
- [ ] Function Specs
- [ ] Lowest Common Ancestor
- [ ] Document `:inline`, `:dynamic:` and field arrays.
- [ ] Document alignment rules
- [x] Document `:inline`, `:dynamic:` and field arrays.
- [x] Document alignment rules
- [ ] Structure type with itself as a field
- [ ] Ability to read a `deftype` form.
- [ ] In the decompiler
- [ ] In the compiler, with the ability to do constant propagation and put things like `(+ 1 2)` or `MY_CONSTANT` as compile-time array size constants.
- [ ] In the compiler, with the ability to do constant propagation and put things like `(+ 1 2)` or `MY_CONSTANT` as compile-time array size constants by providing a function evaluating an `Object` to an `int`.
- [ ] Bitfield types