support for i128 (#331)

This commit is contained in:
water111 2021-03-23 15:56:23 -04:00 committed by GitHub
parent 9ffc6014e1
commit 0d8742241b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 105 additions and 43 deletions

View file

@ -124,3 +124,4 @@
## V0.7
- There is now an option for `allow-misaligned` which allows the alignment of an struct type to be less than 16-bytes when inlined, without enabling array packing. This seems like a stupid option, but GOAL has this in some places, so we support it too.
- In method declarations in a `deftype`, you can no longer provide argument names. There was ambiguity when parsing a compound typespec vs named argument. The names were not used for anything.
- 128-bit integer register variables (`i128`) are now supported. These work with assembly forms, `set!`s between registers, and `set!`s of memory locations with type `(pointer uint128)` or `(pointer int128)`.

View file

@ -209,6 +209,14 @@
;; NOTE: there is a print/inspect for vec4s that is not implemented.
(defmacro print128 (value &key (stream #t))
"Print a 128-bit value"
`(let ((temp (new 'stack 'array 'uint64 2)))
(set! (-> (the (pointer uint128) temp)) ,value)
(format ,stream "#x~16X~16X" (-> temp 1) (-> temp 0))
)
)
;; A "boxed float" type. Simply a float with type information.
(deftype bfloat (basic)
((data float :offset-assert 4))
@ -1001,18 +1009,14 @@
- Ascending address copy."
(local-vars (result pointer) (qwc int))
(set! result dst)
;; round up to nearest quadword count.
(set! qwc (sar (+ size 15) 4))
(while (nonzero? qwc)
(set! qwc (+ qwc -1))
;; EE quadword loads and stores mask the addresses, so we do too.
;; for now, use the vector float because we don't have 128-bit integer support.
;;(.lq value 0 src)
;;(.sq value 0 dst)
(rlet ((value :class vf))
(.lvf value (logand #xfffffff0 (the uint src)))
(.svf (logand #xfffffff0 (the uint dst)) value)
)
;; Use 128-bit OpenGOAL integers to do copy by quadword.
(set! (-> (the (pointer uint128) dst))
(-> (the (pointer uint128) src)))
(set! dst (&+ dst 16))
(set! src (&+ src 16))
@ -1031,23 +1035,19 @@
(src-ptr pointer)
(dst-ptr pointer)
)
(set! result dst)
(set! qwc (sar (+ size 15) 4))
;; start at the end
(set! src-ptr (&+ dst (the-as uint (shl qwc 4))))
(set! dst-ptr (&+ src (the-as uint (shl qwc 4))))
(set! dst-ptr (&+ dst (the-as uint (shl qwc 4))))
(set! src-ptr (&+ src (the-as uint (shl qwc 4))))
(while (nonzero? qwc)
(set! qwc (+ qwc -1))
(set! src-ptr (&+ src-ptr (the-as uint -16)))
(set! dst-ptr (&+ dst-ptr (the-as uint -16)))
;; EE quadword loads and stores mask the address, so we do too.
;;(.lq value 0 dst-ptr)
;;(.sq value 0 src-ptr)
(rlet ((value :class vf))
(.lvf value (logand #xfffffff0 (the uint src)))
(.svf (logand #xfffffff0 (the uint dst)) value)
)
(set! (-> (the (pointer uint128) dst-ptr))
(-> (the (pointer uint128) src-ptr)))
)
result
)

View file

@ -17,17 +17,18 @@ The state handlers are:
- exit : gets run when leaving a state. must return.
- event : not sure of the details here yet.
You can "go" to another state. This causes the current main thread execution to be abandoned.
You can use "go" to change the state of a process. This causes the process main thread execution to be abandoned.
If the main thread has exits/protects on the stack frame, they will be run first to clean up.
There are several ways to "go"
- go during init: when a process is being initialized with run-function-in-process, you can "go".
this causes the run-function-in-process to return, and the next time the process is dispatched
this causes the run-function-in-process to return immediately, and the next time the process is dispatched
it will go into the other state. This will automatically set the process to waiting-to-run,
and shrink the process heap, if appropriate
- go from outside the process. You can temporarily set pp to another process, and have that
process go to another state. The actually go will occur the next time the process is scheduled.
Use the go-process macro to do this.
- go from a non-main thread in the right process. You can do a go from a temporary thread, like trans or post.
If you do it from post, the go returns. If you do it from any other thread, the temporary thread
@ -36,22 +37,10 @@ There are several ways to "go"
- go from the main thread of the main process. This causes the (-> pp state) to change, the stack frames
to be cleaned up, and the old state's exit to be called. It will reset the stack, then run the code.
Unlike the others, this means you "go" immediately.
|#
;; fancy macro to accept variable arguments for go.
;; (defmacro go (next-state &rest args)
;; (if (< 6 (length args))
;; (error "too many arguments to go")
;; (let ((zero-args (repeated-list 0 (- 6 (length args)))))
;; `(with-pp
;; (set! (-> pp next-state) ,next-state)
;; (enter-state ,@args ,@zero-args)
;; )
;; )
;; )
;; )
;; cause the current process to change state
(defmacro go (next-state &rest args)
`(with-pp

View file

@ -83,6 +83,9 @@ void regset_common(emitter::ObjectGenerator* gen,
auto src_class = src->ireg().reg_class;
auto dst_class = dst->ireg().reg_class;
bool src_is_xmm128 = (src_class == RegClass::VECTOR_FLOAT || src_class == RegClass::INT_128);
bool dst_is_xmm128 = (dst_class == RegClass::VECTOR_FLOAT || dst_class == RegClass::INT_128);
if (src_class == RegClass::GPR_64 && dst_class == RegClass::GPR_64) {
if (src_reg == dst_reg) {
// eliminate move
@ -97,7 +100,7 @@ void regset_common(emitter::ObjectGenerator* gen,
} else {
gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec);
}
} else if (src_class == RegClass::VECTOR_FLOAT && dst_class == RegClass::VECTOR_FLOAT) {
} else if (src_is_xmm128 && dst_is_xmm128) {
if (src_reg == dst_reg) {
// eliminate move
gen->add_instr(IGen::null(), irec);
@ -110,13 +113,13 @@ void regset_common(emitter::ObjectGenerator* gen,
} else if (src_class == RegClass::GPR_64 && dst_class == RegClass::FLOAT) {
// gpr -> xmm 1x
gen->add_instr(IGen::movd_xmm32_gpr32(dst_reg, src_reg), irec);
} else if (src_class == RegClass::VECTOR_FLOAT && dst_class == RegClass::FLOAT) {
} else if (src_is_xmm128 && dst_class == RegClass::FLOAT) {
gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec);
} else if (src_class == RegClass::FLOAT && dst_class == RegClass::VECTOR_FLOAT) {
} else if (src_class == RegClass::FLOAT && dst_is_xmm128) {
gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec);
} else if (src_class == RegClass::GPR_64 && dst_class == RegClass::VECTOR_FLOAT) {
} else if (src_class == RegClass::GPR_64 && dst_is_xmm128) {
gen->add_instr(IGen::movd_xmm32_gpr32(dst_reg, src_reg), irec);
} else if (src_class == RegClass::VECTOR_FLOAT && dst_class == RegClass::GPR_64) {
} else if (src_is_xmm128 && dst_class == RegClass::GPR_64) {
gen->add_instr(IGen::movd_gpr32_xmm32(dst_reg, src_reg), irec);
} else {
assert(false); // unhandled move.
@ -861,10 +864,13 @@ void IR_LoadConstOffset::do_codegen(emitter::ObjectGenerator* gen,
gen->add_instr(
IGen::load_goal_xmm32(dest_reg, base_reg, emitter::gRegInfo.get_offset_reg(), m_offset),
irec);
} else if (m_dest->ireg().reg_class == RegClass::VECTOR_FLOAT && m_info.size == 16 &&
m_info.sign_extend == false && m_info.reg == RegClass::VECTOR_FLOAT) {
} else if ((m_dest->ireg().reg_class == RegClass::VECTOR_FLOAT ||
m_dest->ireg().reg_class == RegClass::INT_128) &&
m_info.size == 16 && m_info.sign_extend == false &&
m_info.reg == m_dest->ireg().reg_class) {
gen->add_instr(
IGen::load_goal_vf(dest_reg, base_reg, emitter::gRegInfo.get_offset_reg(), m_offset), irec);
IGen::load_goal_xmm128(dest_reg, base_reg, emitter::gRegInfo.get_offset_reg(), m_offset),
irec);
} else {
throw std::runtime_error("IR_LoadConstOffset::do_codegen not supported");
}
@ -905,7 +911,9 @@ void IR_StoreConstOffset::do_codegen(emitter::ObjectGenerator* gen,
gen->add_instr(
IGen::store_goal_xmm32(base_reg, value_reg, emitter::gRegInfo.get_offset_reg(), m_offset),
irec);
} else if (m_value->ireg().reg_class == RegClass::VECTOR_FLOAT && m_size == 16) {
} else if ((m_value->ireg().reg_class == RegClass::VECTOR_FLOAT ||
m_value->ireg().reg_class == RegClass::INT_128) &&
m_size == 16) {
gen->add_instr(
IGen::store_goal_vf(base_reg, value_reg, emitter::gRegInfo.get_offset_reg(), m_offset),
irec);

View file

@ -149,7 +149,7 @@ RegVal* MemoryOffsetVal::to_reg(Env* fe) {
}
RegVal* MemoryDerefVal::to_reg(Env* fe) {
auto re = fe->make_gpr(coerce_to_reg_type(m_ts));
auto re = fe->make_ireg(coerce_to_reg_type(m_ts), info.reg);
auto base_as_co = dynamic_cast<MemoryOffsetConstantVal*>(base);
if (base_as_co) {
s64 offset;

View file

@ -67,6 +67,8 @@ Val* Compiler::compile_rlet(const goos::Object& form, const goos::Object& rest,
register_class = RegClass::FLOAT;
} else if (class_name == "vf") {
register_class = RegClass::VECTOR_FLOAT;
} else if (class_name == "i128") {
register_class = RegClass::INT_128;
} else {
throw_compiler_error(o, "Register class {} is unknown.", class_name);
}

View file

@ -769,7 +769,7 @@ class IGen {
}
}
static Instruction load_goal_vf(Register dst, Register addr, Register off, int offset) {
static Instruction load_goal_xmm128(Register dst, Register addr, Register off, int offset) {
if (offset == 0) {
return loadvf_gpr64_plus_gpr64(dst, addr, off);
} else if (offset >= INT8_MIN && offset <= INT8_MAX) {

View file

@ -0,0 +1,49 @@
(defun test-mem-copy ((dst pointer) (src pointer) (size int))
"Memory copy by quadword. More efficient, but has restrictions:
- dst and src should be 16-byte aligned.
- size in bytes will be rounded up to 16-bytes
- Ascending address copy."
(local-vars (result pointer) (qwc int))
(set! result dst)
;; round up to nearest quadword count.
(set! qwc (sar (+ size 15) 4))
(while (nonzero? qwc)
(set! qwc (+ qwc -1))
;; Use 128-bit OpenGOAL integers to do copy by quadword.
(set! (-> (the (pointer uint128) dst))
(-> (the (pointer uint128) src)))
(set! dst (&+ dst 16))
(set! src (&+ src 16))
)
result
)
(let ((arr (new 'stack 'array 'uint8 128))
(arr2 (new 'stack 'array 'uint8 128)))
(dotimes (i 128)
(set! (-> arr i) i)
(set! (-> arr2 (- 127 i)) i)
)
(test-mem-copy arr arr2 128)
(let ((arr128 (the (pointer uint128) arr)))
(dotimes (i (/ 128 16))
(format #t "[~d] " i)
(print128 (-> arr128 i))
(format #t "~%")
)
)
)
(rlet ((x :class i128 :reset-here #t)
(y :class vf :reset-here #t)
(z :class i128 :reset-here #t))
(set! x 12344321)
(set! y x)
(set! x 0)
(set! z y)
z
)

View file

@ -566,6 +566,19 @@ TEST_F(WithGameTests, StaticFieldInlineArray) {
"0\n"});
}
TEST_F(WithGameTests, I128Simple) {
runner.run_static_test(env, testCategory, "test-i128-simple.gc",
{"[0] #x707172737475767778797a7b7c7d7e7f\n"
"[1] #x606162636465666768696a6b6c6d6e6f\n"
"[2] #x505152535455565758595a5b5c5d5e5f\n"
"[3] #x404142434445464748494a4b4c4d4e4f\n"
"[4] #x303132333435363738393a3b3c3d3e3f\n"
"[5] #x202122232425262728292a2b2c2d2e2f\n"
"[6] #x101112131415161718191a1b1c1d1e1f\n"
"[7] #x000102030405060708090a0b0c0d0e0f\n"
"12344321\n"});
}
TEST(TypeConsistency, TypeConsistency) {
Compiler compiler;
compiler.enable_throw_on_redefines();