mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 11:26:18 -04:00
[Decompiler] Make matrix decompile (#341)
* small fixes * update * add instructions * finish matrix * add matrix test cases
This commit is contained in:
parent
64c35ca453
commit
0a76e6e157
|
@ -543,10 +543,21 @@ TypeState AsmOp::propagate_types_internal(const TypeState& input,
|
||||||
TypeState result = input;
|
TypeState result = input;
|
||||||
if (m_dst.has_value()) {
|
if (m_dst.has_value()) {
|
||||||
auto kind = m_dst->reg().get_kind();
|
auto kind = m_dst->reg().get_kind();
|
||||||
if (kind == Reg::GPR || kind == Reg::FPR) {
|
if (kind == Reg::FPR) {
|
||||||
|
result.get(m_dst->reg()) = TP_Type::make_from_ts("float");
|
||||||
|
} else if (kind == Reg::GPR) {
|
||||||
|
for (auto& x : m_src) {
|
||||||
|
if (x && x->reg().get_kind() == Reg::GPR) {
|
||||||
|
auto src_type = result.get(x->reg()).typespec();
|
||||||
|
if (dts.ts.tc(TypeSpec("int128"), src_type) || dts.ts.tc(TypeSpec("uint128"), src_type)) {
|
||||||
|
result.get(m_dst->reg()) = TP_Type::make_from_ts("uint128");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
result.get(m_dst->reg()) = TP_Type::make_from_ts("int");
|
result.get(m_dst->reg()) = TP_Type::make_from_ts("int");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,11 @@ const std::map<InstructionKind, OpenGOALAsm::Function> MIPS_ASM_TO_OPEN_GOAL_FUN
|
||||||
{InstructionKind::PSRAW, {"TODO.PSRAW", {}}},
|
{InstructionKind::PSRAW, {"TODO.PSRAW", {}}},
|
||||||
{InstructionKind::PSUBW, {"TODO.PSUBW", {}}},
|
{InstructionKind::PSUBW, {"TODO.PSUBW", {}}},
|
||||||
|
|
||||||
|
{InstructionKind::PEXTUW, {".pextuw", {}}},
|
||||||
|
{InstructionKind::PEXTLW, {".pextlw", {}}},
|
||||||
|
{InstructionKind::PCPYLD, {".pcpyld", {}}},
|
||||||
|
{InstructionKind::PCPYUD, {".pcpyud", {}}},
|
||||||
|
|
||||||
// NOTE - depending on how this is used, this may case issues! Be Warned!
|
// NOTE - depending on how this is used, this may case issues! Be Warned!
|
||||||
// lots of implicit logic in OpenGOAL depending on argument types!
|
// lots of implicit logic in OpenGOAL depending on argument types!
|
||||||
{InstructionKind::MFC1, {".mov", {}}},
|
{InstructionKind::MFC1, {".mov", {}}},
|
||||||
|
@ -163,6 +168,7 @@ std::vector<goos::Object> OpenGOALAsm::get_args(const std::vector<DecompilerLabe
|
||||||
std::vector<goos::Object> args;
|
std::vector<goos::Object> args;
|
||||||
std::vector<goos::Object> named_args;
|
std::vector<goos::Object> named_args;
|
||||||
|
|
||||||
|
bool got_fsf = false;
|
||||||
for (int i = 0; i < instr.n_src; i++) {
|
for (int i = 0; i < instr.n_src; i++) {
|
||||||
auto v = m_src.at(i);
|
auto v = m_src.at(i);
|
||||||
InstructionAtom atom = instr.get_src(i);
|
InstructionAtom atom = instr.get_src(i);
|
||||||
|
@ -172,12 +178,23 @@ std::vector<goos::Object> OpenGOALAsm::get_args(const std::vector<DecompilerLabe
|
||||||
args.push_back(v.value().to_form(env));
|
args.push_back(v.value().to_form(env));
|
||||||
} else if (atom.kind == InstructionAtom::AtomKind::VF_FIELD) {
|
} else if (atom.kind == InstructionAtom::AtomKind::VF_FIELD) {
|
||||||
// Handle FTF/FSF operations
|
// Handle FTF/FSF operations
|
||||||
if (func.allows_modifier(MOD::FTF) && named_args.size() == 0) {
|
if (func.allows_modifier(MOD::FTF) && func.allows_modifier(MOD::FSF)) {
|
||||||
|
if (got_fsf) {
|
||||||
named_args.push_back(
|
named_args.push_back(
|
||||||
pretty_print::to_symbol(fmt::format(":ftf #b{:b}", atom.get_vf_field())));
|
pretty_print::to_symbol(fmt::format(":ftf #b{:b}", atom.get_vf_field())));
|
||||||
|
} else {
|
||||||
|
got_fsf = true;
|
||||||
|
named_args.push_back(
|
||||||
|
pretty_print::to_symbol(fmt::format(":fsf #b{:b}", atom.get_vf_field())));
|
||||||
|
}
|
||||||
} else if (func.allows_modifier(MOD::FSF)) {
|
} else if (func.allows_modifier(MOD::FSF)) {
|
||||||
named_args.push_back(
|
named_args.push_back(
|
||||||
pretty_print::to_symbol(fmt::format(":fsf #b{:b}", atom.get_vf_field())));
|
pretty_print::to_symbol(fmt::format(":fsf #b{:b}", atom.get_vf_field())));
|
||||||
|
} else if (func.allows_modifier(MOD::FTF)) {
|
||||||
|
named_args.push_back(
|
||||||
|
pretty_print::to_symbol(fmt::format(":ftf #b{:b}", atom.get_vf_field())));
|
||||||
|
} else {
|
||||||
|
assert(false);
|
||||||
}
|
}
|
||||||
} else if (func.allows_modifier(MOD::OFFSET) && atom.kind == InstructionAtom::AtomKind::IMM) {
|
} else if (func.allows_modifier(MOD::OFFSET) && atom.kind == InstructionAtom::AtomKind::IMM) {
|
||||||
// Handle offsetting
|
// Handle offsetting
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "common/goos/PrettyPrinter.h"
|
#include "common/goos/PrettyPrinter.h"
|
||||||
#include "decompiler/IR2/OpenGoalMapping.h"
|
#include "decompiler/IR2/OpenGoalMapping.h"
|
||||||
#include "decompiler/analysis/reg_usage.h"
|
#include "decompiler/analysis/reg_usage.h"
|
||||||
|
#include "decompiler/ObjectFile/LinkedObjectFile.h"
|
||||||
|
|
||||||
namespace decompiler {
|
namespace decompiler {
|
||||||
|
|
||||||
|
@ -42,7 +43,7 @@ bool rewrite_inline_asm_instructions(Form* top_level_form,
|
||||||
/*lg::warn("[ASM Re-Write] - Unsupported inline assembly instruction kind - [{}]",
|
/*lg::warn("[ASM Re-Write] - Unsupported inline assembly instruction kind - [{}]",
|
||||||
asmOp.instr.kind);*/
|
asmOp.instr.kind);*/
|
||||||
f.warnings.general_warning("Unsupported inline assembly instruction kind - [{}]",
|
f.warnings.general_warning("Unsupported inline assembly instruction kind - [{}]",
|
||||||
asmOp.instr.kind);
|
asmOp.instr.to_string(f.ir2.env.file->labels));
|
||||||
new_entries.push_back(entry);
|
new_entries.push_back(entry);
|
||||||
continue;
|
continue;
|
||||||
} else if (elem->op()->instruction().kind == InstructionKind::VOPMULA) {
|
} else if (elem->op()->instruction().kind == InstructionKind::VOPMULA) {
|
||||||
|
|
|
@ -1205,7 +1205,7 @@
|
||||||
:size-assert #x40
|
:size-assert #x40
|
||||||
:flag-assert #xa00000040
|
:flag-assert #xa00000040
|
||||||
(:methods
|
(:methods
|
||||||
(dummy-9 () none 9)
|
(transform-vectors! (_type_ (inline-array vector) (inline-array vector) int) none 9)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1446,7 +1446,7 @@
|
||||||
(define-extern matrix-axis-angle! (function matrix vector float none))
|
(define-extern matrix-axis-angle! (function matrix vector float none))
|
||||||
|
|
||||||
|
|
||||||
(define-extern matrix-axis-sin-cos-vu! function)
|
(define-extern matrix-axis-sin-cos-vu! (function matrix vector float float none))
|
||||||
(define-extern trs-matrix-calc! function)
|
(define-extern trs-matrix-calc! function)
|
||||||
(define-extern transform-matrix-parent-calc! function)
|
(define-extern transform-matrix-parent-calc! function)
|
||||||
(define-extern transform-matrix-calc! function)
|
(define-extern transform-matrix-calc! function)
|
||||||
|
|
|
@ -79,7 +79,11 @@
|
||||||
|
|
||||||
"find-parent-method": {
|
"find-parent-method": {
|
||||||
"args": ["child-type", "method-id"],
|
"args": ["child-type", "method-id"],
|
||||||
"vars":{"v0-0":"current-method", "v1-2":"original-method", "v1-5":"unused1"}
|
"vars": {
|
||||||
|
"v0-0": "current-method",
|
||||||
|
"v1-2": "original-method",
|
||||||
|
"v1-5": "unused1"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"ref": {
|
"ref": {
|
||||||
|
@ -136,7 +140,13 @@
|
||||||
},
|
},
|
||||||
"sort": {
|
"sort": {
|
||||||
"args": ["lst", "compare-func"],
|
"args": ["lst", "compare-func"],
|
||||||
"vars":{"s4-0":"unsorted-count", "s3-0":"iter", "s2-0":"first-elt", "s1-0":"seoncd-elt", "v1-1":"compare-result"}
|
"vars": {
|
||||||
|
"s4-0": "unsorted-count",
|
||||||
|
"s3-0": "iter",
|
||||||
|
"s2-0": "first-elt",
|
||||||
|
"s1-0": "seoncd-elt",
|
||||||
|
"v1-1": "compare-result"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"(method 0 inline-array-class)": {
|
"(method 0 inline-array-class)": {
|
||||||
"args": ["allocation", "type-to-make", "size"],
|
"args": ["allocation", "type-to-make", "size"],
|
||||||
|
@ -165,7 +175,13 @@
|
||||||
},
|
},
|
||||||
"qmem-copy->!": {
|
"qmem-copy->!": {
|
||||||
"args": ["dst", "src", "size"],
|
"args": ["dst", "src", "size"],
|
||||||
"vars":{"v0-0":"result", "v1-1":"qwc", "a1-1":"src-ptr", "a0-1":"dst-ptr", "a2-3":"value"}
|
"vars": {
|
||||||
|
"v0-0": "result",
|
||||||
|
"v1-1": "qwc",
|
||||||
|
"a1-1": "src-ptr",
|
||||||
|
"a0-1": "dst-ptr",
|
||||||
|
"a2-3": "value"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"mem-set32!": {
|
"mem-set32!": {
|
||||||
"args": ["dst", "size", "value"],
|
"args": ["dst", "size", "value"],
|
||||||
|
@ -205,7 +221,6 @@
|
||||||
"args": ["this", "rec"]
|
"args": ["this", "rec"]
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
||||||
"(method 0 dead-pool-heap)": {
|
"(method 0 dead-pool-heap)": {
|
||||||
"vars": { "v0-0": ["obj", "dead-pool-heap"] }
|
"vars": { "v0-0": ["obj", "dead-pool-heap"] }
|
||||||
},
|
},
|
||||||
|
@ -341,8 +356,12 @@
|
||||||
|
|
||||||
"string->int": {
|
"string->int": {
|
||||||
"args": ["str"],
|
"args": ["str"],
|
||||||
"vars":{"a0-1":"str-ptr", "v0-0":"result",
|
"vars": {
|
||||||
"a0-2":"next-char-1","a0-3":"next-char-2"}
|
"a0-1": "str-ptr",
|
||||||
|
"v0-0": "result",
|
||||||
|
"a0-2": "next-char-1",
|
||||||
|
"a0-3": "next-char-2"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"string-get-flag!!": {
|
"string-get-flag!!": {
|
||||||
|
@ -350,7 +369,16 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
"(method 0 state)": {
|
"(method 0 state)": {
|
||||||
"args":["allocation", "type-to-make", "name", "code", "trans", "enter", "exit", "event"],
|
"args": [
|
||||||
|
"allocation",
|
||||||
|
"type-to-make",
|
||||||
|
"name",
|
||||||
|
"code",
|
||||||
|
"trans",
|
||||||
|
"enter",
|
||||||
|
"exit",
|
||||||
|
"event"
|
||||||
|
],
|
||||||
"vars": { "v0-0": "obj" }
|
"vars": { "v0-0": "obj" }
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -359,9 +387,190 @@
|
||||||
"vars": { "v1-0": "parent", "v1-2": "child" }
|
"vars": { "v1-0": "parent", "v1-2": "child" }
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Matrix
|
||||||
|
"matrix-identity": {
|
||||||
|
"args": ["mat"],
|
||||||
|
"vars": { "f0-0": "one" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix+!": {
|
||||||
|
"args": ["dst", "src1", "src2"],
|
||||||
|
"vars": { "v1-0": "i" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-!": {
|
||||||
|
"args": ["dst", "src1", "src2"],
|
||||||
|
"vars": { "v1-0": "i" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix*!": {
|
||||||
|
"args": ["dst", "src1", "src2"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrixp*!": {
|
||||||
|
"args": ["dst", "src1", "src2"],
|
||||||
|
"vars": { "s5-0": "temp-mat" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"vector-matrix*!": {
|
||||||
|
"args": ["dst", "vec", "mat"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"vector-rotate*!": {
|
||||||
|
"args": ["dst", "vec", "mat"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"vector3s-matrix*!": {
|
||||||
|
"args": ["dst", "vec", "mat"],
|
||||||
|
"vars": { "s5-0": "temp-vec3" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"vector3s-rotate*!": {
|
||||||
|
"args": ["dst", "vec", "mat"],
|
||||||
|
"vars": { "s5-0": "temp-vec3" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-transpose!": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-inverse-of-rot-trans!": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-4x4-inverse!": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-translate!": {
|
||||||
|
"args": ["dst", "trans"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-translate+!": {
|
||||||
|
"args": ["dst", "src", "trans"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-scale!": {
|
||||||
|
"args": ["dst", "scale"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"scale-matrix!": {
|
||||||
|
"args": ["dst", "scale", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-inv-scale!": {
|
||||||
|
"args": ["dst", "scale"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"column-scale-matrix!": {
|
||||||
|
"args": ["dst", "scale", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-x!": {
|
||||||
|
"args": ["dst", "rot-deg"],
|
||||||
|
"vars": { "f30-0": "rot-sin", "f0-0": "rot-cos" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-y!": {
|
||||||
|
"args": ["dst", "rot-deg"],
|
||||||
|
"vars": { "f30-0": "rot-sin", "f0-0": "rot-cos" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-z!": {
|
||||||
|
"args": ["dst", "rot-deg"],
|
||||||
|
"vars": { "f30-0": "rot-sin", "f0-0": "rot-cos" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-zyx!": {
|
||||||
|
"args": ["dst", "rot-xyz-deg"],
|
||||||
|
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-xyz!": {
|
||||||
|
"args": ["dst", "rot-xyz-deg"],
|
||||||
|
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-zxy!": {
|
||||||
|
"args": ["dst", "rot-xyz-deg"],
|
||||||
|
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-yxz!": {
|
||||||
|
"args": ["dst", "rot-xyz-deg"],
|
||||||
|
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-yzx!": {
|
||||||
|
"args": ["dst", "rot-xyz-deg"],
|
||||||
|
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-yxy!": {
|
||||||
|
"args": ["dst", "rots-deg"],
|
||||||
|
"vars": {
|
||||||
|
"a2-0": "sincos-input",
|
||||||
|
"s5-0": "sin-vec",
|
||||||
|
"s4-0": "cos-vec",
|
||||||
|
"f1-1": "cos-y",
|
||||||
|
"f0-5": "sin-y",
|
||||||
|
"f2-0": "cos-x",
|
||||||
|
"f5-0": "sin-x",
|
||||||
|
"f3-0": "cos-z",
|
||||||
|
"f4-0": "sin-z"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-rotate-yx!": {
|
||||||
|
"args": ["dst", "rot-y-deg", "rot-x-deg"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-axis-angle!": {
|
||||||
|
"args": ["dst", "axis", "angle-deg"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-lerp!": {
|
||||||
|
"args": ["dst", "src1", "src2", "alpha"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-3x3-determinant": {
|
||||||
|
"args": ["mat"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-3x3-inverse!": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-3x3-inverse-transpose!": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix3-inverse-transpose!": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-4x4-determinant": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-4x4-inverse-transpose!": {
|
||||||
|
"args": ["dst", "src"]
|
||||||
|
},
|
||||||
|
|
||||||
|
"matrix-y-angle": {
|
||||||
|
"args": ["mat"],
|
||||||
|
"vars": { "v1-0": "z-row" }
|
||||||
|
},
|
||||||
|
|
||||||
"deg-seek": {
|
"deg-seek": {
|
||||||
"args": ["in", "target", "max-diff"],
|
"args": ["in", "target", "max-diff"],
|
||||||
"vars":{"v1-1":"in-int", "a0-2":"target-int", "a1-2":"max-diff-int", "a2-1":"diff", "a3-0":"abs-diff"}
|
"vars": {
|
||||||
|
"v1-1": "in-int",
|
||||||
|
"a0-2": "target-int",
|
||||||
|
"a1-2": "max-diff-int",
|
||||||
|
"a2-1": "diff",
|
||||||
|
"a3-0": "abs-diff"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"deg-seek-smooth": {
|
"deg-seek-smooth": {
|
||||||
|
@ -410,16 +619,24 @@
|
||||||
},
|
},
|
||||||
"ultimate-memcpy": {
|
"ultimate-memcpy": {
|
||||||
"args": ["dst", "src", "size-bytes"],
|
"args": ["dst", "src", "size-bytes"],
|
||||||
"vars":{"s2-0":"qwc-remaining",
|
"vars": {
|
||||||
|
"s2-0": "qwc-remaining",
|
||||||
"s1-0": "qwc-transferred-now",
|
"s1-0": "qwc-transferred-now",
|
||||||
"s4-0": "spr-to-bank",
|
"s4-0": "spr-to-bank",
|
||||||
"s3-0":"spr-from-bank"}
|
"s3-0": "spr-from-bank"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"dma-buffer-add-vu-function": {
|
"dma-buffer-add-vu-function": {
|
||||||
"args": ["dma-buf", "vu-func"],
|
"args": ["dma-buf", "vu-func"],
|
||||||
"vars":{"t1-1":"dma-buf-2", "v1-0":"func-ptr", "a3-0":"qlen", "a1-1":"origin", "t0-1":"qwc-now",
|
"vars": {
|
||||||
"t2-0":"buf-ptr"}
|
"t1-1": "dma-buf-2",
|
||||||
|
"v1-0": "func-ptr",
|
||||||
|
"a3-0": "qlen",
|
||||||
|
"a1-1": "origin",
|
||||||
|
"t0-1": "qwc-now",
|
||||||
|
"t2-0": "buf-ptr"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"dma-buffer-add-buckets": {
|
"dma-buffer-add-buckets": {
|
||||||
|
@ -452,7 +669,11 @@
|
||||||
|
|
||||||
"analog-input": {
|
"analog-input": {
|
||||||
"args": ["in", "offset", "center-val", "max-val", "out-range"],
|
"args": ["in", "offset", "center-val", "max-val", "out-range"],
|
||||||
"vars":{"f1-1":"offset-in", "f0-3":"magnitude", "v1-0":"max-magnitude"}
|
"vars": {
|
||||||
|
"f1-1": "offset-in",
|
||||||
|
"f0-3": "magnitude",
|
||||||
|
"v1-0": "max-magnitude"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"cpad-set-buzz!": {
|
"cpad-set-buzz!": {
|
||||||
|
@ -460,7 +681,13 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
"service-cpads": {
|
"service-cpads": {
|
||||||
"vars":{"gp-0":"pad-list", "s5-0":"pad-idx", "s4-0":"pad", "s3-0":"buzz-idx", "v1-29":"current-button0"}
|
"vars": {
|
||||||
|
"gp-0": "pad-list",
|
||||||
|
"s5-0": "pad-idx",
|
||||||
|
"s4-0": "pad",
|
||||||
|
"s3-0": "buzz-idx",
|
||||||
|
"v1-29": "current-button0"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"buzz-stop!": {
|
"buzz-stop!": {
|
||||||
|
@ -469,7 +696,17 @@
|
||||||
|
|
||||||
"default-buffer-init": {
|
"default-buffer-init": {
|
||||||
"args": ["buff"],
|
"args": ["buff"],
|
||||||
"vars":{"v1-0":"buff", "v1-1":"buff", "v1-3":"buff", "v1-4":"buff", "a1-4":"tag", "a1-6":"tag2", "a1-8":"data", "a0-1":"tag3", "v1-2":"buff"}
|
"vars": {
|
||||||
|
"v1-0": "buff",
|
||||||
|
"v1-1": "buff",
|
||||||
|
"v1-3": "buff",
|
||||||
|
"v1-4": "buff",
|
||||||
|
"a1-4": "tag",
|
||||||
|
"a1-6": "tag2",
|
||||||
|
"a1-8": "data",
|
||||||
|
"a0-1": "tag3",
|
||||||
|
"v1-2": "buff"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"add-reg-gif-packet": {
|
"add-reg-gif-packet": {
|
||||||
|
@ -478,7 +715,15 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
"(method 0 draw-context)": {
|
"(method 0 draw-context)": {
|
||||||
"args":["allocation", "type-to-make", "org-x", "org-y", "width", "height", "color-0"]
|
"args": [
|
||||||
|
"allocation",
|
||||||
|
"type-to-make",
|
||||||
|
"org-x",
|
||||||
|
"org-y",
|
||||||
|
"width",
|
||||||
|
"height",
|
||||||
|
"color-0"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
|
|
||||||
"(method 0 display)": {
|
"(method 0 display)": {
|
||||||
|
@ -508,9 +753,13 @@
|
||||||
|
|
||||||
"update-math-camera": {
|
"update-math-camera": {
|
||||||
"args": ["math-cam", "ignored", "aspect"],
|
"args": ["math-cam", "ignored", "aspect"],
|
||||||
"vars":{"f0-4":"temp1", "v1-1":"elim1",
|
"vars": {
|
||||||
"f0-6":"temp2", "v1-2":"elim2",
|
"f0-4": "temp1",
|
||||||
"f1-3":"x-rat", "f0-7":"y-rat",
|
"v1-1": "elim1",
|
||||||
|
"f0-6": "temp2",
|
||||||
|
"v1-2": "elim2",
|
||||||
|
"f1-3": "x-rat",
|
||||||
|
"f0-7": "y-rat",
|
||||||
"v1-3": "cull-info",
|
"v1-3": "cull-info",
|
||||||
"f2-2": "unused-x-thing",
|
"f2-2": "unused-x-thing",
|
||||||
"f2-5": "y-thing",
|
"f2-5": "y-thing",
|
||||||
|
@ -567,10 +816,6 @@
|
||||||
"a0-13": "vis-gif-1",
|
"a0-13": "vis-gif-1",
|
||||||
"a0-14": "vis-gif-1-again",
|
"a0-14": "vis-gif-1-again",
|
||||||
"a0-15": "vis-gif-1-again-again"
|
"a0-15": "vis-gif-1-again-again"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -595,7 +840,6 @@
|
||||||
|
|
||||||
"surface-mult!": {
|
"surface-mult!": {
|
||||||
"args": ["dst", "src0", "src1"]
|
"args": ["dst", "src0", "src1"]
|
||||||
|
|
||||||
},
|
},
|
||||||
|
|
||||||
"(method 0 collide-shape-prim)": {
|
"(method 0 collide-shape-prim)": {
|
||||||
|
@ -612,7 +856,13 @@
|
||||||
"args": ["allocation", "type-to-make", "cshape", "elt-count", "prim-id"]
|
"args": ["allocation", "type-to-make", "cshape", "elt-count", "prim-id"]
|
||||||
},
|
},
|
||||||
"(method 0 collide-shape)": {
|
"(method 0 collide-shape)": {
|
||||||
"args":["allocation", "type-to-make", "proc", "collide-list-kind", "prim-id"],
|
"args": [
|
||||||
|
"allocation",
|
||||||
|
"type-to-make",
|
||||||
|
"proc",
|
||||||
|
"collide-list-kind",
|
||||||
|
"prim-id"
|
||||||
|
],
|
||||||
"vars": { "s5-0": "obj" }
|
"vars": { "s5-0": "obj" }
|
||||||
},
|
},
|
||||||
"(method 11 touching-prims-entry-pool)": {
|
"(method 11 touching-prims-entry-pool)": {
|
||||||
|
@ -622,13 +872,15 @@
|
||||||
// LEVEL
|
// LEVEL
|
||||||
"lookup-level-info": {
|
"lookup-level-info": {
|
||||||
"args": ["name"],
|
"args": ["name"],
|
||||||
"vars":{"a1-1":["info", "level-load-info"], "v1-0":"rest", "a1-0":"current-sym"}
|
"vars": {
|
||||||
|
"a1-1": ["info", "level-load-info"],
|
||||||
|
"v1-0": "rest",
|
||||||
|
"a1-0": "current-sym"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
"(method 21 level-group)": {
|
"(method 21 level-group)": {
|
||||||
"args": ["obj", "name", "cmd-idx"],
|
"args": ["obj", "name", "cmd-idx"],
|
||||||
"vars": { "v1-1": "cmd-lst" }
|
"vars": { "v1-1": "cmd-lst" }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
|
@ -125,3 +125,9 @@
|
||||||
- There is now an option for `allow-misaligned` which allows the alignment of an struct type to be less than 16-bytes when inlined, without enabling array packing. This seems like a stupid option, but GOAL has this in some places, so we support it too.
|
- There is now an option for `allow-misaligned` which allows the alignment of an struct type to be less than 16-bytes when inlined, without enabling array packing. This seems like a stupid option, but GOAL has this in some places, so we support it too.
|
||||||
- In method declarations in a `deftype`, you can no longer provide argument names. There was ambiguity when parsing a compound typespec vs named argument. The names were not used for anything.
|
- In method declarations in a `deftype`, you can no longer provide argument names. There was ambiguity when parsing a compound typespec vs named argument. The names were not used for anything.
|
||||||
- 128-bit integer register variables (`i128`) are now supported. These work with assembly forms, `set!`s between registers, and `set!`s of memory locations with type `(pointer uint128)` or `(pointer int128)`.
|
- 128-bit integer register variables (`i128`) are now supported. These work with assembly forms, `set!`s between registers, and `set!`s of memory locations with type `(pointer uint128)` or `(pointer int128)`.
|
||||||
|
- Fixed a bug where the compiler would abort if had to spill an `xmm` register containing an `i128` value.
|
||||||
|
- Added `.pextlw`, `.pextuw`, `.pcpyld`, and `.pcpyud` assembly forms
|
||||||
|
- Fixed a bug where `uint128` or children defined with `local-vars` would end up using a 64-bit GPR instead of a 128-bit XMM.
|
||||||
|
- Fixed a bug where 128-bit variable spills could be misaligned, causing a segfault at `vmovaps`.
|
||||||
|
- Added `.ppach` and `.pceqw`
|
||||||
|
- Fixed a bug where setting 128-bit / 64-bit variables from each other only did a 32-bit set
|
|
@ -1520,6 +1520,26 @@ Wrapper around `vcvtdq2ps` and `vcvtps2dq` to convert packed 32-bit signed integ
|
||||||
|
|
||||||
Wrapper around `vpsrld`, `vpsrad`, and `vpslld`. Does shifts on each of the 4 32-bit integers in the register.
|
Wrapper around `vpsrld`, `vpsrad`, and `vpslld`. Does shifts on each of the 4 32-bit integers in the register.
|
||||||
|
|
||||||
|
## `.pextlw`, `.pextuw`, `.pcpyud`, `.pcpyld`, `.pceqw`, `.ppach`
|
||||||
|
```
|
||||||
|
(.pextlw dst src0 src1 [:color #t|#f])
|
||||||
|
(.pextuw dst src0 src1 [:color #t|#f])
|
||||||
|
(.pcpyud dst src0 src1 [:color #t|#f])
|
||||||
|
(.pcpyld dst src0 src1 [:color #t|#f])
|
||||||
|
(.pceqw dst src0 src1 [:color #t|#f])
|
||||||
|
(.ppach dest src0 src1)
|
||||||
|
```
|
||||||
|
|
||||||
|
Equivalents of the EE's MMI instructions with the same name. These can only be used on 128-bit variables. Most map to single x86 instructions:
|
||||||
|
- `pextlw` is `VPUNPCKLDQ` (sources swapped)
|
||||||
|
- `pextuw` is `VPUNPCKHDQ` (sources swapped)
|
||||||
|
- `pcpyld` is `VPUNPCKLQDQ` (sources swapped)
|
||||||
|
- `pcpyud` is `VPUNPCKHQDQ` (sources _not_ swapped)
|
||||||
|
- `pceqw` is `VPCMPEQD`
|
||||||
|
|
||||||
|
Some map to multiple instructions. These must use the coloring system.
|
||||||
|
- `ppach` is a sequence of 7 instructions (`VPSHUFLW`, `VPSHUFHW`, `VPSRLDQ`, `VPUNPCKLQDQ`).
|
||||||
|
|
||||||
# Compiler Forms - Unsorted
|
# Compiler Forms - Unsorted
|
||||||
|
|
||||||
## `let`
|
## `let`
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
;; name in dgo: matrix-h
|
;; name in dgo: matrix-h
|
||||||
;; dgos: GAME, ENGINE
|
;; dgos: GAME, ENGINE
|
||||||
|
|
||||||
;; matrix-h
|
;; A 4x4 matrix, stored in row-major order
|
||||||
(deftype matrix (structure)
|
(deftype matrix (structure)
|
||||||
((data float 16 :offset-assert 0)
|
((data float 16 :offset-assert 0)
|
||||||
(vector vector 4 :inline :offset 0)
|
(vector vector 4 :inline :offset 0)
|
||||||
|
@ -15,10 +15,13 @@
|
||||||
:size-assert #x40
|
:size-assert #x40
|
||||||
:flag-assert #xa00000040
|
:flag-assert #xa00000040
|
||||||
(:methods
|
(:methods
|
||||||
(dummy-9 () none 9)
|
(transform-vectors! (_type_ (inline-array vector) (inline-array vector) int) none 9)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
;; A 3x3 matrix, stored in row-major order.
|
||||||
|
;; NOTE: the rows each have an extra 4-bytes of padding
|
||||||
|
;; so this is really a 3x4 matrix.
|
||||||
(deftype matrix3 (structure)
|
(deftype matrix3 (structure)
|
||||||
((data float 12 :offset-assert 0)
|
((data float 12 :offset-assert 0)
|
||||||
(vector vector 3 :inline :offset 0)
|
(vector vector 3 :inline :offset 0)
|
||||||
|
@ -40,16 +43,16 @@
|
||||||
:flag-assert #x900000020
|
:flag-assert #x900000020
|
||||||
)
|
)
|
||||||
|
|
||||||
(defun matrix-copy! ((arg0 matrix) (arg1 matrix))
|
(defun matrix-copy! ((dst matrix) (src matrix))
|
||||||
(let ((v1-0 (-> arg1 vector 0 quad))
|
(let ((v1-0 (-> src vector 0 quad))
|
||||||
(a2-0 (-> arg1 vector 1 quad))
|
(a2-0 (-> src vector 1 quad))
|
||||||
(a3-0 (-> arg1 vector 2 quad))
|
(a3-0 (-> src vector 2 quad))
|
||||||
(a1-1 (-> arg1 vector 3 quad))
|
(a1-1 (-> src vector 3 quad))
|
||||||
)
|
)
|
||||||
(set! (-> arg0 vector 0 quad) v1-0)
|
(set! (-> dst vector 0 quad) v1-0)
|
||||||
(set! (-> arg0 vector 1 quad) a2-0)
|
(set! (-> dst vector 1 quad) a2-0)
|
||||||
(set! (-> arg0 vector 2 quad) a3-0)
|
(set! (-> dst vector 2 quad) a3-0)
|
||||||
(set! (-> arg0 vector 3 quad) a1-1)
|
(set! (-> dst vector 3 quad) a1-1)
|
||||||
)
|
)
|
||||||
arg0
|
dst
|
||||||
)
|
)
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -175,6 +175,11 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
|
||||||
m_gen.add_instr(IGen::load_reg_offset_xmm32(
|
m_gen.add_instr(IGen::load_reg_offset_xmm32(
|
||||||
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
||||||
i_rec);
|
i_rec);
|
||||||
|
} else if (op.reg.is_xmm() &&
|
||||||
|
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
|
||||||
|
m_gen.add_instr(IGen::load128_xmm128_reg_offset(
|
||||||
|
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
||||||
|
i_rec);
|
||||||
} else {
|
} else {
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
@ -197,6 +202,11 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
|
||||||
m_gen.add_instr(IGen::store_reg_offset_xmm32(
|
m_gen.add_instr(IGen::store_reg_offset_xmm32(
|
||||||
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
||||||
i_rec);
|
i_rec);
|
||||||
|
} else if (op.reg.is_xmm() &&
|
||||||
|
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
|
||||||
|
m_gen.add_instr(IGen::store128_xmm128_reg_offset(
|
||||||
|
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
|
||||||
|
i_rec);
|
||||||
} else {
|
} else {
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,6 +87,11 @@ class Compiler {
|
||||||
emitter::Register::VF_ELEMENT broadcastElement,
|
emitter::Register::VF_ELEMENT broadcastElement,
|
||||||
Env* env);
|
Env* env);
|
||||||
|
|
||||||
|
Val* compile_asm_int128_math3(const goos::Object& form,
|
||||||
|
const goos::Object& rest,
|
||||||
|
IR_Int128Math3Asm::Kind kind,
|
||||||
|
Env* env);
|
||||||
|
|
||||||
Val* compile_asm_vf_math2(const goos::Object& form,
|
Val* compile_asm_vf_math2(const goos::Object& form,
|
||||||
const goos::Object& rest,
|
const goos::Object& rest,
|
||||||
IR_VFMath2Asm::Kind kind,
|
IR_VFMath2Asm::Kind kind,
|
||||||
|
@ -420,6 +425,13 @@ class Compiler {
|
||||||
Val* compile_asm_pw_sll(const goos::Object& form, const goos::Object& rest, Env* env);
|
Val* compile_asm_pw_sll(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
Val* compile_asm_pw_srl(const goos::Object& form, const goos::Object& rest, Env* env);
|
Val* compile_asm_pw_srl(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
Val* compile_asm_pw_sra(const goos::Object& form, const goos::Object& rest, Env* env);
|
Val* compile_asm_pw_sra(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
Val* compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
Val* compile_asm_pextuw(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
Val* compile_asm_pcpyud(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
Val* compile_asm_pcpyld(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
Val* compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
Val* compile_asm_ppach(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
Val* compile_asm_xorp(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||||
|
|
||||||
// Atoms
|
// Atoms
|
||||||
|
|
||||||
|
|
|
@ -118,9 +118,9 @@ void regset_common(emitter::ObjectGenerator* gen,
|
||||||
} else if (src_class == RegClass::FLOAT && dst_is_xmm128) {
|
} else if (src_class == RegClass::FLOAT && dst_is_xmm128) {
|
||||||
gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec);
|
gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec);
|
||||||
} else if (src_class == RegClass::GPR_64 && dst_is_xmm128) {
|
} else if (src_class == RegClass::GPR_64 && dst_is_xmm128) {
|
||||||
gen->add_instr(IGen::movd_xmm32_gpr32(dst_reg, src_reg), irec);
|
gen->add_instr(IGen::movq_xmm64_gpr64(dst_reg, src_reg), irec);
|
||||||
} else if (src_is_xmm128 && dst_class == RegClass::GPR_64) {
|
} else if (src_is_xmm128 && dst_class == RegClass::GPR_64) {
|
||||||
gen->add_instr(IGen::movd_gpr32_xmm32(dst_reg, src_reg), irec);
|
gen->add_instr(IGen::movq_gpr64_xmm64(dst_reg, src_reg), irec);
|
||||||
} else {
|
} else {
|
||||||
assert(false); // unhandled move.
|
assert(false); // unhandled move.
|
||||||
}
|
}
|
||||||
|
@ -1442,6 +1442,83 @@ void IR_VFMath3Asm::do_codegen(emitter::ObjectGenerator* gen,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// IR_Int128Math3Asm
|
||||||
|
///////////////////////
|
||||||
|
|
||||||
|
IR_Int128Math3Asm::IR_Int128Math3Asm(bool use_color,
|
||||||
|
const RegVal* dst,
|
||||||
|
const RegVal* src1,
|
||||||
|
const RegVal* src2,
|
||||||
|
Kind kind)
|
||||||
|
: IR_Asm(use_color), m_dst(dst), m_src1(src1), m_src2(src2), m_kind(kind) {}
|
||||||
|
|
||||||
|
std::string IR_Int128Math3Asm::print() {
|
||||||
|
std::string function = "";
|
||||||
|
switch (m_kind) {
|
||||||
|
case Kind::PEXTLW:
|
||||||
|
function = ".pextlw";
|
||||||
|
break;
|
||||||
|
case Kind::PEXTUW:
|
||||||
|
function = ".pextuw";
|
||||||
|
break;
|
||||||
|
case Kind::PCPYLD:
|
||||||
|
function = ".pcpyld";
|
||||||
|
break;
|
||||||
|
case Kind::PCPYUD:
|
||||||
|
function = ".pcpyud";
|
||||||
|
break;
|
||||||
|
case Kind::PCEQW:
|
||||||
|
function = ".pceqw";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
return fmt::format("{}{} {}, {}, {}", function, get_color_suffix_string(), m_dst->print(),
|
||||||
|
m_src1->print(), m_src2->print());
|
||||||
|
}
|
||||||
|
|
||||||
|
RegAllocInstr IR_Int128Math3Asm::to_rai() {
|
||||||
|
RegAllocInstr rai;
|
||||||
|
if (m_use_coloring) {
|
||||||
|
rai.write.push_back(m_dst->ireg());
|
||||||
|
rai.read.push_back(m_src1->ireg());
|
||||||
|
rai.read.push_back(m_src2->ireg());
|
||||||
|
}
|
||||||
|
return rai;
|
||||||
|
}
|
||||||
|
|
||||||
|
void IR_Int128Math3Asm::do_codegen(emitter::ObjectGenerator* gen,
|
||||||
|
const AllocationResult& allocs,
|
||||||
|
emitter::IR_Record irec) {
|
||||||
|
auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring);
|
||||||
|
auto src1 = get_reg_asm(m_src1, allocs, irec, m_use_coloring);
|
||||||
|
auto src2 = get_reg_asm(m_src2, allocs, irec, m_use_coloring);
|
||||||
|
|
||||||
|
switch (m_kind) {
|
||||||
|
case Kind::PEXTLW:
|
||||||
|
// NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways.
|
||||||
|
gen->add_instr(IGen::pextlw_swapped(dst, src2, src1), irec);
|
||||||
|
break;
|
||||||
|
case Kind::PEXTUW:
|
||||||
|
// NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways.
|
||||||
|
gen->add_instr(IGen::pextuw_swapped(dst, src2, src1), irec);
|
||||||
|
break;
|
||||||
|
case Kind::PCPYLD:
|
||||||
|
// NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways.
|
||||||
|
gen->add_instr(IGen::pcpyld_swapped(dst, src2, src1), irec);
|
||||||
|
break;
|
||||||
|
case Kind::PCPYUD:
|
||||||
|
gen->add_instr(IGen::pcpyud(dst, src1, src2), irec);
|
||||||
|
break;
|
||||||
|
case Kind::PCEQW:
|
||||||
|
gen->add_instr(IGen::pceqw(dst, src1, src2), irec);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////
|
///////////////////////
|
||||||
// AsmVF2
|
// AsmVF2
|
||||||
///////////////////////
|
///////////////////////
|
||||||
|
@ -1475,6 +1552,22 @@ std::string IR_VFMath2Asm::print() {
|
||||||
use_imm = true;
|
use_imm = true;
|
||||||
function = ".pw.sra";
|
function = ".pw.sra";
|
||||||
break;
|
break;
|
||||||
|
case Kind::VPSRLDQ:
|
||||||
|
use_imm = true;
|
||||||
|
function = ".VPSRLDQ";
|
||||||
|
break;
|
||||||
|
case Kind::VPSLLDQ:
|
||||||
|
use_imm = true;
|
||||||
|
function = ".VPSLLDQ";
|
||||||
|
break;
|
||||||
|
case Kind::VPSHUFLW:
|
||||||
|
use_imm = true;
|
||||||
|
function = ".VPSHUFLW";
|
||||||
|
break;
|
||||||
|
case Kind::VPSHUFHW:
|
||||||
|
use_imm = true;
|
||||||
|
function = ".VPSHUFHW";
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
@ -1530,6 +1623,30 @@ void IR_VFMath2Asm::do_codegen(emitter::ObjectGenerator* gen,
|
||||||
assert(*m_imm <= 255);
|
assert(*m_imm <= 255);
|
||||||
gen->add_instr(IGen::pw_sra(dst, src, *m_imm), irec);
|
gen->add_instr(IGen::pw_sra(dst, src, *m_imm), irec);
|
||||||
break;
|
break;
|
||||||
|
case Kind::VPSRLDQ:
|
||||||
|
assert(m_imm.has_value());
|
||||||
|
assert(*m_imm >= 0);
|
||||||
|
assert(*m_imm <= 255);
|
||||||
|
gen->add_instr(IGen::vpsrldq(dst, src, *m_imm), irec);
|
||||||
|
break;
|
||||||
|
case Kind::VPSLLDQ:
|
||||||
|
assert(m_imm.has_value());
|
||||||
|
assert(*m_imm >= 0);
|
||||||
|
assert(*m_imm <= 255);
|
||||||
|
gen->add_instr(IGen::vpslldq(dst, src, *m_imm), irec);
|
||||||
|
break;
|
||||||
|
case Kind::VPSHUFLW:
|
||||||
|
assert(m_imm.has_value());
|
||||||
|
assert(*m_imm >= 0);
|
||||||
|
assert(*m_imm <= 255);
|
||||||
|
gen->add_instr(IGen::vpshuflw(dst, src, *m_imm), irec);
|
||||||
|
break;
|
||||||
|
case Kind::VPSHUFHW:
|
||||||
|
assert(m_imm.has_value());
|
||||||
|
assert(*m_imm >= 0);
|
||||||
|
assert(*m_imm <= 255);
|
||||||
|
gen->add_instr(IGen::vpshufhw(dst, src, *m_imm), irec);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
assert(false);
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -546,9 +546,30 @@ class IR_VFMath3Asm : public IR_Asm {
|
||||||
Kind m_kind;
|
Kind m_kind;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class IR_Int128Math3Asm : public IR_Asm {
|
||||||
|
public:
|
||||||
|
enum class Kind { PEXTLW, PEXTUW, PCPYUD, PCPYLD, PCEQW };
|
||||||
|
IR_Int128Math3Asm(bool use_color,
|
||||||
|
const RegVal* dst,
|
||||||
|
const RegVal* src1,
|
||||||
|
const RegVal* src2,
|
||||||
|
Kind kind);
|
||||||
|
std::string print() override;
|
||||||
|
RegAllocInstr to_rai() override;
|
||||||
|
void do_codegen(emitter::ObjectGenerator* gen,
|
||||||
|
const AllocationResult& allocs,
|
||||||
|
emitter::IR_Record irec) override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const RegVal* m_dst = nullptr;
|
||||||
|
const RegVal* m_src1 = nullptr;
|
||||||
|
const RegVal* m_src2 = nullptr;
|
||||||
|
Kind m_kind;
|
||||||
|
};
|
||||||
|
|
||||||
class IR_VFMath2Asm : public IR_Asm {
|
class IR_VFMath2Asm : public IR_Asm {
|
||||||
public:
|
public:
|
||||||
enum class Kind { ITOF, FTOI, PW_SLL, PW_SRL, PW_SRA };
|
enum class Kind { ITOF, FTOI, PW_SLL, PW_SRL, PW_SRA, VPSRLDQ, VPSLLDQ, VPSHUFLW, VPSHUFHW };
|
||||||
IR_VFMath2Asm(bool use_color,
|
IR_VFMath2Asm(bool use_color,
|
||||||
const RegVal* dst,
|
const RegVal* dst,
|
||||||
const RegVal* src,
|
const RegVal* src,
|
||||||
|
|
|
@ -484,6 +484,28 @@ Val* Compiler::compile_asm_vf_math3(const goos::Object& form,
|
||||||
return get_none();
|
return get_none();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_int128_math3(const goos::Object& form,
|
||||||
|
const goos::Object& rest,
|
||||||
|
IR_Int128Math3Asm::Kind kind,
|
||||||
|
Env* env) {
|
||||||
|
auto args = get_va(form, rest);
|
||||||
|
va_check(form, args, {{}, {}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
|
||||||
|
bool color = true;
|
||||||
|
if (args.has_named("color")) {
|
||||||
|
color = get_true_or_false(form, args.named.at("color"));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
|
||||||
|
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env);
|
||||||
|
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env);
|
||||||
|
|
||||||
|
if (!dest->settable()) {
|
||||||
|
throw_compiler_error(form, "Cannot set destination");
|
||||||
|
}
|
||||||
|
env->emit_ir<IR_Int128Math3Asm>(color, dest, src1, src2, kind);
|
||||||
|
return get_none();
|
||||||
|
}
|
||||||
|
|
||||||
Val* Compiler::compile_asm_vf_math2(const goos::Object& form,
|
Val* Compiler::compile_asm_vf_math2(const goos::Object& form,
|
||||||
const goos::Object& rest,
|
const goos::Object& rest,
|
||||||
IR_VFMath2Asm::Kind kind,
|
IR_VFMath2Asm::Kind kind,
|
||||||
|
@ -586,6 +608,67 @@ Val* Compiler::compile_asm_pw_sra(const goos::Object& form, const goos::Object&
|
||||||
return compile_asm_vf_math2_imm_u8(form, rest, IR_VFMath2Asm::Kind::PW_SRA, env);
|
return compile_asm_vf_math2_imm_u8(form, rest, IR_VFMath2Asm::Kind::PW_SRA, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
|
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTLW, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_pextuw(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
|
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTUW, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_pcpyud(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
|
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCPYUD, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_pcpyld(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
|
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCPYLD, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
|
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCEQW, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_ppach(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
|
auto args = get_va(form, rest);
|
||||||
|
va_check(form, args, {{}, {}, {}}, {});
|
||||||
|
|
||||||
|
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
|
||||||
|
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); // rs
|
||||||
|
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); // rt
|
||||||
|
auto temp = env->make_ireg(TypeSpec("uint128"), RegClass::INT_128);
|
||||||
|
|
||||||
|
if (!dest->settable()) {
|
||||||
|
throw_compiler_error(form, "Cannot set destination");
|
||||||
|
}
|
||||||
|
|
||||||
|
env->emit_ir<IR_VFMath2Asm>(true, temp, src1, IR_VFMath2Asm::Kind::VPSHUFLW, 0x88);
|
||||||
|
env->emit_ir<IR_VFMath2Asm>(true, dest, src2, IR_VFMath2Asm::Kind::VPSHUFLW, 0x88);
|
||||||
|
env->emit_ir<IR_VFMath2Asm>(true, temp, temp, IR_VFMath2Asm::Kind::VPSHUFHW, 0x88);
|
||||||
|
env->emit_ir<IR_VFMath2Asm>(true, dest, dest, IR_VFMath2Asm::Kind::VPSHUFHW, 0x88);
|
||||||
|
env->emit_ir<IR_VFMath2Asm>(true, temp, temp, IR_VFMath2Asm::Kind::VPSRLDQ, 4);
|
||||||
|
env->emit_ir<IR_VFMath2Asm>(true, dest, dest, IR_VFMath2Asm::Kind::VPSRLDQ, 4);
|
||||||
|
// is actually a VPUNPCKLQDQ with srcs swapped.
|
||||||
|
env->emit_ir<IR_Int128Math3Asm>(true, dest, temp, dest, IR_Int128Math3Asm::Kind::PCPYLD);
|
||||||
|
|
||||||
|
return get_none();
|
||||||
|
}
|
||||||
|
|
||||||
|
Val* Compiler::compile_asm_xorp(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
|
auto args = get_va(form, rest);
|
||||||
|
va_check(form, args, {{}, {}, {}}, {});
|
||||||
|
|
||||||
|
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
|
||||||
|
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); // rs
|
||||||
|
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); // rt
|
||||||
|
|
||||||
|
if (!dest->settable()) {
|
||||||
|
throw_compiler_error(form, "Cannot set destination");
|
||||||
|
}
|
||||||
|
|
||||||
|
env->emit_ir<IR_VFMath3Asm>(true, dest, src1, src2, IR_VFMath3Asm::Kind::XOR);
|
||||||
|
return get_none();
|
||||||
|
}
|
||||||
|
|
||||||
Val* Compiler::compile_asm_itof_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
Val* Compiler::compile_asm_itof_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||||
return compile_asm_vf_math2(form, rest, IR_VFMath2Asm::Kind::ITOF, env);
|
return compile_asm_vf_math2(form, rest, IR_VFMath2Asm::Kind::ITOF, env);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,7 @@ const std::unordered_map<
|
||||||
{".wait.vf", &Compiler::compile_asm_wait_vf},
|
{".wait.vf", &Compiler::compile_asm_wait_vf},
|
||||||
|
|
||||||
{".xor.vf", &Compiler::compile_asm_xor_vf},
|
{".xor.vf", &Compiler::compile_asm_xor_vf},
|
||||||
|
{".xor.p", &Compiler::compile_asm_xorp},
|
||||||
|
|
||||||
{".max.vf", &Compiler::compile_asm_max_vf},
|
{".max.vf", &Compiler::compile_asm_max_vf},
|
||||||
{".max.x.vf", &Compiler::compile_asm_max_x_vf},
|
{".max.x.vf", &Compiler::compile_asm_max_x_vf},
|
||||||
|
@ -95,6 +96,12 @@ const std::unordered_map<
|
||||||
{".pw.sll", &Compiler::compile_asm_pw_sll},
|
{".pw.sll", &Compiler::compile_asm_pw_sll},
|
||||||
{".pw.srl", &Compiler::compile_asm_pw_srl},
|
{".pw.srl", &Compiler::compile_asm_pw_srl},
|
||||||
{".pw.sra", &Compiler::compile_asm_pw_sra},
|
{".pw.sra", &Compiler::compile_asm_pw_sra},
|
||||||
|
{".pextlw", &Compiler::compile_asm_pextlw},
|
||||||
|
{".pextuw", &Compiler::compile_asm_pextuw},
|
||||||
|
{".pcpyld", &Compiler::compile_asm_pcpyld},
|
||||||
|
{".pcpyud", &Compiler::compile_asm_pcpyud},
|
||||||
|
{".pceqw", &Compiler::compile_asm_pceqw},
|
||||||
|
{".ppach", &Compiler::compile_asm_ppach},
|
||||||
|
|
||||||
// BLOCK FORMS
|
// BLOCK FORMS
|
||||||
{"top-level", &Compiler::compile_top_level},
|
{"top-level", &Compiler::compile_top_level},
|
||||||
|
|
|
@ -87,10 +87,14 @@ Val* Compiler::compile_local_vars(const goos::Object& form, const goos::Object&
|
||||||
throw_compiler_error(form, "Cannot declare a local named {}, this already exists.", name);
|
throw_compiler_error(form, "Cannot declare a local named {}, this already exists.", name);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == TypeSpec("float")) {
|
if (m_ts.tc(TypeSpec("float"), type)) {
|
||||||
auto ireg = fe->make_ireg(type, RegClass::FLOAT);
|
auto ireg = fe->make_ireg(type, RegClass::FLOAT);
|
||||||
ireg->mark_as_settable();
|
ireg->mark_as_settable();
|
||||||
fe->params[name] = ireg;
|
fe->params[name] = ireg;
|
||||||
|
} else if (m_ts.tc(TypeSpec("int128"), type) || m_ts.tc(TypeSpec("uint128"), type)) {
|
||||||
|
auto ireg = fe->make_ireg(type, RegClass::INT_128);
|
||||||
|
ireg->mark_as_settable();
|
||||||
|
fe->params[name] = ireg;
|
||||||
} else {
|
} else {
|
||||||
auto ireg = fe->make_ireg(type, RegClass::GPR_64);
|
auto ireg = fe->make_ireg(type, RegClass::GPR_64);
|
||||||
ireg->mark_as_settable();
|
ireg->mark_as_settable();
|
||||||
|
|
|
@ -105,6 +105,34 @@ class IGen {
|
||||||
return instr;
|
return instr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
|
||||||
|
*/
|
||||||
|
static Instruction movq_gpr64_xmm64(Register dst, Register src) {
|
||||||
|
assert(dst.is_gpr());
|
||||||
|
assert(src.is_xmm());
|
||||||
|
Instruction instr(0x66);
|
||||||
|
instr.set_op2(0x0f);
|
||||||
|
instr.set_op3(0x7e);
|
||||||
|
instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true);
|
||||||
|
instr.swap_op0_rex();
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
|
||||||
|
*/
|
||||||
|
static Instruction movq_xmm64_gpr64(Register dst, Register src) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src.is_gpr());
|
||||||
|
Instruction instr(0x66);
|
||||||
|
instr.set_op2(0x0f);
|
||||||
|
instr.set_op3(0x6e);
|
||||||
|
instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true);
|
||||||
|
instr.swap_op0_rex();
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* Move 32-bits between xmm's
|
* Move 32-bits between xmm's
|
||||||
*/
|
*/
|
||||||
|
@ -2401,6 +2429,114 @@ class IGen {
|
||||||
instr.set(Imm(1, imm));
|
instr.set(Imm(1, imm));
|
||||||
return instr;
|
return instr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Instruction pextlw_swapped(Register dst, Register src0, Register src1) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src0.is_xmm());
|
||||||
|
assert(src1.is_xmm());
|
||||||
|
// VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128
|
||||||
|
// reg, vex, r/m
|
||||||
|
Instruction instr(0x62);
|
||||||
|
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
|
||||||
|
false, VexPrefix::P_66);
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction pextuw_swapped(Register dst, Register src0, Register src1) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src0.is_xmm());
|
||||||
|
assert(src1.is_xmm());
|
||||||
|
// VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128
|
||||||
|
// reg, vex, r/m
|
||||||
|
Instruction instr(0x6a);
|
||||||
|
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
|
||||||
|
false, VexPrefix::P_66);
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction vpunpcklqdq(Register dst, Register src0, Register src1) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src0.is_xmm());
|
||||||
|
assert(src1.is_xmm());
|
||||||
|
// VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128
|
||||||
|
// reg, vex, r/m
|
||||||
|
Instruction instr(0x6c);
|
||||||
|
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
|
||||||
|
false, VexPrefix::P_66);
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction pcpyld_swapped(Register dst, Register src0, Register src1) {
|
||||||
|
return vpunpcklqdq(dst, src0, src1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction pcpyud(Register dst, Register src0, Register src1) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src0.is_xmm());
|
||||||
|
assert(src1.is_xmm());
|
||||||
|
// VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128
|
||||||
|
// reg, vex, r/m
|
||||||
|
Instruction instr(0x6d);
|
||||||
|
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
|
||||||
|
false, VexPrefix::P_66);
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction pceqw(Register dst, Register src0, Register src1) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src0.is_xmm());
|
||||||
|
assert(src1.is_xmm());
|
||||||
|
// VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128
|
||||||
|
// reg, vex, r/m
|
||||||
|
Instruction instr(0x76);
|
||||||
|
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
|
||||||
|
false, VexPrefix::P_66);
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction vpsrldq(Register dst, Register src, u8 imm) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src.is_xmm());
|
||||||
|
// VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8
|
||||||
|
Instruction instr(0x73);
|
||||||
|
instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false,
|
||||||
|
VexPrefix::P_66);
|
||||||
|
instr.set(Imm(1, imm));
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction vpslldq(Register dst, Register src, u8 imm) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src.is_xmm());
|
||||||
|
// VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8
|
||||||
|
Instruction instr(0x73);
|
||||||
|
instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false,
|
||||||
|
VexPrefix::P_66);
|
||||||
|
instr.set(Imm(1, imm));
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction vpshuflw(Register dst, Register src, u8 imm) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src.is_xmm());
|
||||||
|
// VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8
|
||||||
|
Instruction instr(0x70);
|
||||||
|
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false,
|
||||||
|
VexPrefix::P_F2);
|
||||||
|
instr.set(Imm(1, imm));
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Instruction vpshufhw(Register dst, Register src, u8 imm) {
|
||||||
|
assert(dst.is_xmm());
|
||||||
|
assert(src.is_xmm());
|
||||||
|
// VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8
|
||||||
|
Instruction instr(0x70);
|
||||||
|
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false,
|
||||||
|
VexPrefix::P_F3);
|
||||||
|
instr.set(Imm(1, imm));
|
||||||
|
return instr;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace emitter
|
} // namespace emitter
|
||||||
|
|
||||||
|
|
|
@ -54,8 +54,8 @@ RegisterInfo RegisterInfo::make_register_info() {
|
||||||
|
|
||||||
// todo - experiment with better orders for allocation.
|
// todo - experiment with better orders for allocation.
|
||||||
info.m_gpr_alloc_order = {RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11}; // arbitrary
|
info.m_gpr_alloc_order = {RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11}; // arbitrary
|
||||||
info.m_xmm_alloc_order = {XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
info.m_xmm_alloc_order = {XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6,
|
||||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14};
|
XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13};
|
||||||
|
|
||||||
// these should only be temp registers!
|
// these should only be temp registers!
|
||||||
info.m_gpr_temp_only_alloc_order = {RAX, RCX, RDX, RSI, RDI, R8, R9};
|
info.m_gpr_temp_only_alloc_order = {RAX, RCX, RDX, RSI, RDI, R8, R9};
|
||||||
|
|
|
@ -605,9 +605,31 @@ bool try_assignment_for_var(int var,
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_stack_slot_for_var(int var, RegAllocCache* cache) {
|
int get_stack_slot_for_var(int var, RegAllocCache* cache) {
|
||||||
|
int slot_size;
|
||||||
|
auto& info = cache->iregs.at(var);
|
||||||
|
switch (info.reg_class) {
|
||||||
|
case RegClass::INT_128:
|
||||||
|
slot_size = 2;
|
||||||
|
break;
|
||||||
|
case RegClass::VECTOR_FLOAT:
|
||||||
|
slot_size = 2;
|
||||||
|
break;
|
||||||
|
case RegClass::FLOAT:
|
||||||
|
slot_size = 1; // todo - this wastes some space
|
||||||
|
break;
|
||||||
|
case RegClass::GPR_64:
|
||||||
|
slot_size = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
auto kv = cache->var_to_stack_slot.find(var);
|
auto kv = cache->var_to_stack_slot.find(var);
|
||||||
if (kv == cache->var_to_stack_slot.end()) {
|
if (kv == cache->var_to_stack_slot.end()) {
|
||||||
auto slot = cache->current_stack_slot++;
|
if (slot_size == 2 && (cache->current_stack_slot & 1)) {
|
||||||
|
cache->current_stack_slot++;
|
||||||
|
}
|
||||||
|
auto slot = cache->current_stack_slot;
|
||||||
|
cache->current_stack_slot += slot_size;
|
||||||
cache->var_to_stack_slot[var] = slot;
|
cache->var_to_stack_slot[var] = slot;
|
||||||
return slot;
|
return slot;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -112,24 +112,22 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for method 3 of type vec4s
|
;; definition for method 3 of type vec4s
|
||||||
;; INFO: Return type mismatch int vs vec4s.
|
;; INFO: Return type mismatch uint128 vs vec4s.
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [131]
|
;; WARN: Unsupported inline assembly instruction kind - [por gp, a0, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [131]
|
;; WARN: Unsupported inline assembly instruction kind - [por a2, gp, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [73]
|
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, gp, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [132]
|
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, v1, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [73]
|
;; WARN: Unsupported inline assembly instruction kind - [por v0, gp, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [132]
|
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [131]
|
|
||||||
(defmethod inspect vec4s ((obj vec4s))
|
(defmethod inspect vec4s ((obj vec4s))
|
||||||
(local-vars
|
(local-vars
|
||||||
(r0-0 none)
|
(r0-0 none)
|
||||||
(v0-5 int)
|
(v0-5 uint128)
|
||||||
(v1-0 int)
|
(v1-0 uint128)
|
||||||
(v1-1 int)
|
(v1-1 uint128)
|
||||||
(a2-0 int)
|
(a2-0 uint128)
|
||||||
(a2-1 int)
|
(a2-1 uint128)
|
||||||
(a2-3 int)
|
(a2-3 uint128)
|
||||||
(gp-0 int)
|
(gp-0 uint128)
|
||||||
)
|
)
|
||||||
(.por gp-0 obj r0-0)
|
(.por gp-0 obj r0-0)
|
||||||
(let ((t9-0 format)
|
(let ((t9-0 format)
|
||||||
|
@ -146,7 +144,7 @@
|
||||||
(.sllv a2-1 gp-0 r0-0)
|
(.sllv a2-1 gp-0 r0-0)
|
||||||
(t9-1 a0-2 a1-1 a2-1)
|
(t9-1 a0-2 a1-1 a2-1)
|
||||||
)
|
)
|
||||||
(format #t "~Ty: ~f~%" (sar gp-0 32))
|
(format #t "~Ty: ~f~%" (sar (the-as int gp-0) 32))
|
||||||
(let ((t9-3 format)
|
(let ((t9-3 format)
|
||||||
(a0-4 #t)
|
(a0-4 #t)
|
||||||
(a1-3 "~Tz: ~f~%")
|
(a1-3 "~Tz: ~f~%")
|
||||||
|
@ -160,31 +158,29 @@
|
||||||
(a1-4 "~Tw: ~f~%")
|
(a1-4 "~Tw: ~f~%")
|
||||||
)
|
)
|
||||||
(.pcpyud v1-1 gp-0 r0-0)
|
(.pcpyud v1-1 gp-0 r0-0)
|
||||||
(t9-4 a0-5 a1-4 (sar v1-1 32))
|
(t9-4 a0-5 a1-4 (sar (the-as int v1-1) 32))
|
||||||
)
|
)
|
||||||
(.por v0-5 gp-0 r0-0)
|
(.por v0-5 gp-0 r0-0)
|
||||||
(the-as vec4s v0-5)
|
(the-as vec4s v0-5)
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for method 2 of type vec4s
|
;; definition for method 2 of type vec4s
|
||||||
;; INFO: Return type mismatch int vs vec4s.
|
;; INFO: Return type mismatch uint128 vs vec4s.
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [131]
|
;; WARN: Unsupported inline assembly instruction kind - [por gp, a0, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [73]
|
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, gp, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [132]
|
;; WARN: Unsupported inline assembly instruction kind - [sllv t0, v1, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [73]
|
;; WARN: Unsupported inline assembly instruction kind - [por t2, gp, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [132]
|
;; WARN: Unsupported inline assembly instruction kind - [por v0, gp, r0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [131]
|
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [131]
|
|
||||||
(defmethod print vec4s ((obj vec4s))
|
(defmethod print vec4s ((obj vec4s))
|
||||||
(local-vars
|
(local-vars
|
||||||
(r0-0 none)
|
(r0-0 none)
|
||||||
(v0-1 int)
|
(v0-1 uint128)
|
||||||
(v1-0 int)
|
(v1-0 uint128)
|
||||||
(v1-1 int)
|
(v1-1 uint128)
|
||||||
(a2-0 int)
|
(a2-0 uint128)
|
||||||
(t0-0 int)
|
(t0-0 uint128)
|
||||||
(t2-0 int)
|
(t2-0 uint128)
|
||||||
(gp-0 int)
|
(gp-0 uint128)
|
||||||
)
|
)
|
||||||
(.por gp-0 obj r0-0)
|
(.por gp-0 obj r0-0)
|
||||||
(let ((t9-0 format)
|
(let ((t9-0 format)
|
||||||
|
@ -192,11 +188,11 @@
|
||||||
(a1-0 "#<vector ~F ~F ~F ~F @ #x~X>")
|
(a1-0 "#<vector ~F ~F ~F ~F @ #x~X>")
|
||||||
)
|
)
|
||||||
(.sllv a2-0 gp-0 r0-0)
|
(.sllv a2-0 gp-0 r0-0)
|
||||||
(let ((a3-0 (sar gp-0 32)))
|
(let ((a3-0 (sar (the-as int gp-0) 32)))
|
||||||
(.pcpyud v1-0 gp-0 r0-0)
|
(.pcpyud v1-0 gp-0 r0-0)
|
||||||
(.sllv t0-0 v1-0 r0-0)
|
(.sllv t0-0 v1-0 r0-0)
|
||||||
(.pcpyud v1-1 gp-0 r0-0)
|
(.pcpyud v1-1 gp-0 r0-0)
|
||||||
(let ((t1-0 (sar v1-1 32)))
|
(let ((t1-0 (sar (the-as int v1-1) 32)))
|
||||||
(.por t2-0 gp-0 r0-0)
|
(.por t2-0 gp-0 r0-0)
|
||||||
(t9-0 a0-1 a1-0 a2-0 a3-0 t0-0 t1-0 t2-0)
|
(t9-0 a0-1 a1-0 a2-0 a3-0 t0-0 t1-0 t2-0)
|
||||||
)
|
)
|
||||||
|
@ -1021,9 +1017,9 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for function breakpoint-range-set!
|
;; definition for function breakpoint-range-set!
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [48]
|
;; WARN: Unsupported inline assembly instruction kind - [mtc0 Debug, a0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [50]
|
;; WARN: Unsupported inline assembly instruction kind - [mtdab a1]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [51]
|
;; WARN: Unsupported inline assembly instruction kind - [mtdabm a2]
|
||||||
(defun breakpoint-range-set! ((arg0 uint) (arg1 uint) (arg2 uint))
|
(defun breakpoint-range-set! ((arg0 uint) (arg1 uint) (arg2 uint))
|
||||||
(.mtc0 Debug arg0)
|
(.mtc0 Debug arg0)
|
||||||
(.mtdab arg1)
|
(.mtdab arg1)
|
||||||
|
@ -1032,9 +1028,9 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for function valid?
|
;; definition for function valid?
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [3]
|
;; WARN: Unsupported inline assembly instruction kind - [daddu v1, v1, s7]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [3]
|
;; WARN: Unsupported inline assembly instruction kind - [daddu v1, v1, s7]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [3]
|
;; WARN: Unsupported inline assembly instruction kind - [daddu v1, v1, s7]
|
||||||
(defun
|
(defun
|
||||||
valid?
|
valid?
|
||||||
((obj object)
|
((obj object)
|
||||||
|
|
|
@ -285,8 +285,8 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for method 2 of type handle
|
;; definition for method 2 of type handle
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [5]
|
;; WARN: Unsupported inline assembly instruction kind - [subu a2, v1, s7]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [73]
|
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, v1, r0]
|
||||||
(defmethod print handle ((obj handle))
|
(defmethod print handle ((obj handle))
|
||||||
(local-vars
|
(local-vars
|
||||||
(r0-0 none)
|
(r0-0 none)
|
||||||
|
|
|
@ -1833,8 +1833,8 @@
|
||||||
|
|
||||||
;; definition for method 10 of type process
|
;; definition for method 10 of type process
|
||||||
;; INFO: Return type mismatch int vs none.
|
;; INFO: Return type mismatch int vs none.
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [22]
|
;; WARN: Unsupported inline assembly instruction kind - [lw ra, return-from-thread(s7)]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [59]
|
;; WARN: Unsupported inline assembly instruction kind - [jr ra]
|
||||||
(defmethod deactivate process ((obj process))
|
(defmethod deactivate process ((obj process))
|
||||||
(let ((v0-0 (when (!= (-> obj status) 'dead)
|
(let ((v0-0 (when (!= (-> obj status) 'dead)
|
||||||
(set! (-> obj next-state) dead-state)
|
(set! (-> obj next-state) dead-state)
|
||||||
|
|
|
@ -49,10 +49,10 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for function enter-state
|
;; definition for function enter-state
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [23]
|
;; WARN: Unsupported inline assembly instruction kind - [lwu sp, 28(v1)]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [22]
|
;; WARN: Unsupported inline assembly instruction kind - [lw ra, return-from-thread-dead(s7)]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [59]
|
;; WARN: Unsupported inline assembly instruction kind - [jr t9]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [13]
|
;; WARN: Unsupported inline assembly instruction kind - [sw v0, 0(sp)]
|
||||||
(defun
|
(defun
|
||||||
enter-state
|
enter-state
|
||||||
((arg0 object)
|
((arg0 object)
|
||||||
|
|
|
@ -118,8 +118,8 @@
|
||||||
|
|
||||||
;; definition for function rand-vu-init
|
;; definition for function rand-vu-init
|
||||||
;; INFO: Return type mismatch int vs float.
|
;; INFO: Return type mismatch int vs float.
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [56]
|
;; WARN: Unsupported inline assembly instruction kind - [ctc2.i vi_R, a0]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [57]
|
;; WARN: Unsupported inline assembly instruction kind - [cfc2.i v0, vi_R]
|
||||||
(defun rand-vu-init ((arg0 float))
|
(defun rand-vu-init ((arg0 float))
|
||||||
(local-vars (v0-0 int))
|
(local-vars (v0-0 int))
|
||||||
(.ctc2.i vi_R arg0)
|
(.ctc2.i vi_R arg0)
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
:size-assert #x40
|
:size-assert #x40
|
||||||
:flag-assert #xa00000040
|
:flag-assert #xa00000040
|
||||||
(:methods
|
(:methods
|
||||||
(dummy-9 () none 9)
|
(transform-vectors! (_type_ (inline-array vector) (inline-array vector) int) none 9)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -822,12 +822,11 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for function vector-dot
|
;; definition for function vector-dot
|
||||||
;; INFO: Return type mismatch int vs float.
|
;; WARN: Unsupported inline assembly instruction kind - [mula.s f0, f3]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [153]
|
;; WARN: Unsupported inline assembly instruction kind - [madda.s f1, f4]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [154]
|
;; WARN: Unsupported inline assembly instruction kind - [madd.s f0, f2, f5]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [157]
|
|
||||||
(defun vector-dot ((arg0 vector) (arg1 vector))
|
(defun vector-dot ((arg0 vector) (arg1 vector))
|
||||||
(local-vars (f0-1 int))
|
(local-vars (f0-1 float))
|
||||||
(let ((f0-0 (-> arg0 data 0))
|
(let ((f0-0 (-> arg0 data 0))
|
||||||
(f1-0 (-> arg0 data 1))
|
(f1-0 (-> arg0 data 1))
|
||||||
(f2-0 (-> arg0 data 2))
|
(f2-0 (-> arg0 data 2))
|
||||||
|
@ -839,7 +838,7 @@
|
||||||
(.madda.s f1-0 f4-0)
|
(.madda.s f1-0 f4-0)
|
||||||
(.madd.s f0-1 f2-0 f5-0)
|
(.madd.s f0-1 f2-0 f5-0)
|
||||||
)
|
)
|
||||||
(the-as float f0-1)
|
f0-1
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for function vector-dot-vu
|
;; definition for function vector-dot-vu
|
||||||
|
@ -860,13 +859,12 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for function vector4-dot
|
;; definition for function vector4-dot
|
||||||
;; INFO: Return type mismatch int vs float.
|
;; WARN: Unsupported inline assembly instruction kind - [mula.s f0, f4]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [153]
|
;; WARN: Unsupported inline assembly instruction kind - [madda.s f1, f5]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [154]
|
;; WARN: Unsupported inline assembly instruction kind - [madda.s f2, f6]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [154]
|
;; WARN: Unsupported inline assembly instruction kind - [madd.s f0, f3, f7]
|
||||||
;; WARN: Unsupported inline assembly instruction kind - [157]
|
|
||||||
(defun vector4-dot ((arg0 vector) (arg1 vector))
|
(defun vector4-dot ((arg0 vector) (arg1 vector))
|
||||||
(local-vars (f0-1 int))
|
(local-vars (f0-1 float))
|
||||||
(let ((f0-0 (-> arg0 data 0))
|
(let ((f0-0 (-> arg0 data 0))
|
||||||
(f1-0 (-> arg0 data 1))
|
(f1-0 (-> arg0 data 1))
|
||||||
(f2-0 (-> arg0 data 2))
|
(f2-0 (-> arg0 data 2))
|
||||||
|
@ -881,7 +879,7 @@
|
||||||
(.madda.s f2-0 f6-0)
|
(.madda.s f2-0 f6-0)
|
||||||
(.madd.s f0-1 f3-0 f7-0)
|
(.madd.s f0-1 f3-0 f7-0)
|
||||||
)
|
)
|
||||||
(the-as float f0-1)
|
f0-1
|
||||||
)
|
)
|
||||||
|
|
||||||
;; definition for function vector4-dot-vu
|
;; definition for function vector4-dot-vu
|
||||||
|
|
164
test/goalc/source_templates/with_game/test-matrix.gc
Normal file
164
test/goalc/source_templates/with_game/test-matrix.gc
Normal file
|
@ -0,0 +1,164 @@
|
||||||
|
(defmacro inspect-mat (obj)
|
||||||
|
`(begin
|
||||||
|
(format #t "~T[~F] [~F] [~F] [~F]~%"
|
||||||
|
(-> ,obj data 0)
|
||||||
|
(-> ,obj data 1)
|
||||||
|
(-> ,obj data 2)
|
||||||
|
(-> ,obj data 3)
|
||||||
|
)
|
||||||
|
(format #t "~T[~F] [~F] [~F] [~F]~%"
|
||||||
|
(-> ,obj data 4)
|
||||||
|
(-> ,obj data 5)
|
||||||
|
(-> ,obj data 6)
|
||||||
|
(-> ,obj data 7)
|
||||||
|
)
|
||||||
|
(format #t "~T[~F] [~F] [~F] [~F]~%"
|
||||||
|
(-> ,obj data 8)
|
||||||
|
(-> ,obj data 9)
|
||||||
|
(-> ,obj data 10)
|
||||||
|
(-> ,obj data 11)
|
||||||
|
)
|
||||||
|
(format #t "~T[~F] [~F] [~F] [~F]~%"
|
||||||
|
(-> ,obj data 12)
|
||||||
|
(-> ,obj data 13)
|
||||||
|
(-> ,obj data 14)
|
||||||
|
(-> ,obj data 15)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
(format #t "mat-mult~%")
|
||||||
|
(let ((dst (new 'stack 'matrix))
|
||||||
|
(src1 (new 'stack 'matrix))
|
||||||
|
(src2 (new 'stack 'matrix)))
|
||||||
|
(dotimes (i 16)
|
||||||
|
(set! (-> src1 data i) (the float (+ i 1)))
|
||||||
|
(set! (-> src2 data i) (the float (- 16 i)))
|
||||||
|
)
|
||||||
|
|
||||||
|
(matrix*! dst src1 src2)
|
||||||
|
(inspect-mat dst)
|
||||||
|
)
|
||||||
|
|
||||||
|
(format #t "transpose~%")
|
||||||
|
(let ((dst (new 'stack 'matrix))
|
||||||
|
(src (new 'stack 'matrix)))
|
||||||
|
(dotimes (i 16)
|
||||||
|
(set! (-> src data i) (the float (+ i 1)))
|
||||||
|
)
|
||||||
|
|
||||||
|
(matrix-transpose! dst src)
|
||||||
|
(inspect-mat dst)
|
||||||
|
)
|
||||||
|
|
||||||
|
(format #t "inv-4x4~%")
|
||||||
|
(let ((dst (new 'stack 'matrix))
|
||||||
|
(prod (new 'stack 'matrix))
|
||||||
|
(src (new 'static 'matrix :data (new 'static 'array float 16
|
||||||
|
3. 2. 1. 0.
|
||||||
|
2. -1. 3. 0.
|
||||||
|
-8. 2. 2. 0.
|
||||||
|
1. 2. 3. 1.))))
|
||||||
|
(matrix-4x4-inverse! dst src)
|
||||||
|
(matrix*! prod src dst)
|
||||||
|
(inspect-mat prod)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
(format #t "axis-angle~%")
|
||||||
|
(defun test-axis-angle ((axis vector))
|
||||||
|
(let ((mat (new 'stack 'matrix)))
|
||||||
|
|
||||||
|
(let* ((norm-squared (+ (* (-> axis x) (-> axis x))
|
||||||
|
(* (-> axis y) (-> axis y))
|
||||||
|
(* (-> axis z) (-> axis z))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
(norm (sqrtf norm-squared)))
|
||||||
|
|
||||||
|
(when (> norm-squared 0)
|
||||||
|
(dotimes (i 3)
|
||||||
|
(set! (-> axis data i) (/ (-> axis data i) norm))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
(matrix-axis-angle! mat axis (degrees 10))
|
||||||
|
(inspect-mat mat)
|
||||||
|
(format #t "~%")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
(test-axis-angle (new 'static 'vector :x 1.0 :y 0.5 :z -0.3 :w 0.0))
|
||||||
|
(test-axis-angle (new 'static 'vector :x 0.2))
|
||||||
|
(test-axis-angle (new 'static 'vector :y 0.2))
|
||||||
|
(test-axis-angle (new 'static 'vector :z 0.2))
|
||||||
|
(test-axis-angle (new 'static 'vector :w 0.2))
|
||||||
|
|
||||||
|
(format #t "3x3-inverse~%")
|
||||||
|
(let ((dst (new 'stack 'matrix))
|
||||||
|
(src (new 'static 'matrix :data (new 'static 'array float 16
|
||||||
|
3. 2. 1. 0.
|
||||||
|
2. -1. 3. 0.
|
||||||
|
-8. 2. 2. 0.
|
||||||
|
0. 0. 0. 0.))))
|
||||||
|
(matrix-3x3-inverse! dst src)
|
||||||
|
(inspect-mat dst)
|
||||||
|
(format #t "~%")
|
||||||
|
(matrix3-inverse-transpose! dst src)
|
||||||
|
(inspect-mat dst)
|
||||||
|
)
|
||||||
|
|
||||||
|
(deftype vec-array (structure)
|
||||||
|
((data vector 32 :inline))
|
||||||
|
)
|
||||||
|
|
||||||
|
(defmethod inspect vec-array ((obj vec-array))
|
||||||
|
(format #t "vec-array~%")
|
||||||
|
(dotimes (i 12)
|
||||||
|
(format #t "~T[~F] [~F] [~F] [~F]~%"
|
||||||
|
(-> obj data i x)
|
||||||
|
(-> obj data i y)
|
||||||
|
(-> obj data i z)
|
||||||
|
(-> obj data i w)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
obj
|
||||||
|
)
|
||||||
|
|
||||||
|
(format #t "transform-many~%")
|
||||||
|
(let ((dst (new 'stack 'vec-array))
|
||||||
|
(dst-ref (new 'stack 'vec-array))
|
||||||
|
(src (new 'stack 'vec-array))
|
||||||
|
(val 0.0)
|
||||||
|
(mat (new 'static 'matrix :data (new 'static 'array float 16
|
||||||
|
3. 2. 1. 0.
|
||||||
|
2. -1. 3. 0.
|
||||||
|
-8. 2. 2. 0.
|
||||||
|
1. 2. 3. 1.))))
|
||||||
|
|
||||||
|
;; init source
|
||||||
|
(dotimes (i 12)
|
||||||
|
(set! (-> dst data i quad) (the uint128 0))
|
||||||
|
(dotimes (j 3)
|
||||||
|
(set! (-> src data i data j) val)
|
||||||
|
(set! val (+ val 1.0))
|
||||||
|
)
|
||||||
|
(set! (-> src data i w) 1.0)
|
||||||
|
)
|
||||||
|
|
||||||
|
;;(inspect src)
|
||||||
|
|
||||||
|
;; compute reference:
|
||||||
|
(dotimes (i 12)
|
||||||
|
(vector-matrix*! (-> dst-ref data i) (-> src data i) mat)
|
||||||
|
)
|
||||||
|
;;(inspect dst-ref)
|
||||||
|
|
||||||
|
;; compute fancy thing
|
||||||
|
(transform-vectors! mat (-> dst data) (-> src data) 11)
|
||||||
|
(inspect dst)
|
||||||
|
0
|
||||||
|
)
|
54
test/goalc/source_templates/with_game/test-pextlw.gc
Normal file
54
test/goalc/source_templates/with_game/test-pextlw.gc
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
(let ((v1 (new 'stack 'array 'uint8 16))
|
||||||
|
(v2 (new 'stack 'array 'uint8 16))
|
||||||
|
(v3 (new 'stack 'array 'uint8 16))
|
||||||
|
)
|
||||||
|
|
||||||
|
;; initialize stack arrays
|
||||||
|
(dotimes (i 16)
|
||||||
|
(set! (-> v1 i) i)
|
||||||
|
(set! (-> v2 i) (+ i 16))
|
||||||
|
)
|
||||||
|
|
||||||
|
(let ((v1-quad (-> (the (pointer uint128) v1)))
|
||||||
|
(v2-quad (-> (the (pointer uint128) v2)))
|
||||||
|
(v3-quad (the uint128 0))
|
||||||
|
)
|
||||||
|
(.pextlw v3-quad v1-quad v2-quad)
|
||||||
|
;;(print128 v1-quad) (format #t "~%")
|
||||||
|
;;(print128 v2-quad) (format #t "~%")
|
||||||
|
;; expect #x07060504171615140302010013121110
|
||||||
|
(print128 v3-quad) (format #t "~%")
|
||||||
|
|
||||||
|
;; expect #x0f0e0d0c1f1e1d1c0b0a09081b1a1918
|
||||||
|
(.pextuw v3-quad v1-quad v2-quad)
|
||||||
|
(print128 v3-quad) (format #t "~%")
|
||||||
|
(.pcpyld v3-quad v1-quad v2-quad)
|
||||||
|
(print128 v3-quad) (format #t "~%")
|
||||||
|
(.pcpyud v3-quad v1-quad v2-quad)
|
||||||
|
(print128 v3-quad) (format #t "~%")
|
||||||
|
(.ppach v3-quad v1-quad v2-quad)
|
||||||
|
(print128 v3-quad) (format #t "~%")
|
||||||
|
)
|
||||||
|
|
||||||
|
(let ((s1 (new 'stack 'array 'uint32 4))
|
||||||
|
(s2 (new 'stack 'array 'uint32 4)))
|
||||||
|
(set! (-> s1 0) #xdeadbeef)
|
||||||
|
(set! (-> s1 1) #x12312323)
|
||||||
|
(set! (-> s1 2) #x11112222)
|
||||||
|
(set! (-> s1 3) #x11112223)
|
||||||
|
|
||||||
|
(set! (-> s2 0) #xdeadbeee) ;; different
|
||||||
|
(set! (-> s2 1) #x12312323)
|
||||||
|
(set! (-> s2 2) #x91112222) ;; different
|
||||||
|
(set! (-> s2 3) #x11112223)
|
||||||
|
|
||||||
|
(let ((s1q (-> (the (pointer uint128) s1)))
|
||||||
|
(s2q (-> (the (pointer uint128) s2)))
|
||||||
|
(s3q (the uint128 0))
|
||||||
|
)
|
||||||
|
|
||||||
|
(.pceqw s3q s1q s2q)
|
||||||
|
(print128 s3q) (format #t "~%")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
|
@ -579,6 +579,87 @@ TEST_F(WithGameTests, I128Simple) {
|
||||||
"12344321\n"});
|
"12344321\n"});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(WithGameTests, Pextlw) {
|
||||||
|
runner.run_static_test(env, testCategory, "test-pextlw.gc",
|
||||||
|
{"#x07060504171615140302010013121110\n"
|
||||||
|
"#x0f0e0d0c1f1e1d1c0b0a09081b1a1918\n"
|
||||||
|
"#x07060504030201001716151413121110\n"
|
||||||
|
"#x1f1e1d1c1b1a19180f0e0d0c0b0a0908\n"
|
||||||
|
"#x0d0c0908050401001d1c191815141110\n"
|
||||||
|
"#xffffffff00000000ffffffff00000000\n"
|
||||||
|
"0\n"});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(WithGameTests, Matrix) {
|
||||||
|
runner.run_static_test(env, testCategory, "test-matrix.gc",
|
||||||
|
{"mat-mult\n"
|
||||||
|
"\t[ 80.0000] [ 70.0000] [ 60.0000] [ 50.0000]\n"
|
||||||
|
"\t[ 240.0000] [ 214.0000] [ 188.0000] [ 162.0000]\n"
|
||||||
|
"\t[ 400.0000] [ 358.0000] [ 316.0000] [ 274.0000]\n"
|
||||||
|
"\t[ 560.0000] [ 502.0000] [ 444.0000] [ 386.0000]\n"
|
||||||
|
"transpose\n"
|
||||||
|
"\t[ 1.0000] [ 5.0000] [ 9.0000] [ 13.0000]\n"
|
||||||
|
"\t[ 2.0000] [ 6.0000] [ 10.0000] [ 14.0000]\n"
|
||||||
|
"\t[ 3.0000] [ 7.0000] [ 11.0000] [ 15.0000]\n"
|
||||||
|
"\t[ 4.0000] [ 8.0000] [ 12.0000] [ 16.0000]\n"
|
||||||
|
"inv-4x4\n"
|
||||||
|
"\t[ 1.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 1.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 1.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
|
||||||
|
"axis-angle\n"
|
||||||
|
"\t[ 0.9961] [ 0.0506] [ 0.0715] [ 0.0000]\n"
|
||||||
|
"\t[ -0.0393] [ 0.9876] [ -0.1516] [ 0.0000]\n"
|
||||||
|
"\t[ -0.0783] [ 0.1482] [ 0.9858] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
|
||||||
|
"\n"
|
||||||
|
"\t[ 1.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.9848] [ -0.1736] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.1736] [ 0.9848] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
|
||||||
|
"\n"
|
||||||
|
"\t[ 0.9848] [ 0.0000] [ 0.1736] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 1.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ -0.1736] [ 0.0000] [ 0.9848] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
|
||||||
|
"\n"
|
||||||
|
"\t[ 0.9848] [ -0.1736] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.1736] [ 0.9848] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 1.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
|
||||||
|
"\n"
|
||||||
|
"\t[ 1.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 1.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 1.0000] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
|
||||||
|
"\n"
|
||||||
|
"3x3-inverse\n"
|
||||||
|
"\t[ 0.0952] [ 0.0238] [ -0.0833] [ 0.0000]\n"
|
||||||
|
"\t[ 0.3333] [ -0.1666] [ 0.0833] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0476] [ 0.2619] [ 0.0833] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"\n"
|
||||||
|
"\t[ 0.0952] [ 0.3333] [ 0.0476] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0238] [ -0.1666] [ 0.2619] [ 0.0000]\n"
|
||||||
|
"\t[ -0.0833] [ 0.0833] [ 0.0833] [ 0.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"transform-many\n"
|
||||||
|
"vec-array\n"
|
||||||
|
"\t[ -13.0000] [ 5.0000] [ 10.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -22.0000] [ 14.0000] [ 28.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -31.0000] [ 23.0000] [ 46.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -40.0000] [ 32.0000] [ 64.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -49.0000] [ 41.0000] [ 82.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -58.0000] [ 50.0000] [ 100.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -67.0000] [ 59.0000] [ 118.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -76.0000] [ 68.0000] [ 136.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -85.0000] [ 77.0000] [ 154.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -94.0000] [ 86.0000] [ 172.0000] [ 1.0000]\n"
|
||||||
|
"\t[ -103.0000] [ 95.0000] [ 190.0000] [ 1.0000]\n"
|
||||||
|
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
|
||||||
|
"0\n"});
|
||||||
|
}
|
||||||
|
|
||||||
TEST(TypeConsistency, TypeConsistency) {
|
TEST(TypeConsistency, TypeConsistency) {
|
||||||
Compiler compiler;
|
Compiler compiler;
|
||||||
compiler.enable_throw_on_redefines();
|
compiler.enable_throw_on_redefines();
|
||||||
|
|
|
@ -92,10 +92,6 @@ const std::unordered_set<std::string> skip_in_compiling = {
|
||||||
"(method 3 vector)", // this function appears twice, which confuses the compiler.
|
"(method 3 vector)", // this function appears twice, which confuses the compiler.
|
||||||
"vector-dot", // fpu acc
|
"vector-dot", // fpu acc
|
||||||
"vector4-dot", // fpu acc
|
"vector4-dot", // fpu acc
|
||||||
|
|
||||||
/// MATRIX
|
|
||||||
"matrix-transpose!", // unsupported asm ops
|
|
||||||
"matrix-4x4-inverse!", // compiler fails to regalloc this...
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// default location for the data. It can be changed with a command line argument.
|
// default location for the data. It can be changed with a command line argument.
|
||||||
|
|
|
@ -372,3 +372,138 @@ TEST(EmitterAVX, VPSLLD) {
|
||||||
tester.emit(IGen::pw_sll(XMM0 + 13, XMM0 + 14, 6));
|
tester.emit(IGen::pw_sll(XMM0 + 13, XMM0 + 14, 6));
|
||||||
EXPECT_EQ(tester.dump_to_hex_string(true), "C5E172F403C4C16172F604C59172F405C4C11172F606");
|
EXPECT_EQ(tester.dump_to_hex_string(true), "C5E172F403C4C16172F604C59172F405C4C11172F606");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPUNPCKLDQ) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||||
|
"C5E162DBC4C16162DDC59162DBC4C11162DDC56162EBC4416162EDC51162EBC4411162ED");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPUNPCKHDQ) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||||
|
"C5E16ADBC4C1616ADDC5916ADBC4C1116ADDC5616AEBC441616AEDC5116AEBC441116AED");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPUNPCKLQDQ) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||||
|
"C5E16CDBC4C1616CDDC5916CDBC4C1116CDDC5616CEBC441616CEDC5116CEBC441116CED");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPUNPCKHQDQ) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 13, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 13, XMM0 + 13));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||||
|
"C5E16DDBC4C1616DDDC5916DDBC4C1116DDDC5616DEBC441616DEDC5116DEBC441116DED");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPCMPEQD) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 13, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 3, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 3, XMM0 + 13));
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 13, XMM0 + 3));
|
||||||
|
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 13, XMM0 + 13));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||||
|
"C5E176DBC4C16176DDC59176DBC4C11176DDC56176EBC4416176EDC51176EBC4411176ED");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPSRLDQ) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::vpsrldq(XMM0 + 3, XMM0 + 4, 3));
|
||||||
|
tester.emit(IGen::vpsrldq(XMM0 + 3, XMM0 + 14, 4));
|
||||||
|
tester.emit(IGen::vpsrldq(XMM0 + 13, XMM0 + 4, 5));
|
||||||
|
tester.emit(IGen::vpsrldq(XMM0 + 13, XMM0 + 14, 6));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true), "C5E173DC03C4C16173DE04C59173DC05C4C11173DE06");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPSLLDQ) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::vpslldq(XMM0 + 3, XMM0 + 4, 3));
|
||||||
|
tester.emit(IGen::vpslldq(XMM0 + 3, XMM0 + 14, 4));
|
||||||
|
tester.emit(IGen::vpslldq(XMM0 + 13, XMM0 + 4, 5));
|
||||||
|
tester.emit(IGen::vpslldq(XMM0 + 13, XMM0 + 14, 6));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true), "C5E173FC03C4C16173FE04C59173FC05C4C11173FE06");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPSHUFLW) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::vpshuflw(XMM0 + 3, XMM0 + 4, 3));
|
||||||
|
tester.emit(IGen::vpshuflw(XMM0 + 3, XMM0 + 14, 4));
|
||||||
|
tester.emit(IGen::vpshuflw(XMM0 + 13, XMM0 + 4, 5));
|
||||||
|
tester.emit(IGen::vpshuflw(XMM0 + 13, XMM0 + 14, 6));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true), "C5FB70DC03C4C17B70DE04C57B70EC05C4417B70EE06");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitterAVX, VPSHUFHW) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::vpshufhw(XMM0 + 3, XMM0 + 4, 3));
|
||||||
|
tester.emit(IGen::vpshufhw(XMM0 + 3, XMM0 + 14, 4));
|
||||||
|
tester.emit(IGen::vpshufhw(XMM0 + 13, XMM0 + 4, 5));
|
||||||
|
tester.emit(IGen::vpshufhw(XMM0 + 13, XMM0 + 14, 6));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true), "C5FA70DC03C4C17A70DE04C57A70EC05C4417A70EE06");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitrerAVX, movq_to_gpr_from_xmm) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::movq_gpr64_xmm64(RSP, XMM0 + 3));
|
||||||
|
tester.emit(IGen::movq_gpr64_xmm64(RSP, XMM0 + 13));
|
||||||
|
tester.emit(IGen::movq_gpr64_xmm64(R12, XMM0 + 3));
|
||||||
|
tester.emit(IGen::movq_gpr64_xmm64(R12, XMM0 + 13));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F7EDC664C0F7EEC66490F7EDC664D0F7EEC");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(EmitrerAVX, movq_to_xmm_from_gpr) {
|
||||||
|
CodeTester tester;
|
||||||
|
tester.init_code_buffer(1024);
|
||||||
|
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 3, RSP));
|
||||||
|
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 13, RSP));
|
||||||
|
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 3, R12));
|
||||||
|
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 13, R12));
|
||||||
|
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F6EDC664C0F6EEC66490F6EDC664D0F6EEC");
|
||||||
|
}
|
Loading…
Reference in a new issue