[Decompiler] Make matrix decompile (#341)

* small fixes

* update

* add instructions

* finish matrix

* add matrix test cases
This commit is contained in:
water111 2021-03-28 20:26:30 -04:00 committed by GitHub
parent 64c35ca453
commit 0a76e6e157
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 4182 additions and 2595 deletions

View file

@ -543,10 +543,21 @@ TypeState AsmOp::propagate_types_internal(const TypeState& input,
TypeState result = input;
if (m_dst.has_value()) {
auto kind = m_dst->reg().get_kind();
if (kind == Reg::GPR || kind == Reg::FPR) {
if (kind == Reg::FPR) {
result.get(m_dst->reg()) = TP_Type::make_from_ts("float");
} else if (kind == Reg::GPR) {
for (auto& x : m_src) {
if (x && x->reg().get_kind() == Reg::GPR) {
auto src_type = result.get(x->reg()).typespec();
if (dts.ts.tc(TypeSpec("int128"), src_type) || dts.ts.tc(TypeSpec("uint128"), src_type)) {
result.get(m_dst->reg()) = TP_Type::make_from_ts("uint128");
return result;
}
}
result.get(m_dst->reg()) = TP_Type::make_from_ts("int");
}
}
}
return result;
}

View file

@ -13,6 +13,11 @@ const std::map<InstructionKind, OpenGOALAsm::Function> MIPS_ASM_TO_OPEN_GOAL_FUN
{InstructionKind::PSRAW, {"TODO.PSRAW", {}}},
{InstructionKind::PSUBW, {"TODO.PSUBW", {}}},
{InstructionKind::PEXTUW, {".pextuw", {}}},
{InstructionKind::PEXTLW, {".pextlw", {}}},
{InstructionKind::PCPYLD, {".pcpyld", {}}},
{InstructionKind::PCPYUD, {".pcpyud", {}}},
// NOTE - depending on how this is used, this may case issues! Be Warned!
// lots of implicit logic in OpenGOAL depending on argument types!
{InstructionKind::MFC1, {".mov", {}}},
@ -163,6 +168,7 @@ std::vector<goos::Object> OpenGOALAsm::get_args(const std::vector<DecompilerLabe
std::vector<goos::Object> args;
std::vector<goos::Object> named_args;
bool got_fsf = false;
for (int i = 0; i < instr.n_src; i++) {
auto v = m_src.at(i);
InstructionAtom atom = instr.get_src(i);
@ -172,12 +178,23 @@ std::vector<goos::Object> OpenGOALAsm::get_args(const std::vector<DecompilerLabe
args.push_back(v.value().to_form(env));
} else if (atom.kind == InstructionAtom::AtomKind::VF_FIELD) {
// Handle FTF/FSF operations
if (func.allows_modifier(MOD::FTF) && named_args.size() == 0) {
if (func.allows_modifier(MOD::FTF) && func.allows_modifier(MOD::FSF)) {
if (got_fsf) {
named_args.push_back(
pretty_print::to_symbol(fmt::format(":ftf #b{:b}", atom.get_vf_field())));
} else {
got_fsf = true;
named_args.push_back(
pretty_print::to_symbol(fmt::format(":fsf #b{:b}", atom.get_vf_field())));
}
} else if (func.allows_modifier(MOD::FSF)) {
named_args.push_back(
pretty_print::to_symbol(fmt::format(":fsf #b{:b}", atom.get_vf_field())));
} else if (func.allows_modifier(MOD::FTF)) {
named_args.push_back(
pretty_print::to_symbol(fmt::format(":ftf #b{:b}", atom.get_vf_field())));
} else {
assert(false);
}
} else if (func.allows_modifier(MOD::OFFSET) && atom.kind == InstructionAtom::AtomKind::IMM) {
// Handle offsetting

View file

@ -7,6 +7,7 @@
#include "common/goos/PrettyPrinter.h"
#include "decompiler/IR2/OpenGoalMapping.h"
#include "decompiler/analysis/reg_usage.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
namespace decompiler {
@ -42,7 +43,7 @@ bool rewrite_inline_asm_instructions(Form* top_level_form,
/*lg::warn("[ASM Re-Write] - Unsupported inline assembly instruction kind - [{}]",
asmOp.instr.kind);*/
f.warnings.general_warning("Unsupported inline assembly instruction kind - [{}]",
asmOp.instr.kind);
asmOp.instr.to_string(f.ir2.env.file->labels));
new_entries.push_back(entry);
continue;
} else if (elem->op()->instruction().kind == InstructionKind::VOPMULA) {

View file

@ -1205,7 +1205,7 @@
:size-assert #x40
:flag-assert #xa00000040
(:methods
(dummy-9 () none 9)
(transform-vectors! (_type_ (inline-array vector) (inline-array vector) int) none 9)
)
)
@ -1446,7 +1446,7 @@
(define-extern matrix-axis-angle! (function matrix vector float none))
(define-extern matrix-axis-sin-cos-vu! function)
(define-extern matrix-axis-sin-cos-vu! (function matrix vector float float none))
(define-extern trs-matrix-calc! function)
(define-extern transform-matrix-parent-calc! function)
(define-extern transform-matrix-calc! function)

View file

@ -79,7 +79,11 @@
"find-parent-method": {
"args": ["child-type", "method-id"],
"vars":{"v0-0":"current-method", "v1-2":"original-method", "v1-5":"unused1"}
"vars": {
"v0-0": "current-method",
"v1-2": "original-method",
"v1-5": "unused1"
}
},
"ref": {
@ -136,7 +140,13 @@
},
"sort": {
"args": ["lst", "compare-func"],
"vars":{"s4-0":"unsorted-count", "s3-0":"iter", "s2-0":"first-elt", "s1-0":"seoncd-elt", "v1-1":"compare-result"}
"vars": {
"s4-0": "unsorted-count",
"s3-0": "iter",
"s2-0": "first-elt",
"s1-0": "seoncd-elt",
"v1-1": "compare-result"
}
},
"(method 0 inline-array-class)": {
"args": ["allocation", "type-to-make", "size"],
@ -165,7 +175,13 @@
},
"qmem-copy->!": {
"args": ["dst", "src", "size"],
"vars":{"v0-0":"result", "v1-1":"qwc", "a1-1":"src-ptr", "a0-1":"dst-ptr", "a2-3":"value"}
"vars": {
"v0-0": "result",
"v1-1": "qwc",
"a1-1": "src-ptr",
"a0-1": "dst-ptr",
"a2-3": "value"
}
},
"mem-set32!": {
"args": ["dst", "size", "value"],
@ -205,7 +221,6 @@
"args": ["this", "rec"]
},
"(method 0 dead-pool-heap)": {
"vars": { "v0-0": ["obj", "dead-pool-heap"] }
},
@ -341,8 +356,12 @@
"string->int": {
"args": ["str"],
"vars":{"a0-1":"str-ptr", "v0-0":"result",
"a0-2":"next-char-1","a0-3":"next-char-2"}
"vars": {
"a0-1": "str-ptr",
"v0-0": "result",
"a0-2": "next-char-1",
"a0-3": "next-char-2"
}
},
"string-get-flag!!": {
@ -350,7 +369,16 @@
},
"(method 0 state)": {
"args":["allocation", "type-to-make", "name", "code", "trans", "enter", "exit", "event"],
"args": [
"allocation",
"type-to-make",
"name",
"code",
"trans",
"enter",
"exit",
"event"
],
"vars": { "v0-0": "obj" }
},
@ -359,9 +387,190 @@
"vars": { "v1-0": "parent", "v1-2": "child" }
},
// Matrix
"matrix-identity": {
"args": ["mat"],
"vars": { "f0-0": "one" }
},
"matrix+!": {
"args": ["dst", "src1", "src2"],
"vars": { "v1-0": "i" }
},
"matrix-!": {
"args": ["dst", "src1", "src2"],
"vars": { "v1-0": "i" }
},
"matrix*!": {
"args": ["dst", "src1", "src2"]
},
"matrixp*!": {
"args": ["dst", "src1", "src2"],
"vars": { "s5-0": "temp-mat" }
},
"vector-matrix*!": {
"args": ["dst", "vec", "mat"]
},
"vector-rotate*!": {
"args": ["dst", "vec", "mat"]
},
"vector3s-matrix*!": {
"args": ["dst", "vec", "mat"],
"vars": { "s5-0": "temp-vec3" }
},
"vector3s-rotate*!": {
"args": ["dst", "vec", "mat"],
"vars": { "s5-0": "temp-vec3" }
},
"matrix-transpose!": {
"args": ["dst", "src"]
},
"matrix-inverse-of-rot-trans!": {
"args": ["dst", "src"]
},
"matrix-4x4-inverse!": {
"args": ["dst", "src"]
},
"matrix-translate!": {
"args": ["dst", "trans"]
},
"matrix-translate+!": {
"args": ["dst", "src", "trans"]
},
"matrix-scale!": {
"args": ["dst", "scale"]
},
"scale-matrix!": {
"args": ["dst", "scale", "src"]
},
"matrix-inv-scale!": {
"args": ["dst", "scale"]
},
"column-scale-matrix!": {
"args": ["dst", "scale", "src"]
},
"matrix-rotate-x!": {
"args": ["dst", "rot-deg"],
"vars": { "f30-0": "rot-sin", "f0-0": "rot-cos" }
},
"matrix-rotate-y!": {
"args": ["dst", "rot-deg"],
"vars": { "f30-0": "rot-sin", "f0-0": "rot-cos" }
},
"matrix-rotate-z!": {
"args": ["dst", "rot-deg"],
"vars": { "f30-0": "rot-sin", "f0-0": "rot-cos" }
},
"matrix-rotate-zyx!": {
"args": ["dst", "rot-xyz-deg"],
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
},
"matrix-rotate-xyz!": {
"args": ["dst", "rot-xyz-deg"],
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
},
"matrix-rotate-zxy!": {
"args": ["dst", "rot-xyz-deg"],
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
},
"matrix-rotate-yxz!": {
"args": ["dst", "rot-xyz-deg"],
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
},
"matrix-rotate-yzx!": {
"args": ["dst", "rot-xyz-deg"],
"vars": { "gp-0": "temp-mat", "s5-0": "rot-mat" }
},
"matrix-rotate-yxy!": {
"args": ["dst", "rots-deg"],
"vars": {
"a2-0": "sincos-input",
"s5-0": "sin-vec",
"s4-0": "cos-vec",
"f1-1": "cos-y",
"f0-5": "sin-y",
"f2-0": "cos-x",
"f5-0": "sin-x",
"f3-0": "cos-z",
"f4-0": "sin-z"
}
},
"matrix-rotate-yx!": {
"args": ["dst", "rot-y-deg", "rot-x-deg"]
},
"matrix-axis-angle!": {
"args": ["dst", "axis", "angle-deg"]
},
"matrix-lerp!": {
"args": ["dst", "src1", "src2", "alpha"]
},
"matrix-3x3-determinant": {
"args": ["mat"]
},
"matrix-3x3-inverse!": {
"args": ["dst", "src"]
},
"matrix-3x3-inverse-transpose!": {
"args": ["dst", "src"]
},
"matrix3-inverse-transpose!": {
"args": ["dst", "src"]
},
"matrix-4x4-determinant": {
"args": ["dst", "src"]
},
"matrix-4x4-inverse-transpose!": {
"args": ["dst", "src"]
},
"matrix-y-angle": {
"args": ["mat"],
"vars": { "v1-0": "z-row" }
},
"deg-seek": {
"args": ["in", "target", "max-diff"],
"vars":{"v1-1":"in-int", "a0-2":"target-int", "a1-2":"max-diff-int", "a2-1":"diff", "a3-0":"abs-diff"}
"vars": {
"v1-1": "in-int",
"a0-2": "target-int",
"a1-2": "max-diff-int",
"a2-1": "diff",
"a3-0": "abs-diff"
}
},
"deg-seek-smooth": {
@ -410,16 +619,24 @@
},
"ultimate-memcpy": {
"args": ["dst", "src", "size-bytes"],
"vars":{"s2-0":"qwc-remaining",
"vars": {
"s2-0": "qwc-remaining",
"s1-0": "qwc-transferred-now",
"s4-0": "spr-to-bank",
"s3-0":"spr-from-bank"}
"s3-0": "spr-from-bank"
}
},
"dma-buffer-add-vu-function": {
"args": ["dma-buf", "vu-func"],
"vars":{"t1-1":"dma-buf-2", "v1-0":"func-ptr", "a3-0":"qlen", "a1-1":"origin", "t0-1":"qwc-now",
"t2-0":"buf-ptr"}
"vars": {
"t1-1": "dma-buf-2",
"v1-0": "func-ptr",
"a3-0": "qlen",
"a1-1": "origin",
"t0-1": "qwc-now",
"t2-0": "buf-ptr"
}
},
"dma-buffer-add-buckets": {
@ -452,7 +669,11 @@
"analog-input": {
"args": ["in", "offset", "center-val", "max-val", "out-range"],
"vars":{"f1-1":"offset-in", "f0-3":"magnitude", "v1-0":"max-magnitude"}
"vars": {
"f1-1": "offset-in",
"f0-3": "magnitude",
"v1-0": "max-magnitude"
}
},
"cpad-set-buzz!": {
@ -460,7 +681,13 @@
},
"service-cpads": {
"vars":{"gp-0":"pad-list", "s5-0":"pad-idx", "s4-0":"pad", "s3-0":"buzz-idx", "v1-29":"current-button0"}
"vars": {
"gp-0": "pad-list",
"s5-0": "pad-idx",
"s4-0": "pad",
"s3-0": "buzz-idx",
"v1-29": "current-button0"
}
},
"buzz-stop!": {
@ -469,7 +696,17 @@
"default-buffer-init": {
"args": ["buff"],
"vars":{"v1-0":"buff", "v1-1":"buff", "v1-3":"buff", "v1-4":"buff", "a1-4":"tag", "a1-6":"tag2", "a1-8":"data", "a0-1":"tag3", "v1-2":"buff"}
"vars": {
"v1-0": "buff",
"v1-1": "buff",
"v1-3": "buff",
"v1-4": "buff",
"a1-4": "tag",
"a1-6": "tag2",
"a1-8": "data",
"a0-1": "tag3",
"v1-2": "buff"
}
},
"add-reg-gif-packet": {
@ -478,7 +715,15 @@
},
"(method 0 draw-context)": {
"args":["allocation", "type-to-make", "org-x", "org-y", "width", "height", "color-0"]
"args": [
"allocation",
"type-to-make",
"org-x",
"org-y",
"width",
"height",
"color-0"
]
},
"(method 0 display)": {
@ -508,9 +753,13 @@
"update-math-camera": {
"args": ["math-cam", "ignored", "aspect"],
"vars":{"f0-4":"temp1", "v1-1":"elim1",
"f0-6":"temp2", "v1-2":"elim2",
"f1-3":"x-rat", "f0-7":"y-rat",
"vars": {
"f0-4": "temp1",
"v1-1": "elim1",
"f0-6": "temp2",
"v1-2": "elim2",
"f1-3": "x-rat",
"f0-7": "y-rat",
"v1-3": "cull-info",
"f2-2": "unused-x-thing",
"f2-5": "y-thing",
@ -567,10 +816,6 @@
"a0-13": "vis-gif-1",
"a0-14": "vis-gif-1-again",
"a0-15": "vis-gif-1-again-again"
}
},
@ -595,7 +840,6 @@
"surface-mult!": {
"args": ["dst", "src0", "src1"]
},
"(method 0 collide-shape-prim)": {
@ -612,7 +856,13 @@
"args": ["allocation", "type-to-make", "cshape", "elt-count", "prim-id"]
},
"(method 0 collide-shape)": {
"args":["allocation", "type-to-make", "proc", "collide-list-kind", "prim-id"],
"args": [
"allocation",
"type-to-make",
"proc",
"collide-list-kind",
"prim-id"
],
"vars": { "s5-0": "obj" }
},
"(method 11 touching-prims-entry-pool)": {
@ -622,13 +872,15 @@
// LEVEL
"lookup-level-info": {
"args": ["name"],
"vars":{"a1-1":["info", "level-load-info"], "v1-0":"rest", "a1-0":"current-sym"}
"vars": {
"a1-1": ["info", "level-load-info"],
"v1-0": "rest",
"a1-0": "current-sym"
}
},
"(method 21 level-group)": {
"args": ["obj", "name", "cmd-idx"],
"vars": { "v1-1": "cmd-lst" }
}
}

View file

@ -125,3 +125,9 @@
- There is now an option for `allow-misaligned` which allows the alignment of an struct type to be less than 16-bytes when inlined, without enabling array packing. This seems like a stupid option, but GOAL has this in some places, so we support it too.
- In method declarations in a `deftype`, you can no longer provide argument names. There was ambiguity when parsing a compound typespec vs named argument. The names were not used for anything.
- 128-bit integer register variables (`i128`) are now supported. These work with assembly forms, `set!`s between registers, and `set!`s of memory locations with type `(pointer uint128)` or `(pointer int128)`.
- Fixed a bug where the compiler would abort if had to spill an `xmm` register containing an `i128` value.
- Added `.pextlw`, `.pextuw`, `.pcpyld`, and `.pcpyud` assembly forms
- Fixed a bug where `uint128` or children defined with `local-vars` would end up using a 64-bit GPR instead of a 128-bit XMM.
- Fixed a bug where 128-bit variable spills could be misaligned, causing a segfault at `vmovaps`.
- Added `.ppach` and `.pceqw`
- Fixed a bug where setting 128-bit / 64-bit variables from each other only did a 32-bit set

View file

@ -1520,6 +1520,26 @@ Wrapper around `vcvtdq2ps` and `vcvtps2dq` to convert packed 32-bit signed integ
Wrapper around `vpsrld`, `vpsrad`, and `vpslld`. Does shifts on each of the 4 32-bit integers in the register.
## `.pextlw`, `.pextuw`, `.pcpyud`, `.pcpyld`, `.pceqw`, `.ppach`
```
(.pextlw dst src0 src1 [:color #t|#f])
(.pextuw dst src0 src1 [:color #t|#f])
(.pcpyud dst src0 src1 [:color #t|#f])
(.pcpyld dst src0 src1 [:color #t|#f])
(.pceqw dst src0 src1 [:color #t|#f])
(.ppach dest src0 src1)
```
Equivalents of the EE's MMI instructions with the same name. These can only be used on 128-bit variables. Most map to single x86 instructions:
- `pextlw` is `VPUNPCKLDQ` (sources swapped)
- `pextuw` is `VPUNPCKHDQ` (sources swapped)
- `pcpyld` is `VPUNPCKLQDQ` (sources swapped)
- `pcpyud` is `VPUNPCKHQDQ` (sources _not_ swapped)
- `pceqw` is `VPCMPEQD`
Some map to multiple instructions. These must use the coloring system.
- `ppach` is a sequence of 7 instructions (`VPSHUFLW`, `VPSHUFHW`, `VPSRLDQ`, `VPUNPCKLQDQ`).
# Compiler Forms - Unsorted
## `let`

View file

@ -5,7 +5,7 @@
;; name in dgo: matrix-h
;; dgos: GAME, ENGINE
;; matrix-h
;; A 4x4 matrix, stored in row-major order
(deftype matrix (structure)
((data float 16 :offset-assert 0)
(vector vector 4 :inline :offset 0)
@ -15,10 +15,13 @@
:size-assert #x40
:flag-assert #xa00000040
(:methods
(dummy-9 () none 9)
(transform-vectors! (_type_ (inline-array vector) (inline-array vector) int) none 9)
)
)
;; A 3x3 matrix, stored in row-major order.
;; NOTE: the rows each have an extra 4-bytes of padding
;; so this is really a 3x4 matrix.
(deftype matrix3 (structure)
((data float 12 :offset-assert 0)
(vector vector 3 :inline :offset 0)
@ -40,16 +43,16 @@
:flag-assert #x900000020
)
(defun matrix-copy! ((arg0 matrix) (arg1 matrix))
(let ((v1-0 (-> arg1 vector 0 quad))
(a2-0 (-> arg1 vector 1 quad))
(a3-0 (-> arg1 vector 2 quad))
(a1-1 (-> arg1 vector 3 quad))
(defun matrix-copy! ((dst matrix) (src matrix))
(let ((v1-0 (-> src vector 0 quad))
(a2-0 (-> src vector 1 quad))
(a3-0 (-> src vector 2 quad))
(a1-1 (-> src vector 3 quad))
)
(set! (-> arg0 vector 0 quad) v1-0)
(set! (-> arg0 vector 1 quad) a2-0)
(set! (-> arg0 vector 2 quad) a3-0)
(set! (-> arg0 vector 3 quad) a1-1)
(set! (-> dst vector 0 quad) v1-0)
(set! (-> dst vector 1 quad) a2-0)
(set! (-> dst vector 2 quad) a3-0)
(set! (-> dst vector 3 quad) a1-1)
)
arg0
dst
)

File diff suppressed because it is too large Load diff

View file

@ -175,6 +175,11 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
m_gen.add_instr(IGen::load_reg_offset_xmm32(
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else if (op.reg.is_xmm() &&
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
m_gen.add_instr(IGen::load128_xmm128_reg_offset(
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else {
assert(false);
}
@ -197,6 +202,11 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
m_gen.add_instr(IGen::store_reg_offset_xmm32(
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else if (op.reg.is_xmm() &&
(op.reg_class == RegClass::VECTOR_FLOAT || op.reg_class == RegClass::INT_128)) {
m_gen.add_instr(IGen::store128_xmm128_reg_offset(
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else {
assert(false);
}

View file

@ -87,6 +87,11 @@ class Compiler {
emitter::Register::VF_ELEMENT broadcastElement,
Env* env);
Val* compile_asm_int128_math3(const goos::Object& form,
const goos::Object& rest,
IR_Int128Math3Asm::Kind kind,
Env* env);
Val* compile_asm_vf_math2(const goos::Object& form,
const goos::Object& rest,
IR_VFMath2Asm::Kind kind,
@ -420,6 +425,13 @@ class Compiler {
Val* compile_asm_pw_sll(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_pw_srl(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_pw_sra(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_pextuw(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_pcpyud(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_pcpyld(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_ppach(const goos::Object& form, const goos::Object& rest, Env* env);
Val* compile_asm_xorp(const goos::Object& form, const goos::Object& rest, Env* env);
// Atoms

View file

@ -118,9 +118,9 @@ void regset_common(emitter::ObjectGenerator* gen,
} else if (src_class == RegClass::FLOAT && dst_is_xmm128) {
gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec);
} else if (src_class == RegClass::GPR_64 && dst_is_xmm128) {
gen->add_instr(IGen::movd_xmm32_gpr32(dst_reg, src_reg), irec);
gen->add_instr(IGen::movq_xmm64_gpr64(dst_reg, src_reg), irec);
} else if (src_is_xmm128 && dst_class == RegClass::GPR_64) {
gen->add_instr(IGen::movd_gpr32_xmm32(dst_reg, src_reg), irec);
gen->add_instr(IGen::movq_gpr64_xmm64(dst_reg, src_reg), irec);
} else {
assert(false); // unhandled move.
}
@ -1442,6 +1442,83 @@ void IR_VFMath3Asm::do_codegen(emitter::ObjectGenerator* gen,
}
}
///////////////////////
// IR_Int128Math3Asm
///////////////////////
IR_Int128Math3Asm::IR_Int128Math3Asm(bool use_color,
const RegVal* dst,
const RegVal* src1,
const RegVal* src2,
Kind kind)
: IR_Asm(use_color), m_dst(dst), m_src1(src1), m_src2(src2), m_kind(kind) {}
std::string IR_Int128Math3Asm::print() {
std::string function = "";
switch (m_kind) {
case Kind::PEXTLW:
function = ".pextlw";
break;
case Kind::PEXTUW:
function = ".pextuw";
break;
case Kind::PCPYLD:
function = ".pcpyld";
break;
case Kind::PCPYUD:
function = ".pcpyud";
break;
case Kind::PCEQW:
function = ".pceqw";
break;
default:
assert(false);
}
return fmt::format("{}{} {}, {}, {}", function, get_color_suffix_string(), m_dst->print(),
m_src1->print(), m_src2->print());
}
RegAllocInstr IR_Int128Math3Asm::to_rai() {
RegAllocInstr rai;
if (m_use_coloring) {
rai.write.push_back(m_dst->ireg());
rai.read.push_back(m_src1->ireg());
rai.read.push_back(m_src2->ireg());
}
return rai;
}
void IR_Int128Math3Asm::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring);
auto src1 = get_reg_asm(m_src1, allocs, irec, m_use_coloring);
auto src2 = get_reg_asm(m_src2, allocs, irec, m_use_coloring);
switch (m_kind) {
case Kind::PEXTLW:
// NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways.
gen->add_instr(IGen::pextlw_swapped(dst, src2, src1), irec);
break;
case Kind::PEXTUW:
// NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways.
gen->add_instr(IGen::pextuw_swapped(dst, src2, src1), irec);
break;
case Kind::PCPYLD:
// NOTE: this is intentionally swapped because x86 and PS2 do this opposite ways.
gen->add_instr(IGen::pcpyld_swapped(dst, src2, src1), irec);
break;
case Kind::PCPYUD:
gen->add_instr(IGen::pcpyud(dst, src1, src2), irec);
break;
case Kind::PCEQW:
gen->add_instr(IGen::pceqw(dst, src1, src2), irec);
break;
default:
assert(false);
}
}
///////////////////////
// AsmVF2
///////////////////////
@ -1475,6 +1552,22 @@ std::string IR_VFMath2Asm::print() {
use_imm = true;
function = ".pw.sra";
break;
case Kind::VPSRLDQ:
use_imm = true;
function = ".VPSRLDQ";
break;
case Kind::VPSLLDQ:
use_imm = true;
function = ".VPSLLDQ";
break;
case Kind::VPSHUFLW:
use_imm = true;
function = ".VPSHUFLW";
break;
case Kind::VPSHUFHW:
use_imm = true;
function = ".VPSHUFHW";
break;
default:
assert(false);
}
@ -1530,6 +1623,30 @@ void IR_VFMath2Asm::do_codegen(emitter::ObjectGenerator* gen,
assert(*m_imm <= 255);
gen->add_instr(IGen::pw_sra(dst, src, *m_imm), irec);
break;
case Kind::VPSRLDQ:
assert(m_imm.has_value());
assert(*m_imm >= 0);
assert(*m_imm <= 255);
gen->add_instr(IGen::vpsrldq(dst, src, *m_imm), irec);
break;
case Kind::VPSLLDQ:
assert(m_imm.has_value());
assert(*m_imm >= 0);
assert(*m_imm <= 255);
gen->add_instr(IGen::vpslldq(dst, src, *m_imm), irec);
break;
case Kind::VPSHUFLW:
assert(m_imm.has_value());
assert(*m_imm >= 0);
assert(*m_imm <= 255);
gen->add_instr(IGen::vpshuflw(dst, src, *m_imm), irec);
break;
case Kind::VPSHUFHW:
assert(m_imm.has_value());
assert(*m_imm >= 0);
assert(*m_imm <= 255);
gen->add_instr(IGen::vpshufhw(dst, src, *m_imm), irec);
break;
default:
assert(false);
}

View file

@ -546,9 +546,30 @@ class IR_VFMath3Asm : public IR_Asm {
Kind m_kind;
};
class IR_Int128Math3Asm : public IR_Asm {
public:
enum class Kind { PEXTLW, PEXTUW, PCPYUD, PCPYLD, PCEQW };
IR_Int128Math3Asm(bool use_color,
const RegVal* dst,
const RegVal* src1,
const RegVal* src2,
Kind kind);
std::string print() override;
RegAllocInstr to_rai() override;
void do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) override;
protected:
const RegVal* m_dst = nullptr;
const RegVal* m_src1 = nullptr;
const RegVal* m_src2 = nullptr;
Kind m_kind;
};
class IR_VFMath2Asm : public IR_Asm {
public:
enum class Kind { ITOF, FTOI, PW_SLL, PW_SRL, PW_SRA };
enum class Kind { ITOF, FTOI, PW_SLL, PW_SRL, PW_SRA, VPSRLDQ, VPSLLDQ, VPSHUFLW, VPSHUFHW };
IR_VFMath2Asm(bool use_color,
const RegVal* dst,
const RegVal* src,

View file

@ -484,6 +484,28 @@ Val* Compiler::compile_asm_vf_math3(const goos::Object& form,
return get_none();
}
Val* Compiler::compile_asm_int128_math3(const goos::Object& form,
const goos::Object& rest,
IR_Int128Math3Asm::Kind kind,
Env* env) {
auto args = get_va(form, rest);
va_check(form, args, {{}, {}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
bool color = true;
if (args.has_named("color")) {
color = get_true_or_false(form, args.named.at("color"));
}
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env);
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env);
if (!dest->settable()) {
throw_compiler_error(form, "Cannot set destination");
}
env->emit_ir<IR_Int128Math3Asm>(color, dest, src1, src2, kind);
return get_none();
}
Val* Compiler::compile_asm_vf_math2(const goos::Object& form,
const goos::Object& rest,
IR_VFMath2Asm::Kind kind,
@ -586,6 +608,67 @@ Val* Compiler::compile_asm_pw_sra(const goos::Object& form, const goos::Object&
return compile_asm_vf_math2_imm_u8(form, rest, IR_VFMath2Asm::Kind::PW_SRA, env);
}
Val* Compiler::compile_asm_pextlw(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTLW, env);
}
Val* Compiler::compile_asm_pextuw(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PEXTUW, env);
}
Val* Compiler::compile_asm_pcpyud(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCPYUD, env);
}
Val* Compiler::compile_asm_pcpyld(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCPYLD, env);
}
Val* Compiler::compile_asm_pceqw(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_int128_math3(form, rest, IR_Int128Math3Asm::Kind::PCEQW, env);
}
Val* Compiler::compile_asm_ppach(const goos::Object& form, const goos::Object& rest, Env* env) {
auto args = get_va(form, rest);
va_check(form, args, {{}, {}, {}}, {});
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); // rs
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); // rt
auto temp = env->make_ireg(TypeSpec("uint128"), RegClass::INT_128);
if (!dest->settable()) {
throw_compiler_error(form, "Cannot set destination");
}
env->emit_ir<IR_VFMath2Asm>(true, temp, src1, IR_VFMath2Asm::Kind::VPSHUFLW, 0x88);
env->emit_ir<IR_VFMath2Asm>(true, dest, src2, IR_VFMath2Asm::Kind::VPSHUFLW, 0x88);
env->emit_ir<IR_VFMath2Asm>(true, temp, temp, IR_VFMath2Asm::Kind::VPSHUFHW, 0x88);
env->emit_ir<IR_VFMath2Asm>(true, dest, dest, IR_VFMath2Asm::Kind::VPSHUFHW, 0x88);
env->emit_ir<IR_VFMath2Asm>(true, temp, temp, IR_VFMath2Asm::Kind::VPSRLDQ, 4);
env->emit_ir<IR_VFMath2Asm>(true, dest, dest, IR_VFMath2Asm::Kind::VPSRLDQ, 4);
// is actually a VPUNPCKLQDQ with srcs swapped.
env->emit_ir<IR_Int128Math3Asm>(true, dest, temp, dest, IR_Int128Math3Asm::Kind::PCPYLD);
return get_none();
}
Val* Compiler::compile_asm_xorp(const goos::Object& form, const goos::Object& rest, Env* env) {
auto args = get_va(form, rest);
va_check(form, args, {{}, {}, {}}, {});
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
auto src1 = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); // rs
auto src2 = compile_error_guard(args.unnamed.at(2), env)->to_reg(env); // rt
if (!dest->settable()) {
throw_compiler_error(form, "Cannot set destination");
}
env->emit_ir<IR_VFMath3Asm>(true, dest, src1, src2, IR_VFMath3Asm::Kind::XOR);
return get_none();
}
Val* Compiler::compile_asm_itof_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
return compile_asm_vf_math2(form, rest, IR_VFMath2Asm::Kind::ITOF, env);
}

View file

@ -35,6 +35,7 @@ const std::unordered_map<
{".wait.vf", &Compiler::compile_asm_wait_vf},
{".xor.vf", &Compiler::compile_asm_xor_vf},
{".xor.p", &Compiler::compile_asm_xorp},
{".max.vf", &Compiler::compile_asm_max_vf},
{".max.x.vf", &Compiler::compile_asm_max_x_vf},
@ -95,6 +96,12 @@ const std::unordered_map<
{".pw.sll", &Compiler::compile_asm_pw_sll},
{".pw.srl", &Compiler::compile_asm_pw_srl},
{".pw.sra", &Compiler::compile_asm_pw_sra},
{".pextlw", &Compiler::compile_asm_pextlw},
{".pextuw", &Compiler::compile_asm_pextuw},
{".pcpyld", &Compiler::compile_asm_pcpyld},
{".pcpyud", &Compiler::compile_asm_pcpyud},
{".pceqw", &Compiler::compile_asm_pceqw},
{".ppach", &Compiler::compile_asm_ppach},
// BLOCK FORMS
{"top-level", &Compiler::compile_top_level},

View file

@ -87,10 +87,14 @@ Val* Compiler::compile_local_vars(const goos::Object& form, const goos::Object&
throw_compiler_error(form, "Cannot declare a local named {}, this already exists.", name);
}
if (type == TypeSpec("float")) {
if (m_ts.tc(TypeSpec("float"), type)) {
auto ireg = fe->make_ireg(type, RegClass::FLOAT);
ireg->mark_as_settable();
fe->params[name] = ireg;
} else if (m_ts.tc(TypeSpec("int128"), type) || m_ts.tc(TypeSpec("uint128"), type)) {
auto ireg = fe->make_ireg(type, RegClass::INT_128);
ireg->mark_as_settable();
fe->params[name] = ireg;
} else {
auto ireg = fe->make_ireg(type, RegClass::GPR_64);
ireg->mark_as_settable();

View file

@ -105,6 +105,34 @@ class IGen {
return instr;
}
/*!
* Move 64-bits of xmm to 64 bits of gpr (no sign extension).
*/
static Instruction movq_gpr64_xmm64(Register dst, Register src) {
assert(dst.is_gpr());
assert(src.is_xmm());
Instruction instr(0x66);
instr.set_op2(0x0f);
instr.set_op3(0x7e);
instr.set_modrm_and_rex(src.hw_id(), dst.hw_id(), 3, true);
instr.swap_op0_rex();
return instr;
}
/*!
* Move 64-bits of gpr to 64-bits of xmm (no sign extension)
*/
static Instruction movq_xmm64_gpr64(Register dst, Register src) {
assert(dst.is_xmm());
assert(src.is_gpr());
Instruction instr(0x66);
instr.set_op2(0x0f);
instr.set_op3(0x6e);
instr.set_modrm_and_rex(dst.hw_id(), src.hw_id(), 3, true);
instr.swap_op0_rex();
return instr;
}
/*!
* Move 32-bits between xmm's
*/
@ -2401,6 +2429,114 @@ class IGen {
instr.set(Imm(1, imm));
return instr;
}
static Instruction pextlw_swapped(Register dst, Register src0, Register src1) {
assert(dst.is_xmm());
assert(src0.is_xmm());
assert(src1.is_xmm());
// VEX.128.66.0F.WIG 62/r VPUNPCKLDQ xmm1, xmm2, xmm3/m128
// reg, vex, r/m
Instruction instr(0x62);
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
false, VexPrefix::P_66);
return instr;
}
static Instruction pextuw_swapped(Register dst, Register src0, Register src1) {
assert(dst.is_xmm());
assert(src0.is_xmm());
assert(src1.is_xmm());
// VEX.128.66.0F.WIG 6A/r VPUNPCKHDQ xmm1, xmm2, xmm3/m128
// reg, vex, r/m
Instruction instr(0x6a);
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
false, VexPrefix::P_66);
return instr;
}
static Instruction vpunpcklqdq(Register dst, Register src0, Register src1) {
assert(dst.is_xmm());
assert(src0.is_xmm());
assert(src1.is_xmm());
// VEX.128.66.0F.WIG 6C/r VPUNPCKLQDQ xmm1, xmm2, xmm3/m128
// reg, vex, r/m
Instruction instr(0x6c);
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
false, VexPrefix::P_66);
return instr;
}
static Instruction pcpyld_swapped(Register dst, Register src0, Register src1) {
return vpunpcklqdq(dst, src0, src1);
}
static Instruction pcpyud(Register dst, Register src0, Register src1) {
assert(dst.is_xmm());
assert(src0.is_xmm());
assert(src1.is_xmm());
// VEX.128.66.0F.WIG 6D/r VPUNPCKHQDQ xmm1, xmm2, xmm3/m128
// reg, vex, r/m
Instruction instr(0x6d);
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
false, VexPrefix::P_66);
return instr;
}
static Instruction pceqw(Register dst, Register src0, Register src1) {
assert(dst.is_xmm());
assert(src0.is_xmm());
assert(src1.is_xmm());
// VEX.128.66.0F.WIG 76 /r VPCMPEQD xmm1, xmm2, xmm3/m128
// reg, vex, r/m
Instruction instr(0x76);
instr.set_vex_modrm_and_rex(dst.hw_id(), src1.hw_id(), VEX3::LeadingBytes::P_0F, src0.hw_id(),
false, VexPrefix::P_66);
return instr;
}
static Instruction vpsrldq(Register dst, Register src, u8 imm) {
assert(dst.is_xmm());
assert(src.is_xmm());
// VEX.128.66.0F.WIG 73 /3 ib VPSRLDQ xmm1, xmm2, imm8
Instruction instr(0x73);
instr.set_vex_modrm_and_rex(3, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false,
VexPrefix::P_66);
instr.set(Imm(1, imm));
return instr;
}
static Instruction vpslldq(Register dst, Register src, u8 imm) {
assert(dst.is_xmm());
assert(src.is_xmm());
// VEX.128.66.0F.WIG 73 /7 ib VPSLLDQ xmm1, xmm2, imm8
Instruction instr(0x73);
instr.set_vex_modrm_and_rex(7, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false,
VexPrefix::P_66);
instr.set(Imm(1, imm));
return instr;
}
static Instruction vpshuflw(Register dst, Register src, u8 imm) {
assert(dst.is_xmm());
assert(src.is_xmm());
// VEX.128.F2.0F.WIG 70 /r ib VPSHUFLW xmm1, xmm2/m128, imm8
Instruction instr(0x70);
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false,
VexPrefix::P_F2);
instr.set(Imm(1, imm));
return instr;
}
static Instruction vpshufhw(Register dst, Register src, u8 imm) {
assert(dst.is_xmm());
assert(src.is_xmm());
// VEX.128.F3.0F.WIG 70 /r ib VPSHUFHW xmm1, xmm2/m128, imm8
Instruction instr(0x70);
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false,
VexPrefix::P_F3);
instr.set(Imm(1, imm));
return instr;
}
};
} // namespace emitter

View file

@ -54,8 +54,8 @@ RegisterInfo RegisterInfo::make_register_info() {
// todo - experiment with better orders for allocation.
info.m_gpr_alloc_order = {RAX, RCX, RDX, RBX, RBP, RSI, RDI, R8, R9, R10, R11}; // arbitrary
info.m_xmm_alloc_order = {XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14};
info.m_xmm_alloc_order = {XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6,
XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13};
// these should only be temp registers!
info.m_gpr_temp_only_alloc_order = {RAX, RCX, RDX, RSI, RDI, R8, R9};

View file

@ -605,9 +605,31 @@ bool try_assignment_for_var(int var,
}
int get_stack_slot_for_var(int var, RegAllocCache* cache) {
int slot_size;
auto& info = cache->iregs.at(var);
switch (info.reg_class) {
case RegClass::INT_128:
slot_size = 2;
break;
case RegClass::VECTOR_FLOAT:
slot_size = 2;
break;
case RegClass::FLOAT:
slot_size = 1; // todo - this wastes some space
break;
case RegClass::GPR_64:
slot_size = 1;
break;
default:
assert(false);
}
auto kv = cache->var_to_stack_slot.find(var);
if (kv == cache->var_to_stack_slot.end()) {
auto slot = cache->current_stack_slot++;
if (slot_size == 2 && (cache->current_stack_slot & 1)) {
cache->current_stack_slot++;
}
auto slot = cache->current_stack_slot;
cache->current_stack_slot += slot_size;
cache->var_to_stack_slot[var] = slot;
return slot;
} else {

View file

@ -112,24 +112,22 @@
)
;; definition for method 3 of type vec4s
;; INFO: Return type mismatch int vs vec4s.
;; WARN: Unsupported inline assembly instruction kind - [131]
;; WARN: Unsupported inline assembly instruction kind - [131]
;; WARN: Unsupported inline assembly instruction kind - [73]
;; WARN: Unsupported inline assembly instruction kind - [132]
;; WARN: Unsupported inline assembly instruction kind - [73]
;; WARN: Unsupported inline assembly instruction kind - [132]
;; WARN: Unsupported inline assembly instruction kind - [131]
;; INFO: Return type mismatch uint128 vs vec4s.
;; WARN: Unsupported inline assembly instruction kind - [por gp, a0, r0]
;; WARN: Unsupported inline assembly instruction kind - [por a2, gp, r0]
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, gp, r0]
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, v1, r0]
;; WARN: Unsupported inline assembly instruction kind - [por v0, gp, r0]
(defmethod inspect vec4s ((obj vec4s))
(local-vars
(r0-0 none)
(v0-5 int)
(v1-0 int)
(v1-1 int)
(a2-0 int)
(a2-1 int)
(a2-3 int)
(gp-0 int)
(v0-5 uint128)
(v1-0 uint128)
(v1-1 uint128)
(a2-0 uint128)
(a2-1 uint128)
(a2-3 uint128)
(gp-0 uint128)
)
(.por gp-0 obj r0-0)
(let ((t9-0 format)
@ -146,7 +144,7 @@
(.sllv a2-1 gp-0 r0-0)
(t9-1 a0-2 a1-1 a2-1)
)
(format #t "~Ty: ~f~%" (sar gp-0 32))
(format #t "~Ty: ~f~%" (sar (the-as int gp-0) 32))
(let ((t9-3 format)
(a0-4 #t)
(a1-3 "~Tz: ~f~%")
@ -160,31 +158,29 @@
(a1-4 "~Tw: ~f~%")
)
(.pcpyud v1-1 gp-0 r0-0)
(t9-4 a0-5 a1-4 (sar v1-1 32))
(t9-4 a0-5 a1-4 (sar (the-as int v1-1) 32))
)
(.por v0-5 gp-0 r0-0)
(the-as vec4s v0-5)
)
;; definition for method 2 of type vec4s
;; INFO: Return type mismatch int vs vec4s.
;; WARN: Unsupported inline assembly instruction kind - [131]
;; WARN: Unsupported inline assembly instruction kind - [73]
;; WARN: Unsupported inline assembly instruction kind - [132]
;; WARN: Unsupported inline assembly instruction kind - [73]
;; WARN: Unsupported inline assembly instruction kind - [132]
;; WARN: Unsupported inline assembly instruction kind - [131]
;; WARN: Unsupported inline assembly instruction kind - [131]
;; INFO: Return type mismatch uint128 vs vec4s.
;; WARN: Unsupported inline assembly instruction kind - [por gp, a0, r0]
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, gp, r0]
;; WARN: Unsupported inline assembly instruction kind - [sllv t0, v1, r0]
;; WARN: Unsupported inline assembly instruction kind - [por t2, gp, r0]
;; WARN: Unsupported inline assembly instruction kind - [por v0, gp, r0]
(defmethod print vec4s ((obj vec4s))
(local-vars
(r0-0 none)
(v0-1 int)
(v1-0 int)
(v1-1 int)
(a2-0 int)
(t0-0 int)
(t2-0 int)
(gp-0 int)
(v0-1 uint128)
(v1-0 uint128)
(v1-1 uint128)
(a2-0 uint128)
(t0-0 uint128)
(t2-0 uint128)
(gp-0 uint128)
)
(.por gp-0 obj r0-0)
(let ((t9-0 format)
@ -192,11 +188,11 @@
(a1-0 "#<vector ~F ~F ~F ~F @ #x~X>")
)
(.sllv a2-0 gp-0 r0-0)
(let ((a3-0 (sar gp-0 32)))
(let ((a3-0 (sar (the-as int gp-0) 32)))
(.pcpyud v1-0 gp-0 r0-0)
(.sllv t0-0 v1-0 r0-0)
(.pcpyud v1-1 gp-0 r0-0)
(let ((t1-0 (sar v1-1 32)))
(let ((t1-0 (sar (the-as int v1-1) 32)))
(.por t2-0 gp-0 r0-0)
(t9-0 a0-1 a1-0 a2-0 a3-0 t0-0 t1-0 t2-0)
)
@ -1021,9 +1017,9 @@
)
;; definition for function breakpoint-range-set!
;; WARN: Unsupported inline assembly instruction kind - [48]
;; WARN: Unsupported inline assembly instruction kind - [50]
;; WARN: Unsupported inline assembly instruction kind - [51]
;; WARN: Unsupported inline assembly instruction kind - [mtc0 Debug, a0]
;; WARN: Unsupported inline assembly instruction kind - [mtdab a1]
;; WARN: Unsupported inline assembly instruction kind - [mtdabm a2]
(defun breakpoint-range-set! ((arg0 uint) (arg1 uint) (arg2 uint))
(.mtc0 Debug arg0)
(.mtdab arg1)
@ -1032,9 +1028,9 @@
)
;; definition for function valid?
;; WARN: Unsupported inline assembly instruction kind - [3]
;; WARN: Unsupported inline assembly instruction kind - [3]
;; WARN: Unsupported inline assembly instruction kind - [3]
;; WARN: Unsupported inline assembly instruction kind - [daddu v1, v1, s7]
;; WARN: Unsupported inline assembly instruction kind - [daddu v1, v1, s7]
;; WARN: Unsupported inline assembly instruction kind - [daddu v1, v1, s7]
(defun
valid?
((obj object)

View file

@ -285,8 +285,8 @@
)
;; definition for method 2 of type handle
;; WARN: Unsupported inline assembly instruction kind - [5]
;; WARN: Unsupported inline assembly instruction kind - [73]
;; WARN: Unsupported inline assembly instruction kind - [subu a2, v1, s7]
;; WARN: Unsupported inline assembly instruction kind - [sllv a2, v1, r0]
(defmethod print handle ((obj handle))
(local-vars
(r0-0 none)

View file

@ -1833,8 +1833,8 @@
;; definition for method 10 of type process
;; INFO: Return type mismatch int vs none.
;; WARN: Unsupported inline assembly instruction kind - [22]
;; WARN: Unsupported inline assembly instruction kind - [59]
;; WARN: Unsupported inline assembly instruction kind - [lw ra, return-from-thread(s7)]
;; WARN: Unsupported inline assembly instruction kind - [jr ra]
(defmethod deactivate process ((obj process))
(let ((v0-0 (when (!= (-> obj status) 'dead)
(set! (-> obj next-state) dead-state)

View file

@ -49,10 +49,10 @@
)
;; definition for function enter-state
;; WARN: Unsupported inline assembly instruction kind - [23]
;; WARN: Unsupported inline assembly instruction kind - [22]
;; WARN: Unsupported inline assembly instruction kind - [59]
;; WARN: Unsupported inline assembly instruction kind - [13]
;; WARN: Unsupported inline assembly instruction kind - [lwu sp, 28(v1)]
;; WARN: Unsupported inline assembly instruction kind - [lw ra, return-from-thread-dead(s7)]
;; WARN: Unsupported inline assembly instruction kind - [jr t9]
;; WARN: Unsupported inline assembly instruction kind - [sw v0, 0(sp)]
(defun
enter-state
((arg0 object)

View file

@ -118,8 +118,8 @@
;; definition for function rand-vu-init
;; INFO: Return type mismatch int vs float.
;; WARN: Unsupported inline assembly instruction kind - [56]
;; WARN: Unsupported inline assembly instruction kind - [57]
;; WARN: Unsupported inline assembly instruction kind - [ctc2.i vi_R, a0]
;; WARN: Unsupported inline assembly instruction kind - [cfc2.i v0, vi_R]
(defun rand-vu-init ((arg0 float))
(local-vars (v0-0 int))
(.ctc2.i vi_R arg0)

View file

@ -11,7 +11,7 @@
:size-assert #x40
:flag-assert #xa00000040
(:methods
(dummy-9 () none 9)
(transform-vectors! (_type_ (inline-array vector) (inline-array vector) int) none 9)
)
)

File diff suppressed because it is too large Load diff

View file

@ -822,12 +822,11 @@
)
;; definition for function vector-dot
;; INFO: Return type mismatch int vs float.
;; WARN: Unsupported inline assembly instruction kind - [153]
;; WARN: Unsupported inline assembly instruction kind - [154]
;; WARN: Unsupported inline assembly instruction kind - [157]
;; WARN: Unsupported inline assembly instruction kind - [mula.s f0, f3]
;; WARN: Unsupported inline assembly instruction kind - [madda.s f1, f4]
;; WARN: Unsupported inline assembly instruction kind - [madd.s f0, f2, f5]
(defun vector-dot ((arg0 vector) (arg1 vector))
(local-vars (f0-1 int))
(local-vars (f0-1 float))
(let ((f0-0 (-> arg0 data 0))
(f1-0 (-> arg0 data 1))
(f2-0 (-> arg0 data 2))
@ -839,7 +838,7 @@
(.madda.s f1-0 f4-0)
(.madd.s f0-1 f2-0 f5-0)
)
(the-as float f0-1)
f0-1
)
;; definition for function vector-dot-vu
@ -860,13 +859,12 @@
)
;; definition for function vector4-dot
;; INFO: Return type mismatch int vs float.
;; WARN: Unsupported inline assembly instruction kind - [153]
;; WARN: Unsupported inline assembly instruction kind - [154]
;; WARN: Unsupported inline assembly instruction kind - [154]
;; WARN: Unsupported inline assembly instruction kind - [157]
;; WARN: Unsupported inline assembly instruction kind - [mula.s f0, f4]
;; WARN: Unsupported inline assembly instruction kind - [madda.s f1, f5]
;; WARN: Unsupported inline assembly instruction kind - [madda.s f2, f6]
;; WARN: Unsupported inline assembly instruction kind - [madd.s f0, f3, f7]
(defun vector4-dot ((arg0 vector) (arg1 vector))
(local-vars (f0-1 int))
(local-vars (f0-1 float))
(let ((f0-0 (-> arg0 data 0))
(f1-0 (-> arg0 data 1))
(f2-0 (-> arg0 data 2))
@ -881,7 +879,7 @@
(.madda.s f2-0 f6-0)
(.madd.s f0-1 f3-0 f7-0)
)
(the-as float f0-1)
f0-1
)
;; definition for function vector4-dot-vu

View file

@ -0,0 +1,164 @@
(defmacro inspect-mat (obj)
`(begin
(format #t "~T[~F] [~F] [~F] [~F]~%"
(-> ,obj data 0)
(-> ,obj data 1)
(-> ,obj data 2)
(-> ,obj data 3)
)
(format #t "~T[~F] [~F] [~F] [~F]~%"
(-> ,obj data 4)
(-> ,obj data 5)
(-> ,obj data 6)
(-> ,obj data 7)
)
(format #t "~T[~F] [~F] [~F] [~F]~%"
(-> ,obj data 8)
(-> ,obj data 9)
(-> ,obj data 10)
(-> ,obj data 11)
)
(format #t "~T[~F] [~F] [~F] [~F]~%"
(-> ,obj data 12)
(-> ,obj data 13)
(-> ,obj data 14)
(-> ,obj data 15)
)
)
)
(format #t "mat-mult~%")
(let ((dst (new 'stack 'matrix))
(src1 (new 'stack 'matrix))
(src2 (new 'stack 'matrix)))
(dotimes (i 16)
(set! (-> src1 data i) (the float (+ i 1)))
(set! (-> src2 data i) (the float (- 16 i)))
)
(matrix*! dst src1 src2)
(inspect-mat dst)
)
(format #t "transpose~%")
(let ((dst (new 'stack 'matrix))
(src (new 'stack 'matrix)))
(dotimes (i 16)
(set! (-> src data i) (the float (+ i 1)))
)
(matrix-transpose! dst src)
(inspect-mat dst)
)
(format #t "inv-4x4~%")
(let ((dst (new 'stack 'matrix))
(prod (new 'stack 'matrix))
(src (new 'static 'matrix :data (new 'static 'array float 16
3. 2. 1. 0.
2. -1. 3. 0.
-8. 2. 2. 0.
1. 2. 3. 1.))))
(matrix-4x4-inverse! dst src)
(matrix*! prod src dst)
(inspect-mat prod)
)
(format #t "axis-angle~%")
(defun test-axis-angle ((axis vector))
(let ((mat (new 'stack 'matrix)))
(let* ((norm-squared (+ (* (-> axis x) (-> axis x))
(* (-> axis y) (-> axis y))
(* (-> axis z) (-> axis z))
)
)
(norm (sqrtf norm-squared)))
(when (> norm-squared 0)
(dotimes (i 3)
(set! (-> axis data i) (/ (-> axis data i) norm))
)
)
(matrix-axis-angle! mat axis (degrees 10))
(inspect-mat mat)
(format #t "~%")
)
)
)
(test-axis-angle (new 'static 'vector :x 1.0 :y 0.5 :z -0.3 :w 0.0))
(test-axis-angle (new 'static 'vector :x 0.2))
(test-axis-angle (new 'static 'vector :y 0.2))
(test-axis-angle (new 'static 'vector :z 0.2))
(test-axis-angle (new 'static 'vector :w 0.2))
(format #t "3x3-inverse~%")
(let ((dst (new 'stack 'matrix))
(src (new 'static 'matrix :data (new 'static 'array float 16
3. 2. 1. 0.
2. -1. 3. 0.
-8. 2. 2. 0.
0. 0. 0. 0.))))
(matrix-3x3-inverse! dst src)
(inspect-mat dst)
(format #t "~%")
(matrix3-inverse-transpose! dst src)
(inspect-mat dst)
)
(deftype vec-array (structure)
((data vector 32 :inline))
)
(defmethod inspect vec-array ((obj vec-array))
(format #t "vec-array~%")
(dotimes (i 12)
(format #t "~T[~F] [~F] [~F] [~F]~%"
(-> obj data i x)
(-> obj data i y)
(-> obj data i z)
(-> obj data i w)
)
)
obj
)
(format #t "transform-many~%")
(let ((dst (new 'stack 'vec-array))
(dst-ref (new 'stack 'vec-array))
(src (new 'stack 'vec-array))
(val 0.0)
(mat (new 'static 'matrix :data (new 'static 'array float 16
3. 2. 1. 0.
2. -1. 3. 0.
-8. 2. 2. 0.
1. 2. 3. 1.))))
;; init source
(dotimes (i 12)
(set! (-> dst data i quad) (the uint128 0))
(dotimes (j 3)
(set! (-> src data i data j) val)
(set! val (+ val 1.0))
)
(set! (-> src data i w) 1.0)
)
;;(inspect src)
;; compute reference:
(dotimes (i 12)
(vector-matrix*! (-> dst-ref data i) (-> src data i) mat)
)
;;(inspect dst-ref)
;; compute fancy thing
(transform-vectors! mat (-> dst data) (-> src data) 11)
(inspect dst)
0
)

View file

@ -0,0 +1,54 @@
(let ((v1 (new 'stack 'array 'uint8 16))
(v2 (new 'stack 'array 'uint8 16))
(v3 (new 'stack 'array 'uint8 16))
)
;; initialize stack arrays
(dotimes (i 16)
(set! (-> v1 i) i)
(set! (-> v2 i) (+ i 16))
)
(let ((v1-quad (-> (the (pointer uint128) v1)))
(v2-quad (-> (the (pointer uint128) v2)))
(v3-quad (the uint128 0))
)
(.pextlw v3-quad v1-quad v2-quad)
;;(print128 v1-quad) (format #t "~%")
;;(print128 v2-quad) (format #t "~%")
;; expect #x07060504171615140302010013121110
(print128 v3-quad) (format #t "~%")
;; expect #x0f0e0d0c1f1e1d1c0b0a09081b1a1918
(.pextuw v3-quad v1-quad v2-quad)
(print128 v3-quad) (format #t "~%")
(.pcpyld v3-quad v1-quad v2-quad)
(print128 v3-quad) (format #t "~%")
(.pcpyud v3-quad v1-quad v2-quad)
(print128 v3-quad) (format #t "~%")
(.ppach v3-quad v1-quad v2-quad)
(print128 v3-quad) (format #t "~%")
)
(let ((s1 (new 'stack 'array 'uint32 4))
(s2 (new 'stack 'array 'uint32 4)))
(set! (-> s1 0) #xdeadbeef)
(set! (-> s1 1) #x12312323)
(set! (-> s1 2) #x11112222)
(set! (-> s1 3) #x11112223)
(set! (-> s2 0) #xdeadbeee) ;; different
(set! (-> s2 1) #x12312323)
(set! (-> s2 2) #x91112222) ;; different
(set! (-> s2 3) #x11112223)
(let ((s1q (-> (the (pointer uint128) s1)))
(s2q (-> (the (pointer uint128) s2)))
(s3q (the uint128 0))
)
(.pceqw s3q s1q s2q)
(print128 s3q) (format #t "~%")
)
)
)

View file

@ -579,6 +579,87 @@ TEST_F(WithGameTests, I128Simple) {
"12344321\n"});
}
TEST_F(WithGameTests, Pextlw) {
runner.run_static_test(env, testCategory, "test-pextlw.gc",
{"#x07060504171615140302010013121110\n"
"#x0f0e0d0c1f1e1d1c0b0a09081b1a1918\n"
"#x07060504030201001716151413121110\n"
"#x1f1e1d1c1b1a19180f0e0d0c0b0a0908\n"
"#x0d0c0908050401001d1c191815141110\n"
"#xffffffff00000000ffffffff00000000\n"
"0\n"});
}
TEST_F(WithGameTests, Matrix) {
runner.run_static_test(env, testCategory, "test-matrix.gc",
{"mat-mult\n"
"\t[ 80.0000] [ 70.0000] [ 60.0000] [ 50.0000]\n"
"\t[ 240.0000] [ 214.0000] [ 188.0000] [ 162.0000]\n"
"\t[ 400.0000] [ 358.0000] [ 316.0000] [ 274.0000]\n"
"\t[ 560.0000] [ 502.0000] [ 444.0000] [ 386.0000]\n"
"transpose\n"
"\t[ 1.0000] [ 5.0000] [ 9.0000] [ 13.0000]\n"
"\t[ 2.0000] [ 6.0000] [ 10.0000] [ 14.0000]\n"
"\t[ 3.0000] [ 7.0000] [ 11.0000] [ 15.0000]\n"
"\t[ 4.0000] [ 8.0000] [ 12.0000] [ 16.0000]\n"
"inv-4x4\n"
"\t[ 1.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 1.0000] [ 0.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 1.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
"axis-angle\n"
"\t[ 0.9961] [ 0.0506] [ 0.0715] [ 0.0000]\n"
"\t[ -0.0393] [ 0.9876] [ -0.1516] [ 0.0000]\n"
"\t[ -0.0783] [ 0.1482] [ 0.9858] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
"\n"
"\t[ 1.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 0.9848] [ -0.1736] [ 0.0000]\n"
"\t[ 0.0000] [ 0.1736] [ 0.9848] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
"\n"
"\t[ 0.9848] [ 0.0000] [ 0.1736] [ 0.0000]\n"
"\t[ 0.0000] [ 1.0000] [ 0.0000] [ 0.0000]\n"
"\t[ -0.1736] [ 0.0000] [ 0.9848] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
"\n"
"\t[ 0.9848] [ -0.1736] [ 0.0000] [ 0.0000]\n"
"\t[ 0.1736] [ 0.9848] [ 0.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 1.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
"\n"
"\t[ 1.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 1.0000] [ 0.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 1.0000] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 1.0000]\n"
"\n"
"3x3-inverse\n"
"\t[ 0.0952] [ 0.0238] [ -0.0833] [ 0.0000]\n"
"\t[ 0.3333] [ -0.1666] [ 0.0833] [ 0.0000]\n"
"\t[ 0.0476] [ 0.2619] [ 0.0833] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
"\n"
"\t[ 0.0952] [ 0.3333] [ 0.0476] [ 0.0000]\n"
"\t[ 0.0238] [ -0.1666] [ 0.2619] [ 0.0000]\n"
"\t[ -0.0833] [ 0.0833] [ 0.0833] [ 0.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
"transform-many\n"
"vec-array\n"
"\t[ -13.0000] [ 5.0000] [ 10.0000] [ 1.0000]\n"
"\t[ -22.0000] [ 14.0000] [ 28.0000] [ 1.0000]\n"
"\t[ -31.0000] [ 23.0000] [ 46.0000] [ 1.0000]\n"
"\t[ -40.0000] [ 32.0000] [ 64.0000] [ 1.0000]\n"
"\t[ -49.0000] [ 41.0000] [ 82.0000] [ 1.0000]\n"
"\t[ -58.0000] [ 50.0000] [ 100.0000] [ 1.0000]\n"
"\t[ -67.0000] [ 59.0000] [ 118.0000] [ 1.0000]\n"
"\t[ -76.0000] [ 68.0000] [ 136.0000] [ 1.0000]\n"
"\t[ -85.0000] [ 77.0000] [ 154.0000] [ 1.0000]\n"
"\t[ -94.0000] [ 86.0000] [ 172.0000] [ 1.0000]\n"
"\t[ -103.0000] [ 95.0000] [ 190.0000] [ 1.0000]\n"
"\t[ 0.0000] [ 0.0000] [ 0.0000] [ 0.0000]\n"
"0\n"});
}
TEST(TypeConsistency, TypeConsistency) {
Compiler compiler;
compiler.enable_throw_on_redefines();

View file

@ -92,10 +92,6 @@ const std::unordered_set<std::string> skip_in_compiling = {
"(method 3 vector)", // this function appears twice, which confuses the compiler.
"vector-dot", // fpu acc
"vector4-dot", // fpu acc
/// MATRIX
"matrix-transpose!", // unsupported asm ops
"matrix-4x4-inverse!", // compiler fails to regalloc this...
};
// default location for the data. It can be changed with a command line argument.

View file

@ -372,3 +372,138 @@ TEST(EmitterAVX, VPSLLD) {
tester.emit(IGen::pw_sll(XMM0 + 13, XMM0 + 14, 6));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5E172F403C4C16172F604C59172F405C4C11172F606");
}
TEST(EmitterAVX, VPUNPCKLDQ) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pextlw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pextlw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E162DBC4C16162DDC59162DBC4C11162DDC56162EBC4416162EDC51162EBC4411162ED");
}
TEST(EmitterAVX, VPUNPCKHDQ) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pextuw_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pextuw_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E16ADBC4C1616ADDC5916ADBC4C1116ADDC5616AEBC441616AEDC5116AEBC441116AED");
}
TEST(EmitterAVX, VPUNPCKLQDQ) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pcpyld_swapped(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pcpyld_swapped(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E16CDBC4C1616CDDC5916CDBC4C1116CDDC5616CEBC441616CEDC5116CEBC441116CED");
}
TEST(EmitterAVX, VPUNPCKHQDQ) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pcpyud(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pcpyud(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E16DDBC4C1616DDDC5916DDBC4C1116DDDC5616DEBC441616DEDC5116DEBC441116DED");
}
TEST(EmitterAVX, VPCMPEQD) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pceqw(XMM0 + 3, XMM0 + 13, XMM0 + 13));
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 3, XMM0 + 3));
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 3, XMM0 + 13));
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 13, XMM0 + 3));
tester.emit(IGen::pceqw(XMM0 + 13, XMM0 + 13, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true),
"C5E176DBC4C16176DDC59176DBC4C11176DDC56176EBC4416176EDC51176EBC4411176ED");
}
TEST(EmitterAVX, VPSRLDQ) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::vpsrldq(XMM0 + 3, XMM0 + 4, 3));
tester.emit(IGen::vpsrldq(XMM0 + 3, XMM0 + 14, 4));
tester.emit(IGen::vpsrldq(XMM0 + 13, XMM0 + 4, 5));
tester.emit(IGen::vpsrldq(XMM0 + 13, XMM0 + 14, 6));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5E173DC03C4C16173DE04C59173DC05C4C11173DE06");
}
TEST(EmitterAVX, VPSLLDQ) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::vpslldq(XMM0 + 3, XMM0 + 4, 3));
tester.emit(IGen::vpslldq(XMM0 + 3, XMM0 + 14, 4));
tester.emit(IGen::vpslldq(XMM0 + 13, XMM0 + 4, 5));
tester.emit(IGen::vpslldq(XMM0 + 13, XMM0 + 14, 6));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5E173FC03C4C16173FE04C59173FC05C4C11173FE06");
}
TEST(EmitterAVX, VPSHUFLW) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::vpshuflw(XMM0 + 3, XMM0 + 4, 3));
tester.emit(IGen::vpshuflw(XMM0 + 3, XMM0 + 14, 4));
tester.emit(IGen::vpshuflw(XMM0 + 13, XMM0 + 4, 5));
tester.emit(IGen::vpshuflw(XMM0 + 13, XMM0 + 14, 6));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5FB70DC03C4C17B70DE04C57B70EC05C4417B70EE06");
}
TEST(EmitterAVX, VPSHUFHW) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::vpshufhw(XMM0 + 3, XMM0 + 4, 3));
tester.emit(IGen::vpshufhw(XMM0 + 3, XMM0 + 14, 4));
tester.emit(IGen::vpshufhw(XMM0 + 13, XMM0 + 4, 5));
tester.emit(IGen::vpshufhw(XMM0 + 13, XMM0 + 14, 6));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5FA70DC03C4C17A70DE04C57A70EC05C4417A70EE06");
}
TEST(EmitrerAVX, movq_to_gpr_from_xmm) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::movq_gpr64_xmm64(RSP, XMM0 + 3));
tester.emit(IGen::movq_gpr64_xmm64(RSP, XMM0 + 13));
tester.emit(IGen::movq_gpr64_xmm64(R12, XMM0 + 3));
tester.emit(IGen::movq_gpr64_xmm64(R12, XMM0 + 13));
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F7EDC664C0F7EEC66490F7EDC664D0F7EEC");
}
TEST(EmitrerAVX, movq_to_xmm_from_gpr) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 3, RSP));
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 13, RSP));
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 3, R12));
tester.emit(IGen::movq_xmm64_gpr64(XMM0 + 13, R12));
EXPECT_EQ(tester.dump_to_hex_string(true), "66480F6EDC664C0F6EEC66490F6EDC664D0F6EEC");
}