mirror of
https://github.com/open-goal/jak-project.git
synced 2024-10-20 00:57:44 -04:00
Compiler - Implementing more VU Instructions (Part 1 of 2) (#221)
* A little project cleanup * Script to grep decompiler results * Compiler: Implement VNOP -> FNOP (.nop.vf) temp: test new addition * Compiler: Implement VMUL.xyzw (.mul.vf) squash: cleaning up files i don't want to accidentally stage * Compiler: Implement V[ADD|SUB|MUL].dest instructions * Compiler: Implement V[ADD|SUB|MUL][x|y|w|z].dest instructions * Compiler: Implement V[MIN|MAX]{[x|y|z|w]}.dest instructions * Compiler: Implement V[ABS]{[x|y|z|w]}.dest instructions * Cleanup review feedback before adding tests and docs * Tests: Added missing emitter tests * tests/compiler: Comprehensively test all new instructions * docs: Add documentation for newly supported operations * Remove unused vector-h function * Address review feedback
This commit is contained in:
parent
2436a8a541
commit
65206823ef
11
.editorconfig
Normal file
11
.editorconfig
Normal file
|
@ -0,0 +1,11 @@
|
|||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
# https://editorconfig-specification.readthedocs.io/
|
||||
# https://docs.microsoft.com/en-us/visualstudio/ide/cpp-editorconfig-properties?view=vs-2019
|
||||
[*.{c++,cc,cpp,cxx,h,h++,hh,hpp,hxx,inl,ipp,tlh,tli}]
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -7,6 +7,8 @@ cmake-build-debug/*
|
|||
build/*
|
||||
decompiler_out/*
|
||||
logs/*
|
||||
.vscode/settings.json
|
||||
|
||||
|
||||
# for Nix
|
||||
/result*
|
||||
|
|
|
@ -14,6 +14,13 @@
|
|||
"name" : "Run Tests - Verbose",
|
||||
"args" : ["--gtest_brief=0"]
|
||||
},
|
||||
{
|
||||
"type" : "default",
|
||||
"project" : "CMakeLists.txt",
|
||||
"projectTarget" : "goalc-test.exe (bin\\goalc-test.exe)",
|
||||
"name" : "Run Draft Tests - Verbose",
|
||||
"args" : ["--gtest_brief=0", "--gtest_filter=\"*Draft*\""]
|
||||
},
|
||||
{
|
||||
"type" : "default",
|
||||
"project" : "CMakeLists.txt",
|
||||
|
@ -39,6 +46,13 @@
|
|||
"project" : "CMakeLists.txt",
|
||||
"projectTarget" : "decompiler.exe (bin\\decompiler.exe)",
|
||||
"name" : "Build Decompiler"
|
||||
},
|
||||
{
|
||||
"type" : "default",
|
||||
"project" : "CMakeLists.txt",
|
||||
"projectTarget" : "decompiler.exe (bin\\decompiler.exe)",
|
||||
"name" : "Run Decompiler - Jak 1",
|
||||
"args" : [ "${workspaceRoot}/decompiler/config/jak1_ntsc_black_label.jsonc", "${workspaceRoot}/iso_data/jak1", "${workspaceRoot}/decompiler_out/jak1"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
6
Taskfile.yml
Normal file
6
Taskfile.yml
Normal file
|
@ -0,0 +1,6 @@
|
|||
version: '3'
|
||||
|
||||
tasks:
|
||||
format:
|
||||
cmds:
|
||||
- python ./third-party/run-clang-format/run-clang-format.py -r common decompiler game goalc test -i
|
|
@ -1097,4 +1097,4 @@ void TypeOfElement::update_from_stack(const Env& env,
|
|||
result->push_back(this);
|
||||
}
|
||||
|
||||
} // namespace decompiler
|
||||
} // namespace decompiler
|
||||
|
|
|
@ -1284,7 +1284,14 @@ Move between two registers. The `dst` should be a register (either `rlet` or `le
|
|||
- `gpr` to `fpr` (only moves 32-bits, uses `movd`)
|
||||
- `fpr` to `gpr` (only moves 32-bits, upper 32-bits are zero, uses `movd`)
|
||||
This code generation is identical to using a `(set! dst src)` form.
|
||||
|
||||
|
||||
## `.nop.vf`
|
||||
```lisp
|
||||
(.nop.vf)
|
||||
```
|
||||
|
||||
Inserts a `FNOP` assembly instruction, which is fundamentally the same as a `NOP`.
|
||||
|
||||
## `.lvf`
|
||||
```lisp
|
||||
(.lvf dst-reg src-loc [:color #t|#f])
|
||||
|
@ -1307,9 +1314,33 @@ Store a vector float. Works similarly to the `lvf` form, but there is no optimiz
|
|||
|
||||
## Three operand vector float operations.
|
||||
```lisp
|
||||
(.<op-name>.vf dst src0 src1 [:color #t|#f])
|
||||
(.<op-name>[<broadcast-element>].vf dst src0 src1 [:color #t|#f] [:mask #b<0-15>])
|
||||
```
|
||||
All the three operand forms work similarly. You can do something like `(.add.vf vf1 vf2 vf3)`. All operations use the similarly named `v<op-name>ps` instruction, xmm128 VEX encoding. We support `xor`, `sub`, and `add` so far.
|
||||
All the three operand forms work similarly. You can do something like `(.add.vf vf1 vf2 vf3)`. All operations use the similarly named `v<op-name>ps` instruction, xmm128 VEX encoding. We support the following `op-name`s:
|
||||
- `xor`
|
||||
- `add`
|
||||
- `sub`
|
||||
- `mul`
|
||||
- `min`
|
||||
- `max`
|
||||
|
||||
An optional `:mask` value can be provided as a binary number between 0-15 (inclusive). This determines _which_ of the resulting elements will be committed to the destination vector. For example, `:mask #b1011` means that the `w`, `y` and `x` results will be committed. Note that the components are defined left-to-right which may be a little counter-intuitive -- `w` is the left-most, `x` is the right-most. This aligns with the PS2's VU implementation.
|
||||
|
||||
Additionally, all of these operations support defining a single `broadcast-element`. This can be one of the 4 vector components `x|y|z|w`. Take the following for an example: `(vaddx.xyzw vf10, vf20, vf30)`, translates into:
|
||||
|
||||
```cpp
|
||||
vf10[x] = vf20[x] + vf30[x]
|
||||
vf10[y] = vf20[y] + vf30[x]
|
||||
vf10[z] = vf20[z] + vf30[x]
|
||||
vf10[w] = vf20[w] + vf30[x]
|
||||
```
|
||||
|
||||
## `.abs.vf`
|
||||
```lisp
|
||||
(.abs.vf dst src [:color #t|#f] [:mask #b<0-15>])
|
||||
```
|
||||
|
||||
Calculates the absolute value of the `src` vector, and stores in the `dst` vector.
|
||||
|
||||
## `.blend.vf`
|
||||
```lisp
|
||||
|
|
|
@ -418,6 +418,14 @@
|
|||
:flag-assert #x90000000c
|
||||
)
|
||||
|
||||
(defmacro set-vector! (v xv yv zv wv)
|
||||
`(begin
|
||||
(set! (-> ,v x) ,xv)
|
||||
(set! (-> ,v y) ,yv)
|
||||
(set! (-> ,v z) ,zv)
|
||||
(set! (-> ,v w) ,wv))
|
||||
)
|
||||
|
||||
(defun vector-dot ((a vector) (b vector))
|
||||
"Take the dot product of two vectors.
|
||||
Only does the x, y, z compoments.
|
||||
|
@ -475,7 +483,7 @@
|
|||
; add
|
||||
(.add.vf vf1 vf2 vf3)
|
||||
; set w = 0
|
||||
(.blend.vf vf1 vf1 vf0 #b1000)
|
||||
(.blend.vf vf1 vf1 vf0 :mask #b1000)
|
||||
; store
|
||||
(.svf dst vf1)
|
||||
)
|
||||
|
@ -497,7 +505,7 @@
|
|||
; subtract
|
||||
(.sub.vf vf1 vf2 vf3)
|
||||
; set w = 0
|
||||
(.blend.vf vf1 vf1 vf0 #b1000)
|
||||
(.blend.vf vf1 vf1 vf0 :mask #b1000)
|
||||
; store
|
||||
(.svf dst vf1)
|
||||
)
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "common/goos/Interpreter.h"
|
||||
#include "goalc/compiler/IR.h"
|
||||
#include "goalc/debugger/Debugger.h"
|
||||
#include "goalc/emitter/Register.h"
|
||||
#include "CompilerSettings.h"
|
||||
#include "third-party/fmt/core.h"
|
||||
#include "third-party/fmt/color.h"
|
||||
|
@ -68,6 +69,7 @@ class Compiler {
|
|||
Val* compile_asm_vf_math3(const goos::Object& form,
|
||||
const goos::Object& rest,
|
||||
IR_VFMath3Asm::Kind kind,
|
||||
emitter::Register::VF_ELEMENT broadcastElement,
|
||||
Env* env);
|
||||
|
||||
Val* get_field_of_structure(const StructureType* type,
|
||||
|
@ -287,11 +289,44 @@ class Compiler {
|
|||
Val* compile_asm_load_sym(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_jr(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_mov(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_nop_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_lvf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_svf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_max_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_maxx_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_maxy_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_maxz_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_maxw_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_min_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_minx_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_miny_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_minz_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_minw_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_subx_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_suby_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_subz_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_subw_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_add_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_addx_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_addy_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_addz_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_addw_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_mul_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_mulx_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_muly_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_mulz_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
Val* compile_asm_mulw_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_abs_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
Val* compile_asm_blend_vf(const goos::Object& form, const goos::Object& rest, Env* env);
|
||||
|
||||
// Atoms
|
||||
|
|
|
@ -48,6 +48,10 @@ RegVal* Env::make_fpr(const TypeSpec& ts) {
|
|||
return make_ireg(coerce_to_reg_type(ts), RegClass::FLOAT);
|
||||
}
|
||||
|
||||
RegVal* Env::make_vfr(const TypeSpec& ts) {
|
||||
return make_ireg(coerce_to_reg_type(ts), RegClass::VECTOR_FLOAT);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, Label>& Env::get_label_map() {
|
||||
return parent()->get_label_map();
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ class Env {
|
|||
virtual std::unordered_map<std::string, Label>& get_label_map();
|
||||
RegVal* make_gpr(const TypeSpec& ts);
|
||||
RegVal* make_fpr(const TypeSpec& ts);
|
||||
RegVal* make_vfr(const TypeSpec& ts);
|
||||
virtual ~Env() = default;
|
||||
Env* parent() { return m_parent; }
|
||||
|
||||
|
|
|
@ -1058,6 +1058,27 @@ void IR_AsmRet::do_codegen(emitter::ObjectGenerator* gen,
|
|||
gen->add_instr(IGen::ret(), irec);
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// AsmFNop
|
||||
///////////////////////
|
||||
|
||||
IR_AsmFNop::IR_AsmFNop() : IR_Asm(false) {}
|
||||
|
||||
std::string IR_AsmFNop::print() {
|
||||
return ".nop.vf";
|
||||
}
|
||||
|
||||
RegAllocInstr IR_AsmFNop::to_rai() {
|
||||
return {};
|
||||
}
|
||||
|
||||
void IR_AsmFNop::do_codegen(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) {
|
||||
(void)allocs;
|
||||
gen->add_instr(IGen::nop_vf(), irec);
|
||||
}
|
||||
|
||||
///////////////////////
|
||||
// AsmPush
|
||||
///////////////////////
|
||||
|
@ -1282,19 +1303,31 @@ IR_VFMath3Asm::IR_VFMath3Asm(bool use_color,
|
|||
: IR_Asm(use_color), m_dst(dst), m_src1(src1), m_src2(src2), m_kind(kind) {}
|
||||
|
||||
std::string IR_VFMath3Asm::print() {
|
||||
std::string function = "";
|
||||
switch (m_kind) {
|
||||
case Kind::XOR:
|
||||
return fmt::format(".xor.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(),
|
||||
m_src1->print(), m_src2->print());
|
||||
function = ".xor.vf";
|
||||
break;
|
||||
case Kind::SUB:
|
||||
return fmt::format(".sub.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(),
|
||||
m_src1->print(), m_src2->print());
|
||||
function = ".sub.vf";
|
||||
break;
|
||||
case Kind::ADD:
|
||||
return fmt::format(".add.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(),
|
||||
m_src1->print(), m_src2->print());
|
||||
function = ".add.vf";
|
||||
break;
|
||||
case Kind::MUL:
|
||||
function = ".mul.vf";
|
||||
break;
|
||||
case Kind::MAX:
|
||||
function = ".max.vf";
|
||||
break;
|
||||
case Kind::MIN:
|
||||
function = ".min.vf";
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
return fmt::format("{}{} {}, {}, {}", function, get_color_suffix_string(), m_dst->print(),
|
||||
m_src1->print(), m_src2->print());
|
||||
}
|
||||
|
||||
RegAllocInstr IR_VFMath3Asm::to_rai() {
|
||||
|
@ -1324,6 +1357,15 @@ void IR_VFMath3Asm::do_codegen(emitter::ObjectGenerator* gen,
|
|||
case Kind::ADD:
|
||||
gen->add_instr(IGen::add_vf(dst, src1, src2), irec);
|
||||
break;
|
||||
case Kind::MUL:
|
||||
gen->add_instr(IGen::mul_vf(dst, src1, src2), irec);
|
||||
break;
|
||||
case Kind::MAX:
|
||||
gen->add_instr(IGen::max_vf(dst, src1, src2), irec);
|
||||
break;
|
||||
case Kind::MIN:
|
||||
gen->add_instr(IGen::min_vf(dst, src1, src2), irec);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
@ -1358,4 +1400,32 @@ void IR_BlendVF::do_codegen(emitter::ObjectGenerator* gen,
|
|||
auto src1 = get_reg_asm(m_src1, allocs, irec, m_use_coloring);
|
||||
auto src2 = get_reg_asm(m_src2, allocs, irec, m_use_coloring);
|
||||
gen->add_instr(IGen::blend_vf(dst, src1, src2, m_mask), irec);
|
||||
}
|
||||
}
|
||||
|
||||
IR_SplatVF::IR_SplatVF(bool use_color,
|
||||
const RegVal* dst,
|
||||
const RegVal* src,
|
||||
const emitter::Register::VF_ELEMENT element)
|
||||
: IR_Asm(use_color), m_dst(dst), m_src(src), m_element(element) {}
|
||||
|
||||
std::string IR_SplatVF::print() {
|
||||
return fmt::format(".splat.vf{} {}, {}, {}", get_color_suffix_string(), m_dst->print(),
|
||||
m_src->print(), m_element);
|
||||
}
|
||||
|
||||
RegAllocInstr IR_SplatVF::to_rai() {
|
||||
RegAllocInstr rai;
|
||||
if (m_use_coloring) {
|
||||
rai.write.push_back(m_dst->ireg());
|
||||
rai.read.push_back(m_src->ireg());
|
||||
}
|
||||
return rai;
|
||||
}
|
||||
|
||||
void IR_SplatVF::do_codegen(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) {
|
||||
auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring);
|
||||
auto src = get_reg_asm(m_src, allocs, irec, m_use_coloring);
|
||||
gen->add_instr(IGen::splat_vf(dst, src, m_element), irec);
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "goalc/regalloc/allocate.h"
|
||||
#include "Val.h"
|
||||
#include "goalc/emitter/ObjectGenerator.h"
|
||||
#include "goalc/emitter/Register.h"
|
||||
|
||||
class IR {
|
||||
public:
|
||||
|
@ -452,6 +453,16 @@ class IR_AsmAdd : public IR_Asm {
|
|||
const RegVal* m_src = nullptr;
|
||||
};
|
||||
|
||||
class IR_AsmFNop : public IR_Asm {
|
||||
public:
|
||||
IR_AsmFNop();
|
||||
std::string print() override;
|
||||
RegAllocInstr to_rai() override;
|
||||
void do_codegen(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) override;
|
||||
};
|
||||
|
||||
class IR_GetSymbolValueAsm : public IR_Asm {
|
||||
public:
|
||||
IR_GetSymbolValueAsm(bool use_coloring, const RegVal* dest, std::string sym_name, bool sext);
|
||||
|
@ -496,7 +507,7 @@ class IR_RegSetAsm : public IR_Asm {
|
|||
|
||||
class IR_VFMath3Asm : public IR_Asm {
|
||||
public:
|
||||
enum class Kind { XOR, SUB, ADD };
|
||||
enum class Kind { XOR, SUB, ADD, MUL, MAX, MIN };
|
||||
IR_VFMath3Asm(bool use_color,
|
||||
const RegVal* dst,
|
||||
const RegVal* src1,
|
||||
|
@ -530,4 +541,22 @@ class IR_BlendVF : public IR_Asm {
|
|||
const RegVal* m_src2 = nullptr;
|
||||
u8 m_mask = 0xff;
|
||||
};
|
||||
|
||||
class IR_SplatVF : public IR_Asm {
|
||||
public:
|
||||
IR_SplatVF(bool use_color,
|
||||
const RegVal* dst,
|
||||
const RegVal* src1,
|
||||
const emitter::Register::VF_ELEMENT element);
|
||||
std::string print() override;
|
||||
RegAllocInstr to_rai() override;
|
||||
void do_codegen(emitter::ObjectGenerator* gen,
|
||||
const AllocationResult& allocs,
|
||||
emitter::IR_Record irec) override;
|
||||
|
||||
protected:
|
||||
const RegVal* m_dst = nullptr;
|
||||
const RegVal* m_src = nullptr;
|
||||
const emitter::Register::VF_ELEMENT m_element = emitter::Register::VF_ELEMENT::NONE;
|
||||
};
|
||||
#endif // JAK_IR_H
|
||||
|
|
|
@ -242,6 +242,14 @@ Val* Compiler::compile_asm_mov(const goos::Object& form, const goos::Object& res
|
|||
return get_none();
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_nop_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
auto args = get_va(form, rest);
|
||||
va_check(form, args, {}, {});
|
||||
|
||||
env->emit_ir<IR_AsmFNop>();
|
||||
return get_none();
|
||||
}
|
||||
|
||||
/*!
|
||||
* Load a vector float from memory. Does an aligned load.
|
||||
*/
|
||||
|
@ -312,20 +320,198 @@ Val* Compiler::compile_asm_svf(const goos::Object& form, const goos::Object& res
|
|||
}
|
||||
|
||||
Val* Compiler::compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::XOR, env);
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::XOR,
|
||||
emitter::Register::VF_ELEMENT::NONE, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_max_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX,
|
||||
emitter::Register::VF_ELEMENT::NONE, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_maxx_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX,
|
||||
emitter::Register::VF_ELEMENT::X, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_maxy_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX,
|
||||
emitter::Register::VF_ELEMENT::Y, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_maxz_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX,
|
||||
emitter::Register::VF_ELEMENT::Z, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_maxw_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MAX,
|
||||
emitter::Register::VF_ELEMENT::W, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_min_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN,
|
||||
emitter::Register::VF_ELEMENT::NONE, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_minx_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN,
|
||||
emitter::Register::VF_ELEMENT::X, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_miny_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN,
|
||||
emitter::Register::VF_ELEMENT::Y, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_minz_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN,
|
||||
emitter::Register::VF_ELEMENT::Z, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_minw_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MIN,
|
||||
emitter::Register::VF_ELEMENT::W, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_sub_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB, env);
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB,
|
||||
emitter::Register::VF_ELEMENT::NONE, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_subx_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB,
|
||||
emitter::Register::VF_ELEMENT::X, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_suby_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB,
|
||||
emitter::Register::VF_ELEMENT::Y, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_subz_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB,
|
||||
emitter::Register::VF_ELEMENT::Z, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_subw_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::SUB,
|
||||
emitter::Register::VF_ELEMENT::W, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_add_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD, env);
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD,
|
||||
emitter::Register::VF_ELEMENT::NONE, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_addx_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD,
|
||||
emitter::Register::VF_ELEMENT::X, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_addy_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD,
|
||||
emitter::Register::VF_ELEMENT::Y, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_addz_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD,
|
||||
emitter::Register::VF_ELEMENT::Z, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_addw_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::ADD,
|
||||
emitter::Register::VF_ELEMENT::W, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_mul_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL,
|
||||
emitter::Register::VF_ELEMENT::NONE, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_mulx_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL,
|
||||
emitter::Register::VF_ELEMENT::X, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_muly_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL,
|
||||
emitter::Register::VF_ELEMENT::Y, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_mulz_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL,
|
||||
emitter::Register::VF_ELEMENT::Z, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_mulw_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::MUL,
|
||||
emitter::Register::VF_ELEMENT::W, env);
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_abs_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
auto args = get_va(form, rest);
|
||||
va_check(
|
||||
form, args, {{}, {}},
|
||||
{{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}});
|
||||
bool color = true;
|
||||
if (args.has_named("color")) {
|
||||
color = get_true_or_false(form, args.named.at("color"));
|
||||
}
|
||||
|
||||
auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env);
|
||||
if (!dest->settable() || dest->ireg().reg_class != RegClass::VECTOR_FLOAT) {
|
||||
throw_compiler_error(
|
||||
form, "Invalid destination register for a vector float 3-arg math form. Got a {}.",
|
||||
dest->print());
|
||||
}
|
||||
|
||||
auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env);
|
||||
if (src->ireg().reg_class != RegClass::VECTOR_FLOAT) {
|
||||
throw_compiler_error(
|
||||
form, "Invalid first source register for a vector float 3-arg math form. Got a {}.",
|
||||
src->print());
|
||||
}
|
||||
|
||||
u8 mask = 0b1111;
|
||||
if (args.has_named("mask")) {
|
||||
mask = args.named.at("mask").as_int();
|
||||
if (mask > 15) {
|
||||
throw_compiler_error(
|
||||
form, "The value {} is out of range for a destination mask (0-15 inclusive).", mask);
|
||||
}
|
||||
}
|
||||
|
||||
// There is no single instruction ABS on AVX, so there are a number of ways to do it manually,
|
||||
// this is one of them. For example, assume the original vec = <1, -2, -3, 4>
|
||||
|
||||
// First we clear a temporary register, XOR'ing itself
|
||||
auto temp_reg = env->make_vfr(dest->type());
|
||||
env->emit_ir<IR_VFMath3Asm>(color, temp_reg, temp_reg, temp_reg, IR_VFMath3Asm::Kind::XOR);
|
||||
|
||||
// Next, find the difference between our source operand and 0, use the same temp register, no need
|
||||
// to use another <0, 0, 0, 0> - <1, -2, -3, 4> = <-1, 2, 3, 4>
|
||||
env->emit_ir<IR_VFMath3Asm>(color, temp_reg, temp_reg, src, IR_VFMath3Asm::Kind::SUB);
|
||||
|
||||
// Finally, find the maximum between our difference, and the original value
|
||||
// MAX_OF(<-1, 2, 3, 4>, <1, -2, -3, 4>) = <1, 2, 3, 4>
|
||||
if (mask == 0b1111) { // If the entire destination is to be copied, we can optimize out the blend
|
||||
env->emit_ir<IR_VFMath3Asm>(color, dest, src, temp_reg, IR_VFMath3Asm::Kind::MAX);
|
||||
} else {
|
||||
env->emit_ir<IR_VFMath3Asm>(color, temp_reg, src, temp_reg, IR_VFMath3Asm::Kind::MAX);
|
||||
|
||||
// Blend the result back into the destination register using the mask
|
||||
env->emit_ir<IR_BlendVF>(color, dest, dest, temp_reg, mask);
|
||||
}
|
||||
|
||||
return get_none();
|
||||
}
|
||||
|
||||
Val* Compiler::compile_asm_blend_vf(const goos::Object& form, const goos::Object& rest, Env* env) {
|
||||
auto args = get_va(form, rest);
|
||||
va_check(form, args, {{}, {}, {}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
|
||||
va_check(
|
||||
form, args, {{}, {}, {}},
|
||||
{{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}});
|
||||
bool color = true;
|
||||
if (args.has_named("color")) {
|
||||
color = get_true_or_false(form, args.named.at("color"));
|
||||
|
@ -352,17 +538,15 @@ Val* Compiler::compile_asm_blend_vf(const goos::Object& form, const goos::Object
|
|||
src2->print());
|
||||
}
|
||||
|
||||
int64_t mask;
|
||||
if (!try_getting_constant_integer(args.unnamed.at(3), &mask, env)) {
|
||||
throw_compiler_error(form,
|
||||
"The value {} is invalid for a blend mask, it could not be evaluated as a "
|
||||
"constant integer.",
|
||||
args.unnamed.at(3).print());
|
||||
u8 mask = 0b1111;
|
||||
if (args.has_named("mask")) {
|
||||
mask = args.named.at("mask").as_int();
|
||||
if (mask > 15) {
|
||||
throw_compiler_error(form, "The value {} is out of range for a blend mask (0-15 inclusive).",
|
||||
mask);
|
||||
}
|
||||
}
|
||||
|
||||
if (mask < 0 || mask > 15) {
|
||||
throw_compiler_error(form, "The value {} is out of range for a blend mask.", mask);
|
||||
}
|
||||
env->emit_ir<IR_BlendVF>(color, dest, src1, src2, mask);
|
||||
return get_none();
|
||||
}
|
||||
|
@ -370,9 +554,12 @@ Val* Compiler::compile_asm_blend_vf(const goos::Object& form, const goos::Object
|
|||
Val* Compiler::compile_asm_vf_math3(const goos::Object& form,
|
||||
const goos::Object& rest,
|
||||
IR_VFMath3Asm::Kind kind,
|
||||
emitter::Register::VF_ELEMENT broadcastElement,
|
||||
Env* env) {
|
||||
auto args = get_va(form, rest);
|
||||
va_check(form, args, {{}, {}, {}}, {{"color", {false, goos::ObjectType::SYMBOL}}});
|
||||
va_check(
|
||||
form, args, {{}, {}, {}},
|
||||
{{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}});
|
||||
bool color = true;
|
||||
if (args.has_named("color")) {
|
||||
color = get_true_or_false(form, args.named.at("color"));
|
||||
|
@ -399,7 +586,46 @@ Val* Compiler::compile_asm_vf_math3(const goos::Object& form,
|
|||
src2->print());
|
||||
}
|
||||
|
||||
env->emit_ir<IR_VFMath3Asm>(color, dest, src1, src2, kind);
|
||||
u8 mask = 0b1111;
|
||||
if (args.has_named("mask")) {
|
||||
mask = args.named.at("mask").as_int();
|
||||
if (mask > 15) {
|
||||
throw_compiler_error(form, "The value {} is out of range for a blend mask (0-15 inclusive).",
|
||||
mask);
|
||||
}
|
||||
}
|
||||
|
||||
// If there is a broadcast register, splat that float across the entire src2 register before
|
||||
// performing the operation For example vaddx.xyzw vf10, vf20, vf30
|
||||
// vf10[x] = vf20[x] + vf30[x]
|
||||
// vf10[y] = vf20[y] + vf30[x]
|
||||
// vf10[z] = vf20[z] + vf30[x]
|
||||
// vf10[w] = vf20[w] + vf30[x]
|
||||
if (broadcastElement != emitter::Register::VF_ELEMENT::NONE) {
|
||||
auto temp_reg = env->make_vfr(dest->type());
|
||||
env->emit_ir<IR_SplatVF>(color, temp_reg, src2, broadcastElement);
|
||||
|
||||
// If the entire destination is to be copied, we can optimize out the blend
|
||||
if (mask == 0b1111) {
|
||||
env->emit_ir<IR_VFMath3Asm>(color, dest, src1, temp_reg, kind);
|
||||
} else {
|
||||
// Perform the arithmetic operation on the two vectors into a temporary register
|
||||
env->emit_ir<IR_VFMath3Asm>(color, temp_reg, src1, temp_reg, kind);
|
||||
// Blend the result back into the destination register using the mask
|
||||
env->emit_ir<IR_BlendVF>(color, dest, dest, temp_reg, mask);
|
||||
}
|
||||
} else {
|
||||
// If the entire destination is to be copied, we can optimize out the blend
|
||||
if (mask == 0b1111) {
|
||||
env->emit_ir<IR_VFMath3Asm>(color, dest, src1, src2, kind);
|
||||
} else {
|
||||
auto temp_reg = env->make_vfr(dest->type());
|
||||
// Perform the arithmetic operation on the two vectors into a temporary register
|
||||
env->emit_ir<IR_VFMath3Asm>(color, temp_reg, src1, src2, kind);
|
||||
// Blend the result back into the destination register using the mask
|
||||
env->emit_ir<IR_BlendVF>(color, dest, dest, temp_reg, mask);
|
||||
}
|
||||
}
|
||||
|
||||
return get_none();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ static const std::unordered_map<
|
|||
std::string,
|
||||
Val* (Compiler::*)(const goos::Object& form, const goos::Object& rest, Env* env)>
|
||||
goal_forms = {
|
||||
// inline asm
|
||||
// INLINE ASM
|
||||
{".ret", &Compiler::compile_asm_ret},
|
||||
{".push", &Compiler::compile_asm_push},
|
||||
{".pop", &Compiler::compile_asm_pop},
|
||||
|
@ -23,11 +23,44 @@ static const std::unordered_map<
|
|||
{".add", &Compiler::compile_asm_add},
|
||||
{".load-sym", &Compiler::compile_asm_load_sym},
|
||||
{".mov", &Compiler::compile_asm_mov},
|
||||
|
||||
// INLINE ASM - VECTOR FLOAT OPERATIONS
|
||||
{".nop.vf", &Compiler::compile_asm_nop_vf},
|
||||
{".lvf", &Compiler::compile_asm_lvf},
|
||||
{".svf", &Compiler::compile_asm_svf},
|
||||
{".xor.vf", &Compiler::compile_asm_xor_vf},
|
||||
|
||||
{".max.vf", &Compiler::compile_asm_max_vf},
|
||||
{".maxx.vf", &Compiler::compile_asm_maxx_vf},
|
||||
{".maxy.vf", &Compiler::compile_asm_maxy_vf},
|
||||
{".maxz.vf", &Compiler::compile_asm_maxz_vf},
|
||||
{".maxw.vf", &Compiler::compile_asm_maxw_vf},
|
||||
|
||||
{".min.vf", &Compiler::compile_asm_min_vf},
|
||||
{".minx.vf", &Compiler::compile_asm_minx_vf},
|
||||
{".miny.vf", &Compiler::compile_asm_miny_vf},
|
||||
{".minz.vf", &Compiler::compile_asm_minz_vf},
|
||||
{".minw.vf", &Compiler::compile_asm_minw_vf},
|
||||
|
||||
{".sub.vf", &Compiler::compile_asm_sub_vf},
|
||||
{".subx.vf", &Compiler::compile_asm_subx_vf},
|
||||
{".suby.vf", &Compiler::compile_asm_suby_vf},
|
||||
{".subz.vf", &Compiler::compile_asm_subz_vf},
|
||||
{".subw.vf", &Compiler::compile_asm_subw_vf},
|
||||
|
||||
{".add.vf", &Compiler::compile_asm_add_vf},
|
||||
{".addx.vf", &Compiler::compile_asm_addx_vf},
|
||||
{".addy.vf", &Compiler::compile_asm_addy_vf},
|
||||
{".addz.vf", &Compiler::compile_asm_addz_vf},
|
||||
{".addw.vf", &Compiler::compile_asm_addw_vf},
|
||||
|
||||
{".mul.vf", &Compiler::compile_asm_mul_vf},
|
||||
{".mulx.vf", &Compiler::compile_asm_mulx_vf},
|
||||
{".muly.vf", &Compiler::compile_asm_muly_vf},
|
||||
{".mulz.vf", &Compiler::compile_asm_mulz_vf},
|
||||
{".mulw.vf", &Compiler::compile_asm_mulw_vf},
|
||||
|
||||
{".abs.vf", &Compiler::compile_asm_abs_vf},
|
||||
{".blend.vf", &Compiler::compile_asm_blend_vf},
|
||||
|
||||
// BLOCK FORMS
|
||||
|
|
|
@ -2009,6 +2009,13 @@ class IGen {
|
|||
return instr;
|
||||
}
|
||||
|
||||
static Instruction nop_vf() {
|
||||
// FNOP
|
||||
Instruction instr(0xd9);
|
||||
instr.set_op2(0xd0);
|
||||
return instr;
|
||||
}
|
||||
|
||||
// eventually...
|
||||
// sqrt
|
||||
// rsqrt
|
||||
|
@ -2153,16 +2160,7 @@ class IGen {
|
|||
return instr;
|
||||
}
|
||||
|
||||
// todo, rip relative loads and stores.
|
||||
|
||||
static Instruction mul_vf(Register dst, Register src1, Register src2) {
|
||||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x59);
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
|
||||
return instr;
|
||||
}
|
||||
// TODO - rip relative loads and stores.
|
||||
|
||||
static Instruction shuffle_vf(Register dst, Register src, u8 dx, u8 dy, u8 dz, u8 dw) {
|
||||
assert(dst.is_xmm());
|
||||
|
@ -2172,12 +2170,7 @@ class IGen {
|
|||
assert(dz < 4);
|
||||
assert(dw < 4);
|
||||
u8 imm = dx + (dy << 2) + (dz << 4) + (dw << 6);
|
||||
// we use the AVX "VEX" encoding here. This is a three-operand form, but we just set both source
|
||||
// to the same register. It seems like this is one byte longer but is faster maybe?
|
||||
Instruction instr(0xc6);
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id());
|
||||
instr.set(Imm(1, imm));
|
||||
return instr;
|
||||
return swizzle_vf(dst, src, imm);
|
||||
|
||||
// SSE encoding version:
|
||||
// Instruction instr(0x0f);
|
||||
|
@ -2187,11 +2180,68 @@ class IGen {
|
|||
// return instr;
|
||||
}
|
||||
|
||||
/*
|
||||
Generic Swizzle (re-arrangment of packed FPs) operation, the control bytes are quite involved.
|
||||
Here's a brief run-down:
|
||||
- 8-bits / 4 groups of 2 bits
|
||||
- Each group is used to determine which element in `src` gets copied to `dst`'s respective
|
||||
element.
|
||||
- Right to Left, the first 2-bit group controls which `dst` element, gets copied to `src`'s
|
||||
most-significant byte (left-most) and so on. GROUP OPTIONS
|
||||
- 00b - Copy the least-significant element
|
||||
- 01b - Copy the second element (from the right)
|
||||
- 10b - Copy the third element (from the right)
|
||||
- 11b - Copy the most significant element
|
||||
Examples
|
||||
; xmm1 = (1.5, 2.5, 3.5, 4.5)
|
||||
SHUFPS xmm1, xmm1, 0xff ; Copy the most significant element to all positions
|
||||
(1.5, 1.5, 1.5, 1.5) SHUFPS xmm1, xmm1, 0x39 ; Rotate right (4.5, 1.5, 2.5, 3.5)
|
||||
*/
|
||||
static Instruction swizzle_vf(Register dst, Register src, u8 controlBytes) {
|
||||
assert(dst.is_xmm());
|
||||
assert(src.is_xmm());
|
||||
Instruction instr(0xC6); // VSHUFPS
|
||||
|
||||
// we use the AVX "VEX" encoding here. This is a three-operand form,
|
||||
// but we just set both source
|
||||
// to the same register. It seems like this is one byte longer but is faster maybe?
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, src.hw_id());
|
||||
instr.set(Imm(1, controlBytes));
|
||||
return instr;
|
||||
}
|
||||
|
||||
/*
|
||||
Splats a single element in 'src' to all elements in 'dst'
|
||||
For example (pseudocode):
|
||||
xmm1 = (1.5, 2.5, 3.5, 4.5)
|
||||
xmm2 = (1, 2, 3, 4)
|
||||
splat_vf(xmm1, xmm2, XMM_ELEMENT::X);
|
||||
xmm1 = (4, 4, 4, 4)
|
||||
*/
|
||||
static Instruction splat_vf(Register dst, Register src, Register::VF_ELEMENT element) {
|
||||
switch (element) {
|
||||
case Register::VF_ELEMENT::X: // Least significant element
|
||||
return swizzle_vf(dst, src, 0b00000000);
|
||||
break;
|
||||
case Register::VF_ELEMENT::Y:
|
||||
return swizzle_vf(dst, src, 0b01010101);
|
||||
break;
|
||||
case Register::VF_ELEMENT::Z:
|
||||
return swizzle_vf(dst, src, 0b10101010);
|
||||
break;
|
||||
case Register::VF_ELEMENT::W: // Most significant element
|
||||
return swizzle_vf(dst, src, 0b11111111);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
static Instruction xor_vf(Register dst, Register src1, Register src2) {
|
||||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x57);
|
||||
Instruction instr(0x57); // VXORPS
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
|
||||
return instr;
|
||||
}
|
||||
|
@ -2200,7 +2250,7 @@ class IGen {
|
|||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x5c);
|
||||
Instruction instr(0x5c); // VSUBPS
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
|
||||
return instr;
|
||||
}
|
||||
|
@ -2209,7 +2259,34 @@ class IGen {
|
|||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x58);
|
||||
Instruction instr(0x58); // VADDPS
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction mul_vf(Register dst, Register src1, Register src2) {
|
||||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x59); // VMULPS
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction max_vf(Register dst, Register src1, Register src2) {
|
||||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x5F); // VMAXPS
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
|
||||
return instr;
|
||||
}
|
||||
|
||||
static Instruction min_vf(Register dst, Register src1, Register src2) {
|
||||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x5D); // VMINPS
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F, src1.hw_id());
|
||||
return instr;
|
||||
}
|
||||
|
@ -2219,7 +2296,7 @@ class IGen {
|
|||
assert(dst.is_xmm());
|
||||
assert(src1.is_xmm());
|
||||
assert(src2.is_xmm());
|
||||
Instruction instr(0x0c);
|
||||
Instruction instr(0x0c); // VBLENDPS
|
||||
instr.set_vex_modrm_and_rex(dst.hw_id(), src2.hw_id(), VEX3::LeadingBytes::P_0F_3A,
|
||||
src1.hw_id(), false, VexPrefix::P_66);
|
||||
instr.set(Imm(1, mask));
|
||||
|
|
|
@ -96,6 +96,13 @@ class Register {
|
|||
|
||||
std::string print() const;
|
||||
|
||||
/*
|
||||
Our XMM Registers are 4 packed single-precision floating points
|
||||
In the order (from left->right a.k.a most significant to least significant):
|
||||
W | Z | Y | X
|
||||
*/
|
||||
enum class VF_ELEMENT { X, Y, Z, W, NONE };
|
||||
|
||||
private:
|
||||
s8 m_id = -1;
|
||||
};
|
||||
|
|
2
scripts/search-decomp/.gitignore
vendored
Normal file
2
scripts/search-decomp/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
*.txt
|
||||
*.md
|
72
scripts/search-decomp/grep.py
Normal file
72
scripts/search-decomp/grep.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
import re
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
parser = argparse.ArgumentParser('pygrep')
|
||||
parser.add_argument('-d', '--directory', type=str, required=True, help='root directory to recursively search')
|
||||
parser.add_argument('-s', '--search_term_file', type=str, required=True, help='search term file')
|
||||
parser.add_argument('-o', '--output_file', type=str, required=True, help='output file name')
|
||||
args = parser.parse_args()
|
||||
|
||||
summary_results = {}
|
||||
results = {}
|
||||
search_terms = []
|
||||
|
||||
with open(args.search_term_file, "r") as f:
|
||||
print("Initializing Search Term File")
|
||||
for line in f:
|
||||
token = line.strip()
|
||||
# VU INSTRUCTION ADDITION. Appends all combinations of `dest` to replace `{DEST}`
|
||||
vuDestCombinations = ["x", "xy", "xz", "xw", "xyz", "xzw", "xyzw", "y", "yz", "yw", "yzw", "z", "zw", "w"]
|
||||
if "{DEST}" in token:
|
||||
for combination in vuDestCombinations:
|
||||
tempToken = token.replace("{DEST}", combination)
|
||||
search_terms.append({
|
||||
"term": tempToken
|
||||
})
|
||||
summary_results[tempToken.lower()] = 0
|
||||
results[tempToken.lower()] = []
|
||||
else:
|
||||
search_terms.append({
|
||||
"term": token
|
||||
})
|
||||
summary_results[token.lower()] = 0
|
||||
results[token.lower()] = []
|
||||
|
||||
print("Searching for {} tokens...".format(len(search_terms)))
|
||||
|
||||
totalTimeStart = time.time()
|
||||
for index, search_term in enumerate(search_terms):
|
||||
start = time.time()
|
||||
term = search_term["term"].lower()
|
||||
print("[{:.2f}%] - Searching for - {}...".format((index/len(search_terms) * 100), term), end="")
|
||||
pattern = re.compile(re.escape(term) + "\s+")
|
||||
for path, _, files in os.walk(args.directory):
|
||||
for fn in files:
|
||||
filepath = os.path.join(path, fn)
|
||||
with open(filepath) as handle:
|
||||
for lineno, line in enumerate(handle):
|
||||
mo = pattern.search(line)
|
||||
if mo:
|
||||
result = "{}:{}:{}".format(filepath,
|
||||
lineno,
|
||||
line)
|
||||
summary_results[term] = summary_results[term] + 1
|
||||
results[term].append(result.strip())
|
||||
print("Took {:.2f} seconds, Found - {} occurences.".format(time.time() - start, summary_results[term]))
|
||||
print("Took {} seconds in total".format(time.time() - totalTimeStart))
|
||||
|
||||
if os.path.exists(args.output_file):
|
||||
os.remove(args.output_file)
|
||||
|
||||
with open(args.output_file, "w") as f:
|
||||
print("Outputting Report")
|
||||
f.write("USAGE SUMMARY\n")
|
||||
for key in sorted(summary_results, key=summary_results.get, reverse=True):
|
||||
f.write("{} - {}\n".format(key, summary_results[key]))
|
||||
f.write("\nOCCURENCES\n")
|
||||
for key, value in results.items():
|
||||
f.write("{}\n".format(key))
|
||||
for occurence in value:
|
||||
f.write("- {}\n".format(occurence))
|
122
scripts/search-decomp/vu-fp-instructions.txt
Normal file
122
scripts/search-decomp/vu-fp-instructions.txt
Normal file
|
@ -0,0 +1,122 @@
|
|||
VABS.{DEST}
|
||||
VADD.{DEST}
|
||||
VADDA.{DEST}
|
||||
VADDAi.{DEST}
|
||||
VADDAq.{DEST}
|
||||
VADDAw.{DEST}
|
||||
VADDAx.{DEST}
|
||||
VADDAy.{DEST}
|
||||
VADDAz.{DEST}
|
||||
VADDi.{DEST}
|
||||
VADDq.{DEST}
|
||||
VADDw.{DEST}
|
||||
VADDx.{DEST}
|
||||
VADDy.{DEST}
|
||||
VADDz.{DEST}
|
||||
VCLIP.xyz
|
||||
VDIVx Q
|
||||
VFTOI0.{DEST}
|
||||
VFTOI12.{DEST}
|
||||
VFTOI15.{DEST}
|
||||
VFTOI4.{DEST}
|
||||
VIADD
|
||||
VIADDI
|
||||
VIAND
|
||||
VILWR.{DEST}
|
||||
VIOR
|
||||
VISUB
|
||||
VISWR.{DEST}
|
||||
VITOF0.{DEST}
|
||||
VITOF12.{DEST}
|
||||
VITOF15.{DEST}
|
||||
VITOF4.{DEST}
|
||||
VLQD.{DEST}
|
||||
VLQI.{DEST}
|
||||
VMADD.{DEST}
|
||||
VMADDA.{DEST}
|
||||
VMADDA.{DEST}
|
||||
VMADDAi.{DEST}
|
||||
VMADDAi.{DEST}
|
||||
VMADDAq.{DEST}
|
||||
VMADDAq.{DEST}
|
||||
VMADDAw.{DEST}
|
||||
VMADDAx.{DEST}
|
||||
VMADDAy.{DEST}
|
||||
VMADDAz.{DEST}
|
||||
VMADDi.{DEST}
|
||||
VMADDq.{DEST}
|
||||
VMADDw.{DEST}
|
||||
VMADDx.{DEST}
|
||||
VMADDy.{DEST}
|
||||
VMADDz.{DEST}
|
||||
VMAX.{DEST}
|
||||
VMAXi.{DEST}
|
||||
VMAXw.{DEST}
|
||||
VMAXx.{DEST}
|
||||
VMAXy.{DEST}
|
||||
VMAXz.{DEST}
|
||||
VMFIR
|
||||
VMINI.{DEST}
|
||||
VMINI.{DEST}
|
||||
VMINIi.{DEST}
|
||||
VMINIw.{DEST}
|
||||
VMINIx.{DEST}
|
||||
VMINIy.{DEST}
|
||||
VMINIz.{DEST}
|
||||
VMOVE.{DEST}
|
||||
VMR32.{DEST}
|
||||
VMSUB.{DEST}
|
||||
VMSUBA.{DEST}
|
||||
VMSUBAi.{DEST}
|
||||
VMSUBAq.{DEST}
|
||||
VMSUBAw.{DEST}
|
||||
VMSUBAx.{DEST}
|
||||
VMSUBAy.{DEST}
|
||||
VMSUBAz.{DEST}
|
||||
VMSUBi.{DEST}
|
||||
VMSUBq.{DEST}
|
||||
VMSUBw.{DEST}
|
||||
VMSUBx.{DEST}
|
||||
VMSUBy.{DEST}
|
||||
VMSUBz.{DEST}
|
||||
VMTIRx
|
||||
VMUL.{DEST}
|
||||
VMULA.{DEST}
|
||||
VMULAi.{DEST}
|
||||
VMULAq.{DEST}
|
||||
VMULAw.{DEST}
|
||||
VMULAx.{DEST}
|
||||
VMULAy.{DEST}
|
||||
VMULAz.{DEST}
|
||||
VMULi.{DEST}
|
||||
VMULq.{DEST}
|
||||
VMULw.{DEST}
|
||||
VMULx.{DEST}
|
||||
VMULy.{DEST}
|
||||
VMULz.{DEST}
|
||||
VNOP
|
||||
VOPMSUB.xyz
|
||||
VOPMULA.xyz
|
||||
VRGET.{DEST}
|
||||
VRINIT
|
||||
VRNEXT.{DEST}
|
||||
VRSQRT Q
|
||||
VRXORw
|
||||
VSQD.{DEST}
|
||||
VSQI.{DEST}
|
||||
VSQRTy
|
||||
VSUB.{DEST}
|
||||
VSUBA.{DEST}
|
||||
VSUBAi.{DEST}
|
||||
VSUBAq.{DEST}
|
||||
VSUBAw.{DEST}
|
||||
VSUBAx.{DEST}
|
||||
VSUBAy.{DEST}
|
||||
VSUBAz.{DEST}
|
||||
VSUBi.{DEST}
|
||||
VSUBq.{DEST}
|
||||
VSUBw.{DEST}
|
||||
VSUBx.{DEST}
|
||||
VSUBy.{DEST}
|
||||
VSUBz.{DEST}
|
||||
VWAIT Q
|
|
@ -4,7 +4,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/goalc/CMakeLists.txt)
|
|||
|
||||
add_executable(goalc-test
|
||||
${CMAKE_CURRENT_LIST_DIR}/test_main.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/test_test.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/test_reader.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/test_goos.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/test_listener_deci2.cpp
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
# TODO - probably a more cmakey way to do this
|
||||
|
||||
set(GOALC_TEST_CASES
|
||||
${CMAKE_CURRENT_LIST_DIR}/all_goalc_template_tests.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/test_debugger.cpp
|
||||
|
|
|
@ -34,6 +34,14 @@ std::string escaped_string(const std::string& in) {
|
|||
return result;
|
||||
}
|
||||
|
||||
std::string CompilerTestRunner::test_file_name(std::string templateStr) {
|
||||
const ::testing::TestInfo* const test_info =
|
||||
::testing::UnitTest::GetInstance()->current_test_info();
|
||||
std::string outFile = fmt::format(templateStr, test_info->name());
|
||||
std::replace(outFile.begin(), outFile.end(), '/', '_');
|
||||
return outFile;
|
||||
}
|
||||
|
||||
void CompilerTestRunner::run_static_test(inja::Environment& env,
|
||||
std::string& testCategory,
|
||||
const std::string& test_file,
|
||||
|
|
|
@ -23,6 +23,8 @@ struct CompilerTestRunner {
|
|||
|
||||
std::vector<Test> tests;
|
||||
|
||||
std::string test_file_name(std::string templateStr);
|
||||
|
||||
void run_static_test(inja::Environment& env,
|
||||
std::string& testCategory,
|
||||
const std::string& test_file,
|
||||
|
|
|
@ -12,10 +12,12 @@
|
|||
(set! (-> vector-1 z) 30.0)
|
||||
(set! (-> vector-1 w) 40.1)
|
||||
|
||||
(.nop.vf)
|
||||
(vector-! vector-2 vector-1 vector-0)
|
||||
(.nop.vf)
|
||||
; 9 + 18 + 27 = 54.0000
|
||||
(format #t "~f~%" (+ (-> vector-2 x) (-> vector-2 y) (-> vector-2 z) (-> vector-2 w)))
|
||||
)
|
||||
)
|
||||
|
||||
(test-basic-vector-math)
|
||||
(test-basic-vector-math)
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
(define my-vector (new 'global 'vector))
|
||||
(rlet ((vf1 :class vf :reset-here #t))
|
||||
(.nop.vf)
|
||||
(.lvf vf1 (new 'static 'vector :x 1.0 :y 1.2 :z 1.5 :w 1.6))
|
||||
(.nop.vf)
|
||||
(.svf my-vector vf1)
|
||||
)
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
(defun test-vector-math ()
|
||||
(let ((vector-in-1 (new 'stack 'vector))
|
||||
{% if twoOperands %}(vector-in-2 (new 'stack 'vector)){% endif %}
|
||||
(vector-out (new 'stack 'vector)))
|
||||
|
||||
(set-vector! vector-in-1 {{ v1x }} {{ v1y }} {{ v1z }} {{ v1w }})
|
||||
{% if twoOperands %}(set-vector! vector-in-2 {{ v2x }} {{ v2y }} {{ v2z }} {{ v2w }}){% endif %}
|
||||
(set-vector! vector-out {{ destx }} {{ desty }} {{ destz }} {{ destw }})
|
||||
|
||||
(rlet ((vf1 :class vf :reset-here #t)
|
||||
{% if twoOperands %}(vf2 :class vf :reset-here #t){% endif %}
|
||||
(vf3 :class vf :reset-here #t))
|
||||
|
||||
(.lvf vf1 vector-in-1)
|
||||
{% if twoOperands %}(.lvf vf2 vector-in-2){% endif %}
|
||||
(.lvf vf3 vector-out)
|
||||
|
||||
|
||||
{% if twoOperands %}({{ operation }} vf3 vf1 vf2{% if destinationMask %} :mask #b{{ destinationMask }}{% endif %}){% else %}({{ operation }} vf3 vf1{% if destinationMask %} :mask #b{{ destinationMask }}{% endif %}){% endif %}
|
||||
|
||||
(.svf vector-out vf3))
|
||||
|
||||
(format #t "(~f, ~f, ~f, ~f)~%" (-> vector-out x) (-> vector-out y) (-> vector-out z) (-> vector-out w))))
|
||||
|
||||
(test-vector-math)
|
|
@ -9,7 +9,9 @@
|
|||
(rlet ((vf1 :class vf :reset-here #t)
|
||||
(vf2 :class vf :reg xmm1 :reset-here #t))
|
||||
(.lvf vf1 vector-0)
|
||||
(.nop.vf)
|
||||
(.mov vf2 vf1)
|
||||
(.nop.vf)
|
||||
(.svf (-> vector-1 vector 0) vf2)
|
||||
)
|
||||
|
||||
|
|
|
@ -19,12 +19,9 @@
|
|||
#include <iostream>
|
||||
#include <random>
|
||||
#include <filesystem>
|
||||
#include <regex>
|
||||
|
||||
struct WithGameParam {
|
||||
// TODO - Not Needed Yet
|
||||
};
|
||||
|
||||
class WithGameTests : public testing::TestWithParam<WithGameParam> {
|
||||
class WithGameTests : public ::testing::Test {
|
||||
public:
|
||||
static void SetUpTestSuite() {
|
||||
try {
|
||||
|
@ -345,6 +342,224 @@ TEST_F(WithGameTests, StaticBoxedArray) {
|
|||
{"4 asdf \"test\" (a b) 0 object 12 12\n0\n"});
|
||||
}
|
||||
|
||||
// VECTOR FLOAT TESTS
|
||||
|
||||
struct VectorFloatRegister {
|
||||
float x = 0;
|
||||
float y = 0;
|
||||
float z = 0;
|
||||
float w = 0;
|
||||
|
||||
void setJson(nlohmann::json& data, std::string vectorKey) {
|
||||
data[fmt::format("{}x", vectorKey)] = x;
|
||||
data[fmt::format("{}y", vectorKey)] = y;
|
||||
data[fmt::format("{}z", vectorKey)] = z;
|
||||
data[fmt::format("{}w", vectorKey)] = w;
|
||||
}
|
||||
|
||||
float getBroadcastElement(emitter::Register::VF_ELEMENT bc, float defValue) {
|
||||
switch (bc) {
|
||||
case emitter::Register::VF_ELEMENT::X:
|
||||
return x;
|
||||
case emitter::Register::VF_ELEMENT::Y:
|
||||
return y;
|
||||
case emitter::Register::VF_ELEMENT::Z:
|
||||
return z;
|
||||
case emitter::Register::VF_ELEMENT::W:
|
||||
return w;
|
||||
default:
|
||||
return defValue;
|
||||
}
|
||||
}
|
||||
|
||||
std::string toGOALFormat() {
|
||||
std::string answer = fmt::format("({:.4f}, {:.4f}, {:.4f}, {:.4f})", x, y, z, w);
|
||||
// {fmt} formats negative 0 as "-0.000", just going to flip any negative zeros to positives as I
|
||||
// don't think is an OpenGOAL issue
|
||||
return std::regex_replace(answer, std::regex("-0.0000"), "0.0000");
|
||||
}
|
||||
};
|
||||
|
||||
struct VectorFloatTestCase {
|
||||
VectorFloatRegister input1 = {1.5, -1.5, 0.0, 100.5};
|
||||
VectorFloatRegister input2 = {-5.5, -0.0, 10.0, 7.5};
|
||||
VectorFloatRegister dest = {11, 22, 33, 44};
|
||||
|
||||
int destinationMask = -1;
|
||||
emitter::Register::VF_ELEMENT bc = emitter::Register::VF_ELEMENT::NONE;
|
||||
std::function<float(float, float)> operation;
|
||||
|
||||
VectorFloatRegister getExpectedResult() {
|
||||
VectorFloatRegister expectedResult;
|
||||
expectedResult.x = destinationMask & 0b0001
|
||||
? operation(input1.x, input2.getBroadcastElement(bc, input2.x))
|
||||
: dest.x;
|
||||
expectedResult.y = destinationMask & 0b0010
|
||||
? operation(input1.y, input2.getBroadcastElement(bc, input2.y))
|
||||
: dest.y;
|
||||
expectedResult.z = destinationMask & 0b0100
|
||||
? operation(input1.z, input2.getBroadcastElement(bc, input2.z))
|
||||
: dest.z;
|
||||
expectedResult.w = destinationMask & 0b1000
|
||||
? operation(input1.w, input2.getBroadcastElement(bc, input2.w))
|
||||
: dest.w;
|
||||
return expectedResult;
|
||||
}
|
||||
|
||||
std::string getOperationBroadcast() {
|
||||
switch (bc) {
|
||||
case emitter::Register::VF_ELEMENT::X:
|
||||
return "x";
|
||||
case emitter::Register::VF_ELEMENT::Y:
|
||||
return "y";
|
||||
case emitter::Register::VF_ELEMENT::Z:
|
||||
return "z";
|
||||
case emitter::Register::VF_ELEMENT::W:
|
||||
return "w";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
void setJson(nlohmann::json& data, std::string func, bool twoOperands = true) {
|
||||
input1.setJson(data, "v1");
|
||||
data["twoOperands"] = twoOperands;
|
||||
if (twoOperands) {
|
||||
input2.setJson(data, "v2");
|
||||
}
|
||||
dest.setJson(data, "dest");
|
||||
data["operation"] = fmt::format(func);
|
||||
if (destinationMask == -1) {
|
||||
data["destinationMask"] = false;
|
||||
} else {
|
||||
data["destinationMask"] = fmt::format("{:b}", destinationMask);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<VectorFloatTestCase> vectorMathTestCaseGen() {
|
||||
std::string test = fmt::format("{:.4f}", -0.0);
|
||||
|
||||
std::vector<VectorFloatTestCase> cases = {};
|
||||
for (int i = 0; i <= 15; i++) {
|
||||
VectorFloatTestCase testCase = VectorFloatTestCase();
|
||||
testCase.destinationMask = i;
|
||||
cases.push_back(testCase);
|
||||
// Re-add each case with each broadcast varient
|
||||
for (int j = 0; j < 4; j++) {
|
||||
VectorFloatTestCase testCaseBC = VectorFloatTestCase();
|
||||
testCaseBC.destinationMask = i;
|
||||
testCaseBC.bc = static_cast<emitter::Register::VF_ELEMENT>(j);
|
||||
cases.push_back(testCaseBC);
|
||||
}
|
||||
}
|
||||
return cases;
|
||||
}
|
||||
|
||||
class VectorFloatParameterizedTestFixtureWithRunner
|
||||
: public WithGameTests,
|
||||
public ::testing::WithParamInterface<VectorFloatTestCase> {
|
||||
protected:
|
||||
std::string templateFile = "test-vector-math.template.gc";
|
||||
};
|
||||
|
||||
// NOTE - an excellent article -
|
||||
// https://www.sandordargo.com/blog/2019/04/24/parameterized-testing-with-gtest
|
||||
|
||||
TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_ADD_XYZW_DEST) {
|
||||
VectorFloatTestCase testCase = GetParam();
|
||||
testCase.operation = [](float x, float y) { return x + y; };
|
||||
|
||||
nlohmann::json data;
|
||||
testCase.setJson(data, fmt::format(".add{}.vf", testCase.getOperationBroadcast()));
|
||||
|
||||
std::string outFile = runner.test_file_name(
|
||||
fmt::format("vector-math-add{}-{{}}.generated.gc", testCase.getOperationBroadcast()));
|
||||
env.write(templateFile, data, outFile);
|
||||
runner.run_test(testCategory, outFile,
|
||||
{fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())});
|
||||
}
|
||||
|
||||
TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_SUB_XYZW_DEST) {
|
||||
VectorFloatTestCase testCase = GetParam();
|
||||
testCase.operation = [](float x, float y) { return x - y; };
|
||||
|
||||
nlohmann::json data;
|
||||
testCase.setJson(data, fmt::format(".sub{}.vf", testCase.getOperationBroadcast()));
|
||||
|
||||
std::string outFile = runner.test_file_name(
|
||||
fmt::format("vector-math-sub{}-{{}}.generated.gc", testCase.getOperationBroadcast()));
|
||||
env.write(templateFile, data, outFile);
|
||||
runner.run_test(testCategory, outFile,
|
||||
{fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())});
|
||||
}
|
||||
|
||||
TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_MUL_XYZW_DEST) {
|
||||
VectorFloatTestCase testCase = GetParam();
|
||||
testCase.operation = [](float x, float y) { return x * y; };
|
||||
|
||||
nlohmann::json data;
|
||||
testCase.setJson(data, fmt::format(".mul{}.vf", testCase.getOperationBroadcast()));
|
||||
|
||||
std::string outFile = runner.test_file_name(
|
||||
fmt::format("vector-math-mul{}-{{}}.generated.gc", testCase.getOperationBroadcast()));
|
||||
env.write(templateFile, data, outFile);
|
||||
runner.run_test(testCategory, outFile,
|
||||
{fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())});
|
||||
}
|
||||
|
||||
TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_MIN_XYZW_DEST) {
|
||||
VectorFloatTestCase testCase = GetParam();
|
||||
testCase.operation = [](float x, float y) { return fmin(x, y); };
|
||||
|
||||
nlohmann::json data;
|
||||
testCase.setJson(data, fmt::format(".min{}.vf", testCase.getOperationBroadcast()));
|
||||
|
||||
std::string outFile = runner.test_file_name(
|
||||
fmt::format("vector-math-min{}-{{}}.generated.gc", testCase.getOperationBroadcast()));
|
||||
env.write(templateFile, data, outFile);
|
||||
runner.run_test(testCategory, outFile,
|
||||
{fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())});
|
||||
}
|
||||
|
||||
TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_MAX_XYZW_DEST) {
|
||||
VectorFloatTestCase testCase = GetParam();
|
||||
testCase.operation = [](float x, float y) { return fmax(x, y); };
|
||||
|
||||
nlohmann::json data;
|
||||
testCase.setJson(data, fmt::format(".max{}.vf", testCase.getOperationBroadcast()));
|
||||
|
||||
std::string outFile = runner.test_file_name(
|
||||
fmt::format("vector-math-max{}-{{}}.generated.gc", testCase.getOperationBroadcast()));
|
||||
env.write(templateFile, data, outFile);
|
||||
runner.run_test(testCategory, outFile,
|
||||
{fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())});
|
||||
}
|
||||
|
||||
// TODO - This test runs more often than the rest, should probably be split into it's own fixture
|
||||
// (broadcasting ignored!)
|
||||
TEST_P(VectorFloatParameterizedTestFixtureWithRunner, VF_ABS_DEST) {
|
||||
VectorFloatTestCase testCase = GetParam();
|
||||
testCase.operation = [](float x, float y) {
|
||||
// Avoid compiler warnings for unused variable, making a varient that accepts a lambda with only
|
||||
// 1 float is just unnecessary complexity
|
||||
y = 0;
|
||||
return fabs(x);
|
||||
};
|
||||
|
||||
nlohmann::json data;
|
||||
testCase.setJson(data, ".abs.vf", false);
|
||||
|
||||
std::string outFile = runner.test_file_name("vector-math-abs-{}.generated.gc");
|
||||
env.write(templateFile, data, outFile);
|
||||
runner.run_test(testCategory, outFile,
|
||||
{fmt::format("{}\n0\n", testCase.getExpectedResult().toGOALFormat())});
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(WithGameTests_VectorFloatTests,
|
||||
VectorFloatParameterizedTestFixtureWithRunner,
|
||||
::testing::ValuesIn(vectorMathTestCaseGen()));
|
||||
|
||||
TEST_F(WithGameTests, VFLoadAndStore) {
|
||||
runner.run_static_test(env, testCategory, "test-vf-load-and-store.gc", {"2.0000\n0\n"});
|
||||
}
|
||||
|
@ -380,4 +595,4 @@ TEST(TypeConsistency, TypeConsistency) {
|
|||
compiler.enable_throw_on_redefines();
|
||||
compiler.run_test_no_load("test/goalc/source_templates/with_game/test-build-game.gc");
|
||||
compiler.run_test_no_load("decompiler/config/all-types.gc");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,13 @@
|
|||
|
||||
using namespace emitter;
|
||||
|
||||
TEST(EmitterAVX, VF_NOP) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::nop_vf());
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "D9D0");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, MOV_VF) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(10000);
|
||||
|
@ -137,6 +144,46 @@ TEST(EmitterAVX, ShuffleVF) {
|
|||
EXPECT_EQ(tester.dump_to_hex_string(true), "C5D8C6DC6DC4C108C6DE6DC558C6EC6DC44108C6EE6D");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, SplatVF_X) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 4, Register::VF_ELEMENT::X));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 14, Register::VF_ELEMENT::X));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 4, Register::VF_ELEMENT::X));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 14, Register::VF_ELEMENT::X));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "C5D8C6DC00C4C108C6DE00C558C6EC00C44108C6EE00");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, SplatVF_Y) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 4, Register::VF_ELEMENT::Y));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 14, Register::VF_ELEMENT::Y));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 4, Register::VF_ELEMENT::Y));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 14, Register::VF_ELEMENT::Y));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "C5D8C6DC55C4C108C6DE55C558C6EC55C44108C6EE55");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, SplatVF_Z) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 4, Register::VF_ELEMENT::Z));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 14, Register::VF_ELEMENT::Z));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 4, Register::VF_ELEMENT::Z));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 14, Register::VF_ELEMENT::Z));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "C5D8C6DCAAC4C108C6DEAAC558C6ECAAC44108C6EEAA");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, SplatVF_W) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 4, Register::VF_ELEMENT::W));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 3, XMM0 + 14, Register::VF_ELEMENT::W));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 4, Register::VF_ELEMENT::W));
|
||||
tester.emit(IGen::splat_vf(XMM0 + 13, XMM0 + 14, Register::VF_ELEMENT::W));
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true), "C5D8C6DCFFC4C108C6DEFFC558C6ECFFC44108C6EEFF");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, XorVF) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
|
@ -185,6 +232,38 @@ TEST(EmitterAVX, AddVF) {
|
|||
"C5E058DBC4C16058DDC59058DBC4C11058DDC56058EBC4416058EDC51058EBC4411058ED");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, MaxVF) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::max_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3));
|
||||
tester.emit(IGen::max_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13));
|
||||
tester.emit(IGen::max_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3));
|
||||
tester.emit(IGen::max_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13));
|
||||
tester.emit(IGen::max_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3));
|
||||
tester.emit(IGen::max_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13));
|
||||
tester.emit(IGen::max_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3));
|
||||
tester.emit(IGen::max_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13));
|
||||
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||
"C5E05FDBC4C1605FDDC5905FDBC4C1105FDDC5605FEBC441605FEDC5105FEBC441105FED");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, MinVF) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
tester.emit(IGen::min_vf(XMM0 + 3, XMM0 + 3, XMM0 + 3));
|
||||
tester.emit(IGen::min_vf(XMM0 + 3, XMM0 + 3, XMM0 + 13));
|
||||
tester.emit(IGen::min_vf(XMM0 + 3, XMM0 + 13, XMM0 + 3));
|
||||
tester.emit(IGen::min_vf(XMM0 + 3, XMM0 + 13, XMM0 + 13));
|
||||
tester.emit(IGen::min_vf(XMM0 + 13, XMM0 + 3, XMM0 + 3));
|
||||
tester.emit(IGen::min_vf(XMM0 + 13, XMM0 + 3, XMM0 + 13));
|
||||
tester.emit(IGen::min_vf(XMM0 + 13, XMM0 + 13, XMM0 + 3));
|
||||
tester.emit(IGen::min_vf(XMM0 + 13, XMM0 + 13, XMM0 + 13));
|
||||
|
||||
EXPECT_EQ(tester.dump_to_hex_string(true),
|
||||
"C5E05DDBC4C1605DDDC5905DDBC4C1105DDDC5605DEBC441605DEDC5105DEBC441105DED");
|
||||
}
|
||||
|
||||
TEST(EmitterAVX, BlendVF) {
|
||||
CodeTester tester;
|
||||
tester.init_code_buffer(1024);
|
||||
|
|
|
@ -4,6 +4,17 @@
|
|||
|
||||
#include <filesystem>
|
||||
|
||||
// Running subsets of tests, see:
|
||||
// -
|
||||
// https://github.com/google/googletest/blob/620659ed92829a88ee34134c782bf5b5aa5a0a0c/googletest/docs/advanced.md#running-a-subset-of-the-tests
|
||||
// This can set via:
|
||||
// - --gtest_filter="" CLI arg
|
||||
// - 'GTEST_FILTER' environment variable,
|
||||
// - or below in code by adding `::testing::GTEST_FLAG(filter) = "Test_Cases1*";` below
|
||||
//
|
||||
// I've set things up so VS has a run configuration that runs all tests with "Draft" in the name
|
||||
// to make it easier to test a subset of tests
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
#include "gtest/gtest.h"
|
||||
|
||||
TEST(test, test) {
|
||||
EXPECT_TRUE(true);
|
||||
EXPECT_FALSE(false);
|
||||
}
|
Loading…
Reference in a new issue