[Compiler] Improve spills and register backups (#175)

* xmm spill

* improve getting stack variables

* improve symbol getting

* update changelog
This commit is contained in:
water111 2020-12-31 15:59:11 -05:00 committed by GitHub
parent a80b331c27
commit c8d382b35c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 535 additions and 121 deletions

View file

@ -98,4 +98,9 @@
- The listener now uses message IDs to more robustly handle the situation where a response messages comes, but is extremely late, or if some sent messages are skipped.
- Fixed bug where references to the debug segment using RIP-relative links were not set to zero by the linker when the debug segment isn't loaded.
- The `rlet` form now supports 128-bit vector float registers with the `vf` register class.
- Added support for "vector float" assembly operations, including `lvf`, `svf`, `xor`, `sub`, `add`, and `blend`.
- Added support for "vector float" assembly operations, including `lvf`, `svf`, `xor`, `sub`, `add`, and `blend`.
- Added the ability to spill floating point variables to the stack if there aren't enough registers.
- Improved back up and restore of xmm registers
- Fixed an off-by-one in move eliminator (previous version was correct, but did not generate as good code). Complicated functions are 2 to 10% smaller.
- Improved getting a stack address.
- Improved getting the value of `#f`, `#t`, and `()`.

View file

@ -9,6 +9,7 @@
#include <thread>
#include "common/common_types.h"
#include "common/util/Timer.h"
#include "game/sce/libscf.h"
#include "kboot.h"
#include "kmachine.h"
@ -146,6 +147,7 @@ void KernelCheckAndDispatch() {
// dispatch the kernel
//(**kernel_dispatcher)();
Timer kernel_dispatch_timer;
if (MasterUseKernel) {
// use the GOAL kernel.
call_goal_on_stack(Ptr<Function>(kernel_dispatcher->value), goal_stack, s7.offset,
@ -164,6 +166,11 @@ void KernelCheckAndDispatch() {
}
}
auto time_ms = kernel_dispatch_timer.getMs();
if (time_ms > 3) {
printf("Kernel dispatch time: %.3f ms\n", time_ms);
}
ClearPending();
// if the listener function changed, it means the kernel ran it, so we should notify compiler.

View file

@ -5,3 +5,4 @@
;; name in dgo: gravity-h
;; dgos: GAME, ENGINE
;; This file generates no code.

View file

@ -318,8 +318,6 @@
len)
)
(defmethod asize-of pair ((obj pair))
"Get the asize of a pair"
(the-as int (-> pair size))

View file

@ -62,6 +62,8 @@ void CodeGenerator::do_function(FunctionEnv* env, int f_idx) {
* Generates prologues / epilogues.
*/
void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
bool use_new_xmms = true;
auto f_rec = m_gen.get_existing_function_record(f_idx);
// todo, extra alignment settings
@ -71,13 +73,44 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
// compute how much stack we will use
int stack_offset = 0;
// back up xmms (currently not aligned)
// count how many xmm's we have to backup
int n_xmm_backups = 0;
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::PROLOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg),
n_xmm_backups++;
}
}
// only for new xmms. if n == 0, we don't use this at all.
int xmm_backup_stack_offset = 8 + XMM_SIZE * n_xmm_backups;
if (use_new_xmms) {
if (n_xmm_backups > 0) {
// offset the stack
stack_offset += xmm_backup_stack_offset;
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm(RSP, xmm_backup_stack_offset),
InstructionInfo::PROLOGUE);
stack_offset += XMM_SIZE;
// back up xmms
int i = 0;
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
int offset = i * XMM_SIZE;
m_gen.add_instr_no_ir(f_rec, IGen::store128_xmm128_reg_offset(RSP, saved_reg, offset),
InstructionInfo::PROLOGUE);
i++;
}
}
}
} else {
// back up xmms (currently not aligned)
for (auto& saved_reg : allocs.used_saved_regs) {
if (saved_reg.is_xmm()) {
m_gen.add_instr_no_ir(f_rec, IGen::sub_gpr64_imm8s(RSP, XMM_SIZE),
InstructionInfo::PROLOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::store128_gpr64_xmm128(RSP, saved_reg),
InstructionInfo::PROLOGUE);
stack_offset += XMM_SIZE;
}
}
}
@ -132,10 +165,16 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
auto& bonus = allocs.stack_ops.at(ir_idx);
for (auto& op : bonus.ops) {
if (op.load) {
if (op.reg.is_gpr()) {
if (op.reg.is_gpr() && op.reg_class == RegClass::GPR_64) {
// todo, s8 or 0 offset if possible?
m_gen.add_instr(IGen::load64_gpr64_plus_s32(
op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, RSP),
i_rec);
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
// load xmm32 off of the stack
m_gen.add_instr(IGen::load_reg_offset_xmm32(
op.reg, RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else {
assert(false);
}
@ -148,10 +187,16 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
// store things back on the stack if needed.
for (auto& op : bonus.ops) {
if (op.store) {
if (op.reg.is_gpr()) {
if (op.reg.is_gpr() && op.reg_class == RegClass::GPR_64) {
// todo, s8 or 0 offset if possible?
m_gen.add_instr(IGen::store64_gpr64_plus_s32(
RSP, allocs.get_slot_for_spill(op.slot) * GPR_SIZE, op.reg),
i_rec);
} else if (op.reg.is_xmm() && op.reg_class == RegClass::FLOAT) {
// store xmm32 on the stack
m_gen.add_instr(IGen::store_reg_offset_xmm32(
RSP, op.reg, allocs.get_slot_for_spill(op.slot) * GPR_SIZE),
i_rec);
} else {
assert(false);
}
@ -180,12 +225,31 @@ void CodeGenerator::do_goal_function(FunctionEnv* env, int f_idx) {
}
}
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_xmm()) {
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP),
if (use_new_xmms) {
if (n_xmm_backups > 0) {
int j = n_xmm_backups;
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_xmm()) {
j--;
int offset = j * XMM_SIZE;
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_reg_offset(saved_reg, RSP, offset),
InstructionInfo::EPILOGUE);
}
}
assert(j == 0);
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm(RSP, xmm_backup_stack_offset),
InstructionInfo::EPILOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE), InstructionInfo::EPILOGUE);
}
} else {
for (int i = int(allocs.used_saved_regs.size()); i-- > 0;) {
auto& saved_reg = allocs.used_saved_regs.at(i);
if (saved_reg.is_xmm()) {
m_gen.add_instr_no_ir(f_rec, IGen::load128_xmm128_gpr64(saved_reg, RSP),
InstructionInfo::EPILOGUE);
m_gen.add_instr_no_ir(f_rec, IGen::add_gpr64_imm8s(RSP, XMM_SIZE),
InstructionInfo::EPILOGUE);
}
}
}

View file

@ -2,6 +2,7 @@
#include "IR.h"
#include "goalc/emitter/IGen.h"
#include "third-party/fmt/core.h"
#include "common/symbols.h"
using namespace emitter;
@ -202,10 +203,19 @@ void IR_LoadSymbolPointer::do_codegen(emitter::ObjectGenerator* gen,
const AllocationResult& allocs,
emitter::IR_Record irec) {
auto dest_reg = get_reg(m_dest, allocs, irec);
// todo, could be single lea opcode
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec);
auto add = gen->add_instr(IGen::add_gpr64_imm32s(dest_reg, 0x0afecafe), irec);
gen->link_instruction_symbol_ptr(add, m_name);
if (m_name == "#f") {
static_assert(FIX_SYM_FALSE == 0, "false symbol location");
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, gRegInfo.get_st_reg()), irec);
} else if (m_name == "#t") {
gen->add_instr(IGen::lea_reg_plus_off8(dest_reg, gRegInfo.get_st_reg(), FIX_SYM_TRUE), irec);
} else if (m_name == "_empty_") {
gen->add_instr(IGen::lea_reg_plus_off8(dest_reg, gRegInfo.get_st_reg(), FIX_SYM_EMPTY_PAIR),
irec);
} else {
auto instr =
gen->add_instr(IGen::lea_reg_plus_off32(dest_reg, gRegInfo.get_st_reg(), 0x0afecafe), irec);
gen->link_instruction_symbol_ptr(instr, m_name);
}
}
/////////////////////
@ -1002,12 +1012,15 @@ void IR_GetStackAddr::do_codegen(emitter::ObjectGenerator* gen,
auto dest_reg = get_reg(m_dest, allocs, irec);
int offset = GPR_SIZE * allocs.get_slot_for_var(m_slot);
// dest = offset
load_constant(offset, gen, irec, dest_reg);
// dest = offset + RSP
gen->add_instr(IGen::add_gpr64_gpr64(dest_reg, RSP), irec);
// dest = offset + RSP - offset
gen->add_instr(IGen::sub_gpr64_gpr64(dest_reg, gRegInfo.get_offset_reg()), irec);
if (offset == 0) {
gen->add_instr(IGen::mov_gpr64_gpr64(dest_reg, RSP), irec);
gen->add_instr(IGen::sub_gpr64_gpr64(dest_reg, gRegInfo.get_offset_reg()), irec);
} else {
// dest = offset + RSP
gen->add_instr(IGen::lea_reg_plus_off(dest_reg, RSP, offset), irec);
// dest = offset + RSP - offset
gen->add_instr(IGen::sub_gpr64_gpr64(dest_reg, gRegInfo.get_offset_reg()), irec);
}
}
///////////////////////

View file

@ -966,6 +966,62 @@ class IGen {
return instr;
}
static Instruction lea_reg_plus_off32(Register dest, Register base, s64 offset) {
assert(dest.is_gpr());
assert(base.is_gpr());
assert(offset >= INT32_MIN && offset <= INT32_MAX);
Instruction instr(0x8d);
instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 2, base.hw_id(), true);
instr.set(Imm(4, offset));
return instr;
}
static Instruction lea_reg_plus_off8(Register dest, Register base, s64 offset) {
assert(dest.is_gpr());
assert(base.is_gpr());
assert(offset >= INT8_MIN && offset <= INT8_MAX);
Instruction instr(0x8d);
instr.set_modrm_rex_sib_for_reg_reg_disp(dest.hw_id(), 1, base.hw_id(), true);
instr.set(Imm(1, offset));
return instr;
}
static Instruction lea_reg_plus_off(Register dest, Register base, s64 offset) {
if (offset >= INT8_MIN && offset <= INT8_MAX) {
return lea_reg_plus_off8(dest, base, offset);
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
return lea_reg_plus_off32(dest, base, offset);
} else {
assert(false);
}
}
static Instruction store32_xmm32_gpr64_plus_s32(Register base, Register xmm_value, s64 offset) {
assert(xmm_value.is_xmm());
assert(base.is_gpr());
assert(offset >= INT32_MIN && offset <= INT32_MAX);
Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x11);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, base.hw_id(), false);
instr.set(Imm(4, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction store32_xmm32_gpr64_plus_s8(Register base, Register xmm_value, s64 offset) {
assert(xmm_value.is_xmm());
assert(base.is_gpr());
assert(offset >= INT8_MIN && offset <= INT8_MAX);
Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x11);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, base.hw_id(), false);
instr.set(Imm(1, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction load32_xmm32_gpr64_plus_gpr64_plus_s32(Register xmm_dest,
Register addr1,
Register addr2,
@ -985,6 +1041,32 @@ class IGen {
return instr;
}
static Instruction load32_xmm32_gpr64_plus_s32(Register xmm_dest, Register base, s64 offset) {
assert(xmm_dest.is_xmm());
assert(base.is_gpr());
assert(offset >= INT32_MIN && offset <= INT32_MAX);
Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x10);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, base.hw_id(), false);
instr.set(Imm(4, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction load32_xmm32_gpr64_plus_s8(Register xmm_dest, Register base, s64 offset) {
assert(xmm_dest.is_xmm());
assert(base.is_gpr());
assert(offset >= INT8_MIN && offset <= INT8_MAX);
Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x10);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, base.hw_id(), false);
instr.set(Imm(1, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction load_goal_xmm32(Register xmm_dest, Register addr, Register off, s64 offset) {
if (offset == 0) {
return load32_xmm32_gpr64_plus_gpr64(xmm_dest, addr, off);
@ -1009,6 +1091,30 @@ class IGen {
}
}
static Instruction store_reg_offset_xmm32(Register base, Register xmm_value, s64 offset) {
assert(base.is_gpr());
assert(xmm_value.is_xmm());
if (offset >= INT8_MIN && offset <= INT8_MAX) {
return store32_xmm32_gpr64_plus_s8(base, xmm_value, offset);
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
return store32_xmm32_gpr64_plus_s32(base, xmm_value, offset);
} else {
assert(false);
}
}
static Instruction load_reg_offset_xmm32(Register xmm_dest, Register base, s64 offset) {
assert(base.is_gpr());
assert(xmm_dest.is_xmm());
if (offset >= INT8_MIN && offset <= INT8_MAX) {
return load32_xmm32_gpr64_plus_s8(xmm_dest, base, offset);
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
return load32_xmm32_gpr64_plus_s32(xmm_dest, base, offset);
} else {
assert(false);
}
}
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// LOADS n' STORES - XMM128
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1019,8 +1125,8 @@ class IGen {
static Instruction store128_gpr64_xmm128(Register gpr_addr, Register xmm_value) {
assert(gpr_addr.is_gpr());
assert(xmm_value.is_xmm());
// Instruction instr(0x66);
Instruction instr(0xf3);
Instruction instr(0x66);
// Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x7f);
instr.set_modrm_and_rex_for_reg_addr(xmm_value.hw_id(), gpr_addr.hw_id(), false);
@ -1028,11 +1134,39 @@ class IGen {
return instr;
}
static Instruction store128_gpr64_xmm128_s32(Register gpr_addr, Register xmm_value, s64 offset) {
assert(gpr_addr.is_gpr());
assert(xmm_value.is_xmm());
assert(offset >= INT32_MIN && offset <= INT32_MAX);
Instruction instr(0x66);
// Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x7f);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 2, gpr_addr.hw_id(), false);
instr.set(Imm(4, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction store128_gpr64_xmm128_s8(Register gpr_addr, Register xmm_value, s64 offset) {
assert(gpr_addr.is_gpr());
assert(xmm_value.is_xmm());
assert(offset >= INT8_MIN && offset <= INT8_MAX);
Instruction instr(0x66);
// Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x7f);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_value.hw_id(), 1, gpr_addr.hw_id(), false);
instr.set(Imm(1, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction load128_xmm128_gpr64(Register xmm_dest, Register gpr_addr) {
assert(gpr_addr.is_gpr());
assert(xmm_dest.is_xmm());
// Instruction instr(0x66);
Instruction instr(0xf3);
Instruction instr(0x66);
// Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x6f);
instr.set_modrm_and_rex_for_reg_addr(xmm_dest.hw_id(), gpr_addr.hw_id(), false);
@ -1040,6 +1174,58 @@ class IGen {
return instr;
}
static Instruction load128_xmm128_gpr64_s32(Register xmm_dest, Register gpr_addr, s64 offset) {
assert(gpr_addr.is_gpr());
assert(xmm_dest.is_xmm());
assert(offset >= INT32_MIN && offset <= INT32_MAX);
Instruction instr(0x66);
// Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x6f);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 2, gpr_addr.hw_id(), false);
instr.set(Imm(4, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction load128_xmm128_gpr64_s8(Register xmm_dest, Register gpr_addr, s64 offset) {
assert(gpr_addr.is_gpr());
assert(xmm_dest.is_xmm());
assert(offset >= INT8_MIN && offset <= INT8_MAX);
Instruction instr(0x66);
// Instruction instr(0xf3);
instr.set_op2(0x0f);
instr.set_op3(0x6f);
instr.set_modrm_rex_sib_for_reg_reg_disp(xmm_dest.hw_id(), 1, gpr_addr.hw_id(), false);
instr.set(Imm(1, offset));
instr.swap_op0_rex();
return instr;
}
static Instruction load128_xmm128_reg_offset(Register xmm_dest, Register base, s64 offset) {
if (offset == 0) {
return load128_xmm128_gpr64(xmm_dest, base);
} else if (offset >= INT8_MIN && offset <= INT8_MAX) {
return load128_xmm128_gpr64_s8(xmm_dest, base, offset);
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
return load128_xmm128_gpr64_s32(xmm_dest, base, offset);
} else {
assert(false);
}
}
static Instruction store128_xmm128_reg_offset(Register base, Register xmm_val, s64 offset) {
if (offset == 0) {
return store128_gpr64_xmm128(base, xmm_val);
} else if (offset >= INT8_MIN && offset <= INT8_MAX) {
return store128_gpr64_xmm128_s8(base, xmm_val, offset);
} else if (offset >= INT32_MIN && offset <= INT32_MAX) {
return store128_gpr64_xmm128_s32(base, xmm_val, offset);
} else {
assert(false);
}
}
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
// RIP loads and stores
//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1231,7 +1417,7 @@ class IGen {
assert(dst_reg.is_gpr());
assert(src_reg.is_gpr());
Instruction instr(0x8b);
instr.set_modrm_rex_sib_for_reg_reg_disp32(dst_reg.hw_id(), 2, src_reg.hw_id(), true);
instr.set_modrm_rex_sib_for_reg_reg_disp(dst_reg.hw_id(), 2, src_reg.hw_id(), true);
instr.set_disp(Imm(4, offset));
return instr;
}
@ -1243,7 +1429,7 @@ class IGen {
assert(addr.is_gpr());
assert(value.is_gpr());
Instruction instr(0x89);
instr.set_modrm_rex_sib_for_reg_reg_disp32(value.hw_id(), 2, addr.hw_id(), true);
instr.set_modrm_rex_sib_for_reg_reg_disp(value.hw_id(), 2, addr.hw_id(), true);
instr.set_disp(Imm(4, offset));
return instr;
}

View file

@ -839,9 +839,9 @@ struct Instruction {
}
/*!
* Set up modrm and rex for the commonly used 32-bit immediate displacement indexing mode.
* Set up modrm and rex for the commonly used immediate displacement indexing mode.
*/
void set_modrm_rex_sib_for_reg_reg_disp32(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) {
void set_modrm_rex_sib_for_reg_reg_disp(uint8_t reg, uint8_t mod, uint8_t rm, bool rex_w) {
ModRM modrm;
bool rex_r = false;

View file

@ -435,12 +435,12 @@ bool can_var_be_assigned(int var,
if (move_eliminator) {
if (enable_fancy_coloring) {
if (lr.dies_next_at_instr(instr) && other_lr.becomes_live_at_instr(instr) &&
in.instructions.at(instr).is_move) {
(allow_read_write_same_reg || in.instructions.at(instr).is_move)) {
allowed_by_move_eliminator = true;
}
if (lr.becomes_live_at_instr(instr) && other_lr.dies_next_at_instr(instr) &&
in.instructions.at(instr).is_move) {
(allow_read_write_same_reg || in.instructions.at(instr).is_move)) {
allowed_by_move_eliminator = true;
}
} else {
@ -456,7 +456,7 @@ bool can_var_be_assigned(int var,
}
if (!allowed_by_move_eliminator) {
if (debug_trace >= 2) {
if (debug_trace >= 1) {
printf("at idx %d, %s conflicts\n", instr, other_lr.print_assignment().c_str());
}
@ -470,7 +470,7 @@ bool can_var_be_assigned(int var,
for (int instr = lr.min + 1; instr <= lr.max - 1; instr++) {
for (auto clobber : in.instructions.at(instr).clobber) {
if (ass.occupies_reg(clobber)) {
if (debug_trace >= 2) {
if (debug_trace >= 1) {
printf("at idx %d clobber\n", instr);
}
@ -482,7 +482,7 @@ bool can_var_be_assigned(int var,
for (int instr = lr.min; instr <= lr.max; instr++) {
for (auto exclusive : in.instructions.at(instr).exclude) {
if (ass.occupies_reg(exclusive)) {
if (debug_trace >= 2) {
if (debug_trace >= 1) {
printf("at idx %d exclusive conflict\n", instr);
}
@ -495,7 +495,7 @@ bool can_var_be_assigned(int var,
for (int instr = lr.min; instr <= lr.max; instr++) {
if (lr.has_constraint && lr.assignment.at(instr - lr.min).is_assigned()) {
if (!(ass.occupies_same_reg(lr.assignment.at(instr - lr.min)))) {
if (debug_trace >= 2) {
if (debug_trace >= 1) {
printf("at idx %d self bad (%s) (%s)\n", instr,
lr.assignment.at(instr - lr.min).to_string().c_str(), ass.to_string().c_str());
}
@ -526,12 +526,12 @@ bool assignment_ok_at(int var,
if (move_eliminator) {
if (enable_fancy_coloring) {
if (lr.dies_next_at_instr(idx) && other_lr.becomes_live_at_instr(idx) &&
in.instructions.at(idx).is_move) {
(allow_read_write_same_reg || in.instructions.at(idx).is_move)) {
allowed_by_move_eliminator = true;
}
if (lr.becomes_live_at_instr(idx) && other_lr.dies_next_at_instr(idx) &&
in.instructions.at(idx).is_move) {
(allow_read_write_same_reg || in.instructions.at(idx).is_move)) {
allowed_by_move_eliminator = true;
}
} else {
@ -668,6 +668,7 @@ bool try_spill_coloring(int var, RegAllocCache* cache, const AllocationInput& in
for (int instr = lr.min; instr <= lr.max; instr++) {
// bonus_instructions.at(instr).clear();
StackOp::Op bonus;
bonus.reg_class = cache->iregs.at(var).reg_class;
// we may have a constaint in here
auto& current_assignment = lr.assignment.at(instr - lr.min);
@ -821,15 +822,15 @@ bool do_allocation_for_var(int var,
auto& first_instr = in.instructions.at(lr.min);
auto& last_instr = in.instructions.at(lr.max);
if (first_instr.is_move) {
auto& possible_coloring = cache->live_ranges.at(first_instr.read.front().id).get(lr.min);
if (!colored && last_instr.is_move) {
auto& possible_coloring = cache->live_ranges.at(last_instr.write.front().id).get(lr.max);
if (possible_coloring.is_assigned() && in_vec(all_reg_order, possible_coloring.reg)) {
colored = try_assignment_for_var(var, possible_coloring, cache, in, debug_trace);
}
}
if (!colored && last_instr.is_move) {
auto& possible_coloring = cache->live_ranges.at(last_instr.write.front().id).get(lr.max);
if (!colored && first_instr.is_move) {
auto& possible_coloring = cache->live_ranges.at(first_instr.read.front().id).get(lr.min);
if (possible_coloring.is_assigned() && in_vec(all_reg_order, possible_coloring.reg)) {
colored = try_assignment_for_var(var, possible_coloring, cache, in, debug_trace);
}

View file

@ -12,6 +12,7 @@ struct StackOp {
struct Op {
int slot = -1;
emitter::Register reg;
RegClass reg_class = RegClass::INVALID;
bool load = false; // load from reg before instruction?
bool store = false; // store into reg after instruction?
};
@ -47,6 +48,8 @@ constexpr bool enable_fancy_coloring = true;
// will attempt to allocate in a way to reduce the number of moves.
constexpr bool move_eliminator = true;
constexpr bool allow_read_write_same_reg = true;
// Indication of where a variable is live and what assignment it has at each point in the range.
struct LiveInfo {
public:

View file

@ -0,0 +1,52 @@
(defun force-xmm-spill ()
(let ((v00 1.0)
(v01 2.0)
(v02 3.0)
(v03 4.0)
(v04 5.0)
(v05 6.0)
(v06 7.0)
(v07 8.0)
(v08 9.0)
(v09 10.0)
(v10 11.0)
(v11 12.0)
(v12 13.0)
(v13 14.0)
(v14 15.0)
(v15 16.0)
(v16 17.0)
(v17 18.0)
(v18 19.0)
(v19 20.0)
(v20 21.0)
(v21 22.0)
)
(+ v00
v01
v02
v03
v04
v05
v06
v07
v08
v09
v10
v11
v12
v13
v14
v15
v16
v17
v18
v19
v20
v21
)
)
)
(format #t "~f~%" (force-xmm-spill))
0

View file

@ -357,6 +357,10 @@ TEST_F(WithGameTests, VFLoadStatic) {
runner.run_static_test(env, testCategory, "test-load-static-vector.gc", {"5.3000\n0\n"});
}
TEST_F(WithGameTests, XMMSpill) {
runner.run_static_test(env, testCategory, "test-xmm-spill.gc", {"253.0000\n0\n"});
}
TEST(TypeConsistency, TypeConsistency) {
Compiler compiler;
compiler.enable_throw_on_redefines();

View file

@ -57,68 +57,68 @@ TEST(CodeTester, xmm_store_128) {
// movdqa [r14], xmm3
// movdqa [rbx], xmm14
// movdqa [r14], xmm13
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
// EXPECT_EQ(tester.dump_to_hex_string(),
// "66 0f 7f 1b 66 41 0f 7f 1e 66 44 0f 7f 33 66 45 0f 7f 2e");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 0c 24"); // requires SIB byte.
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 2c 24"); // requires SIB byte and REX
// byte
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 4d 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 7f 5d 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 7f 55 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 65 00");
tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
EXPECT_EQ(tester.dump_to_hex_string(),
"f3 0f 7f 1b f3 41 0f 7f 1e f3 44 0f 7f 33 f3 45 0f 7f 2e");
"66 0f 7f 1b 66 41 0f 7f 1e 66 44 0f 7f 33 66 45 0f 7f 2e");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 0c 24"); // requires SIB byte.
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 0c 24"); // requires SIB byte.
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 2c 24"); // requires SIB byte and REX byte
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 2c 24"); // requires SIB byte and REX byte
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 4d 00");
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 7f 4d 00");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 7f 5d 00");
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 7f 5d 00");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 7f 55 00");
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 7f 55 00");
tester.clear();
tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 65 00");
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 7f 65 00");
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM3));
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM3));
// tester.emit(IGen::store128_gpr64_xmm128(RBX, XMM14));
// tester.emit(IGen::store128_gpr64_xmm128(R14, XMM13));
// EXPECT_EQ(tester.dump_to_hex_string(),
// "f3 0f 7f 1b f3 41 0f 7f 1e f3 44 0f 7f 33 f3 45 0f 7f 2e");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RSP, XMM1));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 0c 24"); // requires SIB byte.
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R12, XMM13));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 2c 24"); // requires SIB byte and REX
// byte
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM1));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 7f 4d 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(RBP, XMM11));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 7f 5d 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM2));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 7f 55 00");
//
// tester.clear();
// tester.emit(IGen::store128_gpr64_xmm128(R13, XMM12));
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 7f 65 00");
}
TEST(CodeTester, sub_gpr64_imm8) {
@ -147,67 +147,67 @@ TEST(CodeTester, xmm_load_128) {
CodeTester tester;
tester.init_code_buffer(256);
tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
EXPECT_EQ(tester.dump_to_hex_string(),
"f3 0f 6f 1b f3 41 0f 6f 1e f3 44 0f 6f 33 f3 45 0f 6f 2e");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 0c 24"); // requires SIB byte.
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 2c 24"); // requires SIB byte and REX byte
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 4d 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 6f 5d 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 6f 55 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 65 00");
// tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
// tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
// tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
// tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
// EXPECT_EQ(tester.dump_to_hex_string(),
// "66 0f 6f 1b 66 41 0f 6f 1e 66 44 0f 6f 33 66 45 0f 6f 2e");
// "f3 0f 6f 1b f3 41 0f 6f 1e f3 44 0f 6f 33 f3 45 0f 6f 2e");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 0c 24"); // requires SIB byte.
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 0c 24"); // requires SIB byte.
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 2c 24"); // requires SIB byte and REX
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 2c 24"); // requires SIB byte and REX
// byte
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 4d 00");
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 0f 6f 4d 00");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 6f 5d 00");
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 44 0f 6f 5d 00");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 6f 55 00");
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 41 0f 6f 55 00");
//
// tester.clear();
// tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
// EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 65 00");
// EXPECT_EQ(tester.dump_to_hex_string(), "f3 45 0f 6f 65 00");
tester.emit(IGen::load128_xmm128_gpr64(XMM3, RBX));
tester.emit(IGen::load128_xmm128_gpr64(XMM3, R14));
tester.emit(IGen::load128_xmm128_gpr64(XMM14, RBX));
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R14));
EXPECT_EQ(tester.dump_to_hex_string(),
"66 0f 6f 1b 66 41 0f 6f 1e 66 44 0f 6f 33 66 45 0f 6f 2e");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RSP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 0c 24"); // requires SIB byte.
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM13, R12));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 2c 24"); // requires SIB byte and REX byte
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM1, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 0f 6f 4d 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM11, RBP));
EXPECT_EQ(tester.dump_to_hex_string(), "66 44 0f 6f 5d 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM2, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 41 0f 6f 55 00");
tester.clear();
tester.emit(IGen::load128_xmm128_gpr64(XMM12, R13));
EXPECT_EQ(tester.dump_to_hex_string(), "66 45 0f 6f 65 00");
}
TEST(CodeTester, push_pop_xmms) {

View file

@ -3802,3 +3802,83 @@ TEST(EmitterSlow, xmm32_move) {
}
}
}
TEST(Emitter, LEA) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -3));
tester.emit(IGen::lea_reg_plus_off(RDI, R12, -3));
tester.emit(IGen::lea_reg_plus_off(R13, RSP, -3));
tester.emit(IGen::lea_reg_plus_off(R13, R12, -3));
tester.emit(IGen::lea_reg_plus_off(RDI, RSP, -300));
tester.emit(IGen::lea_reg_plus_off(RDI, R12, -300));
tester.emit(IGen::lea_reg_plus_off(R13, RSP, -300));
tester.emit(IGen::lea_reg_plus_off(R13, R12, -300));
EXPECT_EQ(tester.dump_to_hex_string(true),
"488D7C24FD498D7C24FD4C8D6C24FD4D8D6C24FD488DBC24D4FEFFFF498DBC24D4FEFFFF4C8DAC24D4FEFF"
"FF4D8DAC24D4FEFFFF");
}
TEST(EmitterXMM, StackLoad32) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 3, RSP, -1234));
tester.emit(IGen::load32_xmm32_gpr64_plus_s32(XMM0 + 13, RSP, -1234));
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F109C242EFBFFFFF3440F10AC242EFBFFFF");
}
TEST(EmitterXMM, StackLoad8) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 3, RSP, -12));
tester.emit(IGen::load32_xmm32_gpr64_plus_s8(XMM0 + 13, RSP, -12));
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F105C24F4F3440F106C24F4");
}
TEST(EmitterXMM, StackLoadFull32) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 3, RSP, -1234));
tester.emit(IGen::load128_xmm128_gpr64_s32(XMM0 + 13, RSP, -1234));
EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F9C242EFBFFFF66440F6FAC242EFBFFFF");
}
TEST(EmitterXMM, StackLoadFull8) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 3, RSP, -12));
tester.emit(IGen::load128_xmm128_gpr64_s8(XMM0 + 13, RSP, -12));
EXPECT_EQ(tester.dump_to_hex_string(true), "660F6F5C24F466440F6F6C24F4");
}
TEST(EmitterXMM, StackStore32) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 3, -1234));
tester.emit(IGen::store32_xmm32_gpr64_plus_s32(RSP, XMM0 + 13, -1234));
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F119C242EFBFFFFF3440F11AC242EFBFFFF");
}
TEST(EmitterXMM, StackStore8) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 3, -12));
tester.emit(IGen::store32_xmm32_gpr64_plus_s8(RSP, XMM0 + 13, -12));
EXPECT_EQ(tester.dump_to_hex_string(true), "F30F115C24F4F3440F116C24F4");
}
TEST(EmitterXMM, StackStoreFull32) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 3, -1234));
tester.emit(IGen::store128_gpr64_xmm128_s32(RSP, XMM0 + 13, -1234));
EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F9C242EFBFFFF66440F7FAC242EFBFFFF");
}
TEST(EmitterXMM, StackStoreFull8) {
CodeTester tester;
tester.init_code_buffer(1024);
tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 3, -12));
tester.emit(IGen::store128_gpr64_xmm128_s8(RSP, XMM0 + 13, -12));
EXPECT_EQ(tester.dump_to_hex_string(true), "660F7F5C24F466440F7F6C24F4");
}

View file

@ -208,4 +208,4 @@ TEST(EmitterAVX, RIP) {
tester.emit(IGen::loadvf_rip_plus_s32(XMM0 + 3, -123));
tester.emit(IGen::loadvf_rip_plus_s32(XMM0 + 13, -123));
EXPECT_EQ(tester.dump_to_hex_string(true), "C5F8281D85FFFFFFC578282D85FFFFFF");
}
}