diff -Nru luajit-2.1.20231006/.relver luajit-2.1.20231117/.relver
--- luajit-2.1.20231006/.relver 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/.relver 2023-11-17 07:29:25.000000000 +0000
@@ -1 +1 @@
-1696562864
+1700206165
diff -Nru luajit-2.1.20231006/debian/changelog luajit-2.1.20231117/debian/changelog
--- luajit-2.1.20231006/debian/changelog 2023-10-16 18:00:07.000000000 +0000
+++ luajit-2.1.20231117/debian/changelog 2023-11-17 11:00:07.000000000 +0000
@@ -1,7 +1,7 @@
-luajit (2.1.20231006-3myguard1~jammy) jammy; urgency=medium
+luajit (2.1.20231117-3myguard1~jammy) jammy; urgency=medium
* luajit2 from openresty
* See for more http://deb.myguard.nl
- -- Thijs Eilander Mon, 16 Oct 2023 20:00:07 +0200
+ -- Thijs Eilander Fri, 17 Nov 2023 12:00:07 +0100
diff -Nru luajit-2.1.20231006/doc/running.html luajit-2.1.20231117/doc/running.html
--- luajit-2.1.20231006/doc/running.html 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/doc/running.html 2023-11-17 07:29:25.000000000 +0000
@@ -120,7 +120,8 @@
- c — C source file, exported bytecode data.
-- h — C header file, static bytecode data.
+- cc — C++ source file, exported bytecode data.
+- h — C/C++ header file, static bytecode data.
- obj or o — Object file, exported bytecode data
(OS- and architecture-specific).
- raw or any other extension — Raw bytecode file (portable).
diff -Nru luajit-2.1.20231006/dynasm/dasm_arm64.lua luajit-2.1.20231117/dynasm/dasm_arm64.lua
--- luajit-2.1.20231006/dynasm/dasm_arm64.lua 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/dynasm/dasm_arm64.lua 2023-11-17 07:29:25.000000000 +0000
@@ -549,7 +549,7 @@
local function parse_load_pair(params, nparams, n, op)
if params[n+2] then werror("too many operands") end
local pn, p2 = params[n], params[n+1]
- local scale = shr(op, 30) == 0 and 2 or 3
+ local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
if not p1 then
if not p2 then
@@ -806,8 +806,8 @@
["ldrsw_*"] = "98000000DxB|b8800000DxL",
-- NOTE: ldur etc. are handled by ldr et al.
- ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
- ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+ ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
+ ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
["ldpsw_*"] = "68400000DAxP",
-- Branches.
@@ -942,7 +942,7 @@
werror("bad register type")
end
parse_reg_type = false
- elseif p == "x" or p == "w" or p == "d" or p == "s" then
+ elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
if parse_reg_type ~= p then
werror("register size mismatch")
end
diff -Nru luajit-2.1.20231006/src/host/buildvm_peobj.c luajit-2.1.20231117/src/host/buildvm_peobj.c
--- luajit-2.1.20231006/src/host/buildvm_peobj.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/host/buildvm_peobj.c 2023-11-17 07:29:25.000000000 +0000
@@ -373,11 +373,12 @@
/* Unwind codes for .text section with handler. */
p = uwc;
+ CADD_FP(192); /* +2 */
CSAVE_REGS(19, 28, 176); /* +5*2 */
CSAVE_FREGS(8, 15, 96); /* +4*2 */
CSAVE_FPLR(192); /* +1 */
CALLOC_S(208); /* +1 */
- CEND_ALIGN; /* +1 +3 -> 24 */
+ CEND_ALIGN; /* +1 +1 -> 24 */
u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
owrite(ctx, &u32, 4);
diff -Nru luajit-2.1.20231006/src/jit/bcsave.lua luajit-2.1.20231117/src/jit/bcsave.lua
--- luajit-2.1.20231006/src/jit/bcsave.lua 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/jit/bcsave.lua 2023-11-17 07:29:25.000000000 +0000
@@ -39,7 +39,7 @@
-- Stop handling options.
- Use stdin as input and/or stdout as output.
-File types: c h obj o raw (default)
+File types: c cc h obj o raw (default)
]]
os.exit(1)
end
@@ -82,7 +82,7 @@
------------------------------------------------------------------------------
local map_type = {
- raw = "raw", c = "c", h = "h", o = "obj", obj = "obj",
+ raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
}
local map_arch = {
diff -Nru luajit-2.1.20231006/src/jit/dis_arm64.lua luajit-2.1.20231117/src/jit/dis_arm64.lua
--- luajit-2.1.20231006/src/jit/dis_arm64.lua 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/jit/dis_arm64.lua 2023-11-17 07:29:25.000000000 +0000
@@ -948,7 +948,7 @@
elseif p == "U" then
local rn = map_regs.x[band(rshift(op, 5), 31)]
local sz = band(rshift(op, 30), 3)
- local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
+ local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
if imm12 ~= 0 then
x = "["..rn..", #"..imm12.."]"
else
@@ -985,8 +985,7 @@
x = x.."]"
end
elseif p == "P" then
- local opcv, sh = rshift(op, 26), 2
- if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
+ local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
local rn = map_regs.x[band(rshift(op, 5), 31)]
local ind = band(rshift(op, 23), 3)
diff -Nru luajit-2.1.20231006/src/lj_arch.h luajit-2.1.20231117/src/lj_arch.h
--- luajit-2.1.20231006/src/lj_arch.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_arch.h 2023-11-17 07:29:25.000000000 +0000
@@ -538,15 +538,6 @@
#error "No support for ILP32 model on ARM64"
#undef LJ_TARGET_ARM64
#endif
-#elif LJ_TARGET_PPC
-#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
-#error "No support for little-endian PPC32"
-#undef LJ_TARGET_PPC
-#endif
-#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
-#error "No support for PPC/e500, use LuaJIT 2.0"
-#undef LJ_TARGET_PPC
-#endif
#elif LJ_TARGET_MIPS32
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "Only o32 ABI supported for MIPS32"
@@ -706,6 +697,10 @@
#endif
#endif
+#if LUAJIT_TARGET == LUAJIT_ARCH_PPC && LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_NO_UNWIND 0
+#define LJ_UNWIND_EXT 0
+#else
#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5
#define LJ_NO_UNWIND 1
#endif
@@ -715,6 +710,7 @@
#else
#define LJ_UNWIND_EXT 0
#endif
+#endif //#if LUAJIT_TARGET == LUAJIT_ARCH_PPC && LJ_ARCH_ENDIAN == LUAJIT_LE
#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86)
#define LJ_UNWIND_JIT 1
diff -Nru luajit-2.1.20231006/src/lj_asm_arm.h luajit-2.1.20231117/src/lj_asm_arm.h
--- luajit-2.1.20231006/src/lj_asm_arm.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_asm_arm.h 2023-11-17 07:29:25.000000000 +0000
@@ -969,24 +969,32 @@
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- if (irref_isk(ir->op1)) {
+ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+ if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, ARMI_LDR, dest, v);
} else {
- Reg uv = ra_scratch(as, RSET_GPR);
- Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
- if (ir->o == IR_UREFC) {
- asm_guardcc(as, CC_NE);
+ if (guarded) {
+ asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP);
- emit_opk(as, ARMI_ADD, dest, uv,
+ }
+ if (ir->o == IR_UREFC)
+ emit_opk(as, ARMI_ADD, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
- emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+ else
+ emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(GCupval, v));
+ if (guarded)
+ emit_lso(as, ARMI_LDRB, RID_TMP, dest,
+ (int32_t)offsetof(GCupval, closed));
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+ emit_loadi(as, dest, k);
} else {
- emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
+ emit_lso(as, ARMI_LDR, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+ (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
- emit_lso(as, ARMI_LDR, uv, func,
- (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
@@ -2255,7 +2263,7 @@
}
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
- return REGSP_HINT(RID_RET);
+ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
}
static void asm_setup_target(ASMState *as)
diff -Nru luajit-2.1.20231006/src/lj_asm_arm64.h luajit-2.1.20231117/src/lj_asm_arm64.h
--- luajit-2.1.20231006/src/lj_asm_arm64.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_asm_arm64.h 2023-11-17 07:29:25.000000000 +0000
@@ -960,22 +960,30 @@
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- if (irref_isk(ir->op1)) {
+ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+ if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v);
} else {
- if (ir->o == IR_UREFC) {
- asm_guardcnb(as, A64I_CBZ, RID_TMP);
+ if (guarded)
+ asm_guardcnb(as, ir->o == IR_UREFC ? A64I_CBZ : A64I_CBNZ, RID_TMP);
+ if (ir->o == IR_UREFC)
emit_opk(as, A64I_ADDx, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
+ else
+ emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
+ if (guarded)
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ uint64_t k = gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+ emit_loadu64(as, dest, k);
} else {
- emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
+ emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+ (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
- emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
- (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
}
@@ -2040,7 +2048,7 @@
as->evenspill = nslots;
}
#endif
- return REGSP_HINT(RID_RET);
+ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET);
}
static void asm_setup_target(ASMState *as)
diff -Nru luajit-2.1.20231006/src/lj_asm_mips.h luajit-2.1.20231117/src/lj_asm_mips.h
--- luajit-2.1.20231006/src/lj_asm_mips.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_asm_mips.h 2023-11-17 07:29:25.000000000 +0000
@@ -1207,22 +1207,29 @@
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- if (irref_isk(ir->op1)) {
+ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+ if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else {
- Reg uv = ra_scratch(as, RSET_GPR);
- Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
- if (ir->o == IR_UREFC) {
- asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
- emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
- emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+ if (guarded)
+ asm_guard(as, ir->o == IR_UREFC ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
+ if (ir->o == IR_UREFC)
+ emit_tsi(as, MIPSI_AADDIU, dest, dest, (int32_t)offsetof(GCupval, tv));
+ else
+ emit_tsi(as, MIPSI_AL, dest, dest, (int32_t)offsetof(GCupval, v));
+ if (guarded)
+ emit_tsi(as, MIPSI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
+ emit_loada(as, dest, o);
} else {
- emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
+ emit_tsi(as, MIPSI_AL, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+ (int32_t)offsetof(GCfuncL, uvptr) +
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
- emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
- (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
diff -Nru luajit-2.1.20231006/src/lj_asm_ppc.h luajit-2.1.20231117/src/lj_asm_ppc.h
--- luajit-2.1.20231006/src/lj_asm_ppc.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_asm_ppc.h 2023-11-17 07:29:25.000000000 +0000
@@ -840,23 +840,30 @@
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- if (irref_isk(ir->op1)) {
+ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+ if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
} else {
- Reg uv = ra_scratch(as, RSET_GPR);
- Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
- if (ir->o == IR_UREFC) {
- asm_guardcc(as, CC_NE);
+ if (guarded) {
+ asm_guardcc(as, ir->o == IR_UREFC ? CC_NE : CC_EQ);
emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
- emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
- emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+ }
+ if (ir->o == IR_UREFC)
+ emit_tai(as, PPCI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv));
+ else
+ emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(GCupval, v));
+ if (guarded)
+ emit_tai(as, PPCI_LBZ, RID_TMP, dest, (int32_t)offsetof(GCupval, closed));
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ int32_t k = (int32_t)gcrefu(fn->l.uvptr[(ir->op2 >> 8)]);
+ emit_loadi(as, dest, k);
} else {
- emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
+ emit_tai(as, PPCI_LWZ, dest, ra_alloc1(as, ir->op1, RSET_GPR),
+ (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
- emit_tai(as, PPCI_LWZ, uv, func,
- (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
}
}
diff -Nru luajit-2.1.20231006/src/lj_asm_x86.h luajit-2.1.20231117/src/lj_asm_x86.h
--- luajit-2.1.20231006/src/lj_asm_x86.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_asm_x86.h 2023-11-17 07:29:25.000000000 +0000
@@ -109,7 +109,7 @@
/* Check if there's no conflicting instruction between curins and ref.
** Also avoid fusing loads if there are multiple references.
*/
-static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
+static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
{
IRIns *ir = as->ir;
IRRef i = as->curins;
@@ -118,7 +118,9 @@
while (--i > ref) {
if (ir[i].o == conflict)
return 0; /* Conflict found. */
- else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
+ else if ((check & 1) && (ir[i].o == IR_NEWREF || ir[i].o == IR_CALLS))
+ return 0;
+ else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
return 0;
}
return 1; /* Ok, no conflict. */
@@ -134,7 +136,7 @@
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
- !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
+ !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
return irb->op1; /* Table obj. */
}
@@ -456,7 +458,7 @@
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
- noconflict(as, ref, IR_RETF, 0) &&
+ noconflict(as, ref, IR_RETF, 2) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@@ -467,12 +469,12 @@
} else if (ir->o == IR_FLOAD) {
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
- noconflict(as, ref, IR_FSTORE, 0)) {
+ noconflict(as, ref, IR_FSTORE, 2)) {
asm_fusefref(as, ir, xallow);
return RID_MRM;
}
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
- if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
+ if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
!(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
@@ -482,7 +484,7 @@
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
- noconflict(as, ref, IR_XSTORE, 0)) {
+ noconflict(as, ref, IR_XSTORE, 2)) {
asm_fusexref(as, ir->op1, xallow);
return RID_MRM;
}
@@ -815,6 +817,7 @@
emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest);
emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
+ checkmclim(as);
emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */
}
@@ -857,6 +860,7 @@
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
emit_rma(as, XO_MOVSD, bias, k);
+ checkmclim(as);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return;
} else { /* Integer to FP conversion. */
@@ -1173,6 +1177,7 @@
asm_guardcc(as, CC_E);
else
emit_sjcc(as, CC_E, l_end);
+ checkmclim(as);
if (irt_isnum(kt)) {
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
@@ -1232,7 +1237,6 @@
#endif
}
emit_sfixup(as, l_loop);
- checkmclim(as);
#if LJ_GC64
if (!isk && irt_isaddr(kt)) {
emit_rr(as, XO_OR, tmp|REX_64, key);
@@ -1259,6 +1263,7 @@
emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
emit_shifti(as, XOg_ROL, tmp, HASH_ROT3);
emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
+ checkmclim(as);
emit_shifti(as, XOg_ROL, dest, HASH_ROT2);
emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
emit_shifti(as, XOg_ROL, dest, HASH_ROT1);
@@ -1276,7 +1281,6 @@
} else {
emit_rr(as, XO_MOV, tmp, key);
#if LJ_GC64
- checkmclim(as);
emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
if ((as->flags & JIT_F_BMI2)) {
emit_i8(as, 32);
@@ -1373,24 +1377,31 @@
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- if (irref_isk(ir->op1)) {
+ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC);
+ if (irref_isk(ir->op1) && !guarded) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
- Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
- if (ir->o == IR_UREFC) {
+ if (ir->o == IR_UREFC)
emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
- asm_guardcc(as, CC_NE);
- emit_i8(as, 1);
+ else
+ emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
+ if (guarded) {
+ asm_guardcc(as, ir->o == IR_UREFC ? CC_E : CC_NE);
+ emit_i8(as, 0);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
+ }
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]);
+ emit_loada(as, uv, o);
} else {
- emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
+ emit_rmro(as, XO_MOV, uv|REX_GC64, ra_alloc1(as, ir->op1, RSET_GPR),
+ (int32_t)offsetof(GCfuncL, uvptr) +
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
- emit_rmro(as, XO_MOV, uv|REX_GC64, func,
- (int32_t)offsetof(GCfuncL, uvptr) +
- (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
@@ -1547,6 +1558,7 @@
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
+ checkmclim(as);
asm_fuseahuref(as, ir->op1, RSET_GPR);
if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
@@ -1594,6 +1606,7 @@
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
"bad load type %d", irt_type(ir->t));
+ checkmclim(as);
#if LJ_GC64
emit_u32(as, LJ_TISNUM << 15);
#else
diff -Nru luajit-2.1.20231006/src/lj_carith.c luajit-2.1.20231117/src/lj_carith.c
--- luajit-2.1.20231006/src/lj_carith.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_carith.c 2023-11-17 07:29:25.000000000 +0000
@@ -44,9 +44,13 @@
p = (uint8_t *)cdata_getptr(p, ct->size);
if (ctype_isref(ct->info)) ct = ctype_rawchild(cts, ct);
} else if (ctype_isfunc(ct->info)) {
+ CTypeID id0 = i ? ctype_typeid(cts, ca->ct[0]) : 0;
p = (uint8_t *)*(void **)p;
ct = ctype_get(cts,
lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|id), CTSIZE_PTR));
+ if (i) { /* cts->tab may have been reallocated. */
+ ca->ct[0] = ctype_get(cts, id0);
+ }
}
if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
ca->ct[i] = ct;
diff -Nru luajit-2.1.20231006/src/lj_ccall.c luajit-2.1.20231117/src/lj_ccall.c
--- luajit-2.1.20231006/src/lj_ccall.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_ccall.c 2023-11-17 07:29:25.000000000 +0000
@@ -1393,7 +1393,7 @@
ct = ctype_rawchild(cts, ct);
}
if (ctype_isfunc(ct->info)) {
- CCallState cc;
+ CCallState cc = {0};
int gcsteps, ret;
cc.func = (void (*)(void))cdata_getptr(cdataptr(cd), sz);
gcsteps = ccall_set_args(L, cts, ct, &cc);
diff -Nru luajit-2.1.20231006/src/lj_ccall.h luajit-2.1.20231117/src/lj_ccall.h
--- luajit-2.1.20231006/src/lj_ccall.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_ccall.h 2023-11-17 07:29:25.000000000 +0000
@@ -181,6 +181,7 @@
(CCALL_NARG_GPR > CCALL_NRET_GPR ? CCALL_NARG_GPR : CCALL_NRET_GPR)
#define CCALL_NUM_FPR \
(CCALL_NARG_FPR > CCALL_NRET_FPR ? CCALL_NARG_FPR : CCALL_NRET_FPR)
+#define CCALL_MAXSTACK 32
/* Check against constants in lj_ctype.h. */
LJ_STATIC_ASSERT(CCALL_NUM_GPR <= CCALL_MAX_GPR);
diff -Nru luajit-2.1.20231006/src/lj_cparse.c luajit-2.1.20231117/src/lj_cparse.c
--- luajit-2.1.20231006/src/lj_cparse.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_cparse.c 2023-11-17 07:29:25.000000000 +0000
@@ -1766,9 +1766,11 @@
cp_check(cp, '(');
if (cp->tok == CTOK_IDENT) {
if (cp_str_is(cp->str, "push")) {
- if (cp->curpack < CPARSE_MAX_PACKSTACK) {
+ if (cp->curpack < CPARSE_MAX_PACKSTACK-1) {
cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
cp->curpack++;
+ } else {
+ cp_errmsg(cp, cp->tok, LJ_ERR_XLEVELS);
}
} else if (cp_str_is(cp->str, "pop")) {
if (cp->curpack > 0) cp->curpack--;
diff -Nru luajit-2.1.20231006/src/lj_ctype.h luajit-2.1.20231117/src/lj_ctype.h
--- luajit-2.1.20231006/src/lj_ctype.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_ctype.h 2023-11-17 07:29:25.000000000 +0000
@@ -276,6 +276,8 @@
#define CTTYDEFP(_)
#endif
+#define CTF_LONG_IF8 (CTF_LONG * (sizeof(long) == 8))
+
/* Common types. */
#define CTTYDEF(_) \
_(NONE, 0, CT_ATTRIB, CTATTRIB(CTA_BAD)) \
@@ -289,8 +291,8 @@
_(UINT16, 2, CT_NUM, CTF_UNSIGNED|CTALIGN(1)) \
_(INT32, 4, CT_NUM, CTALIGN(2)) \
_(UINT32, 4, CT_NUM, CTF_UNSIGNED|CTALIGN(2)) \
- _(INT64, 8, CT_NUM, CTF_LONG|CTALIGN(3)) \
- _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG|CTALIGN(3)) \
+ _(INT64, 8, CT_NUM, CTF_LONG_IF8|CTALIGN(3)) \
+ _(UINT64, 8, CT_NUM, CTF_UNSIGNED|CTF_LONG_IF8|CTALIGN(3)) \
_(FLOAT, 4, CT_NUM, CTF_FP|CTALIGN(2)) \
_(DOUBLE, 8, CT_NUM, CTF_FP|CTALIGN(3)) \
_(COMPLEX_FLOAT, 8, CT_ARRAY, CTF_COMPLEX|CTALIGN(2)|CTID_FLOAT) \
diff -Nru luajit-2.1.20231006/src/lj_def.h luajit-2.1.20231117/src/lj_def.h
--- luajit-2.1.20231006/src/lj_def.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_def.h 2023-11-17 07:29:25.000000000 +0000
@@ -267,12 +267,8 @@
#else
unsigned char _BitScanForward(unsigned long *, unsigned long);
unsigned char _BitScanReverse(unsigned long *, unsigned long);
-unsigned char _BitScanForward64(unsigned long *, uint64_t);
-unsigned char _BitScanReverse64(unsigned long *, uint64_t);
#pragma intrinsic(_BitScanForward)
#pragma intrinsic(_BitScanReverse)
-#pragma intrinsic(_BitScanForward64)
-#pragma intrinsic(_BitScanReverse64)
static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
{
@@ -284,6 +280,12 @@
unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
}
+#if defined(_M_X64) || defined(_M_ARM64)
+unsigned char _BitScanForward64(unsigned long *, uint64_t);
+unsigned char _BitScanReverse64(unsigned long *, uint64_t);
+#pragma intrinsic(_BitScanForward64)
+#pragma intrinsic(_BitScanReverse64)
+
static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
{
unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
@@ -294,6 +296,7 @@
unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
}
#endif
+#endif
unsigned long _byteswap_ulong(unsigned long);
uint64_t _byteswap_uint64(uint64_t);
diff -Nru luajit-2.1.20231006/src/lj_emit_arm64.h luajit-2.1.20231117/src/lj_emit_arm64.h
--- luajit-2.1.20231006/src/lj_emit_arm64.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_emit_arm64.h 2023-11-17 07:29:25.000000000 +0000
@@ -193,6 +193,32 @@
return 0; /* Failed. */
}
+#define glofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
+#define mcpofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
+#define checkmcpofs(as, k) \
+ (A64F_S_OK(mcpofs(as, k)>>2, 19))
+
+/* Try to form a const as ADR or ADRP or ADRP + ADD. */
+static int emit_kadrp(ASMState *as, Reg rd, uint64_t k)
+{
+ A64Ins ai = A64I_ADR;
+ int64_t ofs = mcpofs(as, k);
+ if (!A64F_S_OK((uint64_t)ofs, 21)) {
+ uint64_t kpage = k & ~0xfffull;
+ MCode *adrp = as->mcp - 1 - (k != kpage);
+ ofs = (int64_t)(kpage - ((uint64_t)adrp & ~0xfffull)) >> 12;
+ if (!A64F_S_OK(ofs, 21))
+ return 0; /* Failed. */
+ if (k != kpage)
+ emit_dn(as, (A64I_ADDx^A64I_K12)|A64F_U12(k - kpage), rd, rd);
+ ai = A64I_ADRP;
+ }
+ emit_d(as, ai|(((uint32_t)ofs&3)<<29)|A64F_S19(ofs>>2), rd);
+ return 1;
+}
+
static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
{
int zeros = 0, ones = 0, neg, lshift = 0;
@@ -213,6 +239,9 @@
if (emit_kdelta(as, rd, u64, is64)) {
return;
}
+ if (emit_kadrp(as, rd, u64)) { /* Either 1 or 2 ins. */
+ return;
+ }
}
if (neg) {
u64 = ~u64;
@@ -240,13 +269,6 @@
/* Load a 64 bit constant into a GPR. */
#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i)
-#define glofs(as, k) \
- ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
-#define mcpofs(as, k) \
- ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
-#define checkmcpofs(as, k) \
- (A64F_S_OK(mcpofs(as, k)>>2, 19))
-
static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
/* Get/set from constant pointer. */
diff -Nru luajit-2.1.20231006/src/lj_opt_fold.c luajit-2.1.20231117/src/lj_opt_fold.c
--- luajit-2.1.20231006/src/lj_opt_fold.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_opt_fold.c 2023-11-17 07:29:25.000000000 +0000
@@ -2134,8 +2134,26 @@
LJFOLD(ALEN any any)
LJFOLDX(lj_opt_fwd_alen)
+/* Try to merge UREFO/UREFC into referenced instruction. */
+static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
+{
+ if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
+ /* Might be pointing to some other coroutine's stack.
+ ** And GC might shrink said stack, thereby repointing the upvalue.
+ ** GC might even collect said coroutine, thereby closing the upvalue.
+ */
+ if (gcstep_barrier(J, ref))
+ return EMITFOLD; /* So cannot merge. */
+ /* Current fins wants a check, but ir doesn't have one. */
+ if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
+ irt_type(ir->t) == IRT_IGC)
+ ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */
+ }
+ return ref; /* Not a TRef, but the caller doesn't care. */
+}
+
/* Upvalue refs are really loads, but there are no corresponding stores.
-** So CSE is ok for them, except for UREFO across a GC step (see below).
+** So CSE is ok for them, except for guarded UREFO across a GC step.
** If the referenced function is const, its upvalue addresses are const, too.
** This can be used to improve CSE by looking for the same address,
** even if the upvalues originate from a different function.
@@ -2153,9 +2171,7 @@
if (irref_isk(ir->op1)) {
GCfunc *fn2 = ir_kfunc(IR(ir->op1));
if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
- if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
- break;
- return ref;
+ return merge_uref(J, ref, ir);
}
}
ref = ir->prev;
@@ -2164,6 +2180,24 @@
return EMITFOLD;
}
+/* Custom CSE for UREFO. */
+LJFOLD(UREFO any any)
+LJFOLDF(cse_urefo)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+ IRRef ref = J->chain[IR_UREFO];
+ IRRef lim = fins->op1;
+ IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
+ while (ref > lim) {
+ IRIns *ir = IR(ref);
+ if (ir->op12 == op12)
+ return merge_uref(J, ref, ir);
+ ref = ir->prev;
+ }
+ }
+ return EMITFOLD;
+}
+
LJFOLD(HREFK any any)
LJFOLDX(lj_opt_fwd_hrefk)
@@ -2384,14 +2418,9 @@
/* Write barriers are amenable to CSE, but not across any incremental
** GC steps.
-**
-** The same logic applies to open upvalue references, because a stack
-** may be resized during a GC step (not the current stack, but maybe that
-** of a coroutine).
*/
LJFOLD(TBAR any)
LJFOLD(OBAR any any)
-LJFOLD(UREFO any any)
LJFOLDF(barrier_tab)
{
TRef tr = lj_opt_cse(J);
diff -Nru luajit-2.1.20231006/src/lj_opt_mem.c luajit-2.1.20231117/src/lj_opt_mem.c
--- luajit-2.1.20231006/src/lj_opt_mem.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_opt_mem.c 2023-11-17 07:29:25.000000000 +0000
@@ -466,18 +466,23 @@
*/
static AliasRet aa_uref(IRIns *refa, IRIns *refb)
{
- if (refa->o != refb->o)
- return ALIAS_NO; /* Different UREFx type. */
if (refa->op1 == refb->op1) { /* Same function. */
if (refa->op2 == refb->op2)
return ALIAS_MUST; /* Same function, same upvalue idx. */
else
return ALIAS_NO; /* Same function, different upvalue idx. */
} else { /* Different functions, check disambiguation hash values. */
- if (((refa->op2 ^ refb->op2) & 0xff))
+ if (((refa->op2 ^ refb->op2) & 0xff)) {
return ALIAS_NO; /* Upvalues with different hash values cannot alias. */
- else
- return ALIAS_MAY; /* No conclusion can be drawn for same hash value. */
+ } else if (refa->o != refb->o) {
+ /* Different UREFx type, but need to confirm the UREFO really is open. */
+ if (irt_type(refa->t) == IRT_IGC) refa->t.irt += IRT_PGC-IRT_IGC;
+ else if (irt_type(refb->t) == IRT_IGC) refb->t.irt += IRT_PGC-IRT_IGC;
+ return ALIAS_NO;
+ } else {
+ /* No conclusion can be drawn for same hash value and same UREFx type. */
+ return ALIAS_MAY;
+ }
}
}
diff -Nru luajit-2.1.20231006/src/lj_parse.c luajit-2.1.20231117/src/lj_parse.c
--- luajit-2.1.20231006/src/lj_parse.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_parse.c 2023-11-17 07:29:25.000000000 +0000
@@ -1752,7 +1752,8 @@
expr(ls, &val);
if (expr_isk(&key) && key.k != VKNIL &&
(key.k == VKSTR || expr_isk_nojump(&val))) {
- TValue k, *v;
+ TValue k = {0};
+ TValue *v;
if (!t) { /* Create template table on demand. */
BCReg kidx;
t = lj_tab_new(fs->L, needarr ? narr : 0, hsize2hbits(nhash));
diff -Nru luajit-2.1.20231006/src/lj_record.c luajit-2.1.20231117/src/lj_record.c
--- luajit-2.1.20231006/src/lj_record.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_record.c 2023-11-17 07:29:25.000000000 +0000
@@ -976,6 +976,7 @@
emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
J->retdepth++;
J->needsnap = 1;
+ J->scev.idx = REF_NIL;
lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
/* Shift result slots up and clear the slots of the new frame below. */
memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
@@ -1772,12 +1773,12 @@
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
if (!uvp->closed) {
- uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
/* In current stack? */
if (uvval(uvp) >= tvref(J->L->stack) &&
uvval(uvp) < tvref(J->L->maxstack)) {
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
if (slot >= 0) { /* Aliases an SSA slot? */
+ uref = tref_ref(emitir(IRT(IR_UREFO, IRT_PGC), fn, uv));
emitir(IRTG(IR_EQ, IRT_PGC),
REF_BASE,
emitir(IRT(IR_ADD, IRT_PGC), uref,
@@ -1792,12 +1793,21 @@
}
}
}
+ /* IR_UREFO+IRT_IGC is not checked for open-ness at runtime.
+ ** Always marked as a guard, since it might get promoted to IRT_PGC later.
+ */
+ uref = emitir(IRTG(IR_UREFO, tref_isgcv(val) ? IRT_PGC : IRT_IGC), fn, uv);
+ uref = tref_ref(uref);
emitir(IRTG(IR_UGT, IRT_PGC),
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
} else {
+ /* If fn is constant, then so is the GCupval*, and the upvalue cannot
+ ** transition back to open, so no guard is required in this case.
+ */
+ IRType t = (tref_isk(fn) ? 0 : IRT_GUARD) | IRT_PGC;
+ uref = tref_ref(emitir(IRT(IR_UREFC, t), fn, uv));
needbarrier = 1;
- uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
}
if (val == 0) { /* Upvalue load */
IRType t = itype2irt(uvval(uvp));
diff -Nru luajit-2.1.20231006/src/lj_state.c luajit-2.1.20231117/src/lj_state.c
--- luajit-2.1.20231006/src/lj_state.c 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_state.c 2023-11-17 07:29:25.000000000 +0000
@@ -350,8 +350,11 @@
lj_assertG(L != mainthread(g), "free of main thread");
if (obj2gco(L) == gcref(g->cur_L))
setgcrefnull(g->cur_L);
- lj_func_closeuv(L, tvref(L->stack));
- lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
+ if (gcref(L->openupval) != NULL) {
+ lj_func_closeuv(L, tvref(L->stack));
+ lj_trace_abort(g); /* For aa_uref soundness. */
+ lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
+ }
lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
lj_mem_freet(g, L);
}
diff -Nru luajit-2.1.20231006/src/lj_target_arm64.h luajit-2.1.20231117/src/lj_target_arm64.h
--- luajit-2.1.20231006/src/lj_target_arm64.h 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/lj_target_arm64.h 2023-11-17 07:29:25.000000000 +0000
@@ -234,6 +234,8 @@
A64I_MOVZx = 0xd2800000,
A64I_MOVNw = 0x12800000,
A64I_MOVNx = 0x92800000,
+ A64I_ADR = 0x10000000,
+ A64I_ADRP = 0x90000000,
A64I_LDRB = 0x39400000,
A64I_LDRH = 0x79400000,
diff -Nru luajit-2.1.20231006/src/vm_arm.dasc luajit-2.1.20231117/src/vm_arm.dasc
--- luajit-2.1.20231006/src/vm_arm.dasc 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/vm_arm.dasc 2023-11-17 07:29:25.000000000 +0000
@@ -1195,8 +1195,11 @@
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
+ | ldr RB, L->maxstack
+ | add INS, BASE, NARGS8:RC
| ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
| cmp NARGS8:RC, #8
+ | cmphs RB, INS
| blo ->fff_fallback
| tst RA, #HOOK_ACTIVE // Remember active hook before pcall.
| mov RB, BASE
@@ -1207,7 +1210,11 @@
| b ->vm_call_dispatch
|
|.ffunc_2 xpcall
+ | ldr RB, L->maxstack
+ | add INS, BASE, NARGS8:RC
| ldrb RA, [DISPATCH, #DISPATCH_GL(hookmask)]
+ | cmp RB, INS
+ | blo ->fff_fallback
| checkfunc CARG4, ->fff_fallback // Traceback must be a function.
| mov RB, BASE
| strd CARG12, [BASE, #8] // Swap function and traceback.
diff -Nru luajit-2.1.20231006/src/vm_arm64.dasc luajit-2.1.20231117/src/vm_arm64.dasc
--- luajit-2.1.20231006/src/vm_arm64.dasc 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/vm_arm64.dasc 2023-11-17 07:29:25.000000000 +0000
@@ -291,8 +291,17 @@
| blo target
|.endmacro
|
+|.macro init_constants
+| movn TISNIL, #0
+| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+|.endmacro
+|
|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
+|.macro mov_nil, reg; mov reg, TISNIL; .endmacro
+|.macro cmp_nil, reg; cmp reg, TISNIL; .endmacro
+|.macro add_TISNUM, dst, src; add dst, src, TISNUM; .endmacro
|
#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
|
@@ -430,6 +439,7 @@
|
|->vm_unwind_c: // Unwind C stack, return from vm_pcall.
| // (void *cframe, int errcode)
+ | add fp, CARG1, # SAVE_FP_LR_
| mov sp, CARG1
| mov CRET1, CARG2
| ldr L, SAVE_L
@@ -441,11 +451,10 @@
|
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
| // (void *cframe)
- | and sp, CARG1, #CFRAME_RAWMASK
+ | add fp, CARG1, # SAVE_FP_LR_
+ | mov sp, CARG1
| ldr L, SAVE_L
- | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
- | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
- | movn TISNIL, #0
+ | init_constants
| ldr GL, L->glref // Setup pointer to global state.
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| mov RC, #16 // 2 results: false + error message.
@@ -510,11 +519,9 @@
| str L, GL->cur_L
| mov RA, BASE
| ldp BASE, CARG1, L->base
- | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
- | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | init_constants
| ldr PC, [BASE, FRAME_PC]
| strb wzr, L->status
- | movn TISNIL, #0
| sub RC, CARG1, BASE
| ands CARG1, PC, #FRAME_TYPE
| add RC, RC, #8
@@ -550,10 +557,8 @@
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
| str L, GL->cur_L
| ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
- | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
- | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
| add PC, PC, BASE
- | movn TISNIL, #0
+ | init_constants
| sub PC, PC, RB // PC = frame delta + frame type
| sub NARGS8:RC, CARG1, BASE
| st_vmstate ST_INTERP
@@ -662,7 +667,7 @@
| b >1
|
|->vmeta_tgetb: // RB = table, RC = index
- | add RC, RC, TISNUM
+ | add_TISNUM RC, RC
| add CARG2, BASE, RB, lsl #3
| add CARG3, sp, TMPDofs
| str RC, TMPD
@@ -697,7 +702,7 @@
| sxtw CARG2, TMP1w
| bl extern lj_tab_getinth // (GCtab *t, int32_t key)
| // Returns cTValue * or NULL.
- | mov TMP0, TISNIL
+ | mov_nil TMP0
| cbz CRET1, ->BC_TGETR_Z
| ldr TMP0, [CRET1]
| b ->BC_TGETR_Z
@@ -720,7 +725,7 @@
| b >1
|
|->vmeta_tsetb: // RB = table, RC = index
- | add RC, RC, TISNUM
+ | add_TISNUM RC, RC
| add CARG2, BASE, RB, lsl #3
| add CARG3, sp, TMPDofs
| str RC, TMPD
@@ -1034,7 +1039,7 @@
|1: // Field metatable must be at same offset for GCtab and GCudata!
| ldr TAB:RB, TAB:CARG1->metatable
|2:
- | mov CARG1, TISNIL
+ | mov_nil CARG1
| ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
| cbz TAB:RB, ->fff_restv
| ldr TMP1w, TAB:RB->hmask
@@ -1056,7 +1061,7 @@
| movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
| b ->fff_restv
|5:
- | cmp TMP0, TISNIL
+ | cmp_nil TMP0
| bne ->fff_restv
| b <4
|
@@ -1156,8 +1161,8 @@
| cbnz TAB:CARG2, ->fff_fallback
#endif
| mov RC, #(3+1)*8
- | stp CARG1, TISNIL, [BASE, #-8]
- | str CFUNC:CARG4, [BASE, #-16]
+ | stp CFUNC:CARG4, CARG1, [BASE, #-16]
+ | str TISNIL, [BASE]
| b ->fff_res
|
|.ffunc_2 ipairs_aux
@@ -1169,14 +1174,14 @@
| add CARG2w, CARG2w, #1
| cmp CARG2w, TMP1w
| ldr PC, [BASE, FRAME_PC]
- | add TMP2, CARG2, TISNUM
+ | add_TISNUM TMP2, CARG2
| mov RC, #(0+1)*8
| str TMP2, [BASE, #-16]
| bhs >2 // Not in array part?
| ldr TMP0, [CARG3, CARG2, lsl #3]
|1:
| mov TMP1, #(2+1)*8
- | cmp TMP0, TISNIL
+ | cmp_nil TMP0
| str TMP0, [BASE, #-8]
| csel RC, RC, TMP1, eq
| b ->fff_res
@@ -1199,13 +1204,17 @@
| cbnz TAB:CARG2, ->fff_fallback
#endif
| mov RC, #(3+1)*8
- | stp CARG1, TISNUM, [BASE, #-8]
- | str CFUNC:CARG4, [BASE, #-16]
+ | stp CFUNC:CARG4, CARG1, [BASE, #-16]
+ | str TISNUM, [BASE]
| b ->fff_res
|
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
+ | ldr TMP1, L->maxstack
+ | add TMP2, BASE, NARGS8:RC
+ | cmp TMP1, TMP2
+ | blo ->fff_fallback
| cmp NARGS8:RC, #8
| ldrb TMP0w, GL->hookmask
| blo ->fff_fallback
@@ -1225,6 +1234,10 @@
| b ->vm_call_dispatch
|
|.ffunc xpcall
+ | ldr TMP1, L->maxstack
+ | add TMP2, BASE, NARGS8:RC
+ | cmp TMP1, TMP2
+ | blo ->fff_fallback
| ldp CARG1, CARG2, [BASE]
| ldrb TMP0w, GL->hookmask
| subs NARGS8:TMP1, NARGS8:RC, #16
@@ -1390,7 +1403,7 @@
| eor CARG2w, CARG1w, CARG1w, asr #31
| movz CARG3, #0x41e0, lsl #48 // 2^31.
| subs CARG1w, CARG2w, CARG1w, asr #31
- | add CARG1, CARG1, TISNUM
+ | add_TISNUM CARG1, CARG1
| csel CARG1, CARG1, CARG3, pl
| // Fallthrough.
|
@@ -1481,7 +1494,7 @@
| ldr PC, [BASE, FRAME_PC]
| str d0, [BASE, #-16]
| mov RC, #(2+1)*8
- | add CARG2, CARG2, TISNUM
+ | add_TISNUM CARG2, CARG2
| str CARG2, [BASE, #-8]
| b ->fff_res
|
@@ -1547,7 +1560,7 @@
| bne ->fff_fallback
| ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
| ldr CARG3w, STR:CARG1->len
- | add TMP0, TMP0, TISNUM
+ | add_TISNUM TMP0, TMP0
| str TMP0, [BASE, #-16]
| mov RC, #(0+1)*8
| cbz CARG3, ->fff_res
@@ -1693,17 +1706,17 @@
|.ffunc_bit tobit
| mov TMP0w, CARG1w
|9: // Label reused by .ffunc_bit_op users.
- | add CARG1, TMP0, TISNUM
+ | add_TISNUM CARG1, TMP0
| b ->fff_restv
|
|.ffunc_bit bswap
| rev TMP0w, CARG1w
- | add CARG1, TMP0, TISNUM
+ | add_TISNUM CARG1, TMP0
| b ->fff_restv
|
|.ffunc_bit bnot
| mvn TMP0w, CARG1w
- | add CARG1, TMP0, TISNUM
+ | add_TISNUM CARG1, TMP0
| b ->fff_restv
|
|.macro .ffunc_bit_sh, name, ins, shmod
@@ -1724,7 +1737,7 @@
| checkint CARG1, ->vm_tobit_fb
|2:
| ins TMP0w, CARG1w, TMP1w
- | add CARG1, TMP0, TISNUM
+ | add_TISNUM CARG1, TMP0
| b ->fff_restv
|.endmacro
|
@@ -1913,8 +1926,7 @@
| and CARG3, CARG3, #LJ_GCVMASK
| beq >2
|1: // Move results down.
- | ldr CARG1, [RA]
- | add RA, RA, #8
+ | ldr CARG1, [RA], #8
| subs RB, RB, #8
| str CARG1, [BASE, RC, lsl #3]
| add RC, RC, #1
@@ -2029,9 +2041,7 @@
|.if JIT
| ldr L, SAVE_L
|1:
- | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
- | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
- | movn TISNIL, #0
+ | init_constants
| cmn CARG1w, #LUA_ERRERR
| bhs >9 // Check for error from exit.
| ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
@@ -2210,9 +2220,7 @@
| bl extern lj_ccallback_enter // (CTState *cts, void *cf)
| // Returns lua_State *.
| ldp BASE, RC, L:CRET1->base
- | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
- | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
- | movn TISNIL, #0
+ | init_constants
| mov L, CRET1
| ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
| sub RC, RC, BASE
@@ -2591,7 +2599,7 @@
| bne >5
| negs TMP0w, TMP0w
| movz CARG3, #0x41e0, lsl #48 // 2^31.
- | add TMP0, TMP0, TISNUM
+ | add_TISNUM TMP0, TMP0
| csel TMP0, TMP0, CARG3, vc
|5:
| str TMP0, [BASE, RA, lsl #3]
@@ -2606,7 +2614,7 @@
| bne >2
| ldr CARG1w, STR:CARG1->len
|1:
- | add CARG1, CARG1, TISNUM
+ | add_TISNUM CARG1, CARG1
| str CARG1, [BASE, RA, lsl #3]
| ins_next
|
@@ -2714,7 +2722,7 @@
| intins CARG1w, CARG1w, CARG2w
| ins_arithfallback bvs
|.endif
- | add CARG1, CARG1, TISNUM
+ | add_TISNUM CARG1, CARG1
| str CARG1, [BASE, RA, lsl #3]
|4:
| ins_next
@@ -2807,7 +2815,7 @@
case BC_KSHORT:
| // RA = dst, RC = int16_literal
| sxth RCw, RCw
- | add TMP0, RC, TISNUM
+ | add_TISNUM TMP0, RC
| str TMP0, [BASE, RA, lsl #3]
| ins_next
break;
@@ -3030,7 +3038,7 @@
| cmp TMP1w, CARG1w // In array part?
| bhs ->vmeta_tgetv
| ldr TMP0, [CARG3]
- | cmp TMP0, TISNIL
+ | cmp_nil TMP0
| beq >5
|1:
| str TMP0, [BASE, RA, lsl #3]
@@ -3073,7 +3081,7 @@
| ldr NODE:CARG3, NODE:CARG3->next
| cmp CARG1, CARG4
| bne >4
- | cmp TMP0, TISNIL
+ | cmp_nil TMP0
| beq >5
|3:
| str TMP0, [BASE, RA, lsl #3]
@@ -3082,7 +3090,7 @@
|4: // Follow hash chain.
| cbnz NODE:CARG3, <1
| // End of hash chain: key not found, nil result.
- | mov TMP0, TISNIL
+ | mov_nil TMP0
|
|5: // Check for __index if table value is nil.
| ldr TAB:CARG1, TAB:CARG2->metatable
@@ -3103,7 +3111,7 @@
| cmp RCw, CARG1w // In array part?
| bhs ->vmeta_tgetb
| ldr TMP0, [CARG3]
- | cmp TMP0, TISNIL
+ | cmp_nil TMP0
| beq >5
|1:
| str TMP0, [BASE, RA, lsl #3]
@@ -3150,7 +3158,7 @@
| ldr TMP1, [CARG3]
| ldr TMP0, [BASE, RA, lsl #3]
| ldrb TMP2w, TAB:CARG2->marked
- | cmp TMP1, TISNIL // Previous value is nil?
+ | cmp_nil TMP1 // Previous value is nil?
| beq >5
|1:
| str TMP0, [CARG3]
@@ -3202,7 +3210,7 @@
| cmp CARG1, CARG4
| bne >5
| ldr TMP0, [BASE, RA, lsl #3]
- | cmp TMP1, TISNIL // Previous value is nil?
+ | cmp_nil TMP1 // Previous value is nil?
| beq >4
|2:
| str TMP0, NODE:CARG3->val
@@ -3261,7 +3269,7 @@
| ldr TMP1, [CARG3]
| ldr TMP0, [BASE, RA, lsl #3]
| ldrb TMP2w, TAB:CARG2->marked
- | cmp TMP1, TISNIL // Previous value is nil?
+ | cmp_nil TMP1 // Previous value is nil?
| beq >5
|1:
| str TMP0, [CARG3]
@@ -3360,9 +3368,8 @@
|->BC_CALL_Z:
| mov RB, BASE // Save old BASE for vmeta_call.
| add BASE, BASE, RA, lsl #3
- | ldr CARG3, [BASE]
+ | ldr CARG3, [BASE], #16
| sub NARGS8:RC, NARGS8:RC, #8
- | add BASE, BASE, #16
| checkfunc CARG3, ->vmeta_call
| ins_call
break;
@@ -3378,9 +3385,8 @@
| // RA = base, (RB = 0,) RC = (nargs+1)*8
|->BC_CALLT1_Z:
| add RA, BASE, RA, lsl #3
- | ldr TMP1, [RA]
+ | ldr TMP1, [RA], #16
| sub NARGS8:RC, NARGS8:RC, #8
- | add RA, RA, #16
| checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
| ldr PC, [BASE, FRAME_PC]
|->BC_CALLT2_Z:
@@ -3460,10 +3466,10 @@
| add CARG3, CARG2, CARG1, lsl #3
| bhs >5 // Index points after array part?
| ldr TMP0, [CARG3]
- | cmp TMP0, TISNIL
+ | cmp_nil TMP0
| cinc CARG1, CARG1, eq // Skip holes in array part.
| beq <1
- | add CARG1, CARG1, TISNUM
+ | add_TISNUM CARG1, CARG1
| stp CARG1, TMP0, [RA]
| add CARG1, CARG1, #1
|3:
@@ -3481,7 +3487,7 @@
| add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
| bhi <4
| ldp TMP0, CARG1, NODE:CARG3->val
- | cmp TMP0, TISNIL
+ | cmp_nil TMP0
| add RC, RC, #1
| beq <6 // Skip holes in hash part.
| stp CARG1, TMP0, [RA]
@@ -3499,8 +3505,8 @@
| checkfunc CFUNC:CARG1, >5
| asr TMP0, TAB:CARG3, #47
| ldrb TMP1w, CFUNC:CARG1->ffid
- | cmn TMP0, #-LJ_TTAB
- | ccmp CARG4, TISNIL, #0, eq
+ | cmp_nil CARG4
+ | ccmn TMP0, #-LJ_TTAB, #0, eq
| ccmp TMP1w, #FF_next_N, #0, eq
| bne >5
| mov TMP0w, #0xfffe7fff // LJ_KEYINDEX
@@ -3540,51 +3546,51 @@
| and RC, RC, #255
| // RA = base, RB = (nresults+1), RC = numparams
| ldr TMP1, [BASE, FRAME_PC]
- | add RC, BASE, RC, lsl #3
- | add RA, BASE, RA, lsl #3
- | add RC, RC, #FRAME_VARG
- | add TMP2, RA, RB, lsl #3
- | sub RC, RC, TMP1 // RC = vbase
- | // Note: RC may now be even _above_ BASE if nargs was < numparams.
+ | add TMP0, BASE, RC, lsl #3
+ | add RC, BASE, RA, lsl #3 // RC = destination
+ | add TMP0, TMP0, #FRAME_VARG
+ | add TMP2, RC, RB, lsl #3
+ | sub RA, TMP0, TMP1 // RA = vbase
+ | // Note: RA may now be even _above_ BASE if nargs was < numparams.
| sub TMP3, BASE, #16 // TMP3 = vtop
| cbz RB, >5
| sub TMP2, TMP2, #16
|1: // Copy vararg slots to destination slots.
- | cmp RC, TMP3
- | ldr TMP0, [RC], #8
- | csel TMP0, TMP0, TISNIL, lo
- | cmp RA, TMP2
- | str TMP0, [RA], #8
+ | cmp RA, TMP3
+ | ldr TMP0, [RA], #8
+ | csinv TMP0, TMP0, xzr, lo // TISNIL = ~xzr
+ | cmp RC, TMP2
+ | str TMP0, [RC], #8
| blo <1
|2:
| ins_next
|
|5: // Copy all varargs.
| ldr TMP0, L->maxstack
- | subs TMP2, TMP3, RC
+ | subs TMP2, TMP3, RA
| csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
| add RB, RB, #8
- | add TMP1, RA, TMP2
+ | add TMP1, RC, TMP2
| str RBw, SAVE_MULTRES
| ble <2 // Nothing to copy.
| cmp TMP1, TMP0
| bhi >7
|6:
- | ldr TMP0, [RC], #8
- | str TMP0, [RA], #8
- | cmp RC, TMP3
+ | ldr TMP0, [RA], #8
+ | str TMP0, [RC], #8
+ | cmp RA, TMP3
| blo <6
| b <2
|
|7: // Grow stack for varargs.
| lsr CARG2, TMP2, #3
- | stp BASE, RA, L->base
+ | stp BASE, RC, L->base
| mov CARG1, L
- | sub RC, RC, BASE // Need delta, because BASE may change.
+ | sub RA, RA, BASE // Need delta, because BASE may change.
| str PC, SAVE_PC
| bl extern lj_state_growstack // (lua_State *L, int n)
- | ldp BASE, RA, L->base
- | add RC, BASE, RC
+ | ldp BASE, RC, L->base
+ | add RA, BASE, RA
| sub TMP3, BASE, #16
| b <6
break;
@@ -3728,7 +3734,7 @@
} else {
| adds CARG1w, CARG1w, CARG3w
| bvs >2
- | add TMP0, CARG1, TISNUM
+ | add_TISNUM TMP0, CARG1
| tbnz CARG3w, #31, >4
| cmp CARG1w, CARG2w
}
@@ -3807,7 +3813,7 @@
| // RA = base, RC = target
| ldr CARG1, [BASE, RA, lsl #3]
| add TMP1, BASE, RA, lsl #3
- | cmp CARG1, TISNIL
+ | cmp_nil CARG1
| beq >1 // Stop if iterator returned nil.
if (op == BC_JITERL) {
| str CARG1, [TMP1, #-8]
diff -Nru luajit-2.1.20231006/src/vm_mips.dasc luajit-2.1.20231117/src/vm_mips.dasc
--- luajit-2.1.20231006/src/vm_mips.dasc 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/vm_mips.dasc 2023-11-17 07:29:25.000000000 +0000
@@ -1374,9 +1374,13 @@
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
+ | lw TMP1, L->maxstack
+ | addu TMP2, BASE, NARGS8:RC
| lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
| beqz NARGS8:RC, ->fff_fallback
- | move TMP2, BASE
+ |. sltu AT, TMP1, TMP2
+ | bnez AT, ->fff_fallback
+ |. move TMP2, BASE
| addiu BASE, BASE, 8
| // Remember active hook before pcall.
| srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
@@ -1386,8 +1390,12 @@
|. addiu NARGS8:RC, NARGS8:RC, -8
|
|.ffunc xpcall
+ | lw TMP1, L->maxstack
+ | addu TMP2, BASE, NARGS8:RC
| sltiu AT, NARGS8:RC, 16
| lw CARG4, 8+HI(BASE)
+ | sltu TMP1, TMP1, TMP2
+ | or AT, AT, TMP1
| bnez AT, ->fff_fallback
|. lw CARG3, 8+LO(BASE)
| lw CARG1, LO(BASE)
diff -Nru luajit-2.1.20231006/src/vm_mips64.dasc luajit-2.1.20231117/src/vm_mips64.dasc
--- luajit-2.1.20231006/src/vm_mips64.dasc 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/vm_mips64.dasc 2023-11-17 07:29:25.000000000 +0000
@@ -1415,8 +1415,12 @@
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
+ | ld TMP1, L->maxstack
+ | daddu TMP2, BASE, NARGS8:RC
+ | sltu AT, TMP1, TMP2
+ | bnez AT, ->fff_fallback
+ |. lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
| daddiu NARGS8:RC, NARGS8:RC, -8
- | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
| bltz NARGS8:RC, ->fff_fallback
|. move TMP2, BASE
| daddiu BASE, BASE, 16
@@ -1437,8 +1441,12 @@
|. nop
|
|.ffunc xpcall
+ | ld TMP1, L->maxstack
+ | daddu TMP2, BASE, NARGS8:RC
+ | sltu AT, TMP1, TMP2
+ | bnez AT, ->fff_fallback
+ |. ld CARG1, 0(BASE)
| daddiu NARGS8:TMP0, NARGS8:RC, -16
- | ld CARG1, 0(BASE)
| ld CARG2, 8(BASE)
| bltz NARGS8:TMP0, ->fff_fallback
|. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
diff -Nru luajit-2.1.20231006/src/vm_ppc.dasc luajit-2.1.20231117/src/vm_ppc.dasc
--- luajit-2.1.20231006/src/vm_ppc.dasc 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/vm_ppc.dasc 2023-11-17 07:29:25.000000000 +0000
@@ -977,8 +977,11 @@
|.if FFI
| cmplwi TMP0, 1
|.endif
- | lwz PC, -16(RB) // Restore PC from [cont|PC].
- | subi TMP2, RD, 8
+ |// PC value corrected to avoid segfault
+ | lwz PC, FRAME_CONTPC(RB) // Restore PC from [cont|PC].
+ | addi BASEP4, BASE, 4
+ | addi TMP2, RD, WORD_HI-8
+ | lwz TMP1, LFUNC:TMP1->pc
| stwx TISNIL, RA, TMP2 // Ensure one valid arg.
|.if P64
| ld TMP3, 0(DISPATCH)
@@ -986,7 +989,9 @@
|.if FFI
| ble >1
|.endif
- | lwz TMP1, LFUNC:TMP1->pc
+ |.if P64
+ | add TMP0, TMP0, TMP3
+ |.endif
| lwz KBASE, PC2PROTO(k)(TMP1)
| // BASE = base, RA = resultptr, RB = meta base
| mtctr TMP0
@@ -1715,14 +1720,23 @@
|
|//-- Base library: iterators -------------------------------------------
|
- |.ffunc_1 next
- | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
- | checktab CARG3
+ |.ffunc next
+ | cmplwi NARGS8:RC, 8
+ | lwz TAB:CARG1, WORD_LO(BASE)
+ | blt ->fff_fallback
+ |.if ENDIAN_LE
+ | add TMP1, BASE, NARGS8:RC
+ | stw TISNIL, WORD_HI(TMP1) // Set missing 2nd arg to nil.
+ |.else
+ | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
+ |.endif
| lwz PC, FRAME_PC(BASE)
- | bne ->fff_fallback
+ | stp BASE, L->base // Add frame since C call can throw.
+ | stp BASE, L->top // Dummy frame length is ok.
| la CARG2, 8(BASE)
| la CARG3, -8(BASE)
- | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | stw PC, SAVE_PC
+ | bl extern lj_tab_next // (GCtab *t, cTValue *key,TValue *o)
| // Returns 1=found, 0=end, -1=error.
| cmpwi CRET1, 0
| la RA, -8(BASE)
@@ -1891,8 +1905,12 @@
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc pcall
+ | lwz TMP1, L->maxstack
+ | add TMP2, BASE, NARGS8:RC
| cmplwi NARGS8:RC, 8
| lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
+ | cmplw cr1, TMP1, TMP2
+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| blt ->fff_fallback
| mr TMP2, BASE
| la BASE, 8(BASE)
@@ -1903,14 +1921,19 @@
| b ->vm_call_dispatch
|
|.ffunc xpcall
+ | lwz TMP1, L->maxstack
+ | add TMP2, BASE, NARGS8:RC
| cmplwi NARGS8:RC, 16
| lwz CARG3, 8+WORD_HI(BASE)
+ | cmplw cr1, TMP1, TMP2
|.if FPU
| lfd FARG2, 8(BASE)
+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd FARG1, 0(BASE)
|.else
| lwz CARG1, 0(BASE)
| lwz CARG2, 4(BASE)
+ | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lwz CARG4, 12(BASE)
|.endif
| blt ->fff_fallback
@@ -3539,7 +3562,7 @@
| .endif
| cmpwi cr1, CARG3, 0
| mr TMP2, sp
- | addic. CARG2, CARG2, -4
+ | addic. CARG2, CARG2, -PSIZE
| .if GPR64
| stdux sp, sp, TMP1
| .else
@@ -5680,10 +5703,10 @@
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
| add TMP3, PC, TMP0
| bne cr0, >5
- | lus TMP1, (LJ_KEYINDEX >> 16)
- | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
- | stw ZERO, -4(RA) // Initialize control var.
- | stw TMP1, -8(RA)
+ | lus TMP1, 0xfffe
+ | ori TMP1, TMP1, 0x7fff
+ | stw ZERO, WORD_LO-8(RA) // Initialize control var.
+ | stw TMP1, WORD_HI-8(RA)
| addis PC, TMP3, -(BCBIAS_J*4 >> 16)
|1:
| ins_next
diff -Nru luajit-2.1.20231006/src/vm_x64.dasc luajit-2.1.20231117/src/vm_x64.dasc
--- luajit-2.1.20231006/src/vm_x64.dasc 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/vm_x64.dasc 2023-11-17 07:29:25.000000000 +0000
@@ -1463,6 +1463,9 @@
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc_1 pcall
+ | mov L:RB, SAVE_L
+ | lea RA, [BASE+NARGS:RD*8]
+ | cmp RA, L:RB->maxstack; ja ->fff_fallback
| lea RA, [BASE+16]
| sub NARGS:RDd, 1
| mov PCd, 16+FRAME_PCALL
@@ -1481,6 +1484,9 @@
| jmp ->vm_call_dispatch
|
|.ffunc_2 xpcall
+ | mov L:RB, SAVE_L
+ | lea RA, [BASE+NARGS:RD*8]
+ | cmp RA, L:RB->maxstack; ja ->fff_fallback
| mov LFUNC:RA, [BASE+8]
| checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
| mov LFUNC:RB, [BASE] // Swap function and traceback.
diff -Nru luajit-2.1.20231006/src/vm_x86.dasc luajit-2.1.20231117/src/vm_x86.dasc
--- luajit-2.1.20231006/src/vm_x86.dasc 2023-10-06 03:27:44.000000000 +0000
+++ luajit-2.1.20231117/src/vm_x86.dasc 2023-11-17 07:29:25.000000000 +0000
@@ -1369,7 +1369,7 @@
| mov LFUNC:RB, [RA-8]
| add NARGS:RD, 1
| // This is fragile. L->base must not move, KBASE must always be defined.
- |.if x64
+ |.if X64
| cmp KBASEa, rdx // Continue with CALLT if flag set.
|.else
| cmp KBASE, BASE // Continue with CALLT if flag set.
@@ -1793,6 +1793,9 @@
|//-- Base library: catch errors ----------------------------------------
|
|.ffunc_1 pcall
+ | mov L:RB, SAVE_L
+ | lea RA, [BASE+NARGS:RD*8]
+ | cmp RA, L:RB->maxstack; ja ->fff_fallback
| lea RA, [BASE+8]
| sub NARGS:RD, 1
| mov PC, 8+FRAME_PCALL
@@ -1804,6 +1807,9 @@
| jmp ->vm_call_dispatch
|
|.ffunc_2 xpcall
+ | mov L:RB, SAVE_L
+ | lea RA, [BASE+NARGS:RD*8]
+ | cmp RA, L:RB->maxstack; ja ->fff_fallback
| cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
| mov RB, [BASE+4] // Swap function and traceback.
| mov [BASE+12], RB