From 47b4fcd961df5af7fc77a5d12102548143171c78 Mon Sep 17 00:00:00 2001 From: Marco Heisig Date: Tue, 28 Mar 2023 15:26:45 +0200 Subject: [PATCH] Fix SIMD shuffle instructions on SSE2 and AVX. This commit also fixes bug #2012990, because the shuffle was used in sb-simd-sse2:f64.2-values, which was used to implement the SSE2 horizontal operations. --- contrib/sb-simd/code/define-fake-vops.lisp | 2 +- contrib/sb-simd/code/instruction-sets/avx.lisp | 4 ++-- contrib/sb-simd/code/instruction-sets/sse2.lisp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/sb-simd/code/define-fake-vops.lisp b/contrib/sb-simd/code/define-fake-vops.lisp index 4432791c4..be7206500 100644 --- a/contrib/sb-simd/code/define-fake-vops.lisp +++ b/contrib/sb-simd/code/define-fake-vops.lisp @@ -420,7 +420,7 @@ (define-fake-vop f64.2-values (x) (values (%f64!-from-p128 x) - (%f64!-from-p128 (%f64.2-shuffle x 1)))) + (%f64!-from-p128 (%f64.2-shuffle x x 1)))) (define-fake-vop f64.2-broadcast (x) (let ((v (%f64.2!-from-f64 x))) diff --git a/contrib/sb-simd/code/instruction-sets/avx.lisp b/contrib/sb-simd/code/instruction-sets/avx.lisp index aa422c56c..e30d4dba4 100644 --- a/contrib/sb-simd/code/instruction-sets/avx.lisp +++ b/contrib/sb-simd/code/instruction-sets/avx.lisp @@ -229,7 +229,7 @@ (f64.2-movemask #:vmovmskpd (u2) (f64.2) :cost 1) (f64.2-%round #:vroundpd (f64.2) (f64.2 imm3) :cost 2) (f64.2-permute #:vpermilpd (f64.2) (f64.2 imm2) :cost 1) - (f64.2-shuffle #:vshufpd (f64.2) (f64.2 f64.2 imm1) :cost 1) + (f64.2-shuffle #:vshufpd (f64.2) (f64.2 f64.2 imm2) :cost 1) (f64.2-movemask #:vmovmskpd (u2) (f64.2) :cost 1) ;; f32.8 (f32.8-from-s32.8 #:vcvtdq2ps (f32.8) (s32.8) :cost 5) @@ -329,7 +329,7 @@ (f64.4-%round #:vroundpd (f64.4) (f64.4 imm3) :cost 2) (f64.4-permute #:vpermilpd (f64.4) (f64.4 imm4) :cost 1) (f64.4-permute128 #:vperm2f128 (f64.4) (f64.4 f64.4 imm8) :cost 1) - (f64.4-shuffle #:vshufpd (f64.4) (f64.4 f64.4 imm2) :cost 1) + (f64.4-shuffle #:vshufpd (f64.4) (f64.4 f64.4 imm4) :cost 1) (f64.4-reverse #:vpermilpd (f64.4) (f64.4) :cost 2 :encoding :fake-vop) (f64.2-from-f64.4 #:vextractf128 (f64.2) (f64.4 imm1) :cost 1) (f64.4-insert-f64.2 #:vinsertf128 (f64.4) (f64.4 f64.2 imm1) :cost 1) diff --git a/contrib/sb-simd/code/instruction-sets/sse2.lisp b/contrib/sb-simd/code/instruction-sets/sse2.lisp index 3984ec3d4..362d6269f 100644 --- a/contrib/sb-simd/code/instruction-sets/sse2.lisp +++ b/contrib/sb-simd/code/instruction-sets/sse2.lisp @@ -114,7 +114,7 @@ (f64.2-sqrt #:sqrtpd (f64.2) (f64.2) :cost 20) (f64.2-unpackhi #:unpckhpd (f64.2) (f64.2 f64.2) :cost 1 :encoding :sse) (f64.2-unpacklo #:unpcklpd (f64.2) (f64.2 f64.2) :cost 1 :encoding :sse) - (f64.2-shuffle #:shufpd (f64.2) (f64.2 imm2) :cost 1) + (f64.2-shuffle #:shufpd (f64.2) (f64.2 f64.2 imm2) :cost 1 :encoding :sse) (f64.2-movemask #:movmskpd (u2) (f64.2) :cost 1) ;; u8.16 (u8.16!-from-u8 #:movq (u8.16) (u8) :cost 1) -- 2.25.1