Skip to content

Commit 728fa07

Browse files
authored
[40.0] Backport Cranelift: x64: fix incorrect load-sinking in copysign operator. (#12437)
* Cranelift: x64: do not incorrectly widen loads sunk into `fcopysign`. The implementation of the `fcopysign` operator uses vector bitwise AND instructions on the floating-point/vector registers containing the inputs to the operator. This is a reasonable implementation as the instruction set does not have scalar (single-lane) bitwise operators. However, when load-sinking automatically kicks in for an operand to an `andps`, it can turn a 64-bit load (`f64.load`) into a 128-bit load incorrectly. This load-widening can cause out-of-bounds accesses where they were not expected. When dynamic bounds checks are enabled, we compile assuming the correct load-operator width is codegen'd; a too-wide load could read beyond the checked bound, either into unmapped memory (crashing the process) or, worse, valid data outside the sandbox. In the case of `fcopysign` the result of that read is not directly available, because it will go into the high (unused) lane, but the out-of-bounds read itself is a problem. Thanks to louismerlin for reporting! * Re-bless Cranelift filetests. * Update release notes.
1 parent bb9ef67 commit 728fa07

File tree

5 files changed

+109
-30
lines changed

5 files changed

+109
-30
lines changed

RELEASES.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
## 40.0.3
2+
3+
Released 2026-01-26.
4+
5+
### Fixed
6+
7+
* Fixed a bug in lowering of `f64.copysign` on x86-64 whereby when combined
8+
with an `f64.load`, the resulting machine code could read 16 bytes rather
9+
than 8 bytes. This could result in a segfault when Wasmtime is configured
10+
without signals-based traps.
11+
12+
--------------------------------------------------------------------------------
13+
114
## 40.0.2
215

316
Released 2026-01-14.

cranelift/codegen/src/isa/x64/lower.isle

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4276,13 +4276,17 @@
42764276
;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
42774277

42784278
(rule (lower (has_type $F32 (fcopysign a @ (value_type $F32) b)))
4279-
(let ((sign_bit Xmm (imm $F32 0x80000000)))
4279+
(let ((sign_bit Xmm (imm $F32 0x80000000))
4280+
(a Xmm a) ;; force into reg so we don't sink a 128-bit load.
4281+
(b Xmm b))
42804282
(x64_orps
42814283
(x64_andnps sign_bit a)
42824284
(x64_andps sign_bit b))))
42834285

42844286
(rule (lower (has_type $F64 (fcopysign a @ (value_type $F64) b)))
4285-
(let ((sign_bit Xmm (imm $F64 0x8000000000000000)))
4287+
(let ((sign_bit Xmm (imm $F64 0x8000000000000000))
4288+
(a Xmm a) ;; force into reg so we don't sink a 128-bit load.
4289+
(b Xmm b))
42864290
(x64_orpd
42874291
(x64_andnpd sign_bit a)
42884292
(x64_andpd sign_bit b))))

cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,14 @@ block0(v0: i64):
3939
; pushq %rbp
4040
; movq %rsp, %rbp
4141
; block0:
42-
; movl $0x80000000, %eax
43-
; vmovd %eax, %xmm4
44-
; vandnps (%rip), %xmm4, %xmm6
45-
; vandps (%rdi), %xmm4, %xmm0
46-
; vorps %xmm0, %xmm6, %xmm0
42+
; uninit %xmm0
43+
; vxorps %xmm0, %xmm0, %xmm2
44+
; vmovss (%rdi), %xmm1
45+
; movl $0x80000000, %r8d
46+
; vmovd %r8d, %xmm7
47+
; vandnps %xmm2, %xmm7, %xmm2
48+
; vandps %xmm1, %xmm7, %xmm3
49+
; vorps %xmm3, %xmm2, %xmm0
4750
; movq %rbp, %rsp
4851
; popq %rbp
4952
; retq
@@ -53,29 +56,16 @@ block0(v0: i64):
5356
; pushq %rbp
5457
; movq %rsp, %rbp
5558
; block1: ; offset 0x4
56-
; movl $0x80000000, %eax
57-
; vmovd %eax, %xmm4
58-
; vandnps 0x1b(%rip), %xmm4, %xmm6
59-
; vandps (%rdi), %xmm4, %xmm0
60-
; vorps %xmm0, %xmm6, %xmm0
61-
; movq %rbp, %rsp
62-
; popq %rbp
63-
; retq
64-
; addb %al, (%rax)
65-
; addb %al, (%rax)
66-
; addb %al, (%rax)
67-
; addb %al, (%rax)
68-
; addb %al, (%rax)
69-
; addb %al, (%rax)
70-
; addb %al, (%rax)
71-
; addb %al, (%rax)
72-
; addb %al, (%rax)
73-
; addb %al, (%rax)
74-
; addb %al, (%rax)
75-
; addb %al, (%rax)
76-
; addb %al, (%rax)
77-
; addb %al, (%rax)
78-
; addb %al, (%rax)
59+
; vxorps %xmm0, %xmm0, %xmm2
60+
; vmovss (%rdi), %xmm1
61+
; movl $0x80000000, %r8d
62+
; vmovd %r8d, %xmm7
63+
; vandnps %xmm2, %xmm7, %xmm2
64+
; vandps %xmm1, %xmm7, %xmm3
65+
; vorps %xmm3, %xmm2, %xmm0
66+
; movq %rbp, %rsp
67+
; popq %rbp
68+
; retq
7969

8070
function %bor_f32x4(f32x4, f32x4) -> f32x4 {
8171
block0(v0: f32x4, v1: f32x4):

tests/disas/f64-copysign.wat

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
;;! target = "x86_64"
2+
;;! test = "compile"
3+
;;!flags = "-Ccranelift-has-avx"
4+
5+
;; This would previously segfault or trap on x86-64 because the
6+
;; `f64.copysign` sunk the `f64.load` and widened it to 128 bits
7+
;; incorrectly.
8+
9+
(module
10+
;; Define a linear memory with 1 page (64KiB)
11+
(memory 1)
12+
(export "f" (func 0))
13+
(func (result i32)
14+
;; Push i32 constant 0 (destination address for the store)
15+
i32.const 0
16+
;; Push f64 constant 0.0 (sign source for copysign)
17+
f64.const 0
18+
;; Push i32 constant 32 (base address for the load)
19+
i32.const 32
20+
;; Load f64 from memory at address (32 + 65491), with align=1
21+
f64.load offset=65491 align=1
22+
;; Apply copysign: take magnitude from loaded f64 and sign from 0.0
23+
f64.copysign
24+
;; Store f64 to memory at address 0, with align=1
25+
f64.store align=1
26+
;; Return 0.
27+
i32.const 0
28+
)
29+
)
30+
;; wasm[0]::function[0]:
31+
;; pushq %rbp
32+
;; movq %rsp, %rbp
33+
;; movq 0x38(%rdi), %rcx
34+
;; vmovsd 0xfff3(%rcx), %xmm4
35+
;; vxorpd %xmm3, %xmm3, %xmm5
36+
;; movabsq $9223372036854775808, %r11
37+
;; vmovq %r11, %xmm2
38+
;; vandnpd %xmm5, %xmm2, %xmm5
39+
;; vandpd %xmm4, %xmm2, %xmm6
40+
;; vorpd %xmm6, %xmm5, %xmm0
41+
;; vmovsd %xmm0, (%rcx)
42+
;; xorl %eax, %eax
43+
;; movq %rbp, %rsp
44+
;; popq %rbp
45+
;; retq
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
;; This would previously segfault or trap on x86-64 because the
2+
;; `f64.copysign` sunk the `f64.load` and widened it to 128 bits
3+
;; incorrectly.
4+
5+
(module
6+
;; Define a linear memory with 1 page (64KiB)
7+
(memory 1)
8+
(export "f" (func 0))
9+
(func (result i32)
10+
;; Push i32 constant 0 (destination address for the store)
11+
i32.const 0
12+
;; Push f64 constant 0.0 (sign source for copysign)
13+
f64.const 0
14+
;; Push i32 constant 32 (base address for the load)
15+
i32.const 32
16+
;; Load f64 from memory at address (32 + 65491), with align=1
17+
f64.load offset=65491 align=1
18+
;; Apply copysign: take magnitude from loaded f64 and sign from 0.0
19+
f64.copysign
20+
;; Store f64 to memory at address 0, with align=1
21+
f64.store align=1
22+
;; Return 0.
23+
i32.const 0
24+
)
25+
)
26+
27+
(assert_return (invoke "f") (i32.const 0))

0 commit comments

Comments
 (0)