79 lines
3.0 KiB
LLVM
79 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=hexagon < %s | FileCheck %s
|
|
|
|
; In this testcase, when loads were moved close to users, they were actualy
|
|
; moved right before the consuming stores. This was after the store group
|
|
; was moved, so the loads and stores ended up being interleaved. This violated
|
|
; the assumption in store realigning that all loads were available before the
|
|
; first store, causing some code depending on the loads being inserted before
|
|
; the load used.
|
|
; Just make sure that this compiles ok.
|
|
|
|
; Function Attrs: nounwind
|
|
define void @f0(i16* noalias nocapture readonly %a0, i16* noalias nocapture %a1, i32 %a2) #0 {
|
|
; CHECK-LABEL: f0:
|
|
; CHECK: // %bb.0: // %b0
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: p0 = cmp.eq(r2,#0)
|
|
; CHECK-NEXT: if (p0.new) jumpr:nt r31
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB0_1: // %b2
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v0.cur = vmem(r0+#0)
|
|
; CHECK-NEXT: vmem(r1+#0) = v0
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v29.cur = vmem(r0+#1)
|
|
; CHECK-NEXT: vmem(r1+#1) = v29
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: v30.cur = vmem(r0+#2)
|
|
; CHECK-NEXT: vmem(r1+#2) = v30
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: r0 = add(r0,#256)
|
|
; CHECK-NEXT: r1 = add(r1,#256)
|
|
; CHECK-NEXT: v31.cur = vmem(r0+#3)
|
|
; CHECK-NEXT: vmem(r1+#3) = v31
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: jump .LBB0_1
|
|
; CHECK-NEXT: }
|
|
b0:
|
|
%v0 = icmp eq i32 %a2, 0
|
|
br i1 %v0, label %b3, label %b1
|
|
|
|
b1: ; preds = %b0
|
|
%v1 = bitcast i16* %a1 to <16 x i32>*
|
|
%v2 = bitcast i16* %a0 to <16 x i32>*
|
|
br label %b2
|
|
|
|
b2: ; preds = %b2, %b1
|
|
%v3 = phi <16 x i32>* [ %v16, %b2 ], [ %v1, %b1 ]
|
|
%v4 = phi <16 x i32>* [ %v11, %b2 ], [ %v2, %b1 ]
|
|
%v5 = getelementptr inbounds <16 x i32>, <16 x i32>* %v4, i32 1
|
|
%v6 = load <16 x i32>, <16 x i32>* %v4, align 64
|
|
%v7 = getelementptr inbounds <16 x i32>, <16 x i32>* %v4, i32 2
|
|
%v8 = load <16 x i32>, <16 x i32>* %v5, align 64
|
|
%v9 = getelementptr inbounds <16 x i32>, <16 x i32>* %v4, i32 3
|
|
%v10 = load <16 x i32>, <16 x i32>* %v7, align 64
|
|
%v11 = getelementptr inbounds <16 x i32>, <16 x i32>* %v4, i32 4
|
|
%v12 = load <16 x i32>, <16 x i32>* %v9, align 64
|
|
%v13 = getelementptr inbounds <16 x i32>, <16 x i32>* %v3, i32 1
|
|
store <16 x i32> %v6, <16 x i32>* %v3, align 64
|
|
%v14 = getelementptr inbounds <16 x i32>, <16 x i32>* %v3, i32 2
|
|
store <16 x i32> %v8, <16 x i32>* %v13, align 64
|
|
%v15 = getelementptr inbounds <16 x i32>, <16 x i32>* %v3, i32 3
|
|
store <16 x i32> %v10, <16 x i32>* %v14, align 64
|
|
%v16 = getelementptr inbounds <16 x i32>, <16 x i32>* %v3, i32 4
|
|
store <16 x i32> %v12, <16 x i32>* %v15, align 64
|
|
br label %b2
|
|
|
|
b3: ; preds = %b0
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind "target-features"="+hvxv65,+hvx-length64b" }
|