207 lines
6.6 KiB
LLVM
207 lines
6.6 KiB
LLVM
; RUN: llc -mtriple=amdgcn -amdgpu-set-wave-priority=true -o - %s | \
|
|
; RUN: FileCheck %s
|
|
|
|
; CHECK-LABEL: no_setprio:
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: ; return to shader part epilog
|
|
define amdgpu_ps <2 x float> @no_setprio(<2 x float> %a, <2 x float> %b) "amdgpu-wave-priority-threshold"="1" {
|
|
%s = fadd <2 x float> %a, %b
|
|
ret <2 x float> %s
|
|
}
|
|
|
|
; CHECK-LABEL: vmem_in_exit_block:
|
|
; CHECK: s_setprio 3
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK: ; return to shader part epilog
|
|
define amdgpu_ps <2 x float> @vmem_in_exit_block(<4 x i32> inreg %p, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" {
|
|
%v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
|
|
%s = fadd <2 x float> %v, %x
|
|
ret <2 x float> %s
|
|
}
|
|
|
|
; CHECK-LABEL: branch:
|
|
; CHECK: s_setprio 3
|
|
; CHECK: s_cbranch_scc0 [[A:.*]]
|
|
; CHECK: {{.*}}: ; %b
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK: s_branch [[EXIT:.*]]
|
|
; CHECK: [[A]]: ; %a
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK: s_branch [[EXIT]]
|
|
; CHECK-NEXT: [[EXIT]]:
|
|
define amdgpu_ps <2 x float> @branch(<4 x i32> inreg %p, i32 inreg %i, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" {
|
|
%cond = icmp eq i32 %i, 0
|
|
br i1 %cond, label %a, label %b
|
|
|
|
a:
|
|
ret <2 x float> <float 0.0, float 0.0>
|
|
|
|
b:
|
|
%v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
|
|
%s = fadd <2 x float> %v, %x
|
|
ret <2 x float> %s
|
|
}
|
|
|
|
; CHECK-LABEL: setprio_follows_setprio:
|
|
; CHECK: s_setprio 3
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK: s_cbranch_scc1 [[C:.*]]
|
|
; CHECK: {{.*}}: ; %a
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK: s_cbranch_vccnz [[C]]
|
|
; CHECK: {{.*}}: ; %b
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: s_branch [[EXIT:.*]]
|
|
; CHECK: [[C]]: ; %c
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK: s_branch [[EXIT]]
|
|
; CHECK: [[EXIT]]:
|
|
define amdgpu_ps <2 x float> @setprio_follows_setprio(<4 x i32> inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="3" {
|
|
entry:
|
|
%v1 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
|
|
%cond1 = icmp ne i32 %i, 0
|
|
br i1 %cond1, label %a, label %c
|
|
|
|
a:
|
|
%v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 1, i32 0)
|
|
%v20 = extractelement <2 x float> %v2, i32 0
|
|
%v21 = extractelement <2 x float> %v2, i32 1
|
|
%cond2 = fcmp ult float %v20, %v21
|
|
br i1 %cond2, label %b, label %c
|
|
|
|
b:
|
|
ret <2 x float> %v2
|
|
|
|
c:
|
|
%v4 = fadd <2 x float> %v1, %v1
|
|
ret <2 x float> %v4
|
|
}
|
|
|
|
; CHECK-LABEL: loop:
|
|
; CHECK: {{.*}}: ; %entry
|
|
; CHECK: s_setprio 3
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: [[LOOP:.*]]: ; %loop
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: s_cbranch_scc1 [[LOOP]]
|
|
; CHECK-NEXT: {{.*}}: ; %exit
|
|
; CHECK-NEXT: s_setprio 0
|
|
define amdgpu_ps <2 x float> @loop(<4 x i32> inreg %p) "amdgpu-wave-priority-threshold"="2" {
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i32 [0, %entry], [%i2, %loop]
|
|
%sum = phi <2 x float> [<float 0.0, float 0.0>, %entry], [%sum2, %loop]
|
|
|
|
%i2 = add i32 %i, 1
|
|
|
|
%v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 %i, i32 0, i32 0, i32 0)
|
|
%sum2 = fadd <2 x float> %sum, %v
|
|
|
|
%cond = icmp ult i32 %i2, 5
|
|
br i1 %cond, label %loop, label %exit
|
|
|
|
exit:
|
|
ret <2 x float> %sum2
|
|
}
|
|
|
|
; CHECK-LABEL: edge_split:
|
|
; CHECK: s_setprio 3
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: s_cbranch_scc1 [[ANOTHER_LOAD:.*]]
|
|
; CHECK: {{.*}}: ; %loop.preheader
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK: [[LOOP:.*]]: ; %loop
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: s_cbranch_scc1 [[LOOP]]
|
|
; CHECK {{.*}}: ; %exit
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: s_branch [[RET:.*]]
|
|
; CHECK: [[ANOTHER_LOAD]]: ; %another_load
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK: s_branch [[RET]]
|
|
; CHECK: [[RET]]:
|
|
define amdgpu_ps <2 x float> @edge_split(<4 x i32> inreg %p, i32 inreg %x) "amdgpu-wave-priority-threshold"="2" {
|
|
entry:
|
|
%v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
|
|
%cond = icmp ne i32 %x, 0
|
|
br i1 %cond, label %loop, label %another_load
|
|
|
|
loop:
|
|
%i = phi i32 [0, %entry], [%i2, %loop]
|
|
%mul = phi <2 x float> [%v, %entry], [%mul2, %loop]
|
|
|
|
%i2 = add i32 %i, 1
|
|
%mul2 = fmul <2 x float> %mul, %v
|
|
|
|
%cond2 = icmp ult i32 %i2, 5
|
|
br i1 %cond2, label %loop, label %exit
|
|
|
|
exit:
|
|
ret <2 x float> %mul2
|
|
|
|
another_load:
|
|
%v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 1, i32 0)
|
|
%sum = fadd <2 x float> %v, %v2
|
|
ret <2 x float> %sum
|
|
}
|
|
|
|
; CHECK-LABEL: valu_insts_threshold:
|
|
; CHECK: s_setprio 3
|
|
; CHECK: buffer_load_dwordx2
|
|
; CHECK-NEXT: s_setprio 0
|
|
; CHECK-COUNT-4: v_add_f32_e32
|
|
; CHECK: s_cbranch_scc0 [[A:.*]]
|
|
; CHECK: {{.*}}: ; %b
|
|
; CHECK-NEXT: buffer_load_dwordx2
|
|
; CHECK: s_branch [[END:.*]]
|
|
; CHECK: [[A]]: ; %a
|
|
; CHECK: s_branch [[END]]
|
|
; CHECK: [[END]]:
|
|
define amdgpu_ps <2 x float> @valu_insts_threshold(<4 x i32> inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="4" {
|
|
%v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
|
|
%add = fadd <2 x float> %v, %v
|
|
%add2 = fadd <2 x float> %add, %add
|
|
|
|
%cond = icmp eq i32 %i, 0
|
|
br i1 %cond, label %a, label %b
|
|
|
|
a:
|
|
ret <2 x float> %add2
|
|
|
|
b:
|
|
%v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 1, i32 0, i32 0)
|
|
%sub = fsub <2 x float> %add2, %v2
|
|
ret <2 x float> %sub
|
|
}
|
|
|
|
; CHECK-LABEL: valu_insts_threshold2:
|
|
; CHECK-NOT: s_setprio
|
|
; CHECK: ; -- End function
|
|
define amdgpu_ps <2 x float> @valu_insts_threshold2(<4 x i32> inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="5" {
|
|
%v = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 0, i32 0, i32 0)
|
|
%add = fadd <2 x float> %v, %v
|
|
%add2 = fadd <2 x float> %add, %add
|
|
|
|
%cond = icmp eq i32 %i, 0
|
|
br i1 %cond, label %a, label %b
|
|
|
|
a:
|
|
ret <2 x float> %add2
|
|
|
|
b:
|
|
%v2 = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %p, i32 0, i32 1, i32 0, i32 0)
|
|
%sub = fsub <2 x float> %add2, %v2
|
|
ret <2 x float> %sub
|
|
}
|
|
|
|
declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) nounwind
|