439 lines
18 KiB
LLVM
439 lines
18 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
|
|
; RUN: opt -S -si-annotate-control-flow -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI-OPT %s
|
|
|
|
define hidden void @widget() {
|
|
; GCN-LABEL: widget:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
|
|
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
|
; GCN-NEXT: s_mov_b64 exec, s[16:17]
|
|
; GCN-NEXT: v_writelane_b32 v40, s33, 16
|
|
; GCN-NEXT: s_mov_b32 s33, s32
|
|
; GCN-NEXT: s_addk_i32 s32, 0x400
|
|
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
|
|
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
|
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
|
; GCN-NEXT: v_writelane_b32 v40, s34, 2
|
|
; GCN-NEXT: v_writelane_b32 v40, s35, 3
|
|
; GCN-NEXT: v_writelane_b32 v40, s36, 4
|
|
; GCN-NEXT: v_writelane_b32 v40, s37, 5
|
|
; GCN-NEXT: v_writelane_b32 v40, s38, 6
|
|
; GCN-NEXT: v_writelane_b32 v40, s39, 7
|
|
; GCN-NEXT: v_writelane_b32 v40, s40, 8
|
|
; GCN-NEXT: v_writelane_b32 v40, s41, 9
|
|
; GCN-NEXT: v_writelane_b32 v40, s42, 10
|
|
; GCN-NEXT: v_writelane_b32 v40, s43, 11
|
|
; GCN-NEXT: v_writelane_b32 v40, s44, 12
|
|
; GCN-NEXT: v_writelane_b32 v40, s45, 13
|
|
; GCN-NEXT: v_writelane_b32 v40, s46, 14
|
|
; GCN-NEXT: v_writelane_b32 v40, s47, 15
|
|
; GCN-NEXT: v_mov_b32_e32 v41, v31
|
|
; GCN-NEXT: s_mov_b32 s42, s15
|
|
; GCN-NEXT: s_mov_b32 s43, s14
|
|
; GCN-NEXT: s_mov_b32 s44, s13
|
|
; GCN-NEXT: s_mov_b32 s45, s12
|
|
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
|
|
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
|
|
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
|
|
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: flat_load_dword v0, v[0:1]
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
|
; GCN-NEXT: s_mov_b64 s[8:9], -1
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0
|
|
; GCN-NEXT: s_mov_b64 s[46:47], 0
|
|
; GCN-NEXT: s_mov_b64 s[6:7], 0
|
|
; GCN-NEXT: s_cbranch_vccz .LBB0_9
|
|
; GCN-NEXT: ; %bb.1: ; %Flow
|
|
; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9]
|
|
; GCN-NEXT: s_cbranch_vccz .LBB0_10
|
|
; GCN-NEXT: .LBB0_2: ; %Flow1
|
|
; GCN-NEXT: s_andn2_b64 vcc, exec, s[6:7]
|
|
; GCN-NEXT: s_cbranch_vccnz .LBB0_4
|
|
; GCN-NEXT: .LBB0_3: ; %bb9
|
|
; GCN-NEXT: s_getpc_b64 s[16:17]
|
|
; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4
|
|
; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
|
|
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
|
|
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
|
|
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GCN-NEXT: s_mov_b32 s12, s45
|
|
; GCN-NEXT: s_mov_b32 s13, s44
|
|
; GCN-NEXT: s_mov_b32 s14, s43
|
|
; GCN-NEXT: s_mov_b32 s15, s42
|
|
; GCN-NEXT: v_mov_b32_e32 v31, v41
|
|
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
|
; GCN-NEXT: s_andn2_b64 s[6:7], s[46:47], exec
|
|
; GCN-NEXT: s_and_b64 s[8:9], vcc, exec
|
|
; GCN-NEXT: s_or_b64 s[46:47], s[6:7], s[8:9]
|
|
; GCN-NEXT: .LBB0_4: ; %Flow2
|
|
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[46:47]
|
|
; GCN-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
|
|
; GCN-NEXT: s_cbranch_execz .LBB0_6
|
|
; GCN-NEXT: ; %bb.5: ; %bb12
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: .LBB0_6: ; %Flow3
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
|
|
; GCN-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
|
; GCN-NEXT: s_cbranch_vccnz .LBB0_8
|
|
; GCN-NEXT: ; %bb.7: ; %bb7
|
|
; GCN-NEXT: s_getpc_b64 s[16:17]
|
|
; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4
|
|
; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
|
|
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
|
|
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
|
|
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GCN-NEXT: s_mov_b32 s12, s45
|
|
; GCN-NEXT: s_mov_b32 s13, s44
|
|
; GCN-NEXT: s_mov_b32 s14, s43
|
|
; GCN-NEXT: s_mov_b32 s15, s42
|
|
; GCN-NEXT: v_mov_b32_e32 v31, v41
|
|
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
|
|
; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock
|
|
; GCN-NEXT: v_readlane_b32 s47, v40, 15
|
|
; GCN-NEXT: v_readlane_b32 s46, v40, 14
|
|
; GCN-NEXT: v_readlane_b32 s45, v40, 13
|
|
; GCN-NEXT: v_readlane_b32 s44, v40, 12
|
|
; GCN-NEXT: v_readlane_b32 s43, v40, 11
|
|
; GCN-NEXT: v_readlane_b32 s42, v40, 10
|
|
; GCN-NEXT: v_readlane_b32 s41, v40, 9
|
|
; GCN-NEXT: v_readlane_b32 s40, v40, 8
|
|
; GCN-NEXT: v_readlane_b32 s39, v40, 7
|
|
; GCN-NEXT: v_readlane_b32 s38, v40, 6
|
|
; GCN-NEXT: v_readlane_b32 s37, v40, 5
|
|
; GCN-NEXT: v_readlane_b32 s36, v40, 4
|
|
; GCN-NEXT: v_readlane_b32 s35, v40, 3
|
|
; GCN-NEXT: v_readlane_b32 s34, v40, 2
|
|
; GCN-NEXT: v_readlane_b32 s31, v40, 1
|
|
; GCN-NEXT: v_readlane_b32 s30, v40, 0
|
|
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
|
|
; GCN-NEXT: s_addk_i32 s32, 0xfc00
|
|
; GCN-NEXT: v_readlane_b32 s33, v40, 16
|
|
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
|
|
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
|
; GCN-NEXT: s_mov_b64 exec, s[4:5]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
; GCN-NEXT: .LBB0_9: ; %bb2
|
|
; GCN-NEXT: v_cmp_eq_u32_e64 s[46:47], 21, v0
|
|
; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 21, v0
|
|
; GCN-NEXT: s_mov_b64 vcc, exec
|
|
; GCN-NEXT: s_cbranch_execnz .LBB0_2
|
|
; GCN-NEXT: .LBB0_10: ; %bb4
|
|
; GCN-NEXT: s_mov_b64 s[4:5], -1
|
|
; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 9, v0
|
|
; GCN-NEXT: s_andn2_b64 vcc, exec, s[6:7]
|
|
; GCN-NEXT: s_cbranch_vccz .LBB0_3
|
|
; GCN-NEXT: s_branch .LBB0_4
|
|
; SI-OPT-LABEL: @widget(
|
|
; SI-OPT-NEXT: bb:
|
|
; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16
|
|
; SI-OPT-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP]], 21
|
|
; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB4:%.*]], label [[BB2:%.*]]
|
|
; SI-OPT: bb2:
|
|
; SI-OPT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP]], 21
|
|
; SI-OPT-NEXT: br i1 [[TMP3]], label [[BB12:%.*]], label [[BB9:%.*]]
|
|
; SI-OPT: bb4:
|
|
; SI-OPT-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP]], 9
|
|
; SI-OPT-NEXT: br i1 [[TMP5]], label [[BB7:%.*]], label [[BB9]]
|
|
; SI-OPT: bb6:
|
|
; SI-OPT-NEXT: ret void
|
|
; SI-OPT: bb7:
|
|
; SI-OPT-NEXT: [[TMP8:%.*]] = call float @wibble()
|
|
; SI-OPT-NEXT: ret void
|
|
; SI-OPT: bb9:
|
|
; SI-OPT-NEXT: [[TMP10:%.*]] = call float @wibble()
|
|
; SI-OPT-NEXT: [[TMP11:%.*]] = fcmp nsz ogt float [[TMP10]], 0.000000e+00
|
|
; SI-OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP11]])
|
|
; SI-OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
|
|
; SI-OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
|
|
; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB6:%.*]], label [[BB9_BB12_CRIT_EDGE:%.*]]
|
|
; SI-OPT: bb9.bb12_crit_edge:
|
|
; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
|
|
; SI-OPT-NEXT: br label [[BB12]]
|
|
; SI-OPT: bb12:
|
|
; SI-OPT-NEXT: store float 0.000000e+00, float addrspace(1)* null, align 8
|
|
; SI-OPT-NEXT: ret void
|
|
bb:
|
|
%tmp = load i32, i32 addrspace(1)* null, align 16
|
|
%tmp1 = icmp slt i32 %tmp, 21
|
|
br i1 %tmp1, label %bb4, label %bb2
|
|
|
|
bb2: ; preds = %bb
|
|
%tmp3 = icmp eq i32 %tmp, 21
|
|
br i1 %tmp3, label %bb12, label %bb9
|
|
|
|
bb4: ; preds = %bb
|
|
%tmp5 = icmp eq i32 %tmp, 9
|
|
br i1 %tmp5, label %bb7, label %bb9
|
|
|
|
bb6: ; preds = %bb9
|
|
ret void
|
|
|
|
bb7: ; preds = %bb4
|
|
%tmp8 = call float @wibble()
|
|
ret void
|
|
|
|
bb9: ; preds = %bb4, %bb2
|
|
%tmp10 = call float @wibble()
|
|
%tmp11 = fcmp nsz ogt float %tmp10, 0.000000e+00
|
|
br i1 %tmp11, label %bb6, label %bb12
|
|
|
|
bb12: ; preds = %bb9, %bb2
|
|
store float 0.000000e+00, float addrspace(1)* null, align 8
|
|
ret void
|
|
}
|
|
|
|
declare hidden float @wibble() local_unnamed_addr
|
|
|
|
|
|
define hidden void @blam() {
|
|
; SI-OPT-LABEL: @blam(
|
|
; SI-OPT-NEXT: bb:
|
|
; SI-OPT-NEXT: [[TMP:%.*]] = load float, float* null, align 16
|
|
; SI-OPT-NEXT: br label [[BB2:%.*]]
|
|
; SI-OPT: bb1:
|
|
; SI-OPT-NEXT: br label [[BB2]]
|
|
; SI-OPT: bb2:
|
|
; SI-OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
|
; SI-OPT-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* null, i32 [[TID]]
|
|
; SI-OPT-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 16
|
|
; SI-OPT-NEXT: store float 0.000000e+00, float addrspace(5)* null, align 8
|
|
; SI-OPT-NEXT: br label [[BB4:%.*]]
|
|
; SI-OPT: bb4:
|
|
; SI-OPT-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP3]], 3
|
|
; SI-OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP5]])
|
|
; SI-OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
|
|
; SI-OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
|
|
; SI-OPT-NEXT: br i1 [[TMP1]], label [[BB8:%.*]], label [[BB6:%.*]]
|
|
; SI-OPT: bb6:
|
|
; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
|
|
; SI-OPT-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP3]], 3
|
|
; SI-OPT-NEXT: br i1 [[TMP7]], label [[BB11:%.*]], label [[BB1:%.*]]
|
|
; SI-OPT: bb8:
|
|
; SI-OPT-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP3]], 1
|
|
; SI-OPT-NEXT: [[TMP3:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP9]])
|
|
; SI-OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP3]], 0
|
|
; SI-OPT-NEXT: [[TMP5:%.*]] = extractvalue { i1, i64 } [[TMP3]], 1
|
|
; SI-OPT-NEXT: br i1 [[TMP4]], label [[BB10:%.*]], label [[BB8_BB1_CRIT_EDGE:%.*]]
|
|
; SI-OPT: bb8.bb1_crit_edge:
|
|
; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP5]])
|
|
; SI-OPT-NEXT: br label [[BB1]]
|
|
; SI-OPT: bb10:
|
|
; SI-OPT-NEXT: store float 0x7FF8000000000000, float addrspace(5)* null, align 16
|
|
; SI-OPT-NEXT: br label [[BB18:%.*]]
|
|
; SI-OPT: bb11:
|
|
; SI-OPT-NEXT: [[TMP12:%.*]] = call float @spam()
|
|
; SI-OPT-NEXT: [[TMP13:%.*]] = fcmp nsz oeq float [[TMP12]], 0.000000e+00
|
|
; SI-OPT-NEXT: [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP13]])
|
|
; SI-OPT-NEXT: [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0
|
|
; SI-OPT-NEXT: [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1
|
|
; SI-OPT-NEXT: br i1 [[TMP7]], label [[BB2]], label [[BB14:%.*]]
|
|
; SI-OPT: bb14:
|
|
; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
|
|
; SI-OPT-NEXT: [[TMP15:%.*]] = fcmp nsz oeq float [[TMP]], 0.000000e+00
|
|
; SI-OPT-NEXT: [[TMP9:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
|
|
; SI-OPT-NEXT: [[TMP10:%.*]] = extractvalue { i1, i64 } [[TMP9]], 0
|
|
; SI-OPT-NEXT: [[TMP11:%.*]] = extractvalue { i1, i64 } [[TMP9]], 1
|
|
; SI-OPT-NEXT: br i1 [[TMP10]], label [[BB17:%.*]], label [[BB16:%.*]]
|
|
; SI-OPT: bb16:
|
|
; SI-OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP11]])
|
|
; SI-OPT-NEXT: store float 0x7FF8000000000000, float addrspace(5)* null, align 16
|
|
; SI-OPT-NEXT: br label [[BB17]]
|
|
; SI-OPT: bb17:
|
|
; SI-OPT-NEXT: store float [[TMP]], float addrspace(5)* null, align 16
|
|
; SI-OPT-NEXT: br label [[BB18]]
|
|
; SI-OPT: bb18:
|
|
; SI-OPT-NEXT: store float 0x7FF8000000000000, float addrspace(5)* null, align 4
|
|
; SI-OPT-NEXT: br label [[BB2]]
|
|
;
|
|
; GCN-LABEL: blam:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
|
|
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
|
; GCN-NEXT: s_mov_b64 exec, s[16:17]
|
|
; GCN-NEXT: v_writelane_b32 v40, s33, 18
|
|
; GCN-NEXT: s_mov_b32 s33, s32
|
|
; GCN-NEXT: s_addk_i32 s32, 0x800
|
|
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
|
|
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
|
|
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
|
|
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
|
|
; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill
|
|
; GCN-NEXT: v_writelane_b32 v40, s30, 0
|
|
; GCN-NEXT: v_writelane_b32 v40, s31, 1
|
|
; GCN-NEXT: v_writelane_b32 v40, s34, 2
|
|
; GCN-NEXT: v_writelane_b32 v40, s35, 3
|
|
; GCN-NEXT: v_writelane_b32 v40, s36, 4
|
|
; GCN-NEXT: v_writelane_b32 v40, s37, 5
|
|
; GCN-NEXT: v_writelane_b32 v40, s38, 6
|
|
; GCN-NEXT: v_writelane_b32 v40, s39, 7
|
|
; GCN-NEXT: v_writelane_b32 v40, s40, 8
|
|
; GCN-NEXT: v_writelane_b32 v40, s41, 9
|
|
; GCN-NEXT: v_writelane_b32 v40, s42, 10
|
|
; GCN-NEXT: v_writelane_b32 v40, s43, 11
|
|
; GCN-NEXT: v_writelane_b32 v40, s44, 12
|
|
; GCN-NEXT: v_writelane_b32 v40, s45, 13
|
|
; GCN-NEXT: v_writelane_b32 v40, s46, 14
|
|
; GCN-NEXT: v_writelane_b32 v40, s47, 15
|
|
; GCN-NEXT: v_writelane_b32 v40, s48, 16
|
|
; GCN-NEXT: v_writelane_b32 v40, s49, 17
|
|
; GCN-NEXT: v_mov_b32_e32 v41, v31
|
|
; GCN-NEXT: s_mov_b32 s44, s15
|
|
; GCN-NEXT: s_mov_b32 s45, s14
|
|
; GCN-NEXT: s_mov_b32 s46, s13
|
|
; GCN-NEXT: s_mov_b32 s47, s12
|
|
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
|
|
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
|
|
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
|
|
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
|
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v41
|
|
; GCN-NEXT: v_mov_b32_e32 v43, 0
|
|
; GCN-NEXT: flat_load_dword v44, v[0:1]
|
|
; GCN-NEXT: v_mov_b32_e32 v45, 0x7fc00000
|
|
; GCN-NEXT: s_getpc_b64 s[48:49]
|
|
; GCN-NEXT: s_add_u32 s48, s48, spam@rel32@lo+4
|
|
; GCN-NEXT: s_addc_u32 s49, s49, spam@rel32@hi+12
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v42, 2, v2
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_cmp_eq_f32_e64 s[42:43], 0, v44
|
|
; GCN-NEXT: s_branch .LBB1_3
|
|
; GCN-NEXT: .LBB1_1: ; %bb10
|
|
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
|
|
; GCN-NEXT: buffer_store_dword v45, off, s[0:3], 0
|
|
; GCN-NEXT: .LBB1_2: ; %bb18
|
|
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
|
|
; GCN-NEXT: buffer_store_dword v45, off, s[0:3], 0
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
|
; GCN-NEXT: .LBB1_3: ; %bb2
|
|
; GCN-NEXT: ; =>This Loop Header: Depth=1
|
|
; GCN-NEXT: ; Child Loop BB1_4 Depth 2
|
|
; GCN-NEXT: s_mov_b64 s[6:7], 0
|
|
; GCN-NEXT: .LBB1_4: ; %bb2
|
|
; GCN-NEXT: ; Parent Loop BB1_3 Depth=1
|
|
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
|
|
; GCN-NEXT: flat_load_dword v0, v[42:43]
|
|
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], 0
|
|
; GCN-NEXT: s_waitcnt vmcnt(1)
|
|
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 3, v0
|
|
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
|
; GCN-NEXT: s_cbranch_execz .LBB1_6
|
|
; GCN-NEXT: ; %bb.5: ; %bb8
|
|
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
|
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
|
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
|
|
; GCN-NEXT: s_cbranch_execnz .LBB1_4
|
|
; GCN-NEXT: s_branch .LBB1_1
|
|
; GCN-NEXT: .LBB1_6: ; %bb6
|
|
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
|
|
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
|
|
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
|
; GCN-NEXT: s_mov_b64 s[6:7], 0
|
|
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
|
; GCN-NEXT: s_cbranch_execnz .LBB1_4
|
|
; GCN-NEXT: ; %bb.7: ; %bb11
|
|
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GCN-NEXT: s_mov_b64 s[4:5], s[40:41]
|
|
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
|
|
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
|
|
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
|
|
; GCN-NEXT: s_mov_b32 s12, s47
|
|
; GCN-NEXT: s_mov_b32 s13, s46
|
|
; GCN-NEXT: s_mov_b32 s14, s45
|
|
; GCN-NEXT: s_mov_b32 s15, s44
|
|
; GCN-NEXT: v_mov_b32_e32 v31, v41
|
|
; GCN-NEXT: s_swappc_b64 s[30:31], s[48:49]
|
|
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
|
|
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
|
; GCN-NEXT: s_mov_b64 s[6:7], 0
|
|
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
|
; GCN-NEXT: s_cbranch_execnz .LBB1_4
|
|
; GCN-NEXT: ; %bb.8: ; %bb14
|
|
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
|
|
; GCN-NEXT: s_and_saveexec_b64 s[4:5], s[42:43]
|
|
; GCN-NEXT: s_cbranch_execnz .LBB1_10
|
|
; GCN-NEXT: ; %bb.9: ; %bb16
|
|
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
|
|
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
|
|
; GCN-NEXT: buffer_store_dword v45, off, s[0:3], 0
|
|
; GCN-NEXT: .LBB1_10: ; %bb17
|
|
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
|
|
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0
|
|
; GCN-NEXT: s_branch .LBB1_2
|
|
bb:
|
|
%tmp = load float, float* null, align 16
|
|
br label %bb2
|
|
|
|
bb1: ; preds = %bb8, %bb6
|
|
br label %bb2
|
|
|
|
bb2:
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr inbounds i32, i32 addrspace(1)* null, i32 %tid
|
|
%tmp3 = load i32, i32 addrspace(1)* %gep, align 16
|
|
store float 0.000000e+00, float addrspace(5)* null, align 8
|
|
br label %bb4
|
|
|
|
bb4: ; preds = %bb2
|
|
%tmp5 = icmp slt i32 %tmp3, 3
|
|
br i1 %tmp5, label %bb8, label %bb6
|
|
|
|
bb6: ; preds = %bb4
|
|
%tmp7 = icmp eq i32 %tmp3, 3
|
|
br i1 %tmp7, label %bb11, label %bb1
|
|
|
|
bb8: ; preds = %bb4
|
|
%tmp9 = icmp eq i32 %tmp3, 1
|
|
br i1 %tmp9, label %bb10, label %bb1
|
|
|
|
bb10: ; preds = %bb8
|
|
store float 0x7FF8000000000000, float addrspace(5)* null, align 16
|
|
br label %bb18
|
|
|
|
bb11: ; preds = %bb6
|
|
%tmp12 = call float @spam()
|
|
%tmp13 = fcmp nsz oeq float %tmp12, 0.000000e+00
|
|
br i1 %tmp13, label %bb2, label %bb14
|
|
|
|
bb14: ; preds = %bb11
|
|
%tmp15 = fcmp nsz oeq float %tmp, 0.000000e+00
|
|
br i1 %tmp15, label %bb17, label %bb16
|
|
|
|
bb16: ; preds = %bb14
|
|
store float 0x7FF8000000000000, float addrspace(5)* null, align 16
|
|
br label %bb17
|
|
|
|
bb17: ; preds = %bb16, %bb14
|
|
store float %tmp, float addrspace(5)* null, align 16
|
|
br label %bb18
|
|
|
|
bb18: ; preds = %bb17, %bb10
|
|
store float 0x7FF8000000000000, float addrspace(5)* null, align 4
|
|
br label %bb2
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
declare hidden float @spam()
|