932 lines
70 KiB
LLVM
932 lines
70 KiB
LLVM
;; #pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable
|
|
;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
|
;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
;;
|
|
;; kernel void testNonUniformBroadcastChars()
|
|
;; {
|
|
;; char16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastUChars()
|
|
;; {
|
|
;; uchar16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastShorts()
|
|
;; {
|
|
;; short16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastUShorts()
|
|
;; {
|
|
;; ushort16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastInts()
|
|
;; {
|
|
;; int16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastUInts()
|
|
;; {
|
|
;; uint16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastLongs()
|
|
;; {
|
|
;; long16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastULongs()
|
|
;; {
|
|
;; ulong16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastFloats()
|
|
;; {
|
|
;; float16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastHalfs()
|
|
;; {
|
|
;; half16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testNonUniformBroadcastDoubles()
|
|
;; {
|
|
;; double16 v = 0;
|
|
;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0);
|
|
;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0);
|
|
;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0);
|
|
;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0);
|
|
;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0);
|
|
;; v = sub_group_non_uniform_broadcast(v, 0);
|
|
;; v.s0 = sub_group_broadcast_first(v.s0);
|
|
;; }
|
|
;;
|
|
;; kernel void testBallotOperations(global uint* dst)
|
|
;; {
|
|
;; uint4 v = sub_group_ballot(0);
|
|
;; dst[0] = sub_group_inverse_ballot(v);
|
|
;; dst[1] = sub_group_ballot_bit_extract(v, 0);
|
|
;; dst[2] = sub_group_ballot_bit_count(v);
|
|
;; dst[3] = sub_group_ballot_inclusive_scan(v);
|
|
;; dst[4] = sub_group_ballot_exclusive_scan(v);
|
|
;; dst[5] = sub_group_ballot_find_lsb(v);
|
|
;; dst[6] = sub_group_ballot_find_msb(v);
|
|
;; }
|
|
;;
|
|
;; kernel void testSubgroupMasks(global uint4* dst)
|
|
;; {
|
|
;; dst[0] = get_sub_group_eq_mask();
|
|
;; dst[1] = get_sub_group_ge_mask();
|
|
;; dst[2] = get_sub_group_gt_mask();
|
|
;; dst[3] = get_sub_group_le_mask();
|
|
;; dst[4] = get_sub_group_lt_mask();
|
|
;; }
|
|
|
|
; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
|
|
|
|
; CHECK-SPIRV-DAG: OpCapability GroupNonUniformBallot
|
|
|
|
; CHECK-SPIRV-DAG: OpDecorate %[[#eqMask:]] BuiltIn SubgroupEqMask
|
|
; CHECK-SPIRV-DAG: OpDecorate %[[#geMask:]] BuiltIn SubgroupGeMask
|
|
; CHECK-SPIRV-DAG: OpDecorate %[[#gtMask:]] BuiltIn SubgroupGtMask
|
|
; CHECK-SPIRV-DAG: OpDecorate %[[#leMask:]] BuiltIn SubgroupLeMask
|
|
; CHECK-SPIRV-DAG: OpDecorate %[[#ltMask:]] BuiltIn SubgroupLtMask
|
|
|
|
; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool
|
|
; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0
|
|
; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0
|
|
; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0
|
|
; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0
|
|
; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16
|
|
; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32
|
|
; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64
|
|
|
|
; CHECK-SPIRV-DAG: %[[#char2:]] = OpTypeVector %[[#char]] 2
|
|
; CHECK-SPIRV-DAG: %[[#char3:]] = OpTypeVector %[[#char]] 3
|
|
; CHECK-SPIRV-DAG: %[[#char4:]] = OpTypeVector %[[#char]] 4
|
|
; CHECK-SPIRV-DAG: %[[#char8:]] = OpTypeVector %[[#char]] 8
|
|
; CHECK-SPIRV-DAG: %[[#char16:]] = OpTypeVector %[[#char]] 16
|
|
|
|
; CHECK-SPIRV-DAG: %[[#short2:]] = OpTypeVector %[[#short]] 2
|
|
; CHECK-SPIRV-DAG: %[[#short3:]] = OpTypeVector %[[#short]] 3
|
|
; CHECK-SPIRV-DAG: %[[#short4:]] = OpTypeVector %[[#short]] 4
|
|
; CHECK-SPIRV-DAG: %[[#short8:]] = OpTypeVector %[[#short]] 8
|
|
; CHECK-SPIRV-DAG: %[[#short16:]] = OpTypeVector %[[#short]] 16
|
|
|
|
; CHECK-SPIRV-DAG: %[[#int2:]] = OpTypeVector %[[#int]] 2
|
|
; CHECK-SPIRV-DAG: %[[#int3:]] = OpTypeVector %[[#int]] 3
|
|
; CHECK-SPIRV-DAG: %[[#int4:]] = OpTypeVector %[[#int]] 4
|
|
; CHECK-SPIRV-DAG: %[[#int8:]] = OpTypeVector %[[#int]] 8
|
|
; CHECK-SPIRV-DAG: %[[#int16:]] = OpTypeVector %[[#int]] 16
|
|
|
|
; CHECK-SPIRV-DAG: %[[#long2:]] = OpTypeVector %[[#long]] 2
|
|
; CHECK-SPIRV-DAG: %[[#long3:]] = OpTypeVector %[[#long]] 3
|
|
; CHECK-SPIRV-DAG: %[[#long4:]] = OpTypeVector %[[#long]] 4
|
|
; CHECK-SPIRV-DAG: %[[#long8:]] = OpTypeVector %[[#long]] 8
|
|
; CHECK-SPIRV-DAG: %[[#long16:]] = OpTypeVector %[[#long]] 16
|
|
|
|
; CHECK-SPIRV-DAG: %[[#float2:]] = OpTypeVector %[[#float]] 2
|
|
; CHECK-SPIRV-DAG: %[[#float3:]] = OpTypeVector %[[#float]] 3
|
|
; CHECK-SPIRV-DAG: %[[#float4:]] = OpTypeVector %[[#float]] 4
|
|
; CHECK-SPIRV-DAG: %[[#float8:]] = OpTypeVector %[[#float]] 8
|
|
; CHECK-SPIRV-DAG: %[[#float16:]] = OpTypeVector %[[#float]] 16
|
|
|
|
; CHECK-SPIRV-DAG: %[[#half2:]] = OpTypeVector %[[#half]] 2
|
|
; CHECK-SPIRV-DAG: %[[#half3:]] = OpTypeVector %[[#half]] 3
|
|
; CHECK-SPIRV-DAG: %[[#half4:]] = OpTypeVector %[[#half]] 4
|
|
; CHECK-SPIRV-DAG: %[[#half8:]] = OpTypeVector %[[#half]] 8
|
|
; CHECK-SPIRV-DAG: %[[#half16:]] = OpTypeVector %[[#half]] 16
|
|
|
|
; CHECK-SPIRV-DAG: %[[#double2:]] = OpTypeVector %[[#double]] 2
|
|
; CHECK-SPIRV-DAG: %[[#double3:]] = OpTypeVector %[[#double]] 3
|
|
; CHECK-SPIRV-DAG: %[[#double4:]] = OpTypeVector %[[#double]] 4
|
|
; CHECK-SPIRV-DAG: %[[#double8:]] = OpTypeVector %[[#double]] 8
|
|
; CHECK-SPIRV-DAG: %[[#double16:]] = OpTypeVector %[[#double]] 16
|
|
|
|
; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]]
|
|
; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3
|
|
; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0
|
|
; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0
|
|
; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0
|
|
; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]]
|
|
; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0
|
|
; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0
|
|
; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]]
|
|
; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char_value:]] = OpCompositeExtract %[[#char]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#char]] %[[#ScopeSubgroup]] %[[#char_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastChars() local_unnamed_addr {
|
|
%1 = tail call spir_func signext i8 @_Z31sub_group_non_uniform_broadcastcj(i8 signext 0, i32 0)
|
|
%2 = insertelement <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %1, i64 0
|
|
%3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_cj(<2 x i8> %3, i32 0)
|
|
%5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_cj(<3 x i8> %7, i32 0)
|
|
%9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_cj(<4 x i8> %11, i32 0)
|
|
%13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_cj(<8 x i8> %15, i32 0)
|
|
%17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_cj(<16 x i8> %18, i32 0)
|
|
%20 = extractelement <16 x i8> %19, i64 0
|
|
%21 = tail call spir_func signext i8 @_Z25sub_group_broadcast_firstc(i8 signext %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func signext i8 @_Z31sub_group_non_uniform_broadcastcj(i8 signext, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_cj(<2 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_cj(<3 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_cj(<4 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_cj(<8 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_cj(<16 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func signext i8 @_Z25sub_group_broadcast_firstc(i8 signext) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]]
|
|
; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#char_value:]] = OpCompositeExtract %[[#char]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#char]] %[[#ScopeSubgroup]] %[[#char_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastUChars() local_unnamed_addr {
|
|
%1 = tail call spir_func zeroext i8 @_Z31sub_group_non_uniform_broadcasthj(i8 zeroext 0, i32 0)
|
|
%2 = insertelement <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %1, i64 0
|
|
%3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_hj(<2 x i8> %3, i32 0)
|
|
%5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_hj(<3 x i8> %7, i32 0)
|
|
%9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_hj(<4 x i8> %11, i32 0)
|
|
%13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_hj(<8 x i8> %15, i32 0)
|
|
%17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_hj(<16 x i8> %18, i32 0)
|
|
%20 = extractelement <16 x i8> %19, i64 0
|
|
%21 = tail call spir_func zeroext i8 @_Z25sub_group_broadcast_firsth(i8 zeroext %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func zeroext i8 @_Z31sub_group_non_uniform_broadcasthj(i8 zeroext, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_hj(<2 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_hj(<3 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_hj(<4 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_hj(<8 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_hj(<16 x i8>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func zeroext i8 @_Z25sub_group_broadcast_firsth(i8 zeroext) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]]
|
|
; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short_value:]] = OpCompositeExtract %[[#short]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#short]] %[[#ScopeSubgroup]] %[[#short_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastShorts() local_unnamed_addr {
|
|
%1 = tail call spir_func signext i16 @_Z31sub_group_non_uniform_broadcastsj(i16 signext 0, i32 0)
|
|
%2 = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %1, i64 0
|
|
%3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_sj(<2 x i16> %3, i32 0)
|
|
%5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_sj(<3 x i16> %7, i32 0)
|
|
%9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_sj(<4 x i16> %11, i32 0)
|
|
%13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_sj(<8 x i16> %15, i32 0)
|
|
%17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_sj(<16 x i16> %18, i32 0)
|
|
%20 = extractelement <16 x i16> %19, i64 0
|
|
%21 = tail call spir_func signext i16 @_Z25sub_group_broadcast_firsts(i16 signext %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func signext i16 @_Z31sub_group_non_uniform_broadcastsj(i16 signext, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_sj(<2 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_sj(<3 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_sj(<4 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_sj(<8 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_sj(<16 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func signext i16 @_Z25sub_group_broadcast_firsts(i16 signext) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]]
|
|
; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#short_value:]] = OpCompositeExtract %[[#short]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#short]] %[[#ScopeSubgroup]] %[[#short_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastUShorts() local_unnamed_addr {
|
|
%1 = tail call spir_func zeroext i16 @_Z31sub_group_non_uniform_broadcasttj(i16 zeroext 0, i32 0)
|
|
%2 = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %1, i64 0
|
|
%3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_tj(<2 x i16> %3, i32 0)
|
|
%5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_tj(<3 x i16> %7, i32 0)
|
|
%9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_tj(<4 x i16> %11, i32 0)
|
|
%13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_tj(<8 x i16> %15, i32 0)
|
|
%17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_tj(<16 x i16> %18, i32 0)
|
|
%20 = extractelement <16 x i16> %19, i64 0
|
|
%21 = tail call spir_func zeroext i16 @_Z25sub_group_broadcast_firstt(i16 zeroext %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func zeroext i16 @_Z31sub_group_non_uniform_broadcasttj(i16 zeroext, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_tj(<2 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_tj(<3 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_tj(<4 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_tj(<8 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_tj(<16 x i16>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func zeroext i16 @_Z25sub_group_broadcast_firstt(i16 zeroext) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]]
|
|
; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int_value:]] = OpCompositeExtract %[[#int]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#int]] %[[#ScopeSubgroup]] %[[#int_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastInts() local_unnamed_addr {
|
|
%1 = tail call spir_func i32 @_Z31sub_group_non_uniform_broadcastij(i32 0, i32 0)
|
|
%2 = insertelement <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %1, i64 0
|
|
%3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_ij(<2 x i32> %3, i32 0)
|
|
%5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_ij(<3 x i32> %7, i32 0)
|
|
%9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_ij(<4 x i32> %11, i32 0)
|
|
%13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_ij(<8 x i32> %15, i32 0)
|
|
%17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_ij(<16 x i32> %18, i32 0)
|
|
%20 = extractelement <16 x i32> %19, i64 0
|
|
%21 = tail call spir_func i32 @_Z25sub_group_broadcast_firsti(i32 %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func i32 @_Z31sub_group_non_uniform_broadcastij(i32, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_ij(<2 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_ij(<3 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_ij(<4 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_ij(<8 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_ij(<16 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z25sub_group_broadcast_firsti(i32) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]]
|
|
; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#int_value:]] = OpCompositeExtract %[[#int]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#int]] %[[#ScopeSubgroup]] %[[#int_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastUInts() local_unnamed_addr {
|
|
%1 = tail call spir_func i32 @_Z31sub_group_non_uniform_broadcastjj(i32 0, i32 0)
|
|
%2 = insertelement <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %1, i64 0
|
|
%3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_jj(<2 x i32> %3, i32 0)
|
|
%5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_jj(<3 x i32> %7, i32 0)
|
|
%9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_jj(<4 x i32> %11, i32 0)
|
|
%13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_jj(<8 x i32> %15, i32 0)
|
|
%17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_jj(<16 x i32> %18, i32 0)
|
|
%20 = extractelement <16 x i32> %19, i64 0
|
|
%21 = tail call spir_func i32 @_Z25sub_group_broadcast_firstj(i32 %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func i32 @_Z31sub_group_non_uniform_broadcastjj(i32, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_jj(<2 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_jj(<3 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_jj(<4 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_jj(<8 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_jj(<16 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z25sub_group_broadcast_firstj(i32) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]]
|
|
; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long_value:]] = OpCompositeExtract %[[#long]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#long]] %[[#ScopeSubgroup]] %[[#long_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastLongs() local_unnamed_addr {
|
|
%1 = tail call spir_func i64 @_Z31sub_group_non_uniform_broadcastlj(i64 0, i32 0)
|
|
%2 = insertelement <16 x i64> <i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 %1, i64 0
|
|
%3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_lj(<2 x i64> %3, i32 0)
|
|
%5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_lj(<3 x i64> %7, i32 0)
|
|
%9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_lj(<4 x i64> %11, i32 0)
|
|
%13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_lj(<8 x i64> %15, i32 0)
|
|
%17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_lj(<16 x i64> %18, i32 0)
|
|
%20 = extractelement <16 x i64> %19, i64 0
|
|
%21 = tail call spir_func i64 @_Z25sub_group_broadcast_firstl(i64 %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func i64 @_Z31sub_group_non_uniform_broadcastlj(i64, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_lj(<2 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_lj(<3 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_lj(<4 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_lj(<8 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_lj(<16 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i64 @_Z25sub_group_broadcast_firstl(i64) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]]
|
|
; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#long_value:]] = OpCompositeExtract %[[#long]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#long]] %[[#ScopeSubgroup]] %[[#long_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastULongs() local_unnamed_addr {
|
|
%1 = tail call spir_func i64 @_Z31sub_group_non_uniform_broadcastmj(i64 0, i32 0)
|
|
%2 = insertelement <16 x i64> <i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 %1, i64 0
|
|
%3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_mj(<2 x i64> %3, i32 0)
|
|
%5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_mj(<3 x i64> %7, i32 0)
|
|
%9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_mj(<4 x i64> %11, i32 0)
|
|
%13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_mj(<8 x i64> %15, i32 0)
|
|
%17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_mj(<16 x i64> %18, i32 0)
|
|
%20 = extractelement <16 x i64> %19, i64 0
|
|
%21 = tail call spir_func i64 @_Z25sub_group_broadcast_firstm(i64 %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func i64 @_Z31sub_group_non_uniform_broadcastmj(i64, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_mj(<2 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_mj(<3 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_mj(<4 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_mj(<8 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_mj(<16 x i64>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i64 @_Z25sub_group_broadcast_firstm(i64) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#float2_0:]] = OpVectorShuffle %[[#float2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float2]] %[[#ScopeSubgroup]] %[[#float2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#float3_0:]] = OpVectorShuffle %[[#float3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float3]] %[[#ScopeSubgroup]] %[[#float3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#float4_0:]] = OpVectorShuffle %[[#float4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float4]] %[[#ScopeSubgroup]] %[[#float4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#float8_0:]] = OpVectorShuffle %[[#float8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float8]] %[[#ScopeSubgroup]] %[[#float8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#float16]]
|
|
; CHECK-SPIRV: %[[#float16_0:]] = OpVectorShuffle %[[#float16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float16]] %[[#ScopeSubgroup]] %[[#float16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#float_value:]] = OpCompositeExtract %[[#float]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#float]] %[[#ScopeSubgroup]] %[[#float_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastFloats() local_unnamed_addr {
|
|
%1 = tail call spir_func float @_Z31sub_group_non_uniform_broadcastfj(float 0.000000e+00, i32 0)
|
|
%2 = insertelement <16 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %1, i64 0
|
|
%3 = shufflevector <16 x float> %2, <16 x float> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x float> @_Z31sub_group_non_uniform_broadcastDv2_fj(<2 x float> %3, i32 0)
|
|
%5 = shufflevector <2 x float> %4, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x float> %5, <16 x float> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x float> %6, <16 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x float> @_Z31sub_group_non_uniform_broadcastDv3_fj(<3 x float> %7, i32 0)
|
|
%9 = shufflevector <3 x float> %8, <3 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x float> %9, <16 x float> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x float> %10, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x float> @_Z31sub_group_non_uniform_broadcastDv4_fj(<4 x float> %11, i32 0)
|
|
%13 = shufflevector <4 x float> %12, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x float> %13, <16 x float> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x float> %14, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x float> @_Z31sub_group_non_uniform_broadcastDv8_fj(<8 x float> %15, i32 0)
|
|
%17 = shufflevector <8 x float> %16, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x float> %17, <16 x float> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x float> @_Z31sub_group_non_uniform_broadcastDv16_fj(<16 x float> %18, i32 0)
|
|
%20 = extractelement <16 x float> %19, i64 0
|
|
%21 = tail call spir_func float @_Z25sub_group_broadcast_firstf(float %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func float @_Z31sub_group_non_uniform_broadcastfj(float, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x float> @_Z31sub_group_non_uniform_broadcastDv2_fj(<2 x float>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x float> @_Z31sub_group_non_uniform_broadcastDv3_fj(<3 x float>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x float> @_Z31sub_group_non_uniform_broadcastDv4_fj(<4 x float>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x float> @_Z31sub_group_non_uniform_broadcastDv8_fj(<8 x float>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x float> @_Z31sub_group_non_uniform_broadcastDv16_fj(<16 x float>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func float @_Z25sub_group_broadcast_firstf(float) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#half2_0:]] = OpVectorShuffle %[[#half2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half2]] %[[#ScopeSubgroup]] %[[#half2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#half3_0:]] = OpVectorShuffle %[[#half3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half3]] %[[#ScopeSubgroup]] %[[#half3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#half4_0:]] = OpVectorShuffle %[[#half4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half4]] %[[#ScopeSubgroup]] %[[#half4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#half8_0:]] = OpVectorShuffle %[[#half8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half8]] %[[#ScopeSubgroup]] %[[#half8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#half16]]
|
|
; CHECK-SPIRV: %[[#half16_0:]] = OpVectorShuffle %[[#half16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half16]] %[[#ScopeSubgroup]] %[[#half16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#half_value:]] = OpCompositeExtract %[[#half]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#half]] %[[#ScopeSubgroup]] %[[#half_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastHalfs() local_unnamed_addr {
|
|
%1 = tail call spir_func half @_Z31sub_group_non_uniform_broadcastDhj(half 0xH0000, i32 0)
|
|
%2 = insertelement <16 x half> <half undef, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000, half 0xH0000>, half %1, i64 0
|
|
%3 = shufflevector <16 x half> %2, <16 x half> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x half> @_Z31sub_group_non_uniform_broadcastDv2_Dhj(<2 x half> %3, i32 0)
|
|
%5 = shufflevector <2 x half> %4, <2 x half> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x half> %5, <16 x half> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x half> %6, <16 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x half> @_Z31sub_group_non_uniform_broadcastDv3_Dhj(<3 x half> %7, i32 0)
|
|
%9 = shufflevector <3 x half> %8, <3 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x half> %9, <16 x half> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x half> %10, <16 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x half> @_Z31sub_group_non_uniform_broadcastDv4_Dhj(<4 x half> %11, i32 0)
|
|
%13 = shufflevector <4 x half> %12, <4 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x half> %13, <16 x half> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x half> %14, <16 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x half> @_Z31sub_group_non_uniform_broadcastDv8_Dhj(<8 x half> %15, i32 0)
|
|
%17 = shufflevector <8 x half> %16, <8 x half> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x half> %17, <16 x half> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x half> @_Z31sub_group_non_uniform_broadcastDv16_Dhj(<16 x half> %18, i32 0)
|
|
%20 = extractelement <16 x half> %19, i64 0
|
|
%21 = tail call spir_func half @_Z25sub_group_broadcast_firstDh(half %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func half @_Z31sub_group_non_uniform_broadcastDhj(half, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x half> @_Z31sub_group_non_uniform_broadcastDv2_Dhj(<2 x half>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x half> @_Z31sub_group_non_uniform_broadcastDv3_Dhj(<3 x half>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x half> @_Z31sub_group_non_uniform_broadcastDv4_Dhj(<4 x half>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x half> @_Z31sub_group_non_uniform_broadcastDv8_Dhj(<8 x half>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x half> @_Z31sub_group_non_uniform_broadcastDv16_Dhj(<16 x half>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func half @_Z25sub_group_broadcast_firstDh(half) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#double2_0:]] = OpVectorShuffle %[[#double2]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double2]] %[[#ScopeSubgroup]] %[[#double2_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#double3_0:]] = OpVectorShuffle %[[#double3]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double3]] %[[#ScopeSubgroup]] %[[#double3_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#double4_0:]] = OpVectorShuffle %[[#double4]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double4]] %[[#ScopeSubgroup]] %[[#double4_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#double8_0:]] = OpVectorShuffle %[[#double8]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double8]] %[[#ScopeSubgroup]] %[[#double8_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#double16]]
|
|
; CHECK-SPIRV: %[[#double16_0:]] = OpVectorShuffle %[[#double16]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double16]] %[[#ScopeSubgroup]] %[[#double16_0]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#double_value:]] = OpCompositeExtract %[[#double]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#double]] %[[#ScopeSubgroup]] %[[#double_value]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testNonUniformBroadcastDoubles() local_unnamed_addr {
|
|
%1 = tail call spir_func double @_Z31sub_group_non_uniform_broadcastdj(double 0.000000e+00, i32 0)
|
|
%2 = insertelement <16 x double> <double undef, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, double %1, i64 0
|
|
%3 = shufflevector <16 x double> %2, <16 x double> undef, <2 x i32> <i32 0, i32 1>
|
|
%4 = tail call spir_func <2 x double> @_Z31sub_group_non_uniform_broadcastDv2_dj(<2 x double> %3, i32 0)
|
|
%5 = shufflevector <2 x double> %4, <2 x double> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%6 = shufflevector <16 x double> %5, <16 x double> %2, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%7 = shufflevector <16 x double> %6, <16 x double> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
|
%8 = tail call spir_func <3 x double> @_Z31sub_group_non_uniform_broadcastDv3_dj(<3 x double> %7, i32 0)
|
|
%9 = shufflevector <3 x double> %8, <3 x double> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%10 = shufflevector <16 x double> %9, <16 x double> %6, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%11 = shufflevector <16 x double> %10, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%12 = tail call spir_func <4 x double> @_Z31sub_group_non_uniform_broadcastDv4_dj(<4 x double> %11, i32 0)
|
|
%13 = shufflevector <4 x double> %12, <4 x double> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%14 = shufflevector <16 x double> %13, <16 x double> %10, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%15 = shufflevector <16 x double> %14, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%16 = tail call spir_func <8 x double> @_Z31sub_group_non_uniform_broadcastDv8_dj(<8 x double> %15, i32 0)
|
|
%17 = shufflevector <8 x double> %16, <8 x double> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
%18 = shufflevector <16 x double> %17, <16 x double> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
|
%19 = tail call spir_func <16 x double> @_Z31sub_group_non_uniform_broadcastDv16_dj(<16 x double> %18, i32 0)
|
|
%20 = extractelement <16 x double> %19, i64 0
|
|
%21 = tail call spir_func double @_Z25sub_group_broadcast_firstd(double %20)
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func double @_Z31sub_group_non_uniform_broadcastdj(double, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <2 x double> @_Z31sub_group_non_uniform_broadcastDv2_dj(<2 x double>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <3 x double> @_Z31sub_group_non_uniform_broadcastDv3_dj(<3 x double>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x double> @_Z31sub_group_non_uniform_broadcastDv4_dj(<4 x double>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <8 x double> @_Z31sub_group_non_uniform_broadcastDv8_dj(<8 x double>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func <16 x double> @_Z31sub_group_non_uniform_broadcastDv16_dj(<16 x double>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func double @_Z25sub_group_broadcast_firstd(double) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#ballot:]] = OpGroupNonUniformBallot %[[#int4]] %[[#ScopeSubgroup]] %[[#false]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformInverseBallot %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitExtract %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]] %[[#int_0]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#ballot]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#ballot]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#ballot]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindLSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]]
|
|
; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindMSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testBallotOperations(i32 addrspace(1)* nocapture) local_unnamed_addr {
|
|
%2 = tail call spir_func <4 x i32> @_Z16sub_group_balloti(i32 0)
|
|
%3 = tail call spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32> %2)
|
|
store i32 %3, i32 addrspace(1)* %0, align 4
|
|
%4 = tail call spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32> %2, i32 0)
|
|
%5 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
|
|
store i32 %4, i32 addrspace(1)* %5, align 4
|
|
%6 = tail call spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32> %2)
|
|
%7 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
|
|
store i32 %6, i32 addrspace(1)* %7, align 4
|
|
%8 = tail call spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32> %2)
|
|
%9 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3
|
|
store i32 %8, i32 addrspace(1)* %9, align 4
|
|
%10 = tail call spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32> %2)
|
|
%11 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4
|
|
store i32 %10, i32 addrspace(1)* %11, align 4
|
|
%12 = tail call spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32> %2)
|
|
%13 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5
|
|
store i32 %12, i32 addrspace(1)* %13, align 4
|
|
%14 = tail call spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32> %2)
|
|
%15 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6
|
|
store i32 %14, i32 addrspace(1)* %15, align 4
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z16sub_group_balloti(i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32>) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32>, i32) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32>) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32>) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32>) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32>) local_unnamed_addr
|
|
|
|
declare dso_local spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32>) local_unnamed_addr
|
|
|
|
; CHECK-SPIRV: OpFunction
|
|
; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#eqMask]]
|
|
; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#geMask]]
|
|
; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#gtMask]]
|
|
; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#leMask]]
|
|
; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#ltMask]]
|
|
; CHECK-SPIRV: OpFunctionEnd
|
|
|
|
define dso_local spir_kernel void @testSubgroupMasks(<4 x i32> addrspace(1)* nocapture) local_unnamed_addr {
|
|
%2 = tail call spir_func <4 x i32> @_Z21get_sub_group_eq_maskv()
|
|
store <4 x i32> %2, <4 x i32> addrspace(1)* %0, align 16
|
|
%3 = tail call spir_func <4 x i32> @_Z21get_sub_group_ge_maskv()
|
|
%4 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 1
|
|
store <4 x i32> %3, <4 x i32> addrspace(1)* %4, align 16
|
|
%5 = tail call spir_func <4 x i32> @_Z21get_sub_group_gt_maskv()
|
|
%6 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 2
|
|
store <4 x i32> %5, <4 x i32> addrspace(1)* %6, align 16
|
|
%7 = tail call spir_func <4 x i32> @_Z21get_sub_group_le_maskv()
|
|
%8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 3
|
|
store <4 x i32> %7, <4 x i32> addrspace(1)* %8, align 16
|
|
%9 = tail call spir_func <4 x i32> @_Z21get_sub_group_lt_maskv()
|
|
%10 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 4
|
|
store <4 x i32> %9, <4 x i32> addrspace(1)* %10, align 16
|
|
ret void
|
|
}
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z21get_sub_group_eq_maskv() local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z21get_sub_group_ge_maskv() local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z21get_sub_group_gt_maskv() local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z21get_sub_group_le_maskv() local_unnamed_addr
|
|
|
|
declare dso_local spir_func <4 x i32> @_Z21get_sub_group_lt_maskv() local_unnamed_addr
|