146 lines
6.4 KiB
LLVM
146 lines
6.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-P9 %s
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-postra-bias-addi=false < %s |\
|
|
; RUN: FileCheck -check-prefix=CHECK-P9-NO-HEURISTIC %s
|
|
|
|
%_type_of_scalars = type <{ [16 x i8], double, [152 x i8] }>
|
|
%_elem_type_of_x = type <{ double }>
|
|
%_elem_type_of_a = type <{ double }>
|
|
|
|
@scalars = common dso_local local_unnamed_addr global %_type_of_scalars zeroinitializer, align 16
|
|
|
|
define dso_local void @test(ptr noalias %.x, ptr %.a, ptr noalias %.n) {
|
|
; CHECK-P9-LABEL: test:
|
|
; CHECK-P9: # %bb.0: # %entry
|
|
; CHECK-P9-NEXT: ld 5, 0(5)
|
|
; CHECK-P9-NEXT: addis 6, 2, scalars@toc@ha
|
|
; CHECK-P9-NEXT: addi 6, 6, scalars@toc@l
|
|
; CHECK-P9-NEXT: rldicr 5, 5, 0, 58
|
|
; CHECK-P9-NEXT: addi 6, 6, 16
|
|
; CHECK-P9-NEXT: addi 5, 5, -32
|
|
; CHECK-P9-NEXT: lxvdsx 0, 0, 6
|
|
; CHECK-P9-NEXT: rldicl 5, 5, 59, 5
|
|
; CHECK-P9-NEXT: addi 5, 5, 1
|
|
; CHECK-P9-NEXT: mtctr 5
|
|
; CHECK-P9-NEXT: .p2align 4
|
|
; CHECK-P9-NEXT: .LBB0_1: # %vector.body
|
|
; CHECK-P9-NEXT: #
|
|
; CHECK-P9-NEXT: lxv 1, 16(4)
|
|
; CHECK-P9-NEXT: lxv 2, 0(4)
|
|
; CHECK-P9-NEXT: lxv 3, 48(4)
|
|
; CHECK-P9-NEXT: lxv 4, 32(4)
|
|
; CHECK-P9-NEXT: xvmuldp 2, 2, 0
|
|
; CHECK-P9-NEXT: lxv 5, 240(4)
|
|
; CHECK-P9-NEXT: lxv 6, 224(4)
|
|
; CHECK-P9-NEXT: xvmuldp 1, 1, 0
|
|
; CHECK-P9-NEXT: xvmuldp 4, 4, 0
|
|
; CHECK-P9-NEXT: xvmuldp 3, 3, 0
|
|
; CHECK-P9-NEXT: xvmuldp 6, 6, 0
|
|
; CHECK-P9-NEXT: xvmuldp 5, 5, 0
|
|
; CHECK-P9-NEXT: addi 4, 4, 256
|
|
; CHECK-P9-NEXT: stxv 1, 16(3)
|
|
; CHECK-P9-NEXT: stxv 2, 0(3)
|
|
; CHECK-P9-NEXT: stxv 3, 48(3)
|
|
; CHECK-P9-NEXT: stxv 4, 32(3)
|
|
; CHECK-P9-NEXT: stxv 5, 240(3)
|
|
; CHECK-P9-NEXT: stxv 6, 224(3)
|
|
; CHECK-P9-NEXT: addi 3, 3, 256
|
|
; CHECK-P9-NEXT: bdnz .LBB0_1
|
|
; CHECK-P9-NEXT: # %bb.2: # %return.block
|
|
; CHECK-P9-NEXT: blr
|
|
;
|
|
; CHECK-P9-NO-HEURISTIC-LABEL: test:
|
|
; CHECK-P9-NO-HEURISTIC: # %bb.0: # %entry
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: ld 5, 0(5)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: addis 6, 2, scalars@toc@ha
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: addi 6, 6, scalars@toc@l
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: rldicr 5, 5, 0, 58
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: addi 6, 6, 16
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, -32
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: lxvdsx 0, 0, 6
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: rldicl 5, 5, 59, 5
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: addi 5, 5, 1
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: mtctr 5
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: .p2align 4
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: .LBB0_1: # %vector.body
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: #
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 1, 16(4)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 2, 0(4)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 3, 48(4)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 4, 32(4)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 2, 2, 0
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 5, 240(4)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: lxv 6, 224(4)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 1, 1, 0
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 4, 4, 0
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 3, 3, 0
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 6, 6, 0
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: xvmuldp 5, 5, 0
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: addi 4, 4, 256
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 1, 16(3)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 2, 0(3)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 3, 48(3)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 4, 32(3)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 5, 240(3)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: stxv 6, 224(3)
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: addi 3, 3, 256
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: bdnz .LBB0_1
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: # %bb.2: # %return.block
|
|
; CHECK-P9-NO-HEURISTIC-NEXT: blr
|
|
entry:
|
|
%x_rvo_based_addr_3 = getelementptr inbounds [0 x %_elem_type_of_x], ptr %.x, i64 0, i64 -1
|
|
%a_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_a], ptr %.a, i64 0, i64 -1
|
|
%_val_n_ = load i64, ptr %.n, align 8
|
|
%_val_c1_ = load double, ptr getelementptr inbounds (%_type_of_scalars, ptr @scalars, i64 0, i32 1), align 16
|
|
%n.vec = and i64 %_val_n_, -32
|
|
%broadcast.splatinsert26 = insertelement <4 x double> undef, double %_val_c1_, i32 0
|
|
%broadcast.splat27 = shufflevector <4 x double> %broadcast.splatinsert26, <4 x double> undef, <4 x i32> zeroinitializer
|
|
br label %vector.body
|
|
|
|
vector.body:
|
|
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
|
|
%offset.idx = or i64 %index, 1
|
|
%0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0
|
|
%1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0
|
|
%wide.load = load <4 x double>, ptr %1, align 8
|
|
%2 = getelementptr double, ptr %1, i64 4
|
|
%wide.load19 = load <4 x double>, ptr %2, align 8
|
|
%3 = getelementptr double, ptr %1, i64 8
|
|
%wide.load20 = load <4 x double>, ptr %3, align 8
|
|
%4 = getelementptr double, ptr %1, i64 12
|
|
%wide.load21 = load <4 x double>, ptr %4, align 8
|
|
%5 = getelementptr double, ptr %1, i64 16
|
|
%wide.load22 = load <4 x double>, ptr %5, align 8
|
|
%6 = getelementptr double, ptr %1, i64 20
|
|
%wide.load23 = load <4 x double>, ptr %6, align 8
|
|
%7 = getelementptr double, ptr %1, i64 24
|
|
%wide.load24 = load <4 x double>, ptr %7, align 8
|
|
%8 = getelementptr double, ptr %1, i64 28
|
|
%wide.load25 = load <4 x double>, ptr %8, align 8
|
|
%9 = fmul fast <4 x double> %wide.load, %broadcast.splat27
|
|
%10 = fmul fast <4 x double> %wide.load19, %broadcast.splat27
|
|
%11 = fmul fast <4 x double> %wide.load20, %broadcast.splat27
|
|
%12 = fmul fast <4 x double> %wide.load21, %broadcast.splat27
|
|
%13 = fmul fast <4 x double> %wide.load22, %broadcast.splat27
|
|
%14 = fmul fast <4 x double> %wide.load23, %broadcast.splat27
|
|
%15 = fmul fast <4 x double> %wide.load24, %broadcast.splat27
|
|
%16 = fmul fast <4 x double> %wide.load25, %broadcast.splat27
|
|
store <4 x double> %9, ptr %0, align 8
|
|
%17 = getelementptr double, ptr %0, i64 4
|
|
store <4 x double> %10, ptr %17, align 8
|
|
%18 = getelementptr double, ptr %0, i64 8
|
|
%19 = getelementptr double, ptr %0, i64 12
|
|
%20 = getelementptr double, ptr %0, i64 16
|
|
%21 = getelementptr double, ptr %0, i64 20
|
|
%22 = getelementptr double, ptr %0, i64 24
|
|
%23 = getelementptr double, ptr %0, i64 28
|
|
store <4 x double> %16, ptr %23, align 8
|
|
%index.next = add i64 %index, 32
|
|
%cm = icmp eq i64 %index.next, %n.vec
|
|
br i1 %cm, label %return.block, label %vector.body
|
|
|
|
return.block:
|
|
ret void
|
|
}
|
|
|