1170 lines
39 KiB
LLVM
1170 lines
39 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \
|
|
; RUN: FileCheck %s --check-prefix=PWR10LE
|
|
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \
|
|
; RUN: FileCheck %s --check-prefix=PWR10BE
|
|
|
|
;;
|
|
;; Vectors of f32
|
|
;;
|
|
define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9BE-NEXT: xscvspdpn f0, v2
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 3
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10BE-NEXT: xscvspdpn f0, v2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxspltw vs0, v2, 2
|
|
; PWR9LE-NEXT: xvminsp vs0, v2, vs0
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxspltw vs0, v2, 1
|
|
; PWR9BE-NEXT: xvminsp vs0, v2, vs0
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxspltw vs0, v2, 2
|
|
; PWR10LE-NEXT: xvminsp vs0, v2, vs0
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxspltw vs0, v2, 1
|
|
; PWR10BE-NEXT: xvminsp vs0, v2, vs0
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxsldwi vs2, v2, v2, 3
|
|
; PWR9LE-NEXT: xxswapd vs3, v2
|
|
; PWR9LE-NEXT: xscvspdpn f0, v2
|
|
; PWR9LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR9LE-NEXT: xscvspdpn f3, vs3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xsmindp f2, f2, f3
|
|
; PWR9LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxsldwi vs2, v2, v2, 1
|
|
; PWR9BE-NEXT: xxswapd vs1, v2
|
|
; PWR9BE-NEXT: xscvspdpn f3, v2
|
|
; PWR9BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9BE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9BE-NEXT: xsmindp f2, f3, f2
|
|
; PWR9BE-NEXT: xsmindp f1, f2, f1
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxsldwi vs2, v2, v2, 3
|
|
; PWR10LE-NEXT: xxswapd vs3, v2
|
|
; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
|
|
; PWR10LE-NEXT: xscvspdpn f0, v2
|
|
; PWR10LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10LE-NEXT: xscvspdpn f3, vs3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xsmindp f2, f2, f3
|
|
; PWR10LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxsldwi vs2, v2, v2, 1
|
|
; PWR10BE-NEXT: xxswapd vs1, v2
|
|
; PWR10BE-NEXT: xscvspdpn f3, v2
|
|
; PWR10BE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; PWR10BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10BE-NEXT: xsmindp f2, f3, f2
|
|
; PWR10BE-NEXT: xsmindp f1, f2, f1
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd v3, v2
|
|
; PWR9LE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd v3, v2
|
|
; PWR9BE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd v3, v2
|
|
; PWR10LE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd v3, v2
|
|
; PWR10BE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9BE-NEXT: xxswapd vs2, vs0
|
|
; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10BE-NEXT: xxswapd vs2, vs0
|
|
; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxswapd v2, vs0
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxswapd v2, vs0
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxswapd v2, vs0
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvminsp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxswapd v2, vs0
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR9LE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR9BE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9BE-NEXT: xxswapd vs2, vs0
|
|
; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR10LE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR10BE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10BE-NEXT: xxswapd vs2, vs0
|
|
; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR9LE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd v2, vs0
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR9BE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxswapd v2, vs0
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR10LE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd v2, vs0
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvminsp vs0, v3, v5
|
|
; PWR10BE-NEXT: xvminsp vs1, v2, v4
|
|
; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxswapd v2, vs0
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v32f32(<32 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32f32:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvminsp vs0, v5, v9
|
|
; PWR9LE-NEXT: xvminsp vs1, v3, v7
|
|
; PWR9LE-NEXT: xvminsp vs2, v2, v6
|
|
; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvminsp vs1, v4, v8
|
|
; PWR9LE-NEXT: xvminsp vs1, vs2, vs1
|
|
; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR9LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR9LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32f32:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvminsp vs0, v5, v9
|
|
; PWR9BE-NEXT: xvminsp vs1, v3, v7
|
|
; PWR9BE-NEXT: xvminsp vs2, v2, v6
|
|
; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvminsp vs1, v4, v8
|
|
; PWR9BE-NEXT: xvminsp vs1, vs2, vs1
|
|
; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9BE-NEXT: xxswapd vs2, vs0
|
|
; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR9BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR9BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32f32:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvminsp vs0, v5, v9
|
|
; PWR10LE-NEXT: xvminsp vs1, v3, v7
|
|
; PWR10LE-NEXT: xvminsp vs2, v2, v6
|
|
; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvminsp vs1, v4, v8
|
|
; PWR10LE-NEXT: xvminsp vs1, vs2, vs1
|
|
; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs1
|
|
; PWR10LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10LE-NEXT: xsmindp f1, f2, f1
|
|
; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR10LE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10LE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32f32:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvminsp vs0, v5, v9
|
|
; PWR10BE-NEXT: xvminsp vs1, v3, v7
|
|
; PWR10BE-NEXT: xvminsp vs2, v2, v6
|
|
; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvminsp vs1, v4, v8
|
|
; PWR10BE-NEXT: xvminsp vs1, vs2, vs1
|
|
; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10BE-NEXT: xxswapd vs2, vs0
|
|
; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10BE-NEXT: xscvspdpn f2, vs2
|
|
; PWR10BE-NEXT: xscvspdpn f0, vs0
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f2
|
|
; PWR10BE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32f32_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvminsp vs0, v4, v8
|
|
; PWR9LE-NEXT: xvminsp vs1, v2, v6
|
|
; PWR9LE-NEXT: xvminsp vs2, v5, v9
|
|
; PWR9LE-NEXT: xvminsp vs3, v3, v7
|
|
; PWR9LE-NEXT: xvminsp vs2, vs3, vs2
|
|
; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, vs2
|
|
; PWR9LE-NEXT: xxswapd v2, vs0
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR9LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR9LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32f32_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvminsp vs0, v4, v8
|
|
; PWR9BE-NEXT: xvminsp vs1, v2, v6
|
|
; PWR9BE-NEXT: xvminsp vs2, v5, v9
|
|
; PWR9BE-NEXT: xvminsp vs3, v3, v7
|
|
; PWR9BE-NEXT: xvminsp vs2, vs3, vs2
|
|
; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, vs2
|
|
; PWR9BE-NEXT: xxswapd v2, vs0
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR9BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32f32_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvminsp vs0, v4, v8
|
|
; PWR10LE-NEXT: xvminsp vs1, v2, v6
|
|
; PWR10LE-NEXT: xvminsp vs2, v5, v9
|
|
; PWR10LE-NEXT: xvminsp vs3, v3, v7
|
|
; PWR10LE-NEXT: xvminsp vs2, vs3, vs2
|
|
; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, vs2
|
|
; PWR10LE-NEXT: xxswapd v2, vs0
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR10LE-NEXT: xxspltw vs1, vs0, 2
|
|
; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
|
|
; PWR10LE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32f32_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvminsp vs0, v4, v8
|
|
; PWR10BE-NEXT: xvminsp vs1, v2, v6
|
|
; PWR10BE-NEXT: xvminsp vs2, v5, v9
|
|
; PWR10BE-NEXT: xvminsp vs3, v3, v7
|
|
; PWR10BE-NEXT: xvminsp vs2, vs3, vs2
|
|
; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, vs2
|
|
; PWR10BE-NEXT: xxswapd v2, vs0
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, v2
|
|
; PWR10BE-NEXT: xxspltw vs1, vs0, 1
|
|
; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xscvspdpn f1, vs0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a)
|
|
ret float %0
|
|
}
|
|
|
|
declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) #0
|
|
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) #0
|
|
declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) #0
|
|
declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) #0
|
|
declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>) #0
|
|
|
|
;;
|
|
;; Vectors of f64
|
|
;;
|
|
define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xsmindp f1, f0, v2
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs0, v2
|
|
; PWR9BE-NEXT: xsmindp f1, v2, f0
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xsmindp f1, f0, v2
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs0, v2
|
|
; PWR10BE-NEXT: xsmindp f1, v2, f0
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v2f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xxswapd vs0, v2
|
|
; PWR9LE-NEXT: xvmindp vs0, v2, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v2f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xxswapd vs0, v2
|
|
; PWR9BE-NEXT: xvmindp vs1, v2, vs0
|
|
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v2f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xxswapd vs0, v2
|
|
; PWR10LE-NEXT: xvmindp vs0, v2, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v2f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xxswapd vs0, v2
|
|
; PWR10BE-NEXT: xvmindp vs1, v2, vs0
|
|
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v4f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v4f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v4f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v4f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvmindp vs0, v2, v3
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR9LE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR9BE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR10LE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR10BE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v8f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR9LE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v8f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR9BE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v8f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR10LE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v8f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvmindp vs0, v3, v5
|
|
; PWR10BE-NEXT: xvmindp vs1, v2, v4
|
|
; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvmindp vs0, v5, v9
|
|
; PWR9LE-NEXT: xvmindp vs1, v3, v7
|
|
; PWR9LE-NEXT: xvmindp vs2, v2, v6
|
|
; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvmindp vs1, v4, v8
|
|
; PWR9LE-NEXT: xvmindp vs1, vs2, vs1
|
|
; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvmindp vs0, v5, v9
|
|
; PWR9BE-NEXT: xvmindp vs1, v3, v7
|
|
; PWR9BE-NEXT: xvmindp vs2, v2, v6
|
|
; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvmindp vs1, v4, v8
|
|
; PWR9BE-NEXT: xvmindp vs1, vs2, vs1
|
|
; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvmindp vs0, v5, v9
|
|
; PWR10LE-NEXT: xvmindp vs1, v3, v7
|
|
; PWR10LE-NEXT: xvmindp vs2, v2, v6
|
|
; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvmindp vs1, v4, v8
|
|
; PWR10LE-NEXT: xvmindp vs1, vs2, vs1
|
|
; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvmindp vs0, v5, v9
|
|
; PWR10BE-NEXT: xvmindp vs1, v3, v7
|
|
; PWR10BE-NEXT: xvmindp vs2, v2, v6
|
|
; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvmindp vs1, v4, v8
|
|
; PWR10BE-NEXT: xvmindp vs1, vs2, vs1
|
|
; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v16f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: xvmindp vs0, v4, v8
|
|
; PWR9LE-NEXT: xvmindp vs1, v2, v6
|
|
; PWR9LE-NEXT: xvmindp vs2, v5, v9
|
|
; PWR9LE-NEXT: xvmindp vs3, v3, v7
|
|
; PWR9LE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v16f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: xvmindp vs0, v4, v8
|
|
; PWR9BE-NEXT: xvmindp vs1, v2, v6
|
|
; PWR9BE-NEXT: xvmindp vs2, v5, v9
|
|
; PWR9BE-NEXT: xvmindp vs3, v3, v7
|
|
; PWR9BE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v16f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: xvmindp vs0, v4, v8
|
|
; PWR10LE-NEXT: xvmindp vs1, v2, v6
|
|
; PWR10LE-NEXT: xvmindp vs2, v5, v9
|
|
; PWR10LE-NEXT: xvmindp vs3, v3, v7
|
|
; PWR10LE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v16f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: xvmindp vs0, v4, v8
|
|
; PWR10BE-NEXT: xvmindp vs1, v2, v6
|
|
; PWR10BE-NEXT: xvmindp vs2, v5, v9
|
|
; PWR10BE-NEXT: xvmindp vs3, v3, v7
|
|
; PWR10BE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32f64:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: lxv vs3, 272(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 240(r1)
|
|
; PWR9LE-NEXT: xvmindp vs4, v5, v13
|
|
; PWR9LE-NEXT: lxv vs1, 256(r1)
|
|
; PWR9LE-NEXT: lxv vs0, 224(r1)
|
|
; PWR9LE-NEXT: xvmindp vs3, v9, vs3
|
|
; PWR9LE-NEXT: xvmindp vs2, v7, vs2
|
|
; PWR9LE-NEXT: xvmindp vs1, v8, vs1
|
|
; PWR9LE-NEXT: xvmindp vs0, v6, vs0
|
|
; PWR9LE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR9LE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR9LE-NEXT: xvmindp vs2, vs4, vs2
|
|
; PWR9LE-NEXT: xvmindp vs2, vs2, vs3
|
|
; PWR9LE-NEXT: xvmindp vs3, v4, v12
|
|
; PWR9LE-NEXT: xvmindp vs1, vs3, vs1
|
|
; PWR9LE-NEXT: xvmindp vs3, v2, v10
|
|
; PWR9LE-NEXT: xvmindp vs0, vs3, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32f64:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: lxv vs3, 288(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 256(r1)
|
|
; PWR9BE-NEXT: xvmindp vs4, v5, v13
|
|
; PWR9BE-NEXT: lxv vs1, 272(r1)
|
|
; PWR9BE-NEXT: lxv vs0, 240(r1)
|
|
; PWR9BE-NEXT: xvmindp vs3, v9, vs3
|
|
; PWR9BE-NEXT: xvmindp vs2, v7, vs2
|
|
; PWR9BE-NEXT: xvmindp vs1, v8, vs1
|
|
; PWR9BE-NEXT: xvmindp vs0, v6, vs0
|
|
; PWR9BE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR9BE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR9BE-NEXT: xvmindp vs2, vs4, vs2
|
|
; PWR9BE-NEXT: xvmindp vs2, vs2, vs3
|
|
; PWR9BE-NEXT: xvmindp vs3, v4, v12
|
|
; PWR9BE-NEXT: xvmindp vs1, vs3, vs1
|
|
; PWR9BE-NEXT: xvmindp vs3, v2, v10
|
|
; PWR9BE-NEXT: xvmindp vs0, vs3, vs0
|
|
; PWR9BE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32f64:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: lxv vs3, 272(r1)
|
|
; PWR10LE-NEXT: lxv vs2, 240(r1)
|
|
; PWR10LE-NEXT: xvmindp vs4, v5, v13
|
|
; PWR10LE-NEXT: xvmindp vs3, v9, vs3
|
|
; PWR10LE-NEXT: lxv vs1, 256(r1)
|
|
; PWR10LE-NEXT: xvmindp vs2, v7, vs2
|
|
; PWR10LE-NEXT: lxv vs0, 224(r1)
|
|
; PWR10LE-NEXT: xvmindp vs1, v8, vs1
|
|
; PWR10LE-NEXT: xvmindp vs0, v6, vs0
|
|
; PWR10LE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR10LE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR10LE-NEXT: xvmindp vs2, vs4, vs2
|
|
; PWR10LE-NEXT: xvmindp vs2, vs2, vs3
|
|
; PWR10LE-NEXT: xvmindp vs3, v4, v12
|
|
; PWR10LE-NEXT: xvmindp vs1, vs3, vs1
|
|
; PWR10LE-NEXT: xvmindp vs3, v2, v10
|
|
; PWR10LE-NEXT: xvmindp vs0, vs3, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xsmindp f1, f1, f0
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32f64:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: lxv vs3, 288(r1)
|
|
; PWR10BE-NEXT: lxv vs2, 256(r1)
|
|
; PWR10BE-NEXT: xvmindp vs4, v5, v13
|
|
; PWR10BE-NEXT: xvmindp vs3, v9, vs3
|
|
; PWR10BE-NEXT: lxv vs1, 272(r1)
|
|
; PWR10BE-NEXT: xvmindp vs2, v7, vs2
|
|
; PWR10BE-NEXT: lxv vs0, 240(r1)
|
|
; PWR10BE-NEXT: xvmindp vs1, v8, vs1
|
|
; PWR10BE-NEXT: xvmindp vs0, v6, vs0
|
|
; PWR10BE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR10BE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR10BE-NEXT: xvmindp vs2, vs4, vs2
|
|
; PWR10BE-NEXT: xvmindp vs2, vs2, vs3
|
|
; PWR10BE-NEXT: xvmindp vs3, v4, v12
|
|
; PWR10BE-NEXT: xvmindp vs1, vs3, vs1
|
|
; PWR10BE-NEXT: xvmindp vs3, v2, v10
|
|
; PWR10BE-NEXT: xvmindp vs0, vs3, vs0
|
|
; PWR10BE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xsmindp f1, f0, f1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
|
|
; PWR9LE-LABEL: v32f64_fast:
|
|
; PWR9LE: # %bb.0: # %entry
|
|
; PWR9LE-NEXT: lxv vs0, 256(r1)
|
|
; PWR9LE-NEXT: lxv vs1, 224(r1)
|
|
; PWR9LE-NEXT: lxv vs2, 272(r1)
|
|
; PWR9LE-NEXT: lxv vs3, 240(r1)
|
|
; PWR9LE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR9LE-NEXT: xvmindp vs5, v5, v13
|
|
; PWR9LE-NEXT: xvmindp vs6, v2, v10
|
|
; PWR9LE-NEXT: xvmindp vs7, v4, v12
|
|
; PWR9LE-NEXT: xvmindp vs3, v7, vs3
|
|
; PWR9LE-NEXT: xvmindp vs2, v9, vs2
|
|
; PWR9LE-NEXT: xvmindp vs1, v6, vs1
|
|
; PWR9LE-NEXT: xvmindp vs0, v8, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs7, vs0
|
|
; PWR9LE-NEXT: xvmindp vs1, vs6, vs1
|
|
; PWR9LE-NEXT: xvmindp vs2, vs5, vs2
|
|
; PWR9LE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR9LE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR9LE-NEXT: xxswapd vs1, vs0
|
|
; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9LE-NEXT: blr
|
|
;
|
|
; PWR9BE-LABEL: v32f64_fast:
|
|
; PWR9BE: # %bb.0: # %entry
|
|
; PWR9BE-NEXT: lxv vs0, 272(r1)
|
|
; PWR9BE-NEXT: lxv vs1, 240(r1)
|
|
; PWR9BE-NEXT: lxv vs2, 288(r1)
|
|
; PWR9BE-NEXT: lxv vs3, 256(r1)
|
|
; PWR9BE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR9BE-NEXT: xvmindp vs5, v5, v13
|
|
; PWR9BE-NEXT: xvmindp vs6, v2, v10
|
|
; PWR9BE-NEXT: xvmindp vs7, v4, v12
|
|
; PWR9BE-NEXT: xvmindp vs3, v7, vs3
|
|
; PWR9BE-NEXT: xvmindp vs2, v9, vs2
|
|
; PWR9BE-NEXT: xvmindp vs1, v6, vs1
|
|
; PWR9BE-NEXT: xvmindp vs0, v8, vs0
|
|
; PWR9BE-NEXT: xvmindp vs0, vs7, vs0
|
|
; PWR9BE-NEXT: xvmindp vs1, vs6, vs1
|
|
; PWR9BE-NEXT: xvmindp vs2, vs5, vs2
|
|
; PWR9BE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR9BE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR9BE-NEXT: xxswapd vs1, vs0
|
|
; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR9BE-NEXT: blr
|
|
;
|
|
; PWR10LE-LABEL: v32f64_fast:
|
|
; PWR10LE: # %bb.0: # %entry
|
|
; PWR10LE-NEXT: lxv vs0, 256(r1)
|
|
; PWR10LE-NEXT: lxv vs1, 224(r1)
|
|
; PWR10LE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR10LE-NEXT: xvmindp vs5, v5, v13
|
|
; PWR10LE-NEXT: xvmindp vs6, v2, v10
|
|
; PWR10LE-NEXT: xvmindp vs7, v4, v12
|
|
; PWR10LE-NEXT: xvmindp vs1, v6, vs1
|
|
; PWR10LE-NEXT: lxv vs2, 272(r1)
|
|
; PWR10LE-NEXT: lxv vs3, 240(r1)
|
|
; PWR10LE-NEXT: xvmindp vs3, v7, vs3
|
|
; PWR10LE-NEXT: xvmindp vs2, v9, vs2
|
|
; PWR10LE-NEXT: xvmindp vs0, v8, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs7, vs0
|
|
; PWR10LE-NEXT: xvmindp vs1, vs6, vs1
|
|
; PWR10LE-NEXT: xvmindp vs2, vs5, vs2
|
|
; PWR10LE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR10LE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
|
|
; PWR10LE-NEXT: xxswapd vs1, vs0
|
|
; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10LE-NEXT: blr
|
|
;
|
|
; PWR10BE-LABEL: v32f64_fast:
|
|
; PWR10BE: # %bb.0: # %entry
|
|
; PWR10BE-NEXT: lxv vs0, 272(r1)
|
|
; PWR10BE-NEXT: lxv vs1, 240(r1)
|
|
; PWR10BE-NEXT: xvmindp vs4, v3, v11
|
|
; PWR10BE-NEXT: xvmindp vs5, v5, v13
|
|
; PWR10BE-NEXT: xvmindp vs6, v2, v10
|
|
; PWR10BE-NEXT: xvmindp vs7, v4, v12
|
|
; PWR10BE-NEXT: xvmindp vs1, v6, vs1
|
|
; PWR10BE-NEXT: lxv vs2, 288(r1)
|
|
; PWR10BE-NEXT: lxv vs3, 256(r1)
|
|
; PWR10BE-NEXT: xvmindp vs3, v7, vs3
|
|
; PWR10BE-NEXT: xvmindp vs2, v9, vs2
|
|
; PWR10BE-NEXT: xvmindp vs0, v8, vs0
|
|
; PWR10BE-NEXT: xvmindp vs0, vs7, vs0
|
|
; PWR10BE-NEXT: xvmindp vs1, vs6, vs1
|
|
; PWR10BE-NEXT: xvmindp vs2, vs5, vs2
|
|
; PWR10BE-NEXT: xvmindp vs3, vs4, vs3
|
|
; PWR10BE-NEXT: xvmindp vs2, vs3, vs2
|
|
; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
|
|
; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
|
|
; PWR10BE-NEXT: xxswapd vs1, vs0
|
|
; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
|
|
; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
|
|
; PWR10BE-NEXT: blr
|
|
entry:
|
|
%0 = call fast double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a)
|
|
ret double %0
|
|
}
|
|
|
|
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) #0
|
|
declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) #0
|
|
declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) #0
|
|
declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) #0
|
|
declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) #0
|
|
|
|
attributes #0 = { nounwind }
|