266 lines
20 KiB
LLVM
266 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -O3 -S | FileCheck %s
|
|
; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
|
|
|
|
; Test that IR is optimal after vectorization/unrolling/CSE/canonicalization.
|
|
; In particular, there should be no fdivs inside loops because that is expensive.
|
|
|
|
; TODO: There is a CSE opportunity to reduce the hoisted fdivs after vectorization/unrolling.
|
|
; PR46115 - https://llvm.org/PR46115
|
|
|
|
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-apple-macosx10.15.0"
|
|
|
|
define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
|
|
; CHECK-LABEL: @vdiv(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.preheader:
|
|
; CHECK-NEXT: [[X4:%.*]] = ptrtoint ptr [[X:%.*]] to i64
|
|
; CHECK-NEXT: [[Y5:%.*]] = ptrtoint ptr [[Y:%.*]] to i64
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]]
|
|
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128
|
|
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
|
|
; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER15:%.*]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT11]], <4 x double> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT13]], <4 x double> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -16
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP3]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP1]], 16
|
|
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
|
|
; CHECK: vector.ph.new:
|
|
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP3]], -2
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT10]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT12]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT14]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT10]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT12]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT14]]
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP13]], align 8, !tbaa [[TBAA3:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 4
|
|
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP15]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 8
|
|
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP17]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 12
|
|
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP19]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP6]]
|
|
; CHECK-NEXT: [[TMP23:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP7]]
|
|
; CHECK-NEXT: [[TMP24:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP8]]
|
|
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: store <4 x double> [[TMP21]], ptr [[TMP25]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 4
|
|
; CHECK-NEXT: store <4 x double> [[TMP22]], ptr [[TMP27]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 8
|
|
; CHECK-NEXT: store <4 x double> [[TMP23]], ptr [[TMP29]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 12
|
|
; CHECK-NEXT: store <4 x double> [[TMP24]], ptr [[TMP31]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16
|
|
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX_NEXT]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x double>, ptr [[TMP33]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 4
|
|
; CHECK-NEXT: [[WIDE_LOAD6_1:%.*]] = load <4 x double>, ptr [[TMP35]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 8
|
|
; CHECK-NEXT: [[WIDE_LOAD7_1:%.*]] = load <4 x double>, ptr [[TMP37]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 12
|
|
; CHECK-NEXT: [[WIDE_LOAD8_1:%.*]] = load <4 x double>, ptr [[TMP39]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP41:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_1]], [[TMP9]]
|
|
; CHECK-NEXT: [[TMP42:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6_1]], [[TMP10]]
|
|
; CHECK-NEXT: [[TMP43:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7_1]], [[TMP11]]
|
|
; CHECK-NEXT: [[TMP44:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8_1]], [[TMP12]]
|
|
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX_NEXT]]
|
|
; CHECK-NEXT: store <4 x double> [[TMP41]], ptr [[TMP45]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds double, ptr [[TMP45]], i64 4
|
|
; CHECK-NEXT: store <4 x double> [[TMP42]], ptr [[TMP47]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, ptr [[TMP45]], i64 8
|
|
; CHECK-NEXT: store <4 x double> [[TMP43]], ptr [[TMP49]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, ptr [[TMP45]], i64 12
|
|
; CHECK-NEXT: store <4 x double> [[TMP44]], ptr [[TMP51]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i64 [[INDEX]], 32
|
|
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
|
|
; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
|
|
; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: middle.block.unr-lcssa:
|
|
; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_1]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL:%.*]]
|
|
; CHECK: vector.body.epil:
|
|
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX_UNR]]
|
|
; CHECK-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <4 x double>, ptr [[TMP53]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, ptr [[TMP53]], i64 4
|
|
; CHECK-NEXT: [[WIDE_LOAD6_EPIL:%.*]] = load <4 x double>, ptr [[TMP55]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, ptr [[TMP53]], i64 8
|
|
; CHECK-NEXT: [[WIDE_LOAD7_EPIL:%.*]] = load <4 x double>, ptr [[TMP57]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds double, ptr [[TMP53]], i64 12
|
|
; CHECK-NEXT: [[WIDE_LOAD8_EPIL:%.*]] = load <4 x double>, ptr [[TMP59]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP61:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD_EPIL]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP62:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD6_EPIL]], [[BROADCAST_SPLAT10]]
|
|
; CHECK-NEXT: [[TMP63:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD7_EPIL]], [[BROADCAST_SPLAT12]]
|
|
; CHECK-NEXT: [[TMP64:%.*]] = fdiv fast <4 x double> [[WIDE_LOAD8_EPIL]], [[BROADCAST_SPLAT14]]
|
|
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX_UNR]]
|
|
; CHECK-NEXT: store <4 x double> [[TMP61]], ptr [[TMP65]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds double, ptr [[TMP65]], i64 4
|
|
; CHECK-NEXT: store <4 x double> [[TMP62]], ptr [[TMP67]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds double, ptr [[TMP65]], i64 8
|
|
; CHECK-NEXT: store <4 x double> [[TMP63]], ptr [[TMP69]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds double, ptr [[TMP65]], i64 12
|
|
; CHECK-NEXT: store <4 x double> [[TMP64]], ptr [[TMP71]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: br label [[MIDDLE_BLOCK]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER15]]
|
|
; CHECK: for.body.preheader15:
|
|
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: [[TMP73:%.*]] = xor i64 [[INDVARS_IV_PH]], -1
|
|
; CHECK-NEXT: [[TMP74:%.*]] = add nsw i64 [[TMP73]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: [[XTRAITER16:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 7
|
|
; CHECK-NEXT: [[LCMP_MOD17_NOT:%.*]] = icmp eq i64 [[XTRAITER16]], 0
|
|
; CHECK-NEXT: br i1 [[LCMP_MOD17_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]]
|
|
; CHECK: for.body.prol.preheader:
|
|
; CHECK-NEXT: [[TMP75:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]]
|
|
; CHECK: for.body.prol:
|
|
; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
|
|
; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_BODY_PROL_PREHEADER]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_PROL]]
|
|
; CHECK-NEXT: [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP76:%.*]] = fmul fast double [[T0_PROL]], [[TMP75]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_PROL]]
|
|
; CHECK-NEXT: store double [[TMP76]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
|
|
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
|
|
; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER16]]
|
|
; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: for.body.prol.loopexit:
|
|
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER15]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
|
|
; CHECK-NEXT: [[TMP77:%.*]] = icmp ult i64 [[TMP74]], 7
|
|
; CHECK-NEXT: br i1 [[TMP77]], label [[FOR_END]], label [[FOR_BODY_PREHEADER15_NEW:%.*]]
|
|
; CHECK: for.body.preheader15.new:
|
|
; CHECK-NEXT: [[TMP78:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: [[TMP79:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: [[TMP80:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: [[TMP81:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: [[TMP82:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: [[TMP83:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: [[TMP84:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: [[TMP85:%.*]] = fdiv fast double 1.000000e+00, [[A]]
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER15_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP86:%.*]] = fmul fast double [[T0]], [[TMP78]]
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: store double [[TMP86]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]]
|
|
; CHECK-NEXT: [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP87:%.*]] = fmul fast double [[T0_1]], [[TMP79]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT]]
|
|
; CHECK-NEXT: store double [[TMP87]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]]
|
|
; CHECK-NEXT: [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP88:%.*]] = fmul fast double [[T0_2]], [[TMP80]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]]
|
|
; CHECK-NEXT: store double [[TMP88]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
|
|
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]]
|
|
; CHECK-NEXT: [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP89:%.*]] = fmul fast double [[T0_3]], [[TMP81]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]]
|
|
; CHECK-NEXT: store double [[TMP89]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4
|
|
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]]
|
|
; CHECK-NEXT: [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP90:%.*]] = fmul fast double [[T0_4]], [[TMP82]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]]
|
|
; CHECK-NEXT: store double [[TMP90]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5
|
|
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]]
|
|
; CHECK-NEXT: [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP91:%.*]] = fmul fast double [[T0_5]], [[TMP83]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]]
|
|
; CHECK-NEXT: store double [[TMP91]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6
|
|
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]]
|
|
; CHECK-NEXT: [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP92:%.*]] = fmul fast double [[T0_6]], [[TMP84]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]]
|
|
; CHECK-NEXT: store double [[TMP92]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7
|
|
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]]
|
|
; CHECK-NEXT: [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[TMP93:%.*]] = fmul fast double [[T0_7]], [[TMP85]]
|
|
; CHECK-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]]
|
|
; CHECK-NEXT: store double [[TMP93]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
|
|
; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%div = fdiv fast double 1.0, %a
|
|
br label %for.cond
|
|
|
|
for.cond:
|
|
%n.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
|
|
%cmp = icmp slt i32 %n.0, %N
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
br label %for.end
|
|
|
|
for.body:
|
|
%idxprom = sext i32 %n.0 to i64
|
|
%arrayidx = getelementptr inbounds double, ptr %y, i64 %idxprom
|
|
%t0 = load double, ptr %arrayidx, align 8, !tbaa !3
|
|
%mul = fmul fast double %t0, %div
|
|
%idxprom1 = sext i32 %n.0 to i64
|
|
%arrayidx2 = getelementptr inbounds double, ptr %x, i64 %idxprom1
|
|
store double %mul, ptr %arrayidx2, align 8, !tbaa !3
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%inc = add nsw i32 %n.0, 1
|
|
br label %for.cond
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="true" "use-soft-float"="false" }
|
|
|
|
!llvm.module.flags = !{!0, !1}
|
|
!llvm.ident = !{!2}
|
|
|
|
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
!1 = !{i32 7, !"PIC Level", i32 2}
|
|
!2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 45ebe38ffc40bb7221fc587bfb4481cf7f53ebbc)"}
|
|
!3 = !{!4, !4, i64 0}
|
|
!4 = !{!"double", !5, i64 0}
|
|
!5 = !{!"omnipotent char", !6, i64 0}
|
|
!6 = !{!"Simple C/C++ TBAA"}
|
|
|