5027 lines
191 KiB
LLVM
5027 lines
191 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple armv8---eabi -mattr=+aes,+fix-cortex-a57-aes-1742098 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-FIX-NOSCHED
|
|
|
|
; These CPUs should have the fix enabled by default. They use different
|
|
; FileCheck prefixes because some instructions are scheduled differently.
|
|
;
|
|
; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a57 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
|
|
; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a72 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
|
|
|
|
; This checks that adding `+fix-cortex-a57-aes-1742098` causes `vorr` to be
|
|
; inserted wherever the compiler cannot prove that either input to the first aes
|
|
; instruction in a fused aes pair was set by 64-bit Neon register writes or
|
|
; 128-bit Neon register writes. All other register writes are unsafe, and
|
|
; require a `vorr` to protect the AES input.
|
|
|
|
declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>)
|
|
declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>)
|
|
declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>)
|
|
declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>)
|
|
|
|
declare arm_aapcs_vfpcc <16 x i8> @get_input() local_unnamed_addr
|
|
declare arm_aapcs_vfpcc <16 x i8> @get_inputf16(half) local_unnamed_addr
|
|
declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind {
|
|
; CHECK-FIX-LABEL: aese_zero:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
|
|
; CHECK-FIX-NEXT: aese.8 q9, q8
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q9
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%2 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2)
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
|
|
store <16 x i8> %4, <16 x i8>* %0, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind {
|
|
; CHECK-FIX-LABEL: aese_via_call1:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: .save {r4, lr}
|
|
; CHECK-FIX-NEXT: push {r4, lr}
|
|
; CHECK-FIX-NEXT: mov r4, r0
|
|
; CHECK-FIX-NEXT: bl get_input
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: aese.8 q0, q8
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: pop {r4, pc}
|
|
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
|
|
%3 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3)
|
|
%5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4)
|
|
store <16 x i8> %5, <16 x i8>* %0, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aese_via_call2:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: .save {r4, lr}
|
|
; CHECK-FIX-NEXT: push {r4, lr}
|
|
; CHECK-FIX-NEXT: mov r4, r0
|
|
; CHECK-FIX-NEXT: bl get_inputf16
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: aese.8 q0, q8
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: pop {r4, pc}
|
|
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
|
|
%4 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aese_via_call3:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: .save {r4, lr}
|
|
; CHECK-FIX-NEXT: push {r4, lr}
|
|
; CHECK-FIX-NEXT: mov r4, r0
|
|
; CHECK-FIX-NEXT: bl get_inputf32
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: aese.8 q0, q8
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: pop {r4, pc}
|
|
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
|
|
%4 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aese_once_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
|
|
; CHECK-FIX-NEXT: aese.8 q9, q8
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q9
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%4 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
|
|
; CHECK-FIX-LABEL: aese_once_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: aese.8 q1, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q0, q1
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
|
|
ret <16 x i8> %4
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aese_twice_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
|
|
; CHECK-FIX-NEXT: aese.8 q9, q8
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q9
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
|
|
; CHECK-FIX-NEXT: aese.8 q8, q9
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%4 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
%7 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
|
|
%9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
|
|
store <16 x i8> %9, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
|
|
; CHECK-FIX-LABEL: aese_twice_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: aese.8 q1, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q0, q8
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
|
|
%5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %4, <16 x i8> %0)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB8_1
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_loop_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bxeq lr
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB8_1
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = icmp eq i32 %0, 0
|
|
br i1 %4, label %5, label %6
|
|
|
|
5:
|
|
ret void
|
|
|
|
6:
|
|
%7 = phi i32 [ %12, %6 ], [ 0, %3 ]
|
|
%8 = load <16 x i8>, <16 x i8>* %2, align 8
|
|
%9 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9)
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
|
|
store <16 x i8> %11, <16 x i8>* %2, align 8
|
|
%12 = add nuw i32 %7, 1
|
|
%13 = icmp eq i32 %12, %0
|
|
br i1 %13, label %5, label %6
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB9_2
|
|
; CHECK-FIX-NEXT: .LBB9_1: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aese.8 q1, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesmc.8 q1, q1
|
|
; CHECK-FIX-NEXT: bne .LBB9_1
|
|
; CHECK-FIX-NEXT: .LBB9_2:
|
|
; CHECK-FIX-NEXT: vorr q0, q1, q1
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = icmp eq i32 %0, 0
|
|
br i1 %4, label %5, label %7
|
|
|
|
5:
|
|
%6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
|
|
ret <16 x i8> %6
|
|
|
|
7:
|
|
%8 = phi i32 [ %12, %7 ], [ 0, %3 ]
|
|
%9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
|
|
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %1)
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
|
|
%12 = add nuw i32 %8, 1
|
|
%13 = icmp eq i32 %12, %0
|
|
br i1 %13, label %5, label %7
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i8, i8* %0, align 1
|
|
%5 = load <16 x i8>, <16 x i8>* %2, align 8
|
|
%6 = insertelement <16 x i8> %5, i8 %4, i64 0
|
|
%7 = insertelement <16 x i8> %1, i8 %4, i64 0
|
|
%8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
|
|
%9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
|
|
store <16 x i8> %9, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set8_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.8 d16[0], r0
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = load <16 x i8>, <16 x i8>* %2, align 8
|
|
%5 = insertelement <16 x i8> %4, i8 %0, i64 0
|
|
%6 = insertelement <16 x i8> %1, i8 %0, i64 0
|
|
%7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6)
|
|
%8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7)
|
|
store <16 x i8> %8, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB12_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB12_3
|
|
; CHECK-FIX-NEXT: b .LBB12_4
|
|
; CHECK-FIX-NEXT: .LBB12_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB12_4
|
|
; CHECK-FIX-NEXT: .LBB12_3:
|
|
; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
|
|
; CHECK-FIX-NEXT: .LBB12_4:
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %9
|
|
|
|
5:
|
|
%6 = load i8, i8* %1, align 1
|
|
%7 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
%8 = insertelement <16 x i8> %7, i8 %6, i64 0
|
|
br label %11
|
|
|
|
9:
|
|
%10 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %11
|
|
|
|
11:
|
|
%12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
|
|
br i1 %0, label %13, label %16
|
|
|
|
13:
|
|
%14 = load i8, i8* %1, align 1
|
|
%15 = insertelement <16 x i8> %2, i8 %14, i64 0
|
|
br label %16
|
|
|
|
16:
|
|
%17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set8_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB13_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
|
|
; CHECK-FIX-NEXT: .LBB13_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB13_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB13_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
%6 = insertelement <16 x i8> %5, i8 %1, i64 0
|
|
%7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
|
|
%8 = insertelement <16 x i8> %2, i8 %1, i64 0
|
|
%9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
|
|
%10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9)
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
|
|
store <16 x i8> %11, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set8_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldrb r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: strb r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB14_1:
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB14_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB14_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load i8, i8* %1, align 1
|
|
%6 = insertelement <16 x i8> %2, i8 %5, i64 0
|
|
%7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
|
|
store i8 %5, i8* %7, align 8
|
|
%8 = icmp eq i32 %0, 0
|
|
br i1 %8, label %12, label %9
|
|
|
|
9:
|
|
%10 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %13
|
|
|
|
11:
|
|
store <16 x i8> %17, <16 x i8>* %3, align 8
|
|
br label %12
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
|
|
%15 = phi i32 [ 0, %9 ], [ %18, %13 ]
|
|
%16 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %6)
|
|
%17 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %16)
|
|
%18 = add nuw i32 %15, 1
|
|
%19 = icmp eq i32 %18, %0
|
|
br i1 %19, label %11, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set8_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB15_1:
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB15_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB15_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %10, label %6
|
|
|
|
6:
|
|
%7 = insertelement <16 x i8> %2, i8 %1, i64 0
|
|
%8 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %11
|
|
|
|
9:
|
|
store <16 x i8> %16, <16 x i8>* %3, align 8
|
|
br label %10
|
|
|
|
10:
|
|
ret void
|
|
|
|
11:
|
|
%12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
|
|
%13 = phi i32 [ 0, %6 ], [ %17, %11 ]
|
|
%14 = insertelement <16 x i8> %12, i8 %1, i64 0
|
|
%15 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %7)
|
|
%16 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %15)
|
|
%17 = add nuw i32 %13, 1
|
|
%18 = icmp eq i32 %17, %0
|
|
br i1 %18, label %9, label %11
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i16, i16* %0, align 2
|
|
%5 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%6 = load <8 x i16>, <8 x i16>* %5, align 8
|
|
%7 = insertelement <8 x i16> %6, i16 %4, i64 0
|
|
%8 = bitcast <8 x i16> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%10 = insertelement <8 x i16> %9, i16 %4, i64 0
|
|
%11 = bitcast <8 x i16> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set16_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%5 = load <8 x i16>, <8 x i16>* %4, align 8
|
|
%6 = insertelement <8 x i16> %5, i16 %0, i64 0
|
|
%7 = bitcast <8 x i16> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%9 = insertelement <8 x i16> %8, i16 %0, i64 0
|
|
%10 = bitcast <8 x i16> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set16_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB18_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB18_3
|
|
; CHECK-FIX-NEXT: b .LBB18_4
|
|
; CHECK-FIX-NEXT: .LBB18_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB18_4
|
|
; CHECK-FIX-NEXT: .LBB18_3:
|
|
; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
|
|
; CHECK-FIX-NEXT: .LBB18_4:
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load i16, i16* %1, align 2
|
|
%7 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%8 = load <8 x i16>, <8 x i16>* %7, align 8
|
|
%9 = insertelement <8 x i16> %8, i16 %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%12 = load <8 x i16>, <8 x i16>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load i16, i16* %1, align 2
|
|
%17 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%18 = insertelement <8 x i16> %17, i16 %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <8 x i16>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <8 x i16> %14 to <16 x i8>
|
|
%24 = bitcast <8 x i16> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set16_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB19_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
|
|
; CHECK-FIX-NEXT: .LBB19_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB19_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB19_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%6 = load <8 x i16>, <8 x i16>* %5, align 8
|
|
%7 = insertelement <8 x i16> %6, i16 %1, i64 0
|
|
%8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
|
|
%9 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%10 = insertelement <8 x i16> %9, i16 %1, i64 0
|
|
%11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
|
|
%12 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%13 = bitcast <8 x i16> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set16_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldrh r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: strh r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB20_1:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB20_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB20_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load i16, i16* %1, align 2
|
|
%6 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%7 = insertelement <8 x i16> %6, i16 %5, i64 0
|
|
%8 = bitcast <8 x i16> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to i16*
|
|
store i16 %5, i16* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set16_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB21_1:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB21_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bne .LBB21_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%8 = insertelement <8 x i16> %7, i16 %1, i64 0
|
|
%9 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%11 = bitcast <16 x i8>* %3 to i16*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <8 x i16>, <8 x i16>* %10, align 8
|
|
%16 = insertelement <8 x i16> %15, i16 %1, i64 0
|
|
%17 = bitcast <8 x i16> %16 to <16 x i8>
|
|
store i16 %1, i16* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i32, i32* %0, align 4
|
|
%5 = bitcast <16 x i8>* %2 to <4 x i32>*
|
|
%6 = load <4 x i32>, <4 x i32>* %5, align 8
|
|
%7 = insertelement <4 x i32> %6, i32 %4, i64 0
|
|
%8 = bitcast <4 x i32> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <4 x i32>
|
|
%10 = insertelement <4 x i32> %9, i32 %4, i64 0
|
|
%11 = bitcast <4 x i32> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set32_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <4 x i32>*
|
|
%5 = load <4 x i32>, <4 x i32>* %4, align 8
|
|
%6 = insertelement <4 x i32> %5, i32 %0, i64 0
|
|
%7 = bitcast <4 x i32> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <4 x i32>
|
|
%9 = insertelement <4 x i32> %8, i32 %0, i64 0
|
|
%10 = bitcast <4 x i32> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set32_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB24_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB24_3
|
|
; CHECK-FIX-NEXT: b .LBB24_4
|
|
; CHECK-FIX-NEXT: .LBB24_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB24_4
|
|
; CHECK-FIX-NEXT: .LBB24_3:
|
|
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: .LBB24_4:
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load i32, i32* %1, align 4
|
|
%7 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%8 = load <4 x i32>, <4 x i32>* %7, align 8
|
|
%9 = insertelement <4 x i32> %8, i32 %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%12 = load <4 x i32>, <4 x i32>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load i32, i32* %1, align 4
|
|
%17 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%18 = insertelement <4 x i32> %17, i32 %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <4 x i32>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <4 x i32> %14 to <16 x i8>
|
|
%24 = bitcast <4 x i32> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set32_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB25_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
|
|
; CHECK-FIX-NEXT: .LBB25_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB25_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB25_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%6 = load <4 x i32>, <4 x i32>* %5, align 8
|
|
%7 = insertelement <4 x i32> %6, i32 %1, i64 0
|
|
%8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
|
|
%9 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%10 = insertelement <4 x i32> %9, i32 %1, i64 0
|
|
%11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
|
|
%12 = bitcast <4 x i32> %8 to <16 x i8>
|
|
%13 = bitcast <4 x i32> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set32_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldr r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: str r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB26_1:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB26_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB26_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load i32, i32* %1, align 4
|
|
%6 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%7 = insertelement <4 x i32> %6, i32 %5, i64 0
|
|
%8 = bitcast <4 x i32> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to i32*
|
|
store i32 %5, i32* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set32_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB27_1:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB27_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bne .LBB27_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%8 = insertelement <4 x i32> %7, i32 %1, i64 0
|
|
%9 = bitcast <4 x i32> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%11 = bitcast <16 x i8>* %3 to i32*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <4 x i32>, <4 x i32>* %10, align 8
|
|
%16 = insertelement <4 x i32> %15, i32 %1, i64 0
|
|
%17 = bitcast <4 x i32> %16 to <16 x i8>
|
|
store i32 %1, i32* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i64, i64* %0, align 8
|
|
%5 = bitcast <16 x i8>* %2 to <2 x i64>*
|
|
%6 = load <2 x i64>, <2 x i64>* %5, align 8
|
|
%7 = insertelement <2 x i64> %6, i64 %4, i64 0
|
|
%8 = bitcast <2 x i64> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <2 x i64>
|
|
%10 = insertelement <2 x i64> %9, i64 %4, i64 0
|
|
%11 = bitcast <2 x i64> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set64_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NEXT: vmov.32 d0[1], r1
|
|
; CHECK-FIX-NEXT: vmov.32 d16[1], r1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <2 x i64>*
|
|
%5 = load <2 x i64>, <2 x i64>* %4, align 8
|
|
%6 = insertelement <2 x i64> %5, i64 %0, i64 0
|
|
%7 = bitcast <2 x i64> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <2 x i64>
|
|
%9 = insertelement <2 x i64> %8, i64 %0, i64 0
|
|
%10 = bitcast <2 x i64> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB30_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: b .LBB30_3
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB30_2:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB30_3:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_set64_cond_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB30_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
|
|
; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
|
|
; CHECK-CORTEX-FIX-NEXT: b .LBB30_3
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB30_2:
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB30_3:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load i64, i64* %1, align 8
|
|
%7 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%8 = load <2 x i64>, <2 x i64>* %7, align 8
|
|
%9 = insertelement <2 x i64> %8, i64 %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%12 = load <2 x i64>, <2 x i64>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load i64, i64* %1, align 8
|
|
%17 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%18 = insertelement <2 x i64> %17, i64 %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <2 x i64>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <2 x i64> %14 to <16 x i8>
|
|
%24 = bitcast <2 x i64> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set64_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldr r1, [sp]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: beq .LBB31_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
|
|
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
|
|
; CHECK-FIX-NEXT: .LBB31_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB31_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
|
|
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
|
|
; CHECK-FIX-NEXT: .LBB31_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%6 = load <2 x i64>, <2 x i64>* %5, align 8
|
|
%7 = insertelement <2 x i64> %6, i64 %1, i64 0
|
|
%8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
|
|
%9 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%10 = insertelement <2 x i64> %9, i64 %1, i64 0
|
|
%11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
|
|
%12 = bitcast <2 x i64> %8 to <16 x i8>
|
|
%13 = bitcast <2 x i64> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB32_4
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB32_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB32_4:
|
|
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB32_1:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB32_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
|
|
%5 = load i64, i64* %1, align 8
|
|
%6 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%7 = insertelement <2 x i64> %6, i64 %5, i64 0
|
|
%8 = bitcast <2 x i64> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to i64*
|
|
store i64 %5, i64* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_set64_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB33_1:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
|
|
; CHECK-FIX-NEXT: ldr r1, [sp]
|
|
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
|
|
; CHECK-FIX-NEXT: .LBB33_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
|
|
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bne .LBB33_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%8 = insertelement <2 x i64> %7, i64 %1, i64 0
|
|
%9 = bitcast <2 x i64> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%11 = bitcast <16 x i8>* %3 to i64*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <2 x i64>, <2 x i64>* %10, align 8
|
|
%16 = insertelement <2 x i64> %15, i64 %1, i64 0
|
|
%17 = bitcast <2 x i64> %16 to <16 x i8>
|
|
store i64 %1, i64* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = bitcast half* %0 to i16*
|
|
%5 = load i16, i16* %4, align 2
|
|
%6 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%7 = load <8 x i16>, <8 x i16>* %6, align 8
|
|
%8 = insertelement <8 x i16> %7, i16 %5, i64 0
|
|
%9 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%11 = insertelement <8 x i16> %10, i16 %5, i64 0
|
|
%12 = bitcast <8 x i16> %11 to <16 x i8>
|
|
%13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12)
|
|
%14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13)
|
|
store <16 x i8> %14, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_setf16_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vmov r1, s0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vmov.16 d2[0], r1
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q1
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%5 = load <8 x i16>, <8 x i16>* %4, align 8
|
|
%6 = bitcast half %0 to i16
|
|
%7 = insertelement <8 x i16> %5, i16 %6, i64 0
|
|
%8 = bitcast <8 x i16> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%10 = insertelement <8 x i16> %9, i16 %6, i64 0
|
|
%11 = bitcast <8 x i16> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q9, q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov lr, r12, d17
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d18[0]}, [r1:16]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r4, d18[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r5, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r12, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr lr, lr, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s10, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s14, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r4, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB36_2:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s3, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB36_3:
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r5, [r2, #10]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r12, [r2, #6]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r6, [r2, #2]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #14]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r4, [r2, #8]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r6
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh lr, [r2, #4]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r8, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB36_4:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q8, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d0[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d16[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s3, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB36_5:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s15, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s15, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, pc}
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: .vsave {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: vpush {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB36_3
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, r6, d17
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r5
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s5, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s4, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 lr, d18[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s9
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, lr, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s13
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB36_4
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB36_2:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, r1, d0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r1, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r3, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s3, r1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: b .LBB36_5
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r12, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh lr, [r2, #2]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r8, [r2, #4]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r5, [r2, #6]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #8]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r2, #12]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r6, [r2, #14]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s5, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s7, r8
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s4, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s11
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB36_4:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d0[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d1
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r1, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r5
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r3, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s3, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r0, d16[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s18, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s18
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB36_5:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s7, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s4, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s3, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r1, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s1, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s5, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s11, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r12, r0, r1, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r1, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r1, r3, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r1, s0
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r7, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s5
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s4
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r5, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s12
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r4, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r12
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r4, r0, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s3
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r4, r1, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r3
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vpop {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, pc}
|
|
br i1 %0, label %5, label %12
|
|
|
|
5:
|
|
%6 = bitcast half* %1 to i16*
|
|
%7 = load i16, i16* %6, align 2
|
|
%8 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%9 = load <8 x i16>, <8 x i16>* %8, align 8
|
|
%10 = insertelement <8 x i16> %9, i16 %7, i64 0
|
|
%11 = bitcast <8 x i16> %10 to <8 x half>
|
|
br label %15
|
|
|
|
12:
|
|
%13 = bitcast <16 x i8>* %3 to <8 x half>*
|
|
%14 = load <8 x half>, <8 x half>* %13, align 8
|
|
br label %15
|
|
|
|
15:
|
|
%16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
|
|
br i1 %0, label %17, label %23
|
|
|
|
17:
|
|
%18 = bitcast half* %1 to i16*
|
|
%19 = load i16, i16* %18, align 2
|
|
%20 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%21 = insertelement <8 x i16> %20, i16 %19, i64 0
|
|
%22 = bitcast <8 x i16> %21 to <8 x half>
|
|
br label %25
|
|
|
|
23:
|
|
%24 = bitcast <16 x i8> %2 to <8 x half>
|
|
br label %25
|
|
|
|
25:
|
|
%26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
|
|
%27 = bitcast <8 x half> %16 to <16 x i8>
|
|
%28 = bitcast <8 x half> %26 to <16 x i8>
|
|
%29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28)
|
|
%30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29)
|
|
store <16 x i8> %30, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_3
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov lr, r12, d17
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr lr, lr, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r2, d16[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r12, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s10, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s14, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s3, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB37_4
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB37_2:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: b .LBB37_5
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB37_3:
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #10]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r12, [r1, #6]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r5, [r1, #2]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #14]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #12]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r4, [r1, #8]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh lr, [r1, #4]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r6, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB37_4:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r7, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d2[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d2[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s7, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB37_5:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s15, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s15, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r2, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: .vsave {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: vpush {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s0
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB37_3
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, r5, d17
|
|
; CHECK-CORTEX-FIX-NEXT: lsr lr, r3, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, lr
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r6, r4, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s1, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d16[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s0, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s11
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r2, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s15
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB37_4
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB37_2:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, r2, d2
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r2, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r3, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s7, r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: b .LBB37_5
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB37_3:
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r12, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh lr, [r1, #2]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #4]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r6, [r1, #6]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r5, [r1, #8]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r1, #10]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r2, [r1, #12]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #14]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s1, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s3, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s0, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s13
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB37_4:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r2, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s7, r2
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r3, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r0, d2[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s18, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s18
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB37_5:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s3, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s7, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s1, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s5, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s11, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s8
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r12, r0, r2, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r2, r3, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s14
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r7, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s5
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s4
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r5, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r4, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r12
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r4, r0, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s7
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r4, r2, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r3
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vpop {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
|
br i1 %0, label %5, label %11
|
|
|
|
5:
|
|
%6 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%7 = load <8 x i16>, <8 x i16>* %6, align 8
|
|
%8 = bitcast half %1 to i16
|
|
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
|
|
%10 = bitcast <8 x i16> %9 to <8 x half>
|
|
br label %14
|
|
|
|
11:
|
|
%12 = bitcast <16 x i8>* %3 to <8 x half>*
|
|
%13 = load <8 x half>, <8 x half>* %12, align 8
|
|
br label %14
|
|
|
|
14:
|
|
%15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
|
|
br i1 %0, label %16, label %21
|
|
|
|
16:
|
|
%17 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%18 = bitcast half %1 to i16
|
|
%19 = insertelement <8 x i16> %17, i16 %18, i64 0
|
|
%20 = bitcast <8 x i16> %19 to <8 x half>
|
|
br label %23
|
|
|
|
21:
|
|
%22 = bitcast <16 x i8> %2 to <8 x half>
|
|
br label %23
|
|
|
|
23:
|
|
%24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
|
|
%25 = bitcast <8 x half> %15 to <16 x i8>
|
|
%26 = bitcast <8 x half> %24 to <16 x i8>
|
|
%27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26)
|
|
%28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27)
|
|
store <16 x i8> %28, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldrh r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: strh r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB38_1:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB38_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB38_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast half* %1 to i16*
|
|
%6 = load i16, i16* %5, align 2
|
|
%7 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%8 = insertelement <8 x i16> %7, i16 %6, i64 0
|
|
%9 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to i16*
|
|
store i16 %6, i16* %10, align 8
|
|
%11 = icmp eq i32 %0, 0
|
|
br i1 %11, label %15, label %12
|
|
|
|
12:
|
|
%13 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %16
|
|
|
|
14:
|
|
store <16 x i8> %20, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
15:
|
|
ret void
|
|
|
|
16:
|
|
%17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
|
|
%18 = phi i32 [ 0, %12 ], [ %21, %16 ]
|
|
%19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
|
|
%20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
|
|
%21 = add nuw i32 %18, 1
|
|
%22 = icmp eq i32 %21, %0
|
|
br i1 %22, label %14, label %16
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_setf16_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB39_1:
|
|
; CHECK-FIX-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-FIX-NEXT: vmov r2, s0
|
|
; CHECK-FIX-NEXT: uxth r2, r2
|
|
; CHECK-FIX-NEXT: vmov.16 d2[0], r2
|
|
; CHECK-FIX-NEXT: .LBB39_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r2
|
|
; CHECK-FIX-NEXT: aese.8 q8, q1
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bne .LBB39_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %13, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%8 = bitcast half %1 to i16
|
|
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
|
|
%10 = bitcast <8 x i16> %9 to <16 x i8>
|
|
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%12 = bitcast <16 x i8>* %3 to half*
|
|
br label %14
|
|
|
|
13:
|
|
ret void
|
|
|
|
14:
|
|
%15 = phi i32 [ 0, %6 ], [ %21, %14 ]
|
|
%16 = load <8 x i16>, <8 x i16>* %11, align 8
|
|
%17 = insertelement <8 x i16> %16, i16 %8, i64 0
|
|
%18 = bitcast <8 x i16> %17 to <16 x i8>
|
|
store half %1, half* %12, align 8
|
|
%19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10)
|
|
%20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
|
|
store <16 x i8> %20, <16 x i8>* %3, align 8
|
|
%21 = add nuw i32 %15, 1
|
|
%22 = icmp eq i32 %21, %0
|
|
br i1 %22, label %13, label %14
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_setf32_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vldr s0, [r0]
|
|
; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.f32 s4, s0
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: aese.8 q1, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q1
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = load float, float* %0, align 4
|
|
%5 = bitcast <16 x i8>* %2 to <4 x float>*
|
|
%6 = load <4 x float>, <4 x float>* %5, align 8
|
|
%7 = insertelement <4 x float> %6, float %4, i64 0
|
|
%8 = bitcast <4 x float> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <4 x float>
|
|
%10 = insertelement <4 x float> %9, float %4, i64 0
|
|
%11 = bitcast <4 x float> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aese_setf32_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vmov.f32 s4, s0
|
|
; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
|
|
; CHECK-FIX-NEXT: vmov.f32 s0, s4
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: aese.8 q0, q1
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <4 x float>*
|
|
%5 = load <4 x float>, <4 x float>* %4, align 8
|
|
%6 = insertelement <4 x float> %5, float %0, i64 0
|
|
%7 = bitcast <4 x float> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <4 x float>
|
|
%9 = insertelement <4 x float> %8, float %0, i64 0
|
|
%10 = bitcast <4 x float> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB42_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB42_3
|
|
; CHECK-FIX-NEXT: b .LBB42_4
|
|
; CHECK-FIX-NEXT: .LBB42_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB42_4
|
|
; CHECK-FIX-NEXT: .LBB42_3:
|
|
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: .LBB42_4:
|
|
; CHECK-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load float, float* %1, align 4
|
|
%7 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%8 = load <4 x float>, <4 x float>* %7, align 8
|
|
%9 = insertelement <4 x float> %8, float %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%12 = load <4 x float>, <4 x float>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load float, float* %1, align 4
|
|
%17 = bitcast <16 x i8> %2 to <4 x float>
|
|
%18 = insertelement <4 x float> %17, float %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <4 x float>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <4 x float> %14 to <16 x i8>
|
|
%24 = bitcast <4 x float> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_setf32_cond_via_val:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%6 = load <4 x float>, <4 x float>* %5, align 8
|
|
%7 = insertelement <4 x float> %6, float %1, i64 0
|
|
%8 = select i1 %0, <4 x float> %7, <4 x float> %6
|
|
%9 = bitcast <16 x i8> %2 to <4 x float>
|
|
%10 = insertelement <4 x float> %9, float %1, i64 0
|
|
%11 = select i1 %0, <4 x float> %10, <4 x float> %9
|
|
%12 = bitcast <4 x float> %8 to <16 x i8>
|
|
%13 = bitcast <4 x float> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB44_1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB44_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bxeq lr
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB44_1:
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB44_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%5 = load float, float* %1, align 4
|
|
%6 = bitcast <16 x i8> %2 to <4 x float>
|
|
%7 = insertelement <4 x float> %6, float %5, i64 0
|
|
%8 = bitcast <4 x float> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to float*
|
|
store float %5, float* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB45_1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB45_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_val:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bxeq lr
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB45_1:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB45_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <4 x float>
|
|
%8 = insertelement <4 x float> %7, float %1, i64 0
|
|
%9 = bitcast <4 x float> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%11 = bitcast <16 x i8>* %3 to float*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <4 x float>, <4 x float>* %10, align 8
|
|
%16 = insertelement <4 x float> %15, float %1, i64 0
|
|
%17 = bitcast <4 x float> %16 to <16 x i8>
|
|
store float %1, float* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_zero:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
|
|
; CHECK-FIX-NEXT: aesd.8 q9, q8
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q9
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%2 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2)
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
|
|
store <16 x i8> %4, <16 x i8>* %0, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_via_call1:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: .save {r4, lr}
|
|
; CHECK-FIX-NEXT: push {r4, lr}
|
|
; CHECK-FIX-NEXT: mov r4, r0
|
|
; CHECK-FIX-NEXT: bl get_input
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: aesd.8 q0, q8
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: pop {r4, pc}
|
|
%2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
|
|
%3 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3)
|
|
%5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4)
|
|
store <16 x i8> %5, <16 x i8>* %0, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_via_call2:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: .save {r4, lr}
|
|
; CHECK-FIX-NEXT: push {r4, lr}
|
|
; CHECK-FIX-NEXT: mov r4, r0
|
|
; CHECK-FIX-NEXT: bl get_inputf16
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: aesd.8 q0, q8
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: pop {r4, pc}
|
|
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
|
|
%4 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_via_call3:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: .save {r4, lr}
|
|
; CHECK-FIX-NEXT: push {r4, lr}
|
|
; CHECK-FIX-NEXT: mov r4, r0
|
|
; CHECK-FIX-NEXT: bl get_inputf32
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: aesd.8 q0, q8
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
|
|
; CHECK-FIX-NEXT: pop {r4, pc}
|
|
%3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
|
|
%4 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_once_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
|
|
; CHECK-FIX-NEXT: aesd.8 q9, q8
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q9
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%4 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_once_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: aesd.8 q1, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q0, q1
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
|
|
ret <16 x i8> %4
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_twice_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
|
|
; CHECK-FIX-NEXT: aesd.8 q9, q8
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q9
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q9
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%4 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
|
|
store <16 x i8> %6, <16 x i8>* %1, align 8
|
|
%7 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
|
|
%9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
|
|
store <16 x i8> %9, <16 x i8>* %1, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_twice_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: aesd.8 q1, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q0, q8
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
|
|
%5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %4, <16 x i8> %0)
|
|
%6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q9, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q9
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB54_1
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_loop_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bxeq lr
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB54_1
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = icmp eq i32 %0, 0
|
|
br i1 %4, label %5, label %6
|
|
|
|
5:
|
|
ret void
|
|
|
|
6:
|
|
%7 = phi i32 [ %12, %6 ], [ 0, %3 ]
|
|
%8 = load <16 x i8>, <16 x i8>* %2, align 8
|
|
%9 = load <16 x i8>, <16 x i8>* %1, align 8
|
|
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9)
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
|
|
store <16 x i8> %11, <16 x i8>* %2, align 8
|
|
%12 = add nuw i32 %7, 1
|
|
%13 = icmp eq i32 %12, %0
|
|
br i1 %13, label %5, label %6
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB55_2
|
|
; CHECK-FIX-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aesd.8 q1, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesimc.8 q1, q1
|
|
; CHECK-FIX-NEXT: bne .LBB55_1
|
|
; CHECK-FIX-NEXT: .LBB55_2:
|
|
; CHECK-FIX-NEXT: vorr q0, q1, q1
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = icmp eq i32 %0, 0
|
|
br i1 %4, label %5, label %7
|
|
|
|
5:
|
|
%6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
|
|
ret <16 x i8> %6
|
|
|
|
7:
|
|
%8 = phi i32 [ %12, %7 ], [ 0, %3 ]
|
|
%9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
|
|
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %1)
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
|
|
%12 = add nuw i32 %8, 1
|
|
%13 = icmp eq i32 %12, %0
|
|
br i1 %13, label %5, label %7
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i8, i8* %0, align 1
|
|
%5 = load <16 x i8>, <16 x i8>* %2, align 8
|
|
%6 = insertelement <16 x i8> %5, i8 %4, i64 0
|
|
%7 = insertelement <16 x i8> %1, i8 %4, i64 0
|
|
%8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
|
|
%9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
|
|
store <16 x i8> %9, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set8_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.8 d16[0], r0
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = load <16 x i8>, <16 x i8>* %2, align 8
|
|
%5 = insertelement <16 x i8> %4, i8 %0, i64 0
|
|
%6 = insertelement <16 x i8> %1, i8 %0, i64 0
|
|
%7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6)
|
|
%8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7)
|
|
store <16 x i8> %8, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB58_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB58_3
|
|
; CHECK-FIX-NEXT: b .LBB58_4
|
|
; CHECK-FIX-NEXT: .LBB58_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB58_4
|
|
; CHECK-FIX-NEXT: .LBB58_3:
|
|
; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
|
|
; CHECK-FIX-NEXT: .LBB58_4:
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %9
|
|
|
|
5:
|
|
%6 = load i8, i8* %1, align 1
|
|
%7 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
%8 = insertelement <16 x i8> %7, i8 %6, i64 0
|
|
br label %11
|
|
|
|
9:
|
|
%10 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %11
|
|
|
|
11:
|
|
%12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
|
|
br i1 %0, label %13, label %16
|
|
|
|
13:
|
|
%14 = load i8, i8* %1, align 1
|
|
%15 = insertelement <16 x i8> %2, i8 %14, i64 0
|
|
br label %16
|
|
|
|
16:
|
|
%17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set8_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB59_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
|
|
; CHECK-FIX-NEXT: .LBB59_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB59_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB59_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
%6 = insertelement <16 x i8> %5, i8 %1, i64 0
|
|
%7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
|
|
%8 = insertelement <16 x i8> %2, i8 %1, i64 0
|
|
%9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
|
|
%10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9)
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
|
|
store <16 x i8> %11, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldrb r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: strb r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB60_1:
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB60_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB60_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load i8, i8* %1, align 1
|
|
%6 = insertelement <16 x i8> %2, i8 %5, i64 0
|
|
%7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
|
|
store i8 %5, i8* %7, align 8
|
|
%8 = icmp eq i32 %0, 0
|
|
br i1 %8, label %12, label %9
|
|
|
|
9:
|
|
%10 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %13
|
|
|
|
11:
|
|
store <16 x i8> %17, <16 x i8>* %3, align 8
|
|
br label %12
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
|
|
%15 = phi i32 [ 0, %9 ], [ %18, %13 ]
|
|
%16 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %6)
|
|
%17 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %16)
|
|
%18 = add nuw i32 %15, 1
|
|
%19 = icmp eq i32 %18, %0
|
|
br i1 %19, label %11, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set8_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB61_1:
|
|
; CHECK-FIX-NEXT: vmov.8 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB61_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vmov.8 d16[0], r1
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB61_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %10, label %6
|
|
|
|
6:
|
|
%7 = insertelement <16 x i8> %2, i8 %1, i64 0
|
|
%8 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %11
|
|
|
|
9:
|
|
store <16 x i8> %16, <16 x i8>* %3, align 8
|
|
br label %10
|
|
|
|
10:
|
|
ret void
|
|
|
|
11:
|
|
%12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
|
|
%13 = phi i32 [ 0, %6 ], [ %17, %11 ]
|
|
%14 = insertelement <16 x i8> %12, i8 %1, i64 0
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %7)
|
|
%16 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %15)
|
|
%17 = add nuw i32 %13, 1
|
|
%18 = icmp eq i32 %17, %0
|
|
br i1 %18, label %9, label %11
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i16, i16* %0, align 2
|
|
%5 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%6 = load <8 x i16>, <8 x i16>* %5, align 8
|
|
%7 = insertelement <8 x i16> %6, i16 %4, i64 0
|
|
%8 = bitcast <8 x i16> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%10 = insertelement <8 x i16> %9, i16 %4, i64 0
|
|
%11 = bitcast <8 x i16> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set16_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%5 = load <8 x i16>, <8 x i16>* %4, align 8
|
|
%6 = insertelement <8 x i16> %5, i16 %0, i64 0
|
|
%7 = bitcast <8 x i16> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%9 = insertelement <8 x i16> %8, i16 %0, i64 0
|
|
%10 = bitcast <8 x i16> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB64_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB64_3
|
|
; CHECK-FIX-NEXT: b .LBB64_4
|
|
; CHECK-FIX-NEXT: .LBB64_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB64_4
|
|
; CHECK-FIX-NEXT: .LBB64_3:
|
|
; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
|
|
; CHECK-FIX-NEXT: .LBB64_4:
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load i16, i16* %1, align 2
|
|
%7 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%8 = load <8 x i16>, <8 x i16>* %7, align 8
|
|
%9 = insertelement <8 x i16> %8, i16 %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%12 = load <8 x i16>, <8 x i16>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load i16, i16* %1, align 2
|
|
%17 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%18 = insertelement <8 x i16> %17, i16 %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <8 x i16>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <8 x i16> %14 to <16 x i8>
|
|
%24 = bitcast <8 x i16> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set16_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB65_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
|
|
; CHECK-FIX-NEXT: .LBB65_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB65_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB65_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%6 = load <8 x i16>, <8 x i16>* %5, align 8
|
|
%7 = insertelement <8 x i16> %6, i16 %1, i64 0
|
|
%8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
|
|
%9 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%10 = insertelement <8 x i16> %9, i16 %1, i64 0
|
|
%11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
|
|
%12 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%13 = bitcast <8 x i16> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldrh r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: strh r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB66_1:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB66_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB66_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load i16, i16* %1, align 2
|
|
%6 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%7 = insertelement <8 x i16> %6, i16 %5, i64 0
|
|
%8 = bitcast <8 x i16> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to i16*
|
|
store i16 %5, i16* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set16_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB67_1:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB67_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bne .LBB67_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%8 = insertelement <8 x i16> %7, i16 %1, i64 0
|
|
%9 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%11 = bitcast <16 x i8>* %3 to i16*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <8 x i16>, <8 x i16>* %10, align 8
|
|
%16 = insertelement <8 x i16> %15, i16 %1, i64 0
|
|
%17 = bitcast <8 x i16> %16 to <16 x i8>
|
|
store i16 %1, i16* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i32, i32* %0, align 4
|
|
%5 = bitcast <16 x i8>* %2 to <4 x i32>*
|
|
%6 = load <4 x i32>, <4 x i32>* %5, align 8
|
|
%7 = insertelement <4 x i32> %6, i32 %4, i64 0
|
|
%8 = bitcast <4 x i32> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <4 x i32>
|
|
%10 = insertelement <4 x i32> %9, i32 %4, i64 0
|
|
%11 = bitcast <4 x i32> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set32_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <4 x i32>*
|
|
%5 = load <4 x i32>, <4 x i32>* %4, align 8
|
|
%6 = insertelement <4 x i32> %5, i32 %0, i64 0
|
|
%7 = bitcast <4 x i32> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <4 x i32>
|
|
%9 = insertelement <4 x i32> %8, i32 %0, i64 0
|
|
%10 = bitcast <4 x i32> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB70_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB70_3
|
|
; CHECK-FIX-NEXT: b .LBB70_4
|
|
; CHECK-FIX-NEXT: .LBB70_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB70_4
|
|
; CHECK-FIX-NEXT: .LBB70_3:
|
|
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: .LBB70_4:
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load i32, i32* %1, align 4
|
|
%7 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%8 = load <4 x i32>, <4 x i32>* %7, align 8
|
|
%9 = insertelement <4 x i32> %8, i32 %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%12 = load <4 x i32>, <4 x i32>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load i32, i32* %1, align 4
|
|
%17 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%18 = insertelement <4 x i32> %17, i32 %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <4 x i32>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <4 x i32> %14 to <16 x i8>
|
|
%24 = bitcast <4 x i32> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set32_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB71_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
|
|
; CHECK-FIX-NEXT: .LBB71_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB71_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB71_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%6 = load <4 x i32>, <4 x i32>* %5, align 8
|
|
%7 = insertelement <4 x i32> %6, i32 %1, i64 0
|
|
%8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
|
|
%9 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%10 = insertelement <4 x i32> %9, i32 %1, i64 0
|
|
%11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
|
|
%12 = bitcast <4 x i32> %8 to <16 x i8>
|
|
%13 = bitcast <4 x i32> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldr r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: str r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB72_1:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB72_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB72_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = load i32, i32* %1, align 4
|
|
%6 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%7 = insertelement <4 x i32> %6, i32 %5, i64 0
|
|
%8 = bitcast <4 x i32> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to i32*
|
|
store i32 %5, i32* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set32_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB73_1:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r1
|
|
; CHECK-FIX-NEXT: .LBB73_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bne .LBB73_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <4 x i32>
|
|
%8 = insertelement <4 x i32> %7, i32 %1, i64 0
|
|
%9 = bitcast <4 x i32> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <4 x i32>*
|
|
%11 = bitcast <16 x i8>* %3 to i32*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <4 x i32>, <4 x i32>* %10, align 8
|
|
%16 = insertelement <4 x i32> %15, i32 %1, i64 0
|
|
%17 = bitcast <4 x i32> %16 to <16 x i8>
|
|
store i32 %1, i32* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = load i64, i64* %0, align 8
|
|
%5 = bitcast <16 x i8>* %2 to <2 x i64>*
|
|
%6 = load <2 x i64>, <2 x i64>* %5, align 8
|
|
%7 = insertelement <2 x i64> %6, i64 %4, i64 0
|
|
%8 = bitcast <2 x i64> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <2 x i64>
|
|
%10 = insertelement <2 x i64> %9, i64 %4, i64 0
|
|
%11 = bitcast <2 x i64> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set64_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r0
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NEXT: vmov.32 d0[1], r1
|
|
; CHECK-FIX-NEXT: vmov.32 d16[1], r1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <2 x i64>*
|
|
%5 = load <2 x i64>, <2 x i64>* %4, align 8
|
|
%6 = insertelement <2 x i64> %5, i64 %0, i64 0
|
|
%7 = bitcast <2 x i64> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <2 x i64>
|
|
%9 = insertelement <2 x i64> %8, i64 %0, i64 0
|
|
%10 = bitcast <2 x i64> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB76_2:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB76_3:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB76_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
|
|
; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
|
|
; CHECK-CORTEX-FIX-NEXT: b .LBB76_3
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB76_2:
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB76_3:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load i64, i64* %1, align 8
|
|
%7 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%8 = load <2 x i64>, <2 x i64>* %7, align 8
|
|
%9 = insertelement <2 x i64> %8, i64 %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%12 = load <2 x i64>, <2 x i64>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load i64, i64* %1, align 8
|
|
%17 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%18 = insertelement <2 x i64> %17, i64 %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <2 x i64>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <2 x i64> %14 to <16 x i8>
|
|
%24 = bitcast <2 x i64> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set64_cond_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldr r1, [sp]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: beq .LBB77_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
|
|
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
|
|
; CHECK-FIX-NEXT: .LBB77_2: @ %select.end
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB77_4
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
|
|
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
|
|
; CHECK-FIX-NEXT: .LBB77_4: @ %select.end1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%6 = load <2 x i64>, <2 x i64>* %5, align 8
|
|
%7 = insertelement <2 x i64> %6, i64 %1, i64 0
|
|
%8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
|
|
%9 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%10 = insertelement <2 x i64> %9, i64 %1, i64 0
|
|
%11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
|
|
%12 = bitcast <2 x i64> %8 to <16 x i8>
|
|
%13 = bitcast <2 x i64> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB78_4
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB78_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB78_4:
|
|
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB78_1:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB78_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
|
|
%5 = load i64, i64* %1, align 8
|
|
%6 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%7 = insertelement <2 x i64> %6, i64 %5, i64 0
|
|
%8 = bitcast <2 x i64> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to i64*
|
|
store i64 %5, i64* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_set64_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB79_1:
|
|
; CHECK-FIX-NEXT: vmov.32 d0[0], r2
|
|
; CHECK-FIX-NEXT: ldr r1, [sp]
|
|
; CHECK-FIX-NEXT: vmov.32 d0[1], r3
|
|
; CHECK-FIX-NEXT: .LBB79_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.32 d16[0], r2
|
|
; CHECK-FIX-NEXT: vmov.32 d16[1], r3
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bne .LBB79_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <2 x i64>
|
|
%8 = insertelement <2 x i64> %7, i64 %1, i64 0
|
|
%9 = bitcast <2 x i64> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <2 x i64>*
|
|
%11 = bitcast <16 x i8>* %3 to i64*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <2 x i64>, <2 x i64>* %10, align 8
|
|
%16 = insertelement <2 x i64> %15, i64 %1, i64 0
|
|
%17 = bitcast <2 x i64> %16 to <16 x i8>
|
|
store i64 %1, i64* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%4 = bitcast half* %0 to i16*
|
|
%5 = load i16, i16* %4, align 2
|
|
%6 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%7 = load <8 x i16>, <8 x i16>* %6, align 8
|
|
%8 = insertelement <8 x i16> %7, i16 %5, i64 0
|
|
%9 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%11 = insertelement <8 x i16> %10, i16 %5, i64 0
|
|
%12 = bitcast <8 x i16> %11 to <16 x i8>
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12)
|
|
%14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13)
|
|
store <16 x i8> %14, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_setf16_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vmov r1, s0
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: vmov.16 d2[0], r1
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q1
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <8 x i16>*
|
|
%5 = load <8 x i16>, <8 x i16>* %4, align 8
|
|
%6 = bitcast half %0 to i16
|
|
%7 = insertelement <8 x i16> %5, i16 %6, i64 0
|
|
%8 = bitcast <8 x i16> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <8 x i16>
|
|
%10 = insertelement <8 x i16> %9, i16 %6, i64 0
|
|
%11 = bitcast <8 x i16> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_3
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q9, q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov lr, r12, d17
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d18[0]}, [r1:16]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r4, d18[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r5, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r12, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr lr, lr, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s10, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s14, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r4, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB82_4
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB82_2:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s3, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: b .LBB82_5
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB82_3:
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r5, [r2, #10]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r12, [r2, #6]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r6, [r2, #2]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #14]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r4, [r2, #8]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r6
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh lr, [r2, #4]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r8, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_2
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB82_4:
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q8, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d0[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r1, d1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d16[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r1, r1, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s3, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB82_5:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s15, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s15, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s5, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s2, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r1, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, pc}
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: .vsave {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: vpush {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB82_3
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, r6, d17
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r5
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s5, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s4, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 lr, d18[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s9
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, lr, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s13
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB82_4
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB82_2:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, r1, d0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r1, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r3, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s3, r1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: b .LBB82_5
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB82_3:
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r12, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh lr, [r2, #2]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r8, [r2, #4]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r5, [r2, #6]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #8]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r2, #12]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r6, [r2, #14]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s5, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s7, r8
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s4, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s11
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB82_4:
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d0[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d1
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r1, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r5
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r3, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s3, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r0, d16[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s18, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s18
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB82_5:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s7, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s4, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s3, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r1, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s1, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s5, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s11, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r12, r0, r1, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r1, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r1, r3, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r1, s0
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r7, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s5
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s4
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r5, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s12
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r4, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r12
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r4, r0, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s3
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r4, r1, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r3
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vpop {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, pc}
|
|
br i1 %0, label %5, label %12
|
|
|
|
5:
|
|
%6 = bitcast half* %1 to i16*
|
|
%7 = load i16, i16* %6, align 2
|
|
%8 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%9 = load <8 x i16>, <8 x i16>* %8, align 8
|
|
%10 = insertelement <8 x i16> %9, i16 %7, i64 0
|
|
%11 = bitcast <8 x i16> %10 to <8 x half>
|
|
br label %15
|
|
|
|
12:
|
|
%13 = bitcast <16 x i8>* %3 to <8 x half>*
|
|
%14 = load <8 x half>, <8 x half>* %13, align 8
|
|
br label %15
|
|
|
|
15:
|
|
%16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
|
|
br i1 %0, label %17, label %23
|
|
|
|
17:
|
|
%18 = bitcast half* %1 to i16*
|
|
%19 = load i16, i16* %18, align 2
|
|
%20 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%21 = insertelement <8 x i16> %20, i16 %19, i64 0
|
|
%22 = bitcast <8 x i16> %21 to <8 x half>
|
|
br label %25
|
|
|
|
23:
|
|
%24 = bitcast <16 x i8> %2 to <8 x half>
|
|
br label %25
|
|
|
|
25:
|
|
%26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
|
|
%27 = bitcast <8 x half> %16 to <16 x i8>
|
|
%28 = bitcast <8 x half> %26 to <16 x i8>
|
|
%29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28)
|
|
%30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29)
|
|
store <16 x i8> %30, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_3
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov lr, r12, d17
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr lr, lr, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r2, d16[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r12, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s10, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s14, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s3, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s2, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB83_4
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB83_2:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, r7, d3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s7, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: b .LBB83_5
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB83_3:
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #10]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r12, [r1, #6]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r5, [r1, #2]
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #14]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #12]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, r12
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r4, [r1, #8]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r5
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh lr, [r1, #4]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: ldrh r6, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s2, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s0, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s10, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s8, r4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s12, lr
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s3, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s1, r6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_2
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB83_4:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, r2, d3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r7, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d2[1]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r7
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d2[0]
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s4, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s9, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s6, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r0, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r2, r2, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s7, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s13, r3
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s7, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r7
|
|
; CHECK-FIX-NOSCHED-NEXT: lsr r0, r7, #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s11, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s5, r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s5, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB83_5:
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov s15, r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s3
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f32.f16 s15, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s15
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s11, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s1, s9
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s11
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s14
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s12, s13
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s12
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r2, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s8, s10
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s8
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s6, s7
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s6
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s4, s5
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r0, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r2, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vcvtb.f16.f32 s0, s2
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov r3, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r3, lsl #16
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r11, lr}
|
|
; CHECK-CORTEX-FIX-NEXT: .vsave {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: vpush {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s0
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB83_3
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, r5, d17
|
|
; CHECK-CORTEX-FIX-NEXT: lsr lr, r3, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, lr
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r6, r4, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s1, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d16[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s0, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s11
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r2, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s15
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB83_4
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB83_2:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, r2, d2
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r2, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r3, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s7, r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: b .LBB83_5
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB83_3:
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r12, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh lr, [r1, #2]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #4]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r6, [r1, #6]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r5, [r1, #8]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r1, #10]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r2, [r1, #12]
|
|
; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #14]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s1, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s3, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s0, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s2, r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s8, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s12, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s11, lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s13, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s14, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s10, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s2, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s0, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s1, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s3, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s8, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s12, s13
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB83_4:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r7, r2, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s16, r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, r5, d3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s7, r2
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r3, r5, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s6, r6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s11, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s9, r3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s15, r4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 r0, d2[0]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s13, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s9, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s15, s16
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s5, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s18, r0
|
|
; CHECK-CORTEX-FIX-NEXT: lsr r12, r0, #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov s4, r12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s7, s18
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB83_5:
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s3, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s2, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s8, s8
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s12, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f32.f16 s4, s4
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s6, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s9, s9
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s7, s7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s14
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s10, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s14, s5
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s1, s13
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s5, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s11, s15
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s8
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r12, r0, r2, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s3
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s10
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s1
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s11
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r0, s9
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r2, r3, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r3, s14
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r7, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r7, s5
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r6, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vcvtb.f16.f32 s0, s4
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r5, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r5, s12
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r4, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r5
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r6
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], lr
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r12
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r4, r0, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov r4, s7
|
|
; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r4, r2, lsl #16
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r2
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r0
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r7
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r3
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vpop {d8, d9}
|
|
; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
|
br i1 %0, label %5, label %11
|
|
|
|
5:
|
|
%6 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%7 = load <8 x i16>, <8 x i16>* %6, align 8
|
|
%8 = bitcast half %1 to i16
|
|
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
|
|
%10 = bitcast <8 x i16> %9 to <8 x half>
|
|
br label %14
|
|
|
|
11:
|
|
%12 = bitcast <16 x i8>* %3 to <8 x half>*
|
|
%13 = load <8 x half>, <8 x half>* %12, align 8
|
|
br label %14
|
|
|
|
14:
|
|
%15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
|
|
br i1 %0, label %16, label %21
|
|
|
|
16:
|
|
%17 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%18 = bitcast half %1 to i16
|
|
%19 = insertelement <8 x i16> %17, i16 %18, i64 0
|
|
%20 = bitcast <8 x i16> %19 to <8 x half>
|
|
br label %23
|
|
|
|
21:
|
|
%22 = bitcast <16 x i8> %2 to <8 x half>
|
|
br label %23
|
|
|
|
23:
|
|
%24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
|
|
%25 = bitcast <8 x half> %15 to <16 x i8>
|
|
%26 = bitcast <8 x half> %24 to <16 x i8>
|
|
%27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26)
|
|
%28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27)
|
|
store <16 x i8> %28, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: ldrh r1, [r1]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: strh r1, [r2]
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB84_1:
|
|
; CHECK-FIX-NEXT: vmov.16 d0[0], r1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: .LBB84_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: bne .LBB84_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = bitcast half* %1 to i16*
|
|
%6 = load i16, i16* %5, align 2
|
|
%7 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%8 = insertelement <8 x i16> %7, i16 %6, i64 0
|
|
%9 = bitcast <8 x i16> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to i16*
|
|
store i16 %6, i16* %10, align 8
|
|
%11 = icmp eq i32 %0, 0
|
|
br i1 %11, label %15, label %12
|
|
|
|
12:
|
|
%13 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %16
|
|
|
|
14:
|
|
store <16 x i8> %20, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
15:
|
|
ret void
|
|
|
|
16:
|
|
%17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
|
|
%18 = phi i32 [ 0, %12 ], [ %21, %16 ]
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
|
|
%20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
|
|
%21 = add nuw i32 %18, 1
|
|
%22 = icmp eq i32 %21, %0
|
|
br i1 %22, label %14, label %16
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_setf16_loop_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bxeq lr
|
|
; CHECK-FIX-NEXT: .LBB85_1:
|
|
; CHECK-FIX-NEXT: vcvtb.f32.f16 s0, s0
|
|
; CHECK-FIX-NEXT: vcvtb.f16.f32 s0, s0
|
|
; CHECK-FIX-NEXT: vmov r2, s0
|
|
; CHECK-FIX-NEXT: uxth r2, r2
|
|
; CHECK-FIX-NEXT: vmov.16 d2[0], r2
|
|
; CHECK-FIX-NEXT: .LBB85_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NEXT: vmov.16 d16[0], r2
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q1
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bne .LBB85_2
|
|
; CHECK-FIX-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %13, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <8 x i16>
|
|
%8 = bitcast half %1 to i16
|
|
%9 = insertelement <8 x i16> %7, i16 %8, i64 0
|
|
%10 = bitcast <8 x i16> %9 to <16 x i8>
|
|
%11 = bitcast <16 x i8>* %3 to <8 x i16>*
|
|
%12 = bitcast <16 x i8>* %3 to half*
|
|
br label %14
|
|
|
|
13:
|
|
ret void
|
|
|
|
14:
|
|
%15 = phi i32 [ 0, %6 ], [ %21, %14 ]
|
|
%16 = load <8 x i16>, <8 x i16>* %11, align 8
|
|
%17 = insertelement <8 x i16> %16, i16 %8, i64 0
|
|
%18 = bitcast <8 x i16> %17 to <16 x i8>
|
|
store half %1, half* %12, align 8
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10)
|
|
%20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
|
|
store <16 x i8> %20, <16 x i8>* %3, align 8
|
|
%21 = add nuw i32 %15, 1
|
|
%22 = icmp eq i32 %21, %0
|
|
br i1 %22, label %13, label %14
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_setf32_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vldr s0, [r0]
|
|
; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
|
|
; CHECK-FIX-NEXT: vmov.f32 s4, s0
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: aesd.8 q1, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q1
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = load float, float* %0, align 4
|
|
%5 = bitcast <16 x i8>* %2 to <4 x float>*
|
|
%6 = load <4 x float>, <4 x float>* %5, align 8
|
|
%7 = insertelement <4 x float> %6, float %4, i64 0
|
|
%8 = bitcast <4 x float> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8> %1 to <4 x float>
|
|
%10 = insertelement <4 x float> %9, float %4, i64 0
|
|
%11 = bitcast <4 x float> %10 to <16 x i8>
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
|
|
%13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
|
|
store <16 x i8> %13, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_setf32_via_val:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vmov.f32 s4, s0
|
|
; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
|
|
; CHECK-FIX-NEXT: vmov.f32 s0, s4
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NEXT: aesd.8 q0, q1
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q0
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
%4 = bitcast <16 x i8>* %2 to <4 x float>*
|
|
%5 = load <4 x float>, <4 x float>* %4, align 8
|
|
%6 = insertelement <4 x float> %5, float %0, i64 0
|
|
%7 = bitcast <4 x float> %6 to <16 x i8>
|
|
%8 = bitcast <16 x i8> %1 to <4 x float>
|
|
%9 = insertelement <4 x float> %8, float %0, i64 0
|
|
%10 = bitcast <4 x float> %9 to <16 x i8>
|
|
%11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
|
|
%12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
|
|
store <16 x i8> %12, <16 x i8>* %2, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
|
|
; CHECK-FIX: @ %bb.0:
|
|
; CHECK-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB88_2
|
|
; CHECK-FIX-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: bne .LBB88_3
|
|
; CHECK-FIX-NEXT: b .LBB88_4
|
|
; CHECK-FIX-NEXT: .LBB88_2:
|
|
; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NEXT: beq .LBB88_4
|
|
; CHECK-FIX-NEXT: .LBB88_3:
|
|
; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
|
|
; CHECK-FIX-NEXT: .LBB88_4:
|
|
; CHECK-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NEXT: bx lr
|
|
br i1 %0, label %5, label %10
|
|
|
|
5:
|
|
%6 = load float, float* %1, align 4
|
|
%7 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%8 = load <4 x float>, <4 x float>* %7, align 8
|
|
%9 = insertelement <4 x float> %8, float %6, i64 0
|
|
br label %13
|
|
|
|
10:
|
|
%11 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%12 = load <4 x float>, <4 x float>* %11, align 8
|
|
br label %13
|
|
|
|
13:
|
|
%14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
|
|
br i1 %0, label %15, label %19
|
|
|
|
15:
|
|
%16 = load float, float* %1, align 4
|
|
%17 = bitcast <16 x i8> %2 to <4 x float>
|
|
%18 = insertelement <4 x float> %17, float %16, i64 0
|
|
br label %21
|
|
|
|
19:
|
|
%20 = bitcast <16 x i8> %2 to <4 x float>
|
|
br label %21
|
|
|
|
21:
|
|
%22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
|
|
%23 = bitcast <4 x float> %14 to <16 x i8>
|
|
%24 = bitcast <4 x float> %22 to <16 x i8>
|
|
%25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
|
|
%26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
|
|
store <16 x i8> %26, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_setf32_cond_via_val:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%5 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%6 = load <4 x float>, <4 x float>* %5, align 8
|
|
%7 = insertelement <4 x float> %6, float %1, i64 0
|
|
%8 = select i1 %0, <4 x float> %7, <4 x float> %6
|
|
%9 = bitcast <16 x i8> %2 to <4 x float>
|
|
%10 = insertelement <4 x float> %9, float %1, i64 0
|
|
%11 = select i1 %0, <4 x float> %10, <4 x float> %9
|
|
%12 = bitcast <4 x float> %8 to <16 x i8>
|
|
%13 = bitcast <4 x float> %11 to <16 x i8>
|
|
%14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
|
|
%15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
|
|
store <16 x i8> %15, <16 x i8>* %3, align 8
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB90_1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB90_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_ptr:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bxeq lr
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB90_1:
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB90_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%5 = load float, float* %1, align 4
|
|
%6 = bitcast <16 x i8> %2 to <4 x float>
|
|
%7 = insertelement <4 x float> %6, float %5, i64 0
|
|
%8 = bitcast <4 x float> %7 to <16 x i8>
|
|
%9 = bitcast <16 x i8>* %3 to float*
|
|
store float %5, float* %9, align 8
|
|
%10 = icmp eq i32 %0, 0
|
|
br i1 %10, label %14, label %11
|
|
|
|
11:
|
|
%12 = load <16 x i8>, <16 x i8>* %3, align 8
|
|
br label %15
|
|
|
|
13:
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
br label %14
|
|
|
|
14:
|
|
ret void
|
|
|
|
15:
|
|
%16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
|
|
%17 = phi i32 [ 0, %11 ], [ %20, %15 ]
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
%20 = add nuw i32 %17, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %13, label %15
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
|
|
; CHECK-FIX-NOSCHED-NEXT: bxeq lr
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB91_1:
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
|
|
; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
|
|
; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-FIX-NOSCHED-NEXT: bne .LBB91_2
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_val:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
|
|
; CHECK-CORTEX-FIX-NEXT: bxeq lr
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB91_1:
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
|
|
; CHECK-CORTEX-FIX-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
|
|
; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
|
|
; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
|
|
; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
|
|
; CHECK-CORTEX-FIX-NEXT: bne .LBB91_2
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
%5 = icmp eq i32 %0, 0
|
|
br i1 %5, label %12, label %6
|
|
|
|
6:
|
|
%7 = bitcast <16 x i8> %2 to <4 x float>
|
|
%8 = insertelement <4 x float> %7, float %1, i64 0
|
|
%9 = bitcast <4 x float> %8 to <16 x i8>
|
|
%10 = bitcast <16 x i8>* %3 to <4 x float>*
|
|
%11 = bitcast <16 x i8>* %3 to float*
|
|
br label %13
|
|
|
|
12:
|
|
ret void
|
|
|
|
13:
|
|
%14 = phi i32 [ 0, %6 ], [ %20, %13 ]
|
|
%15 = load <4 x float>, <4 x float>* %10, align 8
|
|
%16 = insertelement <4 x float> %15, float %1, i64 0
|
|
%17 = bitcast <4 x float> %16 to <16 x i8>
|
|
store float %1, float* %11, align 8
|
|
%18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
|
|
%19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
|
|
store <16 x i8> %19, <16 x i8>* %3, align 8
|
|
%20 = add nuw i32 %14, 1
|
|
%21 = icmp eq i32 %20, %0
|
|
br i1 %21, label %12, label %13
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind {
|
|
; CHECK-FIX-NOSCHED-LABEL: aese_constantisland:
|
|
; CHECK-FIX-NOSCHED: @ %bb.0:
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: adr r1, .LCPI92_0
|
|
; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r1:128]
|
|
; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
|
|
; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
|
|
; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-FIX-NOSCHED-NEXT: bx lr
|
|
; CHECK-FIX-NOSCHED-NEXT: .p2align 4
|
|
; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
|
|
; CHECK-FIX-NOSCHED-NEXT: .LCPI92_0:
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 0 @ 0x0
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 1 @ 0x1
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 2 @ 0x2
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 3 @ 0x3
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 4 @ 0x4
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 5 @ 0x5
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 6 @ 0x6
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 7 @ 0x7
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 8 @ 0x8
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 9 @ 0x9
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 10 @ 0xa
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 11 @ 0xb
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 12 @ 0xc
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 13 @ 0xd
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 14 @ 0xe
|
|
; CHECK-FIX-NOSCHED-NEXT: .byte 15 @ 0xf
|
|
;
|
|
; CHECK-CORTEX-FIX-LABEL: aese_constantisland:
|
|
; CHECK-CORTEX-FIX: @ %bb.0:
|
|
; CHECK-CORTEX-FIX-NEXT: adr r1, .LCPI92_0
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r1:128]
|
|
; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
|
|
; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
|
|
; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r0]
|
|
; CHECK-CORTEX-FIX-NEXT: bx lr
|
|
; CHECK-CORTEX-FIX-NEXT: .p2align 4
|
|
; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
|
|
; CHECK-CORTEX-FIX-NEXT: .LCPI92_0:
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 0 @ 0x0
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 1 @ 0x1
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 2 @ 0x2
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 3 @ 0x3
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 4 @ 0x4
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 5 @ 0x5
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 6 @ 0x6
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 7 @ 0x7
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 8 @ 0x8
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 9 @ 0x9
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 10 @ 0xa
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 11 @ 0xb
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 12 @ 0xc
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 13 @ 0xd
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 14 @ 0xe
|
|
; CHECK-CORTEX-FIX-NEXT: .byte 15 @ 0xf
|
|
%2 = load <16 x i8>, <16 x i8>* %0, align 8
|
|
%3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2)
|
|
%4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
|
|
store <16 x i8> %4, <16 x i8>* %0, align 8
|
|
ret void
|
|
}
|