41 lines
3.0 KiB
LLVM
41 lines
3.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs -stop-after tileconfig | FileCheck %s
|
|
|
|
; Test the tile register is allocated in a seperate pass.
|
|
|
|
define i16 @foo(i32 noundef %t, i16 %row, i16 %col) nounwind {
|
|
; CHECK-LABEL: name: foo
|
|
; CHECK: bb.0.entry:
|
|
; CHECK-NEXT: liveins: $esi, $edx
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: undef %12.sub_32bit:gr64_nosp = COPY $edx
|
|
; CHECK-NEXT: undef %13.sub_32bit:gr64_with_sub_8bit = COPY $esi
|
|
; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0
|
|
; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4)
|
|
; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4)
|
|
; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 20, $noreg, %12.sub_16bit :: (store (s512) into %stack.0 + 20, align 4)
|
|
; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 50, $noreg, %13.sub_8bit :: (store (s512) into %stack.0 + 50, align 2, basealign 4)
|
|
; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 18, $noreg, %12.sub_16bit :: (store (s512) into %stack.0 + 18, align 2, basealign 4)
|
|
; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 49, $noreg, %13.sub_8bit :: (store (s512) into %stack.0 + 49, align 1, basealign 4)
|
|
; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 16, $noreg, %12.sub_16bit :: (store (s512) into %stack.0 + 16, align 4)
|
|
; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 48, $noreg, %13.sub_8bit :: (store (s512) into %stack.0 + 48, align 4)
|
|
; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4)
|
|
; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV %13.sub_16bit, %12.sub_16bit
|
|
; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV %13.sub_16bit, %12.sub_16bit
|
|
; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV %13.sub_16bit, %12.sub_16bit
|
|
; CHECK-NEXT: dead [[PTILEZEROV2]]:tile = PTDPBSSDV %13.sub_16bit, %12.sub_16bit, %12.sub_16bit, [[PTILEZEROV2]], [[PTILEZEROV]], [[PTILEZEROV1]]
|
|
; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r %13, 1, %12, 0, $noreg
|
|
; CHECK-NEXT: $ax = COPY [[LEA64_32r]].sub_16bit
|
|
; CHECK-NEXT: RET 0, killed $ax
|
|
entry:
|
|
%0 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col)
|
|
%1 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col)
|
|
%2 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col)
|
|
%3 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 %col, x86_amx %2, x86_amx %0, x86_amx %1)
|
|
%4 = add i16 %row, %col
|
|
ret i16 %4
|
|
}
|
|
|
|
declare x86_amx @llvm.x86.tilezero.internal(i16, i16)
|
|
declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
|