|
@@ -2,6 +2,7 @@
|
|
|
--- |
|
|
|
define amdgpu_kernel void @func0() #0 { ret void }
|
|
|
define amdgpu_kernel void @func1() #0 { ret void }
|
|
|
+ define amdgpu_kernel void @splitHoist() #0 { ret void }
|
|
|
|
|
|
attributes #0 = { "amdgpu-num-sgpr"="12" }
|
|
|
...
|
|
@@ -62,3 +63,43 @@ body: |
|
|
|
S_NOP 0, implicit %0.sub0
|
|
|
S_NOP 0, implicit %0.sub2
|
|
|
...
|
|
|
+---
|
|
|
+# Check that copy hoisting out of loops works. This mainly should not crash the
|
|
|
+# compiler when it hoists a subreg copy sequence.
|
|
|
+# CHECK-LABEL: name: splitHoist
|
|
|
+# CHECK: S_NOP 0, implicit-def %sgpr0
|
|
|
+# CHECK: S_NOP 0, implicit-def %sgpr3
|
|
|
+# CHECK-NEXT: SI_SPILL_S128_SAVE
|
|
|
+name: splitHoist
|
|
|
+tracksRegLiveness: true
|
|
|
+body: |
|
|
|
+ bb.0:
|
|
|
+ successors: %bb.1, %bb.2
|
|
|
+ S_NOP 0, implicit-def undef %0.sub0 : sreg_128
|
|
|
+ S_NOP 0, implicit-def %0.sub3 : sreg_128
|
|
|
+
|
|
|
+ S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
|
|
|
+ S_BRANCH %bb.2
|
|
|
+
|
|
|
+ bb.1:
|
|
|
+ successors: %bb.1, %bb.3
|
|
|
+ S_NOP 0, implicit %0.sub0
|
|
|
+
|
|
|
+ ; Clobber registers
|
|
|
+ S_NOP 0, implicit-def dead %sgpr0, implicit-def dead %sgpr1, implicit-def dead %sgpr2, implicit-def dead %sgpr3, implicit-def dead %sgpr4, implicit-def dead %sgpr5, implicit-def dead %sgpr6, implicit-def dead %sgpr7, implicit-def dead %sgpr8, implicit-def dead %sgpr9, implicit-def dead %sgpr10, implicit-def dead %sgpr11
|
|
|
+
|
|
|
+ S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc
|
|
|
+ S_BRANCH %bb.3
|
|
|
+
|
|
|
+ bb.2:
|
|
|
+ successors: %bb.3
|
|
|
+ ; Clobber registers
|
|
|
+ S_NOP 0, implicit-def dead %sgpr0, implicit-def dead %sgpr1, implicit-def dead %sgpr2, implicit-def dead %sgpr3, implicit-def dead %sgpr4, implicit-def dead %sgpr5, implicit-def dead %sgpr6, implicit-def dead %sgpr7, implicit-def dead %sgpr8, implicit-def dead %sgpr9, implicit-def dead %sgpr10, implicit-def dead %sgpr11
|
|
|
+ S_BRANCH %bb.3
|
|
|
+
|
|
|
+ bb.3:
|
|
|
+ S_NOP 0, implicit %0.sub0
|
|
|
+ S_NOP 0, implicit %0.sub3
|
|
|
+ S_NOP 0, implicit %0.sub0
|
|
|
+ S_NOP 0, implicit %0.sub3
|
|
|
+...
|