; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-linux-gnu -o - -global-isel %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI

;; Check that the llvm aarch64 backend can handle arrays of
;; structs and vice versa when passed from IR.
;; (this layering is something clang would normally simplify)
;;
;; Some of these examples are not ABI compliant and they're not
;; meant to be. For instance according to the ABI an aggregate
;; with more than 4 members must go in memory. This restriction
;; is applied earlier in the compilation process so here we do
;; see 8 member types in registers.
;;
;; When we have more than 8 members we simply run out of registers
;; and that's what produces the 8 limit here.

;; Plain arrays

define [ 0 x double ] @array_0() {
; CHECK-LABEL: array_0:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  ret [ 0 x double ] zeroinitializer
}

define [ 1 x double ] @array_1() {
; CHECK-LABEL: array_1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 1 x double ] zeroinitializer
}

define [ 8 x double ] @array_8() {
; CHECK-LABEL: array_8:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    movi d4, #0000000000000000
; CHECK-NEXT:    movi d5, #0000000000000000
; CHECK-NEXT:    movi d6, #0000000000000000
; CHECK-NEXT:    movi d7, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 8 x double ] zeroinitializer
}

;; > 8 items goes on the stack

define [ 9 x double ] @array_9() {
; CHECK-SD-LABEL: array_9:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
; CHECK-SD-NEXT:    str xzr, [x8, #64]
; CHECK-SD-NEXT:    stp q0, q0, [x8]
; CHECK-SD-NEXT:    stp q0, q0, [x8, #32]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: array_9:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    stp xzr, xzr, [x8]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #16]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #32]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #48]
; CHECK-GI-NEXT:    str xzr, [x8, #64]
; CHECK-GI-NEXT:    ret
  ret [ 9 x double ] zeroinitializer
}

;; Won't use any registers, just checking for assumptions.
%T_STRUCT_0M = type { }

define %T_STRUCT_0M @struct_zero_fields() {
; CHECK-LABEL: struct_zero_fields:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  ret %T_STRUCT_0M zeroinitializer
}

define [ 1 x %T_STRUCT_0M ] @array_of_struct_zero_fields() {
; CHECK-LABEL: array_of_struct_zero_fields:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  ret [ 1 x %T_STRUCT_0M ] zeroinitializer
}

define [ 2 x %T_STRUCT_0M ] @array_of_struct_zero_fields_in_struct() {
; CHECK-LABEL: array_of_struct_zero_fields_in_struct:
; CHECK:       // %bb.0:
; CHECK-NEXT:    ret
  ret [ 2 x %T_STRUCT_0M ] zeroinitializer
}

%T_STRUCT_1M = type { i32 }

define %T_STRUCT_1M @struct_one_field() {
; CHECK-LABEL: struct_one_field:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
  ret %T_STRUCT_1M zeroinitializer
}

define [ 1 x %T_STRUCT_1M ] @array_of_struct_one_field() {
; CHECK-LABEL: array_of_struct_one_field:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
  ret [ 1 x %T_STRUCT_1M ] zeroinitializer
}

;; This one will be a reg block
define [ 2 x %T_STRUCT_1M ] @array_of_struct_one_field_2() {
; CHECK-LABEL: array_of_struct_one_field_2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    mov w1, wzr
; CHECK-NEXT:    ret
  ret [ 2 x %T_STRUCT_1M ] zeroinitializer
}

;; Different types for each field, will not be put in a reg block
%T_STRUCT_DIFFM = type { double, i32 }

define %T_STRUCT_DIFFM @struct_different_field_types() {
; CHECK-LABEL: struct_different_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
  ret %T_STRUCT_DIFFM zeroinitializer
}

define [ 1 x %T_STRUCT_DIFFM ] @array_of_struct_different_field_types() {
; CHECK-LABEL: array_of_struct_different_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    ret
  ret [ 1 x %T_STRUCT_DIFFM ] zeroinitializer
}

define [ 2 x %T_STRUCT_DIFFM ] @array_of_struct_different_field_types_2() {
; CHECK-LABEL: array_of_struct_different_field_types_2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    mov w1, wzr
; CHECK-NEXT:    ret
  ret [ 2 x %T_STRUCT_DIFFM ] zeroinitializer
}

;; Each field is the same type, can be put in a reg block
%T_STRUCT_SAMEM = type { double, double }

;; Here isn't a block as such, we just allocate two consecutive registers
define %T_STRUCT_SAMEM @struct_same_field_types() {
; CHECK-LABEL: struct_same_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    ret
  ret %T_STRUCT_SAMEM zeroinitializer
}

define [ 1 x %T_STRUCT_SAMEM ] @array_of_struct_same_field_types() {
; CHECK-LABEL: array_of_struct_same_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 1 x %T_STRUCT_SAMEM ] zeroinitializer
}

define [ 2 x %T_STRUCT_SAMEM ] @array_of_struct_same_field_types_2() {
; CHECK-LABEL: array_of_struct_same_field_types_2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 2 x %T_STRUCT_SAMEM ] zeroinitializer
}

;; Same field type but integer this time. Put into x registers instead.
%T_STRUCT_SAMEM_INT = type { i64, i64 }

define %T_STRUCT_SAMEM_INT @struct_same_field_types_int() {
; CHECK-LABEL: struct_same_field_types_int:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, xzr
; CHECK-NEXT:    mov x1, xzr
; CHECK-NEXT:    ret
  ret %T_STRUCT_SAMEM_INT zeroinitializer
}

define [ 1 x %T_STRUCT_SAMEM_INT ] @array_of_struct_same_field_types_int() {
; CHECK-LABEL: array_of_struct_same_field_types_int:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, xzr
; CHECK-NEXT:    mov x1, xzr
; CHECK-NEXT:    ret
  ret [ 1 x %T_STRUCT_SAMEM_INT ] zeroinitializer
}

define [ 2 x %T_STRUCT_SAMEM_INT ] @array_of_struct_same_field_types_int_2() {
; CHECK-LABEL: array_of_struct_same_field_types_int_2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    mov x0, xzr
; CHECK-NEXT:    mov x1, xzr
; CHECK-NEXT:    mov x2, xzr
; CHECK-NEXT:    mov x3, xzr
; CHECK-NEXT:    ret
  ret [ 2 x %T_STRUCT_SAMEM_INT ] zeroinitializer
}

;; An aggregate of more than 8 items must go in memory.
;; 4x2 struct fields = 8 items so it goes in a block.

define [ 4 x %T_STRUCT_SAMEM ] @array_of_struct_8_fields() {
; CHECK-LABEL: array_of_struct_8_fields:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    movi d4, #0000000000000000
; CHECK-NEXT:    movi d5, #0000000000000000
; CHECK-NEXT:    movi d6, #0000000000000000
; CHECK-NEXT:    movi d7, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 4 x %T_STRUCT_SAMEM ] zeroinitializer
}

;; 5x2 fields = 10 so it is returned in memory.

define [ 5 x %T_STRUCT_SAMEM ] @array_of_struct_in_memory() {
; CHECK-SD-LABEL: array_of_struct_in_memory:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
; CHECK-SD-NEXT:    stp q0, q0, [x8, #16]
; CHECK-SD-NEXT:    stp q0, q0, [x8, #48]
; CHECK-SD-NEXT:    str q0, [x8]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: array_of_struct_in_memory:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    stp xzr, xzr, [x8]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #16]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #32]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #48]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #64]
; CHECK-GI-NEXT:    ret
  ret [ 5 x %T_STRUCT_SAMEM ] zeroinitializer
}

;; A struct whose field is an array.
%T_STRUCT_ARRAYM = type { [ 2 x double ]};

define %T_STRUCT_ARRAYM @struct_array_field() {
; CHECK-LABEL: struct_array_field:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    ret
  ret %T_STRUCT_ARRAYM zeroinitializer
}

define [ 1 x %T_STRUCT_ARRAYM ] @array_of_struct_array_field() {
; CHECK-LABEL: array_of_struct_array_field:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 1 x %T_STRUCT_ARRAYM ] zeroinitializer
}

define [ 2 x %T_STRUCT_ARRAYM ] @array_of_struct_array_field_2() {
; CHECK-LABEL: array_of_struct_array_field_2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 2 x %T_STRUCT_ARRAYM ] zeroinitializer
}

;; All non-aggregate fields must have the same type, all through the
;; overall aggreagate. This is false here because of the i32.
%T_NESTED_STRUCT_DIFFM = type {
  [ 1 x { { double, double } } ],
  [ 1 x { { double, i32 } } ]
};

define %T_NESTED_STRUCT_DIFFM @struct_nested_different_field_types() {
; CHECK-LABEL: struct_nested_different_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    ret
  ret %T_NESTED_STRUCT_DIFFM zeroinitializer
}

define [ 1 x %T_NESTED_STRUCT_DIFFM ] @array_of_struct_nested_different_field_types() {
; CHECK-LABEL: array_of_struct_nested_different_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 1 x %T_NESTED_STRUCT_DIFFM ] zeroinitializer
}

define [ 2 x %T_NESTED_STRUCT_DIFFM ] @array_of_struct_nested_different_field_types_2() {
; CHECK-LABEL: array_of_struct_nested_different_field_types_2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    mov w1, wzr
; CHECK-NEXT:    movi d4, #0000000000000000
; CHECK-NEXT:    movi d5, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 2 x %T_NESTED_STRUCT_DIFFM ] zeroinitializer
}

;; All fields here are the same type, more nesting to stress the recursive walk.
%T_NESTED_STRUCT_SAMEM = type {
  { { double} },
  { [ 2 x { double, double } ] }
};

define %T_NESTED_STRUCT_SAMEM @struct_nested_same_field_types() {
; CHECK-LABEL: struct_nested_same_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    movi d4, #0000000000000000
; CHECK-NEXT:    ret
  ret %T_NESTED_STRUCT_SAMEM zeroinitializer
}

define [ 1 x %T_NESTED_STRUCT_SAMEM ] @array_of_struct_nested_same_field_types() {
; CHECK-LABEL: array_of_struct_nested_same_field_types:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    movi d4, #0000000000000000
; CHECK-NEXT:    ret
  ret [ 1 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer
}

;; 2 x (1 + (2 x 2)) = 10 so this is returned in memory
define [ 2 x %T_NESTED_STRUCT_SAMEM ] @array_of_struct_nested_same_field_types_2() {
; CHECK-SD-LABEL: array_of_struct_nested_same_field_types_2:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
; CHECK-SD-NEXT:    stp q0, q0, [x8, #16]
; CHECK-SD-NEXT:    stp q0, q0, [x8, #48]
; CHECK-SD-NEXT:    str q0, [x8]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: array_of_struct_nested_same_field_types_2:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    stp xzr, xzr, [x8]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #16]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #32]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #48]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #64]
; CHECK-GI-NEXT:    ret
  ret [ 2 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer
}

;; Check combinations of call, return and argument passing

%T_IN_BLOCK = type [ 2 x { double, { double, double } } ]

define %T_IN_BLOCK @return_in_block() {
; CHECK-LABEL: return_in_block:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    movi d2, #0000000000000000
; CHECK-NEXT:    movi d3, #0000000000000000
; CHECK-NEXT:    movi d4, #0000000000000000
; CHECK-NEXT:    movi d5, #0000000000000000
; CHECK-NEXT:    ret
  ret %T_IN_BLOCK zeroinitializer
}

@in_block_store = dso_local global %T_IN_BLOCK zeroinitializer, align 8

define void @caller_in_block() {
; CHECK-SD-LABEL: caller_in_block:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl return_in_block
; CHECK-SD-NEXT:    adrp x8, in_block_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:in_block_store
; CHECK-SD-NEXT:    stp d0, d1, [x8]
; CHECK-SD-NEXT:    stp d2, d3, [x8, #16]
; CHECK-SD-NEXT:    stp d4, d5, [x8, #32]
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: caller_in_block:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    bl return_in_block
; CHECK-GI-NEXT:    adrp x8, in_block_store
; CHECK-GI-NEXT:    str d0, [x8, :lo12:in_block_store]
; CHECK-GI-NEXT:    adrp x8, in_block_store
; CHECK-GI-NEXT:    add x8, x8, :lo12:in_block_store
; CHECK-GI-NEXT:    stp d1, d2, [x8, #8]
; CHECK-GI-NEXT:    stp d3, d4, [x8, #24]
; CHECK-GI-NEXT:    str d5, [x8, #40]
; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT:    ret
  %1 = call %T_IN_BLOCK @return_in_block()
  store %T_IN_BLOCK %1, ptr @in_block_store
  ret void
}

define void @callee_in_block(%T_IN_BLOCK %a) {
; CHECK-SD-LABEL: callee_in_block:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    adrp x8, in_block_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:in_block_store
; CHECK-SD-NEXT:    stp d4, d5, [x8, #32]
; CHECK-SD-NEXT:    stp d2, d3, [x8, #16]
; CHECK-SD-NEXT:    stp d0, d1, [x8]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: callee_in_block:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    adrp x8, in_block_store
; CHECK-GI-NEXT:    str d0, [x8, :lo12:in_block_store]
; CHECK-GI-NEXT:    adrp x8, in_block_store
; CHECK-GI-NEXT:    add x8, x8, :lo12:in_block_store
; CHECK-GI-NEXT:    stp d1, d2, [x8, #8]
; CHECK-GI-NEXT:    stp d3, d4, [x8, #24]
; CHECK-GI-NEXT:    str d5, [x8, #40]
; CHECK-GI-NEXT:    ret
  store %T_IN_BLOCK %a, ptr @in_block_store
  ret void
}

define void @argument_in_block() {
; CHECK-SD-LABEL: argument_in_block:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    adrp x8, in_block_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:in_block_store
; CHECK-SD-NEXT:    ldp d4, d5, [x8, #32]
; CHECK-SD-NEXT:    ldp d2, d3, [x8, #16]
; CHECK-SD-NEXT:    ldp d0, d1, [x8]
; CHECK-SD-NEXT:    bl callee_in_block
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: argument_in_block:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    adrp x9, in_block_store
; CHECK-GI-NEXT:    add x9, x9, :lo12:in_block_store
; CHECK-GI-NEXT:    adrp x8, in_block_store
; CHECK-GI-NEXT:    ldp d1, d2, [x9, #8]
; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:in_block_store]
; CHECK-GI-NEXT:    ldp d3, d4, [x9, #24]
; CHECK-GI-NEXT:    ldr d5, [x9, #40]
; CHECK-GI-NEXT:    bl callee_in_block
; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT:    ret
  %1 = load %T_IN_BLOCK, ptr @in_block_store
  call void @callee_in_block(%T_IN_BLOCK %1)
  ret void
}

%T_IN_MEMORY = type [ 3 x { double, { double, double } } ]

define %T_IN_MEMORY @return_in_memory() {
; CHECK-SD-LABEL: return_in_memory:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
; CHECK-SD-NEXT:    str xzr, [x8, #64]
; CHECK-SD-NEXT:    stp q0, q0, [x8]
; CHECK-SD-NEXT:    stp q0, q0, [x8, #32]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: return_in_memory:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    stp xzr, xzr, [x8]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #16]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #32]
; CHECK-GI-NEXT:    stp xzr, xzr, [x8, #48]
; CHECK-GI-NEXT:    str xzr, [x8, #64]
; CHECK-GI-NEXT:    ret
  ret %T_IN_MEMORY zeroinitializer
}

@in_memory_store = dso_local global %T_IN_MEMORY zeroinitializer, align 8

define void @caller_in_memory() {
; CHECK-SD-LABEL: caller_in_memory:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #96
; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    add x8, sp, #8
; CHECK-SD-NEXT:    bl return_in_memory
; CHECK-SD-NEXT:    ldur q0, [sp, #24]
; CHECK-SD-NEXT:    ldur q1, [sp, #8]
; CHECK-SD-NEXT:    adrp x8, in_memory_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:in_memory_store
; CHECK-SD-NEXT:    ldr d2, [sp, #72]
; CHECK-SD-NEXT:    ldur q3, [sp, #56]
; CHECK-SD-NEXT:    ldur q4, [sp, #40]
; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-SD-NEXT:    stp q1, q0, [x8]
; CHECK-SD-NEXT:    str d2, [x8, #64]
; CHECK-SD-NEXT:    stp q4, q3, [x8, #32]
; CHECK-SD-NEXT:    add sp, sp, #96
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: caller_in_memory:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #96
; CHECK-GI-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    add x8, sp, #8
; CHECK-GI-NEXT:    bl return_in_memory
; CHECK-GI-NEXT:    ldp x8, x9, [sp, #8]
; CHECK-GI-NEXT:    adrp x10, in_memory_store
; CHECK-GI-NEXT:    ldp x11, x12, [sp, #24]
; CHECK-GI-NEXT:    ldp x13, x14, [sp, #40]
; CHECK-GI-NEXT:    ldp x15, x16, [sp, #56]
; CHECK-GI-NEXT:    ldp x17, x30, [sp, #72] // 8-byte Folded Reload
; CHECK-GI-NEXT:    str x8, [x10, :lo12:in_memory_store]
; CHECK-GI-NEXT:    adrp x8, in_memory_store
; CHECK-GI-NEXT:    add x8, x8, :lo12:in_memory_store
; CHECK-GI-NEXT:    stp x9, x11, [x8, #8]
; CHECK-GI-NEXT:    stp x12, x13, [x8, #24]
; CHECK-GI-NEXT:    stp x14, x15, [x8, #40]
; CHECK-GI-NEXT:    stp x16, x17, [x8, #56]
; CHECK-GI-NEXT:    add sp, sp, #96
; CHECK-GI-NEXT:    ret
  %1 = call %T_IN_MEMORY @return_in_memory()
  store %T_IN_MEMORY %1, ptr @in_memory_store
  ret void
}

define void @callee_in_memory(%T_IN_MEMORY %a) {
; CHECK-SD-LABEL: callee_in_memory:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    ldp q1, q2, [sp, #32]
; CHECK-SD-NEXT:    adrp x8, in_memory_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:in_memory_store
; CHECK-SD-NEXT:    ldr d0, [sp, #64]
; CHECK-SD-NEXT:    str d0, [x8, #64]
; CHECK-SD-NEXT:    str q2, [x8, #48]
; CHECK-SD-NEXT:    ldp q2, q0, [sp]
; CHECK-SD-NEXT:    stp q0, q1, [x8, #16]
; CHECK-SD-NEXT:    str q2, [x8]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: callee_in_memory:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    ldp x8, x9, [sp]
; CHECK-GI-NEXT:    adrp x10, in_memory_store
; CHECK-GI-NEXT:    ldp x11, x12, [sp, #16]
; CHECK-GI-NEXT:    str x8, [x10, :lo12:in_memory_store]
; CHECK-GI-NEXT:    adrp x8, in_memory_store
; CHECK-GI-NEXT:    add x8, x8, :lo12:in_memory_store
; CHECK-GI-NEXT:    stp x9, x11, [x8, #8]
; CHECK-GI-NEXT:    ldp x9, x10, [sp, #32]
; CHECK-GI-NEXT:    stp x12, x9, [x8, #24]
; CHECK-GI-NEXT:    ldp x9, x11, [sp, #48]
; CHECK-GI-NEXT:    str x10, [x8, #40]
; CHECK-GI-NEXT:    ldr x10, [sp, #64]
; CHECK-GI-NEXT:    stp x9, x11, [x8, #48]
; CHECK-GI-NEXT:    str x10, [x8, #64]
; CHECK-GI-NEXT:    ret
  store %T_IN_MEMORY %a, ptr @in_memory_store
  ret void
}

define void @argument_in_memory() {
; CHECK-SD-LABEL: argument_in_memory:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    sub sp, sp, #96
; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    adrp x8, in_memory_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:in_memory_store
; CHECK-SD-NEXT:    ldp q0, q1, [x8]
; CHECK-SD-NEXT:    ldr d4, [x8, #64]
; CHECK-SD-NEXT:    ldp q2, q3, [x8, #32]
; CHECK-SD-NEXT:    str d4, [sp, #64]
; CHECK-SD-NEXT:    stp q0, q1, [sp]
; CHECK-SD-NEXT:    stp q2, q3, [sp, #32]
; CHECK-SD-NEXT:    bl callee_in_memory
; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-SD-NEXT:    add sp, sp, #96
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: argument_in_memory:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    sub sp, sp, #96
; CHECK-GI-NEXT:    str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    adrp x9, in_memory_store
; CHECK-GI-NEXT:    add x9, x9, :lo12:in_memory_store
; CHECK-GI-NEXT:    adrp x8, in_memory_store
; CHECK-GI-NEXT:    ldp x10, x11, [x9, #8]
; CHECK-GI-NEXT:    ldr x8, [x8, :lo12:in_memory_store]
; CHECK-GI-NEXT:    ldp x12, x13, [x9, #24]
; CHECK-GI-NEXT:    ldp x14, x15, [x9, #40]
; CHECK-GI-NEXT:    ldp x16, x9, [x9, #56]
; CHECK-GI-NEXT:    stp x8, x10, [sp]
; CHECK-GI-NEXT:    stp x11, x12, [sp, #16]
; CHECK-GI-NEXT:    stp x13, x14, [sp, #32]
; CHECK-GI-NEXT:    stp x15, x16, [sp, #48]
; CHECK-GI-NEXT:    str x9, [sp, #64]
; CHECK-GI-NEXT:    bl callee_in_memory
; CHECK-GI-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-GI-NEXT:    add sp, sp, #96
; CHECK-GI-NEXT:    ret
  %1 = load %T_IN_MEMORY, ptr @in_memory_store
  call void @callee_in_memory(%T_IN_MEMORY %1)
  ret void
}

%T_NO_BLOCK = type [ 2 x { double, { i32 } } ]

define %T_NO_BLOCK @return_no_block() {
; CHECK-LABEL: return_no_block:
; CHECK:       // %bb.0:
; CHECK-NEXT:    movi d0, #0000000000000000
; CHECK-NEXT:    movi d1, #0000000000000000
; CHECK-NEXT:    mov w0, wzr
; CHECK-NEXT:    mov w1, wzr
; CHECK-NEXT:    ret
  ret %T_NO_BLOCK zeroinitializer
}

@no_block_store = dso_local global %T_NO_BLOCK zeroinitializer, align 8

define void @caller_no_block() {
; CHECK-SD-LABEL: caller_no_block:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    bl return_no_block
; CHECK-SD-NEXT:    adrp x8, no_block_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:no_block_store
; CHECK-SD-NEXT:    str d0, [x8]
; CHECK-SD-NEXT:    str w0, [x8, #8]
; CHECK-SD-NEXT:    str d1, [x8, #16]
; CHECK-SD-NEXT:    str w1, [x8, #24]
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: caller_no_block:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    bl return_no_block
; CHECK-GI-NEXT:    adrp x8, no_block_store
; CHECK-GI-NEXT:    str d0, [x8, :lo12:no_block_store]
; CHECK-GI-NEXT:    adrp x8, no_block_store
; CHECK-GI-NEXT:    add x8, x8, :lo12:no_block_store
; CHECK-GI-NEXT:    str w0, [x8, #8]
; CHECK-GI-NEXT:    str d1, [x8, #16]
; CHECK-GI-NEXT:    str w1, [x8, #24]
; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT:    ret
  %1 = call %T_NO_BLOCK @return_no_block()
  store %T_NO_BLOCK %1, ptr @no_block_store
  ret void
}

define void @callee_no_block(%T_NO_BLOCK %a) {
; CHECK-SD-LABEL: callee_no_block:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    adrp x8, no_block_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:no_block_store
; CHECK-SD-NEXT:    str w1, [x8, #24]
; CHECK-SD-NEXT:    str d1, [x8, #16]
; CHECK-SD-NEXT:    str w0, [x8, #8]
; CHECK-SD-NEXT:    str d0, [x8]
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: callee_no_block:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    adrp x8, no_block_store
; CHECK-GI-NEXT:    str d0, [x8, :lo12:no_block_store]
; CHECK-GI-NEXT:    adrp x8, no_block_store
; CHECK-GI-NEXT:    add x8, x8, :lo12:no_block_store
; CHECK-GI-NEXT:    str w0, [x8, #8]
; CHECK-GI-NEXT:    str d1, [x8, #16]
; CHECK-GI-NEXT:    str w1, [x8, #24]
; CHECK-GI-NEXT:    ret
  store %T_NO_BLOCK %a, ptr @no_block_store
  ret void
}

define void @argument_no_block() {
; CHECK-SD-LABEL: argument_no_block:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
; CHECK-SD-NEXT:    .cfi_offset w30, -16
; CHECK-SD-NEXT:    adrp x8, no_block_store
; CHECK-SD-NEXT:    add x8, x8, :lo12:no_block_store
; CHECK-SD-NEXT:    ldr w1, [x8, #24]
; CHECK-SD-NEXT:    ldr d1, [x8, #16]
; CHECK-SD-NEXT:    ldr w0, [x8, #8]
; CHECK-SD-NEXT:    ldr d0, [x8]
; CHECK-SD-NEXT:    bl callee_no_block
; CHECK-SD-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: argument_no_block:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
; CHECK-GI-NEXT:    .cfi_offset w30, -16
; CHECK-GI-NEXT:    adrp x8, no_block_store
; CHECK-GI-NEXT:    adrp x9, no_block_store
; CHECK-GI-NEXT:    add x9, x9, :lo12:no_block_store
; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:no_block_store]
; CHECK-GI-NEXT:    ldr w0, [x9, #8]
; CHECK-GI-NEXT:    ldr d1, [x9, #16]
; CHECK-GI-NEXT:    ldr w1, [x9, #24]
; CHECK-GI-NEXT:    bl callee_no_block
; CHECK-GI-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-GI-NEXT:    ret
  %1 = load %T_NO_BLOCK, ptr @no_block_store
  call void @callee_no_block(%T_NO_BLOCK %1)
  ret void
}
