; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s

define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
; CHECK-LABEL: insert_extract_v32i8:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 15
; CHECK-NEXT:    xvst $xr0, $sp, 32
; CHECK-NEXT:    ld.b $a1, $sp, 63
; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 1
; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 1
; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
entry:
  %b_lo = extractelement <32 x i8> %a, i32 15
  %b_hi = extractelement <32 x i8> %a, i32 31
  %c = insertelement <32 x i8> %a, i8 %b_lo, i32 1
  %d = insertelement <32 x i8> %c, i8 %b_hi, i32 17
  ret <32 x i8> %d
}

define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
; CHECK-LABEL: insert_extract_v16i16:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    addi.d $sp, $sp, -96
; CHECK-NEXT:    st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT:    st.d $fp, $sp, 80 # 8-byte Folded Spill
; CHECK-NEXT:    addi.d $fp, $sp, 96
; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 7
; CHECK-NEXT:    xvst $xr0, $sp, 32
; CHECK-NEXT:    ld.h $a1, $sp, 62
; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 1
; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 1
; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT:    addi.d $sp, $fp, -96
; CHECK-NEXT:    ld.d $fp, $sp, 80 # 8-byte Folded Reload
; CHECK-NEXT:    ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT:    addi.d $sp, $sp, 96
; CHECK-NEXT:    ret
entry:
  %b_lo = extractelement <16 x i16> %a, i32 7
  %b_hi = extractelement <16 x i16> %a, i32 15
  %c = insertelement <16 x i16> %a, i16 %b_lo, i32 1
  %d = insertelement <16 x i16> %c, i16 %b_hi, i32 9
  ret <16 x i16> %d
}

define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
; CHECK-LABEL: insert_extract_v8i32:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xvpickve2gr.w $a0, $xr0, 3
; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 7
; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 1
; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a1, 5
; CHECK-NEXT:    ret
entry:
  %b_lo = extractelement <8 x i32> %a, i32 3
  %b_hi = extractelement <8 x i32> %a, i32 7
  %c = insertelement <8 x i32> %a, i32 %b_lo, i32 1
  %d = insertelement <8 x i32> %c, i32 %b_hi, i32 5
  ret <8 x i32> %d
}

define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
; CHECK-LABEL: insert_extract_v8f32:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xvpickve2gr.w $a0, $xr0, 3
; CHECK-NEXT:    movgr2fr.w $fa1, $a0
; CHECK-NEXT:    xvpickve2gr.w $a0, $xr0, 7
; CHECK-NEXT:    movgr2fr.w $fa2, $a0
; CHECK-NEXT:    movfr2gr.s $a0, $fa1
; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 1
; CHECK-NEXT:    movfr2gr.s $a0, $fa2
; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 5
; CHECK-NEXT:    ret
entry:
  %b_lo = extractelement <8 x float> %a, i32 3
  %b_hi = extractelement <8 x float> %a, i32 7
  %c = insertelement <8 x float> %a, float %b_lo, i32 1
  %d = insertelement <8 x float> %c, float %b_hi, i32 5
  ret <8 x float> %d
}

define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: insert_extract_v4i64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT:    xvpickve2gr.d $a1, $xr0, 3
; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 0
; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a1, 2
; CHECK-NEXT:    ret
entry:
  %b_lo = extractelement <4 x i64> %a, i32 1
  %b_hi = extractelement <4 x i64> %a, i32 3
  %c = insertelement <4 x i64> %a, i64 %b_lo, i32 0
  %d = insertelement <4 x i64> %c, i64 %b_hi, i32 2
  ret <4 x i64> %d
}

define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
; CHECK-LABEL: insert_extract_v4f64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT:    movgr2fr.d $fa1, $a0
; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT:    movgr2fr.d $fa2, $a0
; CHECK-NEXT:    movfr2gr.d $a0, $fa1
; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 0
; CHECK-NEXT:    movfr2gr.d $a0, $fa2
; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 2
; CHECK-NEXT:    ret
entry:
  %b_lo = extractelement <4 x double> %a, i32 1
  %b_hi = extractelement <4 x double> %a, i32 3
  %c = insertelement <4 x double> %a, double %b_lo, i32 0
  %d = insertelement <4 x double> %c, double %b_hi, i32 2
  ret <4 x double> %d
}
