;; Machine description for AArch64 SVE2.
;; Copyright (C) 2019-2020 Free Software Foundation, Inc.
;; Contributed by ARM Ltd.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
;; The file is organised into the following sections (search for the full
;; line):
;;
;; == Moves
;; ---- Non-temporal gather loads
;; ---- Non-temporal scatter stores
;;
;; == Uniform binary arithmnetic
;; ---- [INT] Multiplication
;; ---- [INT] Scaled high-part multiplication
;; ---- [INT] General binary arithmetic that maps to unspecs
;; ---- [INT] Saturating binary arithmetic
;; ---- [INT] Saturating left shifts
;;
;; == Uniform ternary arithmnetic
;; ---- [INT] General ternary arithmetic that maps to unspecs
;; ---- [INT] Multiply-and-accumulate operations
;; ---- [INT] Binary logic operations with rotation
;; ---- [INT] Ternary logic operations
;; ---- [INT] Shift-and-accumulate operations
;; ---- [INT] Shift-and-insert operations
;; ---- [INT] Sum of absolute differences
;;
;; == Extending arithmetic
;; ---- [INT] Wide binary arithmetic
;; ---- [INT] Long binary arithmetic
;; ---- [INT] Long left shifts
;; ---- [INT] Long binary arithmetic with accumulation
;; ---- [FP] Long multiplication with accumulation
;;
;; == Narrowing arithnetic
;; ---- [INT] Narrowing unary arithmetic
;; ---- [INT] Narrowing binary arithmetic
;; ---- [INT] Narrowing right shifts
;;
;; == Pairwise arithmetic
;; ---- [INT] Pairwise arithmetic
;; ---- [FP] Pairwise arithmetic
;; ---- [INT] Pairwise arithmetic with accumulation
;;
;; == Complex arithmetic
;; ---- [INT] Complex binary operations
;; ---- [INT] Complex ternary operations
;; ---- [INT] Complex dot product
;;
;; == Conversions
;; ---- [FP<-FP] Widening conversions
;; ---- [FP<-FP] Narrowing conversions
;;
;; == Other arithmetic
;; ---- [INT] Reciprocal approximation
;; ---- [INT<-FP] Base-2 logarithm
;; ---- [INT] Polynomial multiplication
;;
;; == Permutation
;; ---- [INT,FP] General permutes
;; ---- [INT] Optional bit-permute extensions
;;
;; == General
;; ---- Check for aliases between pointers
;; ---- Histogram processing
;; ---- String matching
;;
;; == Crypotographic extensions
;; ---- Optional AES extensions
;; ---- Optional SHA-3 extensions
;; ---- Optional SM4 extensions
;; =========================================================================
;; == Moves
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- Non-temporal gather loads
;; -------------------------------------------------------------------------
;; Includes gather forms of:
;; - LDNT1B
;; - LDNT1D
;; - LDNT1H
;; - LDNT1W
;; -------------------------------------------------------------------------
;; Non-extending loads.
(define_insn "@aarch64_gather_ldnt<mode>"
[(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w, w")
(unspec:SVE_FULL_SD
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
(match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")
(mem:BLK (scratch))]
UNSPEC_LDNT1_GATHER))]
"TARGET_SVE2"
"@
ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]"
)
;; Extending loads.
(define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w")
(unspec:SVE_FULL_SDI
[(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand" "UplDnm, UplDnm")
(ANY_EXTEND:SVE_FULL_SDI
(unspec:SVE_PARTIAL_I
[(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:DI 2 "aarch64_reg_or_zero" "Z, r")
(match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand" "w, w")
(mem:BLK (scratch))]
UNSPEC_LDNT1_GATHER))]
UNSPEC_PRED_X))]
"TARGET_SVE2
&& (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
"@
ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
}
)
;; -------------------------------------------------------------------------
;; ---- Non-temporal scatter stores
;; -------------------------------------------------------------------------
;; Includes scatter forms of:
;; - STNT1B
;; - STNT1D
;; - STNT1H
;; - STNT1W
;; -------------------------------------------------------------------------
;; Non-truncating stores.
(define_insn "@aarch64_scatter_stnt<mode>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:<VPRED> 0 "register_operand" "Upl, Upl")
(match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
(match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w")
(match_operand:SVE_FULL_SD 3 "register_operand" "w, w")]
UNSPEC_STNT1_SCATTER))]
"TARGET_SVE"
"@
stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]"
)
;; Truncating stores.
(define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand" "Upl, Upl")
(match_operand:DI 1 "aarch64_reg_or_zero" "Z, r")
(match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand" "w, w")
(truncate:SVE_PARTIAL_I
(match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))]
UNSPEC_STNT1_SCATTER))]
"TARGET_SVE2
&& (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
"@
stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]"
)
;; =========================================================================
;; == Uniform binary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] Multiplication
;; -------------------------------------------------------------------------
;; Includes the lane forms of:
;; - MUL
;; -------------------------------------------------------------------------
(define_insn "@aarch64_mul_lane_<mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(mult:SVE_FULL_HSDI
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")))]
"TARGET_SVE2"
"mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Scaled high-part multiplication
;; -------------------------------------------------------------------------
;; The patterns in this section are synthetic.
;; -------------------------------------------------------------------------
;; Unpredicated integer multiply-high-with-(round-and-)scale.
(define_expand "<su>mulh<r>s<mode>3"
[(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
(unspec:SVE_FULL_BHSI
[(match_dup 3)
(unspec:SVE_FULL_BHSI
[(match_operand:SVE_FULL_BHSI 1 "register_operand")
(match_operand:SVE_FULL_BHSI 2 "register_operand")]
MULHRS)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{
operands[3] = aarch64_ptrue_reg (<VPRED>mode);
rtx prod_b = gen_reg_rtx (<VWIDE>mode);
rtx prod_t = gen_reg_rtx (<VWIDE>mode);
emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
operands[2]));
emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
operands[2]));
rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
prod_t, shift));
DONE;
}
)
;; -------------------------------------------------------------------------
;; ---- [INT] General binary arithmetic that maps to unspecs
;; -------------------------------------------------------------------------
;; Includes:
;; - SHADD
;; - SHSUB
;; - SHSUBR
;; - SQRSHL
;; - SQRSHLR
;; - SRHADD
;; - SRSHL
;; - SRSHLR
;; - SUQADD
;; - UHADD
;; - UHSUB
;; - UHSUBR
;; - UQRSHL
;; - UQRSHLR
;; - URHADD
;; - URSHL
;; - URSHLR
;; - USQADD
;; -------------------------------------------------------------------------
;; Integer average (floor).
(define_expand "<u>avg<mode>3_floor"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 3)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand")]
HADD)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{
operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
}
)
;; Integer average (rounding).
(define_expand "<u>avg<mode>3_ceil"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 3)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand")]
RHADD)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{
operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
}
)
;; The immediate form of SQADD acts as an immediate form of SUQADD
;; over its full range. In contrast to the ss_plus pattern, we do
;; not need to treat byte immediates specially. E.g.:
;;
;; SQADD Z0.B, Z0.B, #128
;;
;; is equivalent to:
;;
;; MOV Z1.B, #128
;; SUQADD Z0.B, P0/M, Z0.B, Z1.B
;;
;; even though it's not equivalent to:
;;
;; MOV Z1.B, #128
;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128
(define_insn "@aarch64_sve_suqadd<mode>_const"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
(match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
UNSPEC_SUQADD))]
"TARGET_SVE2"
"@
sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
[(set_attr "movprfx" "*,yes")]
)
;; General predicated binary arithmetic. All operations handled here
;; are commutative or have a reversed form.
(define_insn "@aarch64_pred_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w")]
SVE2_COND_INT_BINARY_REV)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,*,yes")]
)
;; Predicated binary arithmetic with merging.
(define_expand "@cond_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 5)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand")
(match_operand:SVE_FULL_I 3 "register_operand")]
SVE2_COND_INT_BINARY)]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
{
operands[5] = CONSTM1_RTX (<MODE>mode);
}
)
;; Predicated binary arithmetic, merging with the first input.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand 4)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
SVE2_COND_INT_BINARY)]
UNSPEC_PRED_X)
(match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Predicated binary arithmetic, merging with the second input.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand 4)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
SVE2_COND_INT_BINARY_REV)]
UNSPEC_PRED_X)
(match_dup 3)]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Predicated binary operations, merging with an independent value.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand 5)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")]
SVE2_COND_INT_BINARY_REV)]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
UNSPEC_SEL))]
"TARGET_SVE2
&& !rtx_equal_p (operands[2], operands[4])
&& !rtx_equal_p (operands[3], operands[4])"
"@
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
#"
"&& 1"
{
if (reload_completed
&& register_operand (operands[4], <MODE>mode)
&& !rtx_equal_p (operands[0], operands[4]))
{
emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
operands[4], operands[1]));
operands[4] = operands[2] = operands[0];
}
else if (!CONSTANT_P (operands[5]))
operands[5] = CONSTM1_RTX (<VPRED>mode);
else
FAIL;
}
[(set_attr "movprfx" "yes")]
)
;; Predicated binary operations with no reverse form, merging with zero.
;; At present we don't generate these patterns via a cond_* optab,
;; so there's no correctness requirement to handle merging with an
;; independent value.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand 5)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
SVE2_COND_INT_BINARY_NOREV)]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
"&& !CONSTANT_P (operands[5])"
{
operands[5] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Saturating binary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - SQDMULH
;; - SQRDMULH
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand" "w")
(match_operand:SVE_FULL_I 2 "register_operand" "w")]
SVE2_INT_BINARY))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_BINARY_LANE))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Saturating left shifts
;; -------------------------------------------------------------------------
;; Includes:
;; - SQSHL
;; - SQSHLR
;; - UQSHL
;; - UQSHLR
;; -------------------------------------------------------------------------
;; Predicated left shifts.
(define_insn "@aarch64_pred_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w")
(match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w")]
SVE2_COND_INT_SHIFT)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,*,*,yes,yes")]
)
;; Predicated left shifts with merging.
(define_expand "@cond_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 5)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand")
(match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
SVE2_COND_INT_SHIFT)]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 4 "register_operand")]
UNSPEC_SEL))]
"TARGET_SVE2"
{
operands[5] = CONSTM1_RTX (<VPRED>mode);
}
)
;; Predicated left shifts, merging with the first input.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand 4)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w")
(match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, D<lr>, w")]
SVE2_COND_INT_SHIFT)]
UNSPEC_PRED_X)
(match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,*,yes,yes")]
)
;; Predicated left shifts, merging with the second input.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand 4)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "0, w")]
SVE2_COND_INT_SHIFT)]
UNSPEC_PRED_X)
(match_dup 3)]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Predicated left shifts, merging with an independent value.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl")
(unspec:SVE_FULL_I
[(match_operand 5)
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "0, 0, w, w, w, w, w, w, w")
(match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, D<lr>, w, D<lr>, w, D<lr>, w")]
SVE2_COND_INT_SHIFT)]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, Dz, 0, 0, w, w")]
UNSPEC_SEL))]
"TARGET_SVE2
&& !rtx_equal_p (operands[2], operands[4])
&& (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
"@
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
#
#"
"&& 1"
{
if (reload_completed
&& register_operand (operands[4], <MODE>mode)
&& !rtx_equal_p (operands[0], operands[4]))
{
emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
operands[4], operands[1]));
operands[4] = operands[2] = operands[0];
}
else if (!CONSTANT_P (operands[5]))
operands[5] = CONSTM1_RTX (<VPRED>mode);
else
FAIL;
}
[(set_attr "movprfx" "yes")]
)
;; =========================================================================
;; == Uniform ternary arithmnetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] General ternary arithmetic that maps to unspecs
;; -------------------------------------------------------------------------
;; Includes:
;; - ADCLB
;; - ADCLT
;; - EORBT
;; - EORTB
;; - SBCLB
;; - SBCLT
;; - SQRDMLAH
;; - SQRDMLSH
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w")
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")]
SVE2_INT_TERNARY))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
(match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")]
SVE2_INT_TERNARY_LANE))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Multiply-and-accumulate operations
;; -------------------------------------------------------------------------
;; Includes the lane forms of:
;; - MLA
;; - MLS
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_add_mul_lane_<mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_HSDI
(mult:SVE_FULL_HSDI
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))
(match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
[(set_attr "movprfx" "*,yes")]
)
(define_insn "@aarch64_sve_sub_mul_lane_<mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(minus:SVE_FULL_HSDI
(match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
(mult:SVE_FULL_HSDI
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w"))))]
"TARGET_SVE2"
"@
mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Binary logic operations with rotation
;; -------------------------------------------------------------------------
;; Includes:
;; - XAR
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve2_xar<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(rotatert:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "%0, w")
(match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
(match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
"TARGET_SVE2"
"@
xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Ternary logic operations
;; -------------------------------------------------------------------------
;; Includes:
;; - BCAX
;; - BSL
;; - BSL1N
;; - BSL2N
;; - EOR3
;; - NBSL
;; -------------------------------------------------------------------------
;; Unpredicated exclusive OR of AND.
(define_expand "@aarch64_sve2_bcax<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_dup 4)
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 3 "register_operand"))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 2 "register_operand"))
(match_operand:SVE_FULL_I 1 "register_operand")))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
)
(define_insn_and_rewrite "*aarch64_sve2_bcax<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand 4)
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 2 "register_operand" "w, w"))
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
bcax\t%0.d, %0.d, %2.d, %3.d
movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated 3-way exclusive OR.
(define_insn "@aarch64_sve2_eor3<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
(xor:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "0, w, w, w")
(match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w"))
(match_operand:SVE_FULL_I 3 "register_operand" "w, w, 0, w")))]
"TARGET_SVE2"
"@
eor3\t%0.d, %0.d, %2.d, %3.d
eor3\t%0.d, %0.d, %1.d, %3.d
eor3\t%0.d, %0.d, %1.d, %2.d
movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d"
[(set_attr "movprfx" "*,*,*,yes")]
)
;; Use NBSL for vector NOR.
(define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand 3)
(and:SVE_FULL_I
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
nbsl\t%0.d, %0.d, %2.d, %0.d
movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d"
"&& !CONSTANT_P (operands[3])"
{
operands[3] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Use NBSL for vector NAND.
(define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand 3)
(ior:SVE_FULL_I
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "%0, w"))
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
nbsl\t%0.d, %0.d, %2.d, %2.d
movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d"
"&& !CONSTANT_P (operands[3])"
{
operands[3] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select.
;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
(define_expand "@aarch64_sve2_bsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand"))
(match_operand:SVE_FULL_I 3 "register_operand"))
(match_dup 2)))]
"TARGET_SVE2"
)
(define_insn "*aarch64_sve2_bsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
(match_dup BSL_DUP)))]
"TARGET_SVE2"
"@
bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise inverted select.
;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
(define_expand "@aarch64_sve2_nbsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(match_dup 4)
(not:SVE_FULL_I
(xor:SVE_FULL_I
(and:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand"))
(match_operand:SVE_FULL_I 3 "register_operand"))
(match_dup 2)))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
)
(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand 4)
(not:SVE_FULL_I
(xor:SVE_FULL_I
(and:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
(match_dup BSL_DUP)))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select with inverted first operand.
;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
(define_expand "@aarch64_sve2_bsl1n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_dup 4)
(not:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "register_operand")))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 3 "register_operand"))
(match_dup 2)))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
)
(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(xor:SVE_FULL_I
(and:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand 4)
(not:SVE_FULL_I
(xor:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w")))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
(match_dup BSL_DUP)))]
"TARGET_SVE2"
"@
bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select with inverted second operand.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
(define_expand "@aarch64_sve2_bsl2n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(ior:SVE_FULL_I
(and:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 3 "register_operand"))
(unspec:SVE_FULL_I
[(match_dup 4)
(and:SVE_FULL_I
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand"))
(not:SVE_FULL_I
(match_dup 3)))]
UNSPEC_PRED_X)))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
)
(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(ior:SVE_FULL_I
(and:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
(unspec:SVE_FULL_I
[(match_operand 4)
(and:SVE_FULL_I
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
(not:SVE_FULL_I
(match_dup BSL_DUP)))]
UNSPEC_PRED_X)))]
"TARGET_SVE2"
"@
bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select with inverted second operand, alternative form.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(ior:SVE_FULL_I
(and:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_FULL_I 2 "register_operand" "<bsl_2nd>, w"))
(unspec:SVE_FULL_I
[(match_operand 4)
(and:SVE_FULL_I
(not:SVE_FULL_I
(match_dup BSL_DUP))
(not:SVE_FULL_I
(match_operand:SVE_FULL_I 3 "register_operand" "w, w")))]
UNSPEC_PRED_X)))]
"TARGET_SVE2"
"@
bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Shift-and-accumulate operations
;; -------------------------------------------------------------------------
;; Includes:
;; - SRSRA
;; - SSRA
;; - URSRA
;; - USRA
;; -------------------------------------------------------------------------
;; Provide the natural unpredicated interface for SSRA and USRA.
(define_expand "@aarch64_sve_add_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(plus:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_dup 4)
(SHIFTRT:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand")
(match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 1 "register_operand")))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
)
;; Pattern-match SSRA and USRA as a predicated operation whose predicate
;; isn't needed.
(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand 4)
(SHIFTRT:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
UNSPEC_PRED_X)
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; SRSRA and URSRA.
(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
VRSHR_N)
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Shift-and-insert operations
;; -------------------------------------------------------------------------
;; Includes:
;; - SLI
;; - SRI
;; -------------------------------------------------------------------------
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand" "0")
(match_operand:SVE_FULL_I 2 "register_operand" "w")
(match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
SVE2_INT_SHIFT_INSERT))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Sum of absolute differences
;; -------------------------------------------------------------------------
;; Includes:
;; - SABA
;; - UABA
;; -------------------------------------------------------------------------
;; Provide the natural unpredicated interface for SABA and UABA.
(define_expand "@aarch64_sve2_<su>aba<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_I
(minus:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_dup 4)
(USMAX:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
UNSPEC_PRED_X)
(unspec:SVE_FULL_I
[(match_dup 4)
(<max_opp>:SVE_FULL_I
(match_dup 2)
(match_dup 3))]
UNSPEC_PRED_X))
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
)
;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
;; operation whose predicates aren't needed.
(define_insn "*aarch64_sve2_<su>aba<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_I
(minus:SVE_FULL_I
(unspec:SVE_FULL_I
[(match_operand 4)
(USMAX:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
UNSPEC_PRED_X)
(unspec:SVE_FULL_I
[(match_operand 5)
(<max_opp>:SVE_FULL_I
(match_dup 2)
(match_dup 3))]
UNSPEC_PRED_X))
(match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
;; =========================================================================
;; == Extending arithmetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] Wide binary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - SADDWB
;; - SADDWT
;; - SSUBWB
;; - SSUBWT
;; - UADDWB
;; - UADDWT
;; - USUBWB
;; - USUBWT
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HSDI
[(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
(match_operand:<VNARROW> 2 "register_operand" "w")]
SVE2_INT_BINARY_WIDE))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Long binary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - SABDLB
;; - SABDLT
;; - SADDLB
;; - SADDLBT
;; - SADDLT
;; - SMULLB
;; - SMULLT
;; - SQDMULLB
;; - SQDMULLT
;; - SSUBLB
;; - SSUBLBT
;; - SSUBLT
;; - SSUBLTB
;; - UABDLB
;; - UABDLT
;; - UADDLB
;; - UADDLT
;; - UMULLB
;; - UMULLT
;; - USUBLB
;; - USUBLT
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HSDI
[(match_operand:<VNARROW> 1 "register_operand" "w")
(match_operand:<VNARROW> 2 "register_operand" "w")]
SVE2_INT_BINARY_LONG))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
)
(define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
(unspec:SVE_FULL_SDI
[(match_operand:<VNARROW> 1 "register_operand" "w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_BINARY_LONG_LANE))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Long left shifts
;; -------------------------------------------------------------------------
;; Includes:
;; - SSHLLB
;; - SSHLLT
;; - USHLLB
;; - USHLLT
;; -------------------------------------------------------------------------
;; The immediate range is enforced before generating the instruction.
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HSDI
[(match_operand:<VNARROW> 1 "register_operand" "w")
(match_operand:DI 2 "const_int_operand")]
SVE2_INT_SHIFT_IMM_LONG))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Long binary arithmetic with accumulation
;; -------------------------------------------------------------------------
;; Includes:
;; - SABALB
;; - SABALT
;; - SMLALB
;; - SMLALT
;; - SMLSLB
;; - SMLSLT
;; - SQDMLALB
;; - SQDMLALBT
;; - SQDMLALT
;; - SQDMLSLB
;; - SQDMLSLBT
;; - SQDMLSLT
;; - UABALB
;; - UABALT
;; - UMLALB
;; - UMLALT
;; - UMLSLB
;; - UMLSLT
;; -------------------------------------------------------------------------
;; Non-saturating MLA operations.
(define_insn "@aarch64_sve_add_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_HSDI
(unspec:SVE_FULL_HSDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(match_operand:<VNARROW> 3 "register_operand" "w, w")]
SVE2_INT_ADD_BINARY_LONG)
(match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
[(set_attr "movprfx" "*,yes")]
)
;; Non-saturating MLA operations with lane select.
(define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
(plus:SVE_FULL_SDI
(unspec:SVE_FULL_SDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_ADD_BINARY_LONG_LANE)
(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
[(set_attr "movprfx" "*,yes")]
)
;; Saturating MLA operations.
(define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(ss_plus:SVE_FULL_HSDI
(unspec:SVE_FULL_HSDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(match_operand:<VNARROW> 3 "register_operand" "w, w")]
SVE2_INT_QADD_BINARY_LONG)
(match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
[(set_attr "movprfx" "*,yes")]
)
;; Saturating MLA operations with lane select.
(define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
(ss_plus:SVE_FULL_SDI
(unspec:SVE_FULL_SDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_QADD_BINARY_LONG_LANE)
(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")))]
"TARGET_SVE2"
"@
<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
[(set_attr "movprfx" "*,yes")]
)
;; Non-saturating MLS operations.
(define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(minus:SVE_FULL_HSDI
(match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
(unspec:SVE_FULL_HSDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(match_operand:<VNARROW> 3 "register_operand" "w, w")]
SVE2_INT_SUB_BINARY_LONG)))]
"TARGET_SVE2"
"@
<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
[(set_attr "movprfx" "*,yes")]
)
;; Non-saturating MLS operations with lane select.
(define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
(minus:SVE_FULL_SDI
(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
(unspec:SVE_FULL_SDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_SUB_BINARY_LONG_LANE)))]
"TARGET_SVE2"
"@
<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
[(set_attr "movprfx" "*,yes")]
)
;; Saturating MLS operations.
(define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(ss_minus:SVE_FULL_HSDI
(match_operand:SVE_FULL_HSDI 1 "register_operand" "0, w")
(unspec:SVE_FULL_HSDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(match_operand:<VNARROW> 3 "register_operand" "w, w")]
SVE2_INT_QSUB_BINARY_LONG)))]
"TARGET_SVE2"
"@
<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>"
[(set_attr "movprfx" "*,yes")]
)
;; Saturating MLS operations with lane select.
(define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
(ss_minus:SVE_FULL_SDI
(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
(unspec:SVE_FULL_SDI
[(match_operand:<VNARROW> 2 "register_operand" "w, w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_QSUB_BINARY_LONG_LANE)))]
"TARGET_SVE2"
"@
<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [FP] Long multiplication with accumulation
;; -------------------------------------------------------------------------
;; Includes:
;; - FMLALB
;; - FMLALT
;; - FMLSLB
;; - FMLSLT
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_fp_op><mode>"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
(unspec:VNx4SF_ONLY
[(match_operand:<VNARROW> 1 "register_operand" "w, w")
(match_operand:<VNARROW> 2 "register_operand" "w, w")
(match_operand:VNx4SF_ONLY 3 "register_operand" "0, w")]
SVE2_FP_TERNARY_LONG))]
"TARGET_SVE2"
"@
<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
[(set_attr "movprfx" "*,yes")]
)
(define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
(unspec:VNx4SF_ONLY
[(match_operand:<VNARROW> 1 "register_operand" "w, w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 3 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)
(match_operand:VNx4SF_ONLY 4 "register_operand" "0, w")]
SVE2_FP_TERNARY_LONG_LANE))]
"TARGET_SVE2"
"@
<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
[(set_attr "movprfx" "*,yes")]
)
;; =========================================================================
;; == Narrowing arithnetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] Narrowing unary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - SQXTNB
;; - SQXTNT
;; - SQXTUNB
;; - SQXTUNT
;; - UQXTNB
;; - UQXTNT
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
(unspec:<VNARROW>
[(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
SVE2_INT_UNARY_NARROWB))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
)
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 1 "register_operand" "0")
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
SVE2_INT_UNARY_NARROWT))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Narrowing binary arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - ADDHNB
;; - ADDHNT
;; - RADDHNB
;; - RADDHNT
;; - RSUBHNB
;; - RSUBHNT
;; - SUBHNB
;; - SUBHNT
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
(unspec:<VNARROW>
[(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
SVE2_INT_BINARY_NARROWB))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
)
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 1 "register_operand" "0")
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
(match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
SVE2_INT_BINARY_NARROWT))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Narrowing right shifts
;; -------------------------------------------------------------------------
;; Includes:
;; - RSHRNB
;; - RSHRNT
;; - SHRNB
;; - SHRNT
;; - SQRSHRNB
;; - SQRSHRNT
;; - SQRSHRUNB
;; - SQRSHRUNT
;; - SQSHRNB
;; - SQSHRNT
;; - SQSHRUNB
;; - SQSHRUNT
;; - UQRSHRNB
;; - UQRSHRNT
;; - UQSHRNB
;; - UQSHRNT
;; -------------------------------------------------------------------------
;; The immediate range is enforced before generating the instruction.
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
(unspec:<VNARROW>
[(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
(match_operand:DI 2 "const_int_operand")]
SVE2_INT_SHIFT_IMM_NARROWB))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
)
;; The immediate range is enforced before generating the instruction.
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
(unspec:<VNARROW>
[(match_operand:<VNARROW> 1 "register_operand" "0")
(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
(match_operand:DI 3 "const_int_operand")]
SVE2_INT_SHIFT_IMM_NARROWT))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
)
;; =========================================================================
;; == Pairwise arithmetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] Pairwise arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - ADDP
;; - SMAXP
;; - SMINP
;; - UMAXP
;; - UMINP
;; -------------------------------------------------------------------------
(define_insn "@aarch64_pred_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
SVE2_INT_BINARY_PAIR))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [FP] Pairwise arithmetic
;; -------------------------------------------------------------------------
;; Includes:
;; - FADDP
;; - FMAXP
;; - FMAXNMP
;; - FMINP
;; - FMINNMP
;; -------------------------------------------------------------------------
(define_insn "@aarch64_pred_<sve_fp_op><mode>"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")
(match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
SVE2_FP_BINARY_PAIR))]
"TARGET_SVE2"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Pairwise arithmetic with accumulation
;; -------------------------------------------------------------------------
;; Includes:
;; - SADALP
;; - UADALP
;; -------------------------------------------------------------------------
;; Predicated pairwise absolute difference and accumulate with merging.
(define_expand "@cond_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
(unspec:SVE_FULL_HSDI
[(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_FULL_HSDI
[(match_dup 1)
(match_operand:SVE_FULL_HSDI 2 "register_operand")
(match_operand:<VNARROW> 3 "register_operand")]
SVE2_INT_BINARY_PAIR_LONG)
(match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
{
/* Only target code is aware of these operations, so we don't need
to handle the fully-general case. */
gcc_assert (rtx_equal_p (operands[2], operands[4])
|| CONSTANT_P (operands[4]));
})
;; Predicated pairwise absolute difference and accumulate, merging with
;; the first input.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_HSDI
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_HSDI
[(match_operand 4)
(match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
(match_operand:<VNARROW> 3 "register_operand" "w, w")]
SVE2_INT_BINARY_PAIR_LONG)
(match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Predicated pairwise absolute difference and accumulate, merging with zero.
(define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w")
(unspec:SVE_FULL_HSDI
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_FULL_HSDI
[(match_operand 5)
(match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")
(match_operand:<VNARROW> 3 "register_operand" "w, w")]
SVE2_INT_BINARY_PAIR_LONG)
(match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>"
"&& !CONSTANT_P (operands[5])"
{
operands[5] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "yes")]
)
;; =========================================================================
;; == Complex arithmetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] Complex binary operations
;; -------------------------------------------------------------------------
;; Includes:
;; - CADD
;; - SQCADD
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<optab><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")]
SVE2_INT_CADD))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Complex ternary operations
;; -------------------------------------------------------------------------
;; Includes:
;; - CMLA
;; - SQRDCMLA
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<optab><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
(match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
SVE2_INT_CMLA))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>"
[(set_attr "movprfx" "*,yes")]
)
(define_insn "@aarch64_<optab>_lane_<mode>"
[(set (match_operand:SVE_FULL_HSI 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_HSI
[(match_operand:SVE_FULL_HSI 1 "register_operand" "0, w")
(match_operand:SVE_FULL_HSI 2 "register_operand" "w, w")
(unspec:SVE_FULL_HSI
[(match_operand:SVE_FULL_HSI 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_CMLA))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>"
[(set_attr "movprfx" "*,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Complex dot product
;; -------------------------------------------------------------------------
;; Includes:
;; - CDOT
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<optab><mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_SDI
[(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
(match_operand:<VSI2QI> 2 "register_operand" "w, w")
(match_operand:<VSI2QI> 3 "register_operand" "w, w")]
SVE2_INT_CDOT))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>"
[(set_attr "movprfx" "*,yes")]
)
(define_insn "@aarch64_<optab>_lane_<mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
(unspec:SVE_FULL_SDI
[(match_operand:SVE_FULL_SDI 1 "register_operand" "0, w")
(match_operand:<VSI2QI> 2 "register_operand" "w, w")
(unspec:<VSI2QI>
[(match_operand:<VSI2QI> 3 "register_operand" "<sve_lane_con>, <sve_lane_con>")
(match_operand:SI 4 "const_int_operand")]
UNSPEC_SVE_LANE_SELECT)]
SVE2_INT_CDOT))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>"
[(set_attr "movprfx" "*,yes")]
)
;; =========================================================================
;; == Conversions
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [FP<-FP] Widening conversions
;; -------------------------------------------------------------------------
;; Includes:
;; - FCVTLT
;; -------------------------------------------------------------------------
;; Predicated convert long top.
(define_insn "@aarch64_pred_<sve_fp_op><mode>"
[(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
(unspec:SVE_FULL_SDF
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(match_operand:SI 3 "aarch64_sve_gp_strictness")
(match_operand:<VNARROW> 2 "register_operand" "0")]
SVE2_COND_FP_UNARY_LONG))]
"TARGET_SVE2"
"<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>"
)
;; Predicated convert long top with merging.
(define_expand "@cond_<sve_fp_op><mode>"
[(set (match_operand:SVE_FULL_SDF 0 "register_operand")
(unspec:SVE_FULL_SDF
[(match_operand:<VPRED> 1 "register_operand")
(unspec:SVE_FULL_SDF
[(match_dup 1)
(const_int SVE_STRICT_GP)
(match_operand:<VNARROW> 2 "register_operand")]
SVE2_COND_FP_UNARY_LONG)
(match_operand:SVE_FULL_SDF 3 "register_operand")]
UNSPEC_SEL))]
"TARGET_SVE2"
)
;; These instructions do not take MOVPRFX.
(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
[(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
(unspec:SVE_FULL_SDF
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(unspec:SVE_FULL_SDF
[(match_operand 4)
(const_int SVE_RELAXED_GP)
(match_operand:<VNARROW> 2 "register_operand" "w")]
SVE2_COND_FP_UNARY_LONG)
(match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
UNSPEC_SEL))]
"TARGET_SVE2"
"<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
"&& !rtx_equal_p (operands[1], operands[4])"
{
operands[4] = copy_rtx (operands[1]);
}
)
(define_insn "*cond_<sve_fp_op><mode>_strict"
[(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
(unspec:SVE_FULL_SDF
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(unspec:SVE_FULL_SDF
[(match_dup 1)
(const_int SVE_STRICT_GP)
(match_operand:<VNARROW> 2 "register_operand" "w")]
SVE2_COND_FP_UNARY_LONG)
(match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
UNSPEC_SEL))]
"TARGET_SVE2"
"<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
)
;; -------------------------------------------------------------------------
;; ---- [FP<-FP] Narrowing conversions
;; -------------------------------------------------------------------------
;; Includes:
;; - FCVTNT
;; - FCVTX
;; - FCVTXNT
;; -------------------------------------------------------------------------
;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
;; pair because the even elements always have to be supplied for active
;; elements, even if the inactive elements don't matter.
;;
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve_cvtnt<mode>"
[(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
(unspec:SVE_FULL_HSF
[(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
(const_int SVE_STRICT_GP)
(match_operand:SVE_FULL_HSF 1 "register_operand" "0")
(match_operand:<VWIDE> 3 "register_operand" "w")]
UNSPEC_COND_FCVTNT))]
"TARGET_SVE2"
"fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
)
;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
;; it supports MOVPRFX).
(define_insn "@aarch64_pred_<sve_fp_op><mode>"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=w, ?&w")
(unspec:VNx4SF_ONLY
[(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl")
(match_operand:SI 3 "aarch64_sve_gp_strictness")
(match_operand:<VWIDE> 2 "register_operand" "0, w")]
SVE2_COND_FP_UNARY_NARROWB))]
"TARGET_SVE2"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
[(set_attr "movprfx" "*,yes")]
)
;; Predicated FCVTX with merging.
(define_expand "@cond_<sve_fp_op><mode>"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand")
(unspec:VNx4SF_ONLY
[(match_operand:<VWIDE_PRED> 1 "register_operand")
(unspec:VNx4SF_ONLY
[(match_dup 1)
(const_int SVE_STRICT_GP)
(match_operand:<VWIDE> 2 "register_operand")]
SVE2_COND_FP_UNARY_NARROWB)
(match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
)
(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
(unspec:VNx4SF_ONLY
[(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:VNx4SF_ONLY
[(match_operand 4)
(const_int SVE_RELAXED_GP)
(match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
SVE2_COND_FP_UNARY_NARROWB)
(match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
"TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
"&& !rtx_equal_p (operands[1], operands[4])"
{
operands[4] = copy_rtx (operands[1]);
}
[(set_attr "movprfx" "*,yes,yes")]
)
(define_insn "*cond_<sve_fp_op><mode>_any_strict"
[(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
(unspec:VNx4SF_ONLY
[(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:VNx4SF_ONLY
[(match_dup 1)
(const_int SVE_STRICT_GP)
(match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
SVE2_COND_FP_UNARY_NARROWB)
(match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
"TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
[(set_attr "movprfx" "*,yes,yes")]
)
;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_*
;; pair because the even elements always have to be supplied for active
;; elements, even if the inactive elements don't matter.
;;
;; These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve2_cvtxnt<mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
(unspec:<VNARROW>
[(match_operand:<VPRED> 2 "register_operand" "Upl")
(const_int SVE_STRICT_GP)
(match_operand:<VNARROW> 1 "register_operand" "0")
(match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
UNSPEC_COND_FCVTXNT))]
"TARGET_SVE2"
"fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
)
;; =========================================================================
;; == Other arithmetic
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT] Reciprocal approximation
;; -------------------------------------------------------------------------
;; Includes:
;; - URECPE
;; - URSQRTE
;; -------------------------------------------------------------------------
;; Predicated integer unary operations.
(define_insn "@aarch64_pred_<sve_int_op><mode>"
[(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
(unspec:VNx4SI_ONLY
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:VNx4SI_ONLY
[(match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
SVE2_U32_UNARY)]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
;; Predicated integer unary operations with merging.
(define_expand "@cond_<sve_int_op><mode>"
[(set (match_operand:VNx4SI_ONLY 0 "register_operand")
(unspec:VNx4SI_ONLY
[(match_operand:<VPRED> 1 "register_operand")
(unspec:VNx4SI_ONLY
[(match_dup 4)
(unspec:VNx4SI_ONLY
[(match_operand:VNx4SI_ONLY 2 "register_operand")]
SVE2_U32_UNARY)]
UNSPEC_PRED_X)
(match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
{
operands[4] = CONSTM1_RTX (<MODE>mode);
}
)
(define_insn_and_rewrite "*cond_<sve_int_op><mode>"
[(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w, ?&w")
(unspec:VNx4SI_ONLY
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:VNx4SI_ONLY
[(match_operand 4)
(unspec:VNx4SI_ONLY
[(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
SVE2_U32_UNARY)]
UNSPEC_PRED_X)
(match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
"TARGET_SVE2"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT<-FP] Base-2 logarithm
;; -------------------------------------------------------------------------
;; Includes:
;; - FLOGB
;; -------------------------------------------------------------------------
;; Predicated FLOGB.
(define_insn "@aarch64_pred_<sve_fp_op><mode>"
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w, ?&w")
(unspec:<V_INT_EQUIV>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:SI 3 "aarch64_sve_gp_strictness")
(match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
SVE2_COND_INT_UNARY_FP))]
"TARGET_SVE2"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
;; Predicated FLOGB with merging.
(define_expand "@cond_<sve_fp_op><mode>"
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
(unspec:<V_INT_EQUIV>
[(match_operand:<VPRED> 1 "register_operand")
(unspec:<V_INT_EQUIV>
[(match_dup 1)
(const_int SVE_STRICT_GP)
(match_operand:SVE_FULL_F 2 "register_operand")]
SVE2_COND_INT_UNARY_FP)
(match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE2"
)
(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
(unspec:<V_INT_EQUIV>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:<V_INT_EQUIV>
[(match_operand 4)
(const_int SVE_RELAXED_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
SVE2_COND_INT_UNARY_FP)
(match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
"TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
"&& !rtx_equal_p (operands[1], operands[4])"
{
operands[4] = copy_rtx (operands[1]);
}
[(set_attr "movprfx" "*,yes,yes")]
)
(define_insn "*cond_<sve_fp_op><mode>_strict"
[(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
(unspec:<V_INT_EQUIV>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:<V_INT_EQUIV>
[(match_dup 1)
(const_int SVE_STRICT_GP)
(match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
SVE2_COND_INT_UNARY_FP)
(match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
"TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
[(set_attr "movprfx" "*,yes,yes")]
)
;; -------------------------------------------------------------------------
;; ---- [INT] Polynomial multiplication
;; -------------------------------------------------------------------------
;; Includes:
;; - PMUL
;; - PMULLB
;; - PMULLT
;; -------------------------------------------------------------------------
;; Uniform PMUL.
(define_insn "@aarch64_sve2_pmul<mode>"
[(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
(unspec:VNx16QI_ONLY
[(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
(match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
UNSPEC_PMUL))]
"TARGET_SVE2"
"pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
;; Extending PMUL, with the results modeled as wider vectors.
;; This representation is only possible for .H and .D, not .Q.
(define_insn "@aarch64_sve_<optab><mode>"
[(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
(unspec:SVE_FULL_HDI
[(match_operand:<VNARROW> 1 "register_operand" "w")
(match_operand:<VNARROW> 2 "register_operand" "w")]
SVE2_PMULL))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
)
;; Extending PMUL, with the results modeled as pairs of values.
;; This representation works for .H, .D and .Q, with .Q requiring
;; the AES extension. (This is enforced by the mode iterator.)
(define_insn "@aarch64_sve_<optab><mode>"
[(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
(unspec:SVE2_PMULL_PAIR_I
[(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
(match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
SVE2_PMULL_PAIR))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
)
;; =========================================================================
;; == Permutation
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- [INT,FP] General permutes
;; -------------------------------------------------------------------------
;; Includes:
;; - TBL (vector pair form)
;; - TBX
;; -------------------------------------------------------------------------
;; TBL on a pair of data vectors.
(define_insn "@aarch64_sve2_tbl2<mode>"
[(set (match_operand:SVE_FULL 0 "register_operand" "=w")
(unspec:SVE_FULL
[(match_operand:<VDOUBLE> 1 "register_operand" "w")
(match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
UNSPEC_TBL2))]
"TARGET_SVE2"
"tbl\t%0.<Vetype>, %1, %2.<Vetype>"
)
;; TBX. These instructions do not take MOVPRFX.
(define_insn "@aarch64_sve2_tbx<mode>"
[(set (match_operand:SVE_FULL 0 "register_operand" "=w")
(unspec:SVE_FULL
[(match_operand:SVE_FULL 1 "register_operand" "0")
(match_operand:SVE_FULL 2 "register_operand" "w")
(match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
UNSPEC_TBX))]
"TARGET_SVE2"
"tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
)
;; -------------------------------------------------------------------------
;; ---- [INT] Optional bit-permute extensions
;; -------------------------------------------------------------------------
;; Includes:
;; - BDEP
;; - BEXT
;; - BGRP
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
(unspec:SVE_FULL_I
[(match_operand:SVE_FULL_I 1 "register_operand" "w")
(match_operand:SVE_FULL_I 2 "register_operand" "w")]
SVE2_INT_BITPERM))]
"TARGET_SVE2_BITPERM"
"<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
;; =========================================================================
;; == General
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- Check for aliases between pointers
;; -------------------------------------------------------------------------
;; The patterns in this section are synthetic: WHILERW and WHILEWR are
;; defined in aarch64-sve.md instead.
;; -------------------------------------------------------------------------
;; Use WHILERW and WHILEWR to accelerate alias checks. This is only
;; possible if the accesses we're checking are exactly the same size
;; as an SVE vector.
(define_expand "check_<raw_war>_ptrs<mode>"
[(match_operand:GPI 0 "register_operand")
(unspec:VNx16BI
[(match_operand:GPI 1 "register_operand")
(match_operand:GPI 2 "register_operand")
(match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
(match_operand:GPI 4 "const_int_operand")]
SVE2_WHILE_PTR)]
"TARGET_SVE2"
{
/* Use the widest predicate mode we can. */
unsigned int align = INTVAL (operands[4]);
if (align > 8)
align = 8;
machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
/* Emit a WHILERW or WHILEWR, setting the condition codes based on
the result. */
emit_insn (gen_while_ptest
(<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
/* Set operand 0 to true if the last bit of the predicate result is set,
i.e. if all elements are free of dependencies. */
rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
DONE;
})
;; -------------------------------------------------------------------------
;; ---- Histogram processing
;; -------------------------------------------------------------------------
;; Includes:
;; - HISTCNT
;; - HISTSEG
;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve2_histcnt<mode>"
[(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
(unspec:SVE_FULL_SDI
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(match_operand:SVE_FULL_SDI 2 "register_operand" "w")
(match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
UNSPEC_HISTCNT))]
"TARGET_SVE2"
"histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
(define_insn "@aarch64_sve2_histseg<mode>"
[(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
(unspec:VNx16QI_ONLY
[(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
(match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
UNSPEC_HISTSEG))]
"TARGET_SVE2"
"histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
;; -------------------------------------------------------------------------
;; ---- String matching
;; -------------------------------------------------------------------------
;; Includes:
;; - MATCH
;; - NMATCH
;; -------------------------------------------------------------------------
;; Predicated string matching.
(define_insn "@aarch64_pred_<sve_int_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand:SVE_FULL_BHI 3 "register_operand" "w")
(match_operand:SVE_FULL_BHI 4 "register_operand" "w")]
SVE2_MATCH)]
UNSPEC_PRED_Z))
(clobber (reg:CC_NZC CC_REGNUM))]
"TARGET_SVE2"
"<sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
)
;; Predicated string matching in which both the flag and predicate results
;; are interesting.
(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand" "Upl")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand 6)
(match_operand:SI 7 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
(match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
SVE2_MATCH)]
UNSPEC_PRED_Z)]
UNSPEC_PTEST))
(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(unspec:<VPRED>
[(match_dup 6)
(match_dup 7)
(unspec:<VPRED>
[(match_dup 2)
(match_dup 3)]
SVE2_MATCH)]
UNSPEC_PRED_Z))]
"TARGET_SVE2
&& aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
"<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
"&& !rtx_equal_p (operands[4], operands[6])"
{
operands[6] = copy_rtx (operands[4]);
operands[7] = operands[5];
}
)
;; Predicated string matching in which only the flags result is interesting.
(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:VNx16BI 1 "register_operand" "Upl")
(match_operand 4)
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand 6)
(match_operand:SI 7 "aarch64_sve_ptrue_flag")
(unspec:<VPRED>
[(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
(match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
SVE2_MATCH)]
UNSPEC_PRED_Z)]
UNSPEC_PTEST))
(clobber (match_scratch:<VPRED> 0 "=Upa"))]
"TARGET_SVE2
&& aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
"<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
"&& !rtx_equal_p (operands[4], operands[6])"
{
operands[6] = copy_rtx (operands[4]);
operands[7] = operands[5];
}
)
;; =========================================================================
;; == Crypotographic extensions
;; =========================================================================
;; -------------------------------------------------------------------------
;; ---- Optional AES extensions
;; -------------------------------------------------------------------------
;; Includes:
;; - AESD
;; - AESE
;; - AESIMC
;; - AESMC
;; -------------------------------------------------------------------------
;; AESD and AESE.
(define_insn "aarch64_sve2_aes<aes_op>"
[(set (match_operand:VNx16QI 0 "register_operand" "=w")
(unspec:VNx16QI
[(xor:VNx16QI
(match_operand:VNx16QI 1 "register_operand" "%0")
(match_operand:VNx16QI 2 "register_operand" "w"))]
CRYPTO_AES))]
"TARGET_SVE2_AES"
"aes<aes_op>\t%0.b, %0.b, %2.b"
[(set_attr "type" "crypto_aese")]
)
;; AESMC and AESIMC. These instructions do not take MOVPRFX.
(define_insn "aarch64_sve2_aes<aesmc_op>"
[(set (match_operand:VNx16QI 0 "register_operand" "=w")
(unspec:VNx16QI
[(match_operand:VNx16QI 1 "register_operand" "0")]
CRYPTO_AESMC))]
"TARGET_SVE2_AES"
"aes<aesmc_op>\t%0.b, %0.b"
[(set_attr "type" "crypto_aesmc")]
)
;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
;; to keep the two together and enforce the register dependency without
;; scheduling or register allocation messing up the order or introducing
;; moves inbetween. Mash the two together during combine.
(define_insn "*aarch64_sve2_aese_fused"
[(set (match_operand:VNx16QI 0 "register_operand" "=w")
(unspec:VNx16QI
[(unspec:VNx16QI
[(xor:VNx16QI
(match_operand:VNx16QI 1 "register_operand" "%0")
(match_operand:VNx16QI 2 "register_operand" "w"))]
UNSPEC_AESE)]
UNSPEC_AESMC))]
"TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
(define_insn "*aarch64_sve2_aesd_fused"
[(set (match_operand:VNx16QI 0 "register_operand" "=w")
(unspec:VNx16QI
[(unspec:VNx16QI
[(xor:VNx16QI
(match_operand:VNx16QI 1 "register_operand" "%0")
(match_operand:VNx16QI 2 "register_operand" "w"))]
UNSPEC_AESD)]
UNSPEC_AESIMC))]
"TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
[(set_attr "type" "crypto_aese")
(set_attr "length" "8")]
)
;; -------------------------------------------------------------------------
;; ---- Optional SHA-3 extensions
;; -------------------------------------------------------------------------
;; Includes:
;; - RAX1
;; -------------------------------------------------------------------------
(define_insn "aarch64_sve2_rax1"
[(set (match_operand:VNx2DI 0 "register_operand" "=w")
(xor:VNx2DI
(rotate:VNx2DI
(match_operand:VNx2DI 2 "register_operand" "w")
(const_int 1))
(match_operand:VNx2DI 1 "register_operand" "w")))]
"TARGET_SVE2_SHA3"
"rax1\t%0.d, %1.d, %2.d"
[(set_attr "type" "crypto_sha3")]
)
;; -------------------------------------------------------------------------
;; ---- Optional SM4 extensions
;; -------------------------------------------------------------------------
;; Includes:
;; - SM4E
;; - SM4EKEY
;; -------------------------------------------------------------------------
;; These instructions do not take MOVPRFX.
(define_insn "aarch64_sve2_sm4e"
[(set (match_operand:VNx4SI 0 "register_operand" "=w")
(unspec:VNx4SI
[(match_operand:VNx4SI 1 "register_operand" "0")
(match_operand:VNx4SI 2 "register_operand" "w")]
UNSPEC_SM4E))]
"TARGET_SVE2_SM4"
"sm4e\t%0.s, %0.s, %2.s"
[(set_attr "type" "crypto_sm4")]
)
(define_insn "aarch64_sve2_sm4ekey"
[(set (match_operand:VNx4SI 0 "register_operand" "=w")
(unspec:VNx4SI
[(match_operand:VNx4SI 1 "register_operand" "w")
(match_operand:VNx4SI 2 "register_operand" "w")]
UNSPEC_SM4EKEY))]
"TARGET_SVE2_SM4"
"sm4ekey\t%0.s, %1.s, %2.s"
[(set_attr "type" "crypto_sm4")]
)