/* Machine description for AArch64 architecture.
Copyright (C) 2009-2020 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* Important note about Carry generation in AArch64.
Unlike some architectures, the C flag generated by a subtract
operation, or a simple compare operation is set to 1 if the result
does not overflow in an unsigned sense. That is, if there is no
borrow needed from a higher word. That means that overflow from
addition will set C, but overflow from a subtraction will clear C.
We use CC_Cmode to represent detection of overflow from addition as
CCmode is used for 'normal' compare (subtraction) operations. For
ADC, the representation becomes more complex still, since we cannot
use the normal idiom of comparing the result to one of the input
operands; instead we use CC_ADCmode to represent this case. */
CC_MODE (CCFP);
CC_MODE (CCFPE);
CC_MODE (CC_SWP);
CC_MODE (CC_NZC); /* Only N, Z and C bits of condition flags are valid.
(Used with SVE predicate tests.) */
CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */
CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */
CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */
CC_MODE (CC_ADC); /* Unsigned overflow from an ADC (add with carry). */
CC_MODE (CC_V); /* Only V bit of condition flags is valid. */
/* Half-precision floating point for __fp16. */
FLOAT_MODE (HF, 2, 0);
ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
/* Vector modes. */
VECTOR_BOOL_MODE (VNx16BI, 16, 2);
VECTOR_BOOL_MODE (VNx8BI, 8, 2);
VECTOR_BOOL_MODE (VNx4BI, 4, 2);
VECTOR_BOOL_MODE (VNx2BI, 2, 2);
ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
ADJUST_ALIGNMENT (VNx16BI, 2);
ADJUST_ALIGNMENT (VNx8BI, 2);
ADJUST_ALIGNMENT (VNx4BI, 2);
ADJUST_ALIGNMENT (VNx2BI, 2);
/* Bfloat16 modes. */
FLOAT_MODE (BF, 2, 0);
ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */
VECTOR_MODES (FLOAT, 8); /* V2SF. */
VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */
VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */
VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */
/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */
INT_MODE (OI, 32);
/* Opaque integer modes for 3 or 4 Neon q-registers / 6 or 8 Neon d-registers
(2 d-regs = 1 q-reg = TImode). */
INT_MODE (CI, 48);
INT_MODE (XI, 64);
/* Define SVE modes for NVECS vectors. VB, VH, VS and VD are the prefixes
for 8-bit, 16-bit, 32-bit and 64-bit elements respectively. It isn't
strictly necessary to set the alignment here, since the default would
be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */
#define SVE_MODES(NVECS, VB, VH, VS, VD) \
VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \
VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \
\
ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \
ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \
ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \
ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \
ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \
ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \
ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \
ADJUST_NUNITS (VD##DF, aarch64_sve_vg * NVECS); \
\
ADJUST_ALIGNMENT (VB##QI, 16); \
ADJUST_ALIGNMENT (VH##HI, 16); \
ADJUST_ALIGNMENT (VS##SI, 16); \
ADJUST_ALIGNMENT (VD##DI, 16); \
ADJUST_ALIGNMENT (VH##BF, 16); \
ADJUST_ALIGNMENT (VH##HF, 16); \
ADJUST_ALIGNMENT (VS##SF, 16); \
ADJUST_ALIGNMENT (VD##DF, 16);
/* Give SVE vectors the names normally used for 256-bit vectors.
The actual number depends on command-line flags. */
SVE_MODES (1, VNx16, VNx8, VNx4, VNx2)
SVE_MODES (2, VNx32, VNx16, VNx8, VNx4)
SVE_MODES (3, VNx48, VNx24, VNx12, VNx6)
SVE_MODES (4, VNx64, VNx32, VNx16, VNx8)
/* Partial SVE vectors:
VNx2QI VNx4QI VNx8QI
VNx2HI VNx4HI
VNx2SI
In memory they occupy contiguous locations, in the same way as fixed-length
vectors. E.g. VNx8QImode is half the size of VNx16QImode.
Passing 1 as the final argument ensures that the modes come after all
other modes in the GET_MODE_WIDER chain, so that we never pick them
in preference to a full vector mode. */
VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1);
VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1);
VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1);
VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 4, 1);
VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8, 1);
ADJUST_NUNITS (VNx2QI, aarch64_sve_vg);
ADJUST_NUNITS (VNx2HI, aarch64_sve_vg);
ADJUST_NUNITS (VNx2SI, aarch64_sve_vg);
ADJUST_NUNITS (VNx2HF, aarch64_sve_vg);
ADJUST_NUNITS (VNx2SF, aarch64_sve_vg);
ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx4HF, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4);
ADJUST_ALIGNMENT (VNx2QI, 1);
ADJUST_ALIGNMENT (VNx4QI, 1);
ADJUST_ALIGNMENT (VNx8QI, 1);
ADJUST_ALIGNMENT (VNx2HI, 2);
ADJUST_ALIGNMENT (VNx4HI, 2);
ADJUST_ALIGNMENT (VNx2HF, 2);
ADJUST_ALIGNMENT (VNx4HF, 2);
ADJUST_ALIGNMENT (VNx2SI, 4);
ADJUST_ALIGNMENT (VNx2SF, 4);
/* Quad float: 128-bit floating mode for long doubles. */
FLOAT_MODE (TF, 16, ieee_quad_format);
/* A 4-tuple of SVE vectors with the maximum -msve-vector-bits= setting.
Note that this is a limit only on the compile-time sizes of modes;
it is not a limit on the runtime sizes, since VL-agnostic code
must work with arbitary vector lengths. */
#define MAX_BITSIZE_MODE_ANY_MODE (2048 * 4)
/* Coefficient 1 is multiplied by the number of 128-bit chunks in an
SVE vector (referred to as "VQ") minus one. */
#define NUM_POLY_INT_COEFFS 2