;; define_peephole2 optimization patterns of Andes NDS32 cpu for GNU compiler
;; Copyright (C) 2012-2020 Free Software Foundation, Inc.
;; Contributed by Andes Technology Corporation.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published
;; by the Free Software Foundation; either version 3, or (at your
;; option) any later version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
;; License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
;; Use define_peephole2 to handle possible target-specific optimization.
;; ------------------------------------------------------------------------
;; Try to utilize 16-bit instruction by swap operand if possible.
;; ------------------------------------------------------------------------
;; Try to make add as add45.
(define_peephole2
[(set (match_operand:QIHISI 0 "register_operand" "")
(plus:QIHISI (match_operand:QIHISI 1 "register_operand" "")
(match_operand:QIHISI 2 "register_operand" "")))]
"reload_completed
&& TARGET_16_BIT
&& REGNO (operands[0]) == REGNO (operands[2])
&& REGNO (operands[0]) != REGNO (operands[1])
&& TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))"
[(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))])
;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33.
(define_peephole2
[(set (match_operand:SI 0 "register_operand" "")
(match_operator:SI 1 "nds32_have_33_inst_operator"
[(match_operand:SI 2 "register_operand" "")
(match_operand:SI 3 "register_operand" "")]))]
"reload_completed
&& TARGET_16_BIT
&& REGNO (operands[0]) == REGNO (operands[3])
&& REGNO (operands[0]) != REGNO (operands[2])
&& TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0]))
&& TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))"
[(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))])
(define_peephole
[(set (match_operand:SI 0 "register_operand" "")
(match_operand:SI 1 "register_operand" ""))
(set (match_operand:SI 2 "register_operand" "")
(match_operand:SI 3 "register_operand" ""))]
"TARGET_16_BIT
&& !TARGET_ISA_V2
&& NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
&& NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
&& ((REGNO (operands[0]) & 0x1) == 0)
&& ((REGNO (operands[1]) & 0x1) == 0)
&& (REGNO (operands[0]) + 1) == REGNO (operands[2])
&& (REGNO (operands[1]) + 1) == REGNO (operands[3])"
"movd44\t%0, %1"
[(set_attr "type" "alu")
(set_attr "length" "2")])
;; Merge two fcpyss to fcpysd.
(define_peephole2
[(set (match_operand:SF 0 "float_even_register_operand" "")
(match_operand:SF 1 "float_even_register_operand" ""))
(set (match_operand:SF 2 "float_odd_register_operand" "")
(match_operand:SF 3 "float_odd_register_operand" ""))]
"(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
&& REGNO (operands[0]) == REGNO (operands[2]) - 1
&& REGNO (operands[1]) == REGNO (operands[3]) - 1"
[(set (match_dup 4) (match_dup 5))]
{
operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0]));
operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1]));
})
(define_peephole2
[(set (match_operand:SF 0 "float_odd_register_operand" "")
(match_operand:SF 1 "float_odd_register_operand" ""))
(set (match_operand:SF 2 "float_even_register_operand" "")
(match_operand:SF 3 "float_even_register_operand" ""))]
"(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
&& REGNO (operands[2]) == REGNO (operands[0]) - 1
&& REGNO (operands[3]) == REGNO (operands[1]) - 1"
[(set (match_dup 4) (match_dup 5))]
{
operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2]));
operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3]));
})
;; ------------------------------------------------------------------------
;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is
;; unused, so we use split to drop mod operation for lower register pressure.
(define_split
[(set (match_operand:SI 0 "register_operand")
(div:SI (match_operand:SI 1 "register_operand")
(match_operand:SI 2 "register_operand")))
(set (match_operand:SI 3 "register_operand")
(mod:SI (match_dup 1) (match_dup 2)))]
"find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
&& can_create_pseudo_p ()"
[(set (match_dup 0)
(div:SI (match_dup 1)
(match_dup 2)))])
(define_split
[(set (match_operand:SI 0 "register_operand")
(udiv:SI (match_operand:SI 1 "register_operand")
(match_operand:SI 2 "register_operand")))
(set (match_operand:SI 3 "register_operand")
(umod:SI (match_dup 1) (match_dup 2)))]
"find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
&& can_create_pseudo_p ()"
[(set (match_dup 0)
(udiv:SI (match_dup 1)
(match_dup 2)))])
(define_peephole2
[(set (match_operand:DI 0 "register_operand")
(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand"))
(sign_extend:DI (match_operand:SI 2 "register_operand"))))]
"NDS32_EXT_DSP_P ()
&& peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))"
[(const_int 1)]
{
rtx highpart = nds32_di_high_part_subreg (operands[0]);
emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2]));
DONE;
})
(define_split
[(set (match_operand:DI 0 "nds32_general_register_operand" "")
(match_operand:DI 1 "nds32_general_register_operand" ""))]
"find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL
|| find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL"
[(set (match_dup 0) (match_dup 1))]
{
rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
HOST_WIDE_INT offset;
if (dead_note == NULL_RTX)
offset = 0;
else
offset = 4;
operands[0] = simplify_gen_subreg (
SImode, operands[0],
DImode, offset);
operands[1] = simplify_gen_subreg (
SImode, operands[1],
DImode, offset);
})