Training courses

Kernel and Embedded Linux

Bootlin training courses

Embedded Linux, kernel,
Yocto Project, Buildroot, real-time,
graphics, boot time, debugging...

Bootlin logo

Elixir Cross Referencer

;; Pipeline description for Motorola PowerPC 8540 processor.
;;   Copyright (C) 2003-2020 Free Software Foundation, Inc.
;;
;; This file is part of GCC.

;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published
;; by the Free Software Foundation; either version 3, or (at your
;; option) any later version.

;; GCC is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
;; License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.

(define_automaton "ppc8540_most,ppc8540_long,ppc8540_retire")
(define_cpu_unit "ppc8540_decode_0,ppc8540_decode_1" "ppc8540_most")

;; We don't simulate general issue queue (GIC).  If we have SU insn
;; and then SU1 insn, they cannot be issued on the same cycle
;; (although SU1 insn and then SU insn can be issued) because the SU
;; insn will go to SU1 from GIC0 entry.  Fortunately, the first cycle
;; multipass insn scheduling will find the situation and issue the SU1
;; insn and then the SU insn.
(define_cpu_unit "ppc8540_issue_0,ppc8540_issue_1"   "ppc8540_most")

;; We could describe completion buffers slots in combination with the
;; retirement units and the order of completion but the result
;; automaton would behave in the same way because we cannot describe
;; real latency time with taking in order completion into account.
;; Actually we could define the real latency time by querying reserved
;; automaton units but the current scheduler uses latency time before
;; issuing insns and making any reservations.
;;
;; So our description is aimed to achieve a insn schedule in which the
;; insns would not wait in the completion buffer.
(define_cpu_unit "ppc8540_retire_0,ppc8540_retire_1" "ppc8540_retire")

;; Branch unit:
(define_cpu_unit "ppc8540_bu" "ppc8540_most")

;; SU:
(define_cpu_unit "ppc8540_su0_stage0,ppc8540_su1_stage0" "ppc8540_most")

;; We could describe here MU subunits for float multiply, float add
;; etc.  But the result automaton would behave the same way as the
;; described one pipeline below because MU can start only one insn
;; per cycle.  Actually we could simplify the automaton more not
;; describing stages 1-3, the result automata would be the same.
(define_cpu_unit "ppc8540_mu_stage0,ppc8540_mu_stage1" "ppc8540_most")
(define_cpu_unit "ppc8540_mu_stage2,ppc8540_mu_stage3" "ppc8540_most")

;; The following unit is used to describe non-pipelined division.
(define_cpu_unit "ppc8540_mu_div" "ppc8540_long")

;; Here we simplified LSU unit description not describing the stages.
(define_cpu_unit "ppc8540_lsu" "ppc8540_most")

;; The following units are used to make automata deterministic
(define_cpu_unit "present_ppc8540_decode_0" "ppc8540_most")
(define_cpu_unit "present_ppc8540_issue_0" "ppc8540_most")
(define_cpu_unit "present_ppc8540_retire_0" "ppc8540_retire")
(define_cpu_unit "present_ppc8540_su0_stage0" "ppc8540_most")

;; The following sets to make automata deterministic when option ndfa is used.
(presence_set "present_ppc8540_decode_0" "ppc8540_decode_0")
(presence_set "present_ppc8540_issue_0" "ppc8540_issue_0")
(presence_set "present_ppc8540_retire_0" "ppc8540_retire_0")
(presence_set "present_ppc8540_su0_stage0" "ppc8540_su0_stage0")

;; Some useful abbreviations.
(define_reservation "ppc8540_decode"
    "ppc8540_decode_0|ppc8540_decode_1+present_ppc8540_decode_0")
(define_reservation "ppc8540_issue"
    "ppc8540_issue_0|ppc8540_issue_1+present_ppc8540_issue_0")
(define_reservation "ppc8540_retire"
   "ppc8540_retire_0|ppc8540_retire_1+present_ppc8540_retire_0")
(define_reservation "ppc8540_su_stage0"
   "ppc8540_su0_stage0|ppc8540_su1_stage0+present_ppc8540_su0_stage0")

;; Simple SU insns
(define_insn_reservation "ppc8540_su" 1
  (and (eq_attr "type" "integer,add,logical,insert,cmp,\
                        shift,trap,cntlz,exts,isel")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")

(define_insn_reservation "ppc8540_two" 1
  (and (eq_attr "type" "two")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")

(define_insn_reservation "ppc8540_three" 1
  (and (eq_attr "type" "three")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire,\
   ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")

;; Branch.  Actually this latency time is not used by the scheduler.
(define_insn_reservation "ppc8540_branch" 1
  (and (eq_attr "type" "jmpreg,branch,isync")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_bu,ppc8540_retire")

;; Multiply
(define_insn_reservation "ppc8540_multiply" 4
  (and (eq_attr "type" "mul")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")

;; Divide.  We use the average latency time here.  We omit reserving a
;; retire unit because of the result automata will be huge.  We ignore
;; reservation of miu_stage3 here because we use the average latency
;; time.
(define_insn_reservation "ppc8540_divide" 14
  (and (eq_attr "type" "div")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
   ppc8540_mu_div*13")

;; CR logical
(define_insn_reservation "ppc8540_cr_logical" 1
  (and (eq_attr "type" "cr_logical")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_bu,ppc8540_retire")

;; Mfcr
(define_insn_reservation "ppc8540_mfcr" 1
  (and (eq_attr "type" "mfcr")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")

;; Mtcrf
(define_insn_reservation "ppc8540_mtcrf" 1
  (and (eq_attr "type" "mtcr")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")

;; Mtjmpr
(define_insn_reservation "ppc8540_mtjmpr" 1
  (and (eq_attr "type" "mtjmpr,mfjmpr")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")

;; Loads
(define_insn_reservation "ppc8540_load" 3
  (and (eq_attr "type" "load,load_l,sync")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")

;; Stores.
(define_insn_reservation "ppc8540_store" 3
  (and (eq_attr "type" "store,store_c")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")

;; Simple FP
(define_insn_reservation "ppc8540_simple_float" 1
  (and (eq_attr "type" "fpsimple")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")

;; FP
(define_insn_reservation "ppc8540_float" 4
  (and (eq_attr "type" "fp")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")

;; float divides.  We omit reserving a retire unit and miu_stage3
;; because of the result automata will be huge.
(define_insn_reservation "ppc8540_float_vector_divide" 29
  (and (eq_attr "type" "vecfdiv")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
   ppc8540_mu_div*28")

;; Simple vector
(define_insn_reservation "ppc8540_simple_vector" 1
  (and (eq_attr "type" "vecsimple,veclogical,vecmove")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")

;; Simple vector compare
(define_insn_reservation "ppc8540_simple_vector_compare" 1
  (and (eq_attr "type" "veccmpsimple")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su_stage0+ppc8540_retire")

;; Vector compare
(define_insn_reservation "ppc8540_vector_compare" 1
  (and (eq_attr "type" "veccmp,veccmpfx")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")

;; evsplatfi evsplati
(define_insn_reservation "ppc8540_vector_perm" 1
  (and (eq_attr "type" "vecperm")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_su1_stage0+ppc8540_retire")

;; Vector float
(define_insn_reservation "ppc8540_float_vector" 4
  (and (eq_attr "type" "vecfloat")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")

;; Vector divides: Use the average.  We omit reserving a retire unit
;; because of the result automata will be huge.  We ignore reservation
;; of miu_stage3 here because we use the average latency time.
(define_insn_reservation "ppc8540_vector_divide" 14
  (and (eq_attr "type" "vecdiv")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0+ppc8540_mu_div,\
   ppc8540_mu_div*13")

;; Complex vector.
(define_insn_reservation "ppc8540_complex_vector" 4
  (and (eq_attr "type" "veccomplex")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_mu_stage0,ppc8540_mu_stage1,\
   ppc8540_mu_stage2,ppc8540_mu_stage3+ppc8540_retire")

;; Vector load
(define_insn_reservation "ppc8540_vector_load" 3
  (and (eq_attr "type" "vecload")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")

;; Vector store
(define_insn_reservation "ppc8540_vector_store" 3
  (and (eq_attr "type" "vecstore")
       (eq_attr "cpu" "ppc8540,ppc8548"))
  "ppc8540_decode,ppc8540_issue+ppc8540_lsu,nothing,ppc8540_retire")