diff -u gcc-5-5.5.0/debian/changelog gcc-5-5.5.0/debian/changelog --- gcc-5-5.5.0/debian/changelog +++ gcc-5-5.5.0/debian/changelog @@ -1,3 +1,9 @@ +gcc-5 (5.5.0-1ubuntu1+thunderx2.2) artful; urgency=medium + + * Add thunderx2 support. LP: #1727540. + + -- dann frazier Thu, 26 Oct 2017 10:06:39 -0600 + gcc-5 (5.5.0-1ubuntu1) artful; urgency=medium * Merge with Debian; remaining changes: diff -u gcc-5-5.5.0/debian/rules.patch gcc-5-5.5.0/debian/rules.patch --- gcc-5-5.5.0/debian/rules.patch +++ gcc-5-5.5.0/debian/rules.patch @@ -118,6 +118,10 @@ pr71183 \ sanitizer-Fix-a-crash-in-SizeClassAllocator32-with-a \ asan-Enable-48-bit-VMA-support-on-aarch64 \ + $(if $(with_linaro_branch),vulcan-to-thunderx2t99) \ + $(if $(with_linaro_branch),thunderx2t99-improve-cost-table) \ + $(if $(with_linaro_branch),thunderx2t99-scheduler) \ + $(if $(with_linaro_branch),thunderx2t99p1) # this is still needed on powerpc, e.g. firefox and insighttoolkit4 will ftbfs. ifneq (,$(filter $(DEB_TARGET_ARCH),powerpc)) only in patch2: unchanged: --- gcc-5-5.5.0.orig/debian/patches/thunderx2t99-improve-cost-table.diff +++ gcc-5-5.5.0/debian/patches/thunderx2t99-improve-cost-table.diff @@ -0,0 +1,22 @@ +Description: (thunderx2t99_addrcost_table): Improve cost table. + Backported to gcc-5 by dann frazier +Author: Andrew Pinski +Origin: https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=244851 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1727540 +Applied-Upstream: svn://gcc.gnu.org/svn/gcc/trunk@244851 + +--- a/src/gcc/config/aarch64/aarch64.c 2017-10-19 19:13:38.407610061 -0400 ++++ b/src/gcc/config/aarch64/aarch64.c 2017-10-20 15:37:03.712619817 -0400 +@@ -270,9 +270,9 @@ static const struct cpu_addrcost_table x + static const struct cpu_addrcost_table thunderx2t99_addrcost_table = + { + { +- 0, /* hi */ +- 0, /* si */ +- 0, /* di */ ++ 1, /* hi */ ++ 1, /* si */ ++ 1, /* di */ + 2, /* ti */ + }, + 0, /* pre_modify */ only in patch2: unchanged: --- gcc-5-5.5.0.orig/debian/patches/thunderx2t99-scheduler.diff +++ gcc-5-5.5.0/debian/patches/thunderx2t99-scheduler.diff @@ -0,0 +1,485 @@ +Description: Add scheduler for Thunderx2t99 + * config/aarch64/aarch64-cores.def: Change the scheduler + to Thunderx2t99. + * config/aarch64/aarch64.md: Include thunderx2t99.md. + * config/aarch64/thunderx2t99.md: New file. + . + Backported to gcc-5 by dann frazier +Author: Julian Brown +Author: Naveen H.S +Author: Virendra Pathak +Origin: https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=245203 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1727540 +Applied-Upstream: svn://gcc.gnu.org/svn/gcc/trunk@245203 + +--- a/src/gcc/config/aarch64/aarch64-cores.def 2017-10-20 17:19:39.650846958 -0400 ++++ b/src/gcc/config/aarch64/aarch64-cores.def 2017-10-20 18:10:47.523130202 -0400 +@@ -51,8 +51,8 @@ AARCH64_CORE("xgene1", xgene1, x + + /* V8.1 Architecture Processors. */ + +-AARCH64_CORE("thunderx2t99", thunderx2t99, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") +-AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") ++AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") + + /* V8 big.LITTLE implementations. */ + +diff -urpN a/src/gcc/config/aarch64/aarch64.md b/src/gcc/config/aarch64/aarch64.md +--- a/src/gcc/config/aarch64/aarch64.md 2017-10-20 17:04:34.804898636 -0400 ++++ b/src/gcc/config/aarch64/aarch64.md 2017-10-20 18:11:08.241347156 -0400 +@@ -208,6 +208,7 @@ + (include "../arm/cortex-a57.md") + (include "thunderx.md") + (include "../arm/xgene1.md") ++(include "thunderx2t99.md") + + ;; ------------------------------------------------------------------- + ;; Jumps and other miscellaneous insns +diff -urpN a/src/gcc/config/aarch64/thunderx2t99.md b/src/gcc/config/aarch64/thunderx2t99.md +--- a/src/gcc/config/aarch64/thunderx2t99.md 1969-12-31 19:00:00.000000000 -0500 ++++ b/src/gcc/config/aarch64/thunderx2t99.md 2017-10-20 18:11:52.526064571 -0400 +@@ -0,0 +1,443 @@ ++;; Cavium ThunderX 2 CN99xx pipeline description ++;; Copyright (C) 2016-2017 Free Software Foundation, Inc. ++;; ++;; Contributed by Cavium, Broadcom and Mentor Embedded. ++ ++;; This file is part of GCC. ++ ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++ ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++ ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_automaton "thunderx2t99, thunderx2t99_advsimd, thunderx2t99_ldst") ++(define_automaton "thunderx2t99_mult") ++ ++(define_cpu_unit "thunderx2t99_i0" "thunderx2t99") ++(define_cpu_unit "thunderx2t99_i1" "thunderx2t99") ++(define_cpu_unit "thunderx2t99_i2" "thunderx2t99") ++ ++(define_cpu_unit "thunderx2t99_ls0" "thunderx2t99_ldst") ++(define_cpu_unit "thunderx2t99_ls1" "thunderx2t99_ldst") ++(define_cpu_unit "thunderx2t99_sd" "thunderx2t99_ldst") ++ ++; Pseudo-units for multiply pipeline. ++ ++(define_cpu_unit "thunderx2t99_i1m1" "thunderx2t99_mult") ++(define_cpu_unit "thunderx2t99_i1m2" "thunderx2t99_mult") ++(define_cpu_unit "thunderx2t99_i1m3" "thunderx2t99_mult") ++ ++; Pseudo-units for load delay (assuming dcache hit). ++ ++(define_cpu_unit "thunderx2t99_ls0d1" "thunderx2t99_ldst") ++(define_cpu_unit "thunderx2t99_ls0d2" "thunderx2t99_ldst") ++(define_cpu_unit "thunderx2t99_ls0d3" "thunderx2t99_ldst") ++ ++(define_cpu_unit "thunderx2t99_ls1d1" "thunderx2t99_ldst") ++(define_cpu_unit "thunderx2t99_ls1d2" "thunderx2t99_ldst") ++(define_cpu_unit "thunderx2t99_ls1d3" "thunderx2t99_ldst") ++ ++; Make some aliases for f0/f1. ++(define_cpu_unit "thunderx2t99_f0" "thunderx2t99_advsimd") ++(define_cpu_unit "thunderx2t99_f1" "thunderx2t99_advsimd") ++ ++(define_reservation "thunderx2t99_i012" "thunderx2t99_i0|thunderx2t99_i1|thunderx2t99_i2") ++(define_reservation "thunderx2t99_ls01" "thunderx2t99_ls0|thunderx2t99_ls1") ++(define_reservation "thunderx2t99_f01" "thunderx2t99_f0|thunderx2t99_f1") ++ ++(define_reservation "thunderx2t99_ls_both" "thunderx2t99_ls0+thunderx2t99_ls1") ++ ++; A load with delay in the ls0/ls1 pipes. ++(define_reservation "thunderx2t99_l0delay" "thunderx2t99_ls0,\ ++ thunderx2t99_ls0d1,thunderx2t99_ls0d2,\ ++ thunderx2t99_ls0d3") ++(define_reservation "thunderx2t99_l1delay" "thunderx2t99_ls1,\ ++ thunderx2t99_ls1d1,thunderx2t99_ls1d2,\ ++ thunderx2t99_ls1d3") ++(define_reservation "thunderx2t99_l01delay" "thunderx2t99_l0delay|thunderx2t99_l1delay") ++ ++;; Branch and call instructions. ++ ++(define_insn_reservation "thunderx2t99_branch" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "call,branch")) ++ "thunderx2t99_i2") ++ ++;; Integer arithmetic/logic instructions. ++ ++; Plain register moves are handled by renaming, and don't create any uops. ++ ++(define_insn_reservation "thunderx2t99_regmove" 0 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "mov_reg")) ++ "nothing") ++ ++(define_insn_reservation "thunderx2t99_alu_basic" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\ ++ adc_reg,adc_imm,adcs_reg,adcs_imm,\ ++ logic_reg,logic_imm,logics_reg,logics_imm,\ ++ csel,adr,mov_imm,shift_reg,shift_imm,bfm,\ ++ rbit,rev,extend,rotate_imm")) ++ "thunderx2t99_i012") ++ ++(define_insn_reservation "thunderx2t99_alu_shift" 2 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "alu_shift_imm,alu_ext,alu_shift_reg,\ ++ alus_shift_imm,alus_ext,alus_shift_reg,\ ++ logic_shift_imm,logics_shift_reg")) ++ "thunderx2t99_i012,thunderx2t99_i012") ++ ++(define_insn_reservation "thunderx2t99_div" 13 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "sdiv,udiv")) ++ "thunderx2t99_i1*3") ++ ++(define_insn_reservation "thunderx2t99_madd" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "mla,smlal,umlal")) ++ "thunderx2t99_i1,thunderx2t99_i1m1,thunderx2t99_i1m2,thunderx2t99_i1m3,\ ++ thunderx2t99_i012") ++ ++; NOTE: smull, umull are used for "high part" multiplies too. ++(define_insn_reservation "thunderx2t99_mul" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "mul,smull,umull")) ++ "thunderx2t99_i1,thunderx2t99_i1m1,thunderx2t99_i1m2,thunderx2t99_i1m3") ++ ++(define_insn_reservation "thunderx2t99_countbits" 3 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "clz")) ++ "thunderx2t99_i1") ++ ++;; Integer loads and stores. ++ ++(define_insn_reservation "thunderx2t99_load_basic" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "load1")) ++ "thunderx2t99_ls01") ++ ++(define_insn_reservation "thunderx2t99_loadpair" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "load2")) ++ "thunderx2t99_i012,thunderx2t99_ls01") ++ ++(define_insn_reservation "thunderx2t99_store_basic" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "store1")) ++ "thunderx2t99_ls01,thunderx2t99_sd") ++ ++(define_insn_reservation "thunderx2t99_storepair_basic" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "store2")) ++ "thunderx2t99_ls01,thunderx2t99_sd") ++ ++;; FP data processing instructions. ++ ++(define_insn_reservation "thunderx2t99_fp_simple" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_fp_addsub" 6 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "fadds,faddd")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_fp_cmp" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "fcmps,fcmpd")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_fp_divsqrt_s" 16 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "fdivs,fsqrts")) ++ "thunderx2t99_f0*3|thunderx2t99_f1*3") ++ ++(define_insn_reservation "thunderx2t99_fp_divsqrt_d" 23 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "fdivd,fsqrtd")) ++ "thunderx2t99_f0*5|thunderx2t99_f1*5") ++ ++(define_insn_reservation "thunderx2t99_fp_mul_mac" 6 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "fmuls,fmuld,fmacs,fmacd")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_frint" 7 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "f_rints,f_rintd")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_fcsel" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "fcsel")) ++ "thunderx2t99_f01") ++ ++;; FP miscellaneous instructions. ++ ++(define_insn_reservation "thunderx2t99_fp_cvt" 7 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_fp_mov" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "fconsts,fconstd,fmov,f_mrc")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_fp_mov_to_gen" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "f_mcr")) ++ "thunderx2t99_f01") ++ ++;; FP loads and stores. ++ ++(define_insn_reservation "thunderx2t99_fp_load_basic" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "f_loads,f_loadd")) ++ "thunderx2t99_ls01") ++ ++(define_insn_reservation "thunderx2t99_fp_loadpair_basic" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_load1_2reg")) ++ "thunderx2t99_ls01*2") ++ ++(define_insn_reservation "thunderx2t99_fp_store_basic" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "f_stores,f_stored")) ++ "thunderx2t99_ls01,thunderx2t99_sd") ++ ++(define_insn_reservation "thunderx2t99_fp_storepair_basic" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_store1_2reg")) ++ "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd),thunderx2t99_sd") ++ ++;; ASIMD integer instructions. ++ ++(define_insn_reservation "thunderx2t99_asimd_int" 7 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_abd,neon_abd_q,\ ++ neon_arith_acc,neon_arith_acc_q,\ ++ neon_abs,neon_abs_q,\ ++ neon_add,neon_add_q,\ ++ neon_neg,neon_neg_q,\ ++ neon_add_long,neon_add_widen,\ ++ neon_add_halve,neon_add_halve_q,\ ++ neon_sub_long,neon_sub_widen,\ ++ neon_sub_halve,neon_sub_halve_q,\ ++ neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\ ++ neon_qabs,neon_qabs_q,\ ++ neon_qadd,neon_qadd_q,\ ++ neon_qneg,neon_qneg_q,\ ++ neon_qsub,neon_qsub_q,\ ++ neon_minmax,neon_minmax_q,\ ++ neon_reduc_minmax,neon_reduc_minmax_q,\ ++ neon_mul_b,neon_mul_h,neon_mul_s,\ ++ neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\ ++ neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\ ++ neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\ ++ neon_mla_b,neon_mla_h,neon_mla_s,\ ++ neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\ ++ neon_mul_b_long,neon_mul_h_long,\ ++ neon_mul_s_long,neon_mul_d_long,\ ++ neon_sat_mul_b_long,neon_sat_mul_h_long,\ ++ neon_sat_mul_s_long,\ ++ neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\ ++ neon_sat_mla_b_long,neon_sat_mla_h_long,\ ++ neon_sat_mla_s_long,\ ++ neon_shift_acc,neon_shift_acc_q,\ ++ neon_shift_imm,neon_shift_imm_q,\ ++ neon_shift_reg,neon_shift_reg_q,\ ++ neon_shift_imm_long,neon_shift_imm_narrow_q,\ ++ neon_sat_shift_imm,neon_sat_shift_imm_q,\ ++ neon_sat_shift_reg,neon_sat_shift_reg_q,\ ++ neon_sat_shift_imm_narrow_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_reduc_add" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_reduc_add,neon_reduc_add_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_cmp" 7 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\ ++ neon_tst,neon_tst_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_logic" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_logic,neon_logic_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_polynomial" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_mul_d_long")) ++ "thunderx2t99_f01") ++ ++;; ASIMD floating-point instructions. ++ ++(define_insn_reservation "thunderx2t99_asimd_fp_simple" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\ ++ neon_fp_abs_s_q,neon_fp_abs_d_q,\ ++ neon_fp_compare_s,neon_fp_compare_d,\ ++ neon_fp_compare_s_q,neon_fp_compare_d_q,\ ++ neon_fp_minmax_s,neon_fp_minmax_d,\ ++ neon_fp_minmax_s_q,neon_fp_minmax_d_q,\ ++ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\ ++ neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\ ++ neon_fp_neg_s,neon_fp_neg_d,\ ++ neon_fp_neg_s_q,neon_fp_neg_d_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_fp_arith" 6 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\ ++ neon_fp_abd_s_q,neon_fp_abd_d_q,\ ++ neon_fp_addsub_s,neon_fp_addsub_d,\ ++ neon_fp_addsub_s_q,neon_fp_addsub_d_q,\ ++ neon_fp_reduc_add_s,neon_fp_reduc_add_d,\ ++ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q,\ ++ neon_fp_mul_s,neon_fp_mul_d,\ ++ neon_fp_mul_s_q,neon_fp_mul_d_q,\ ++ neon_fp_mla_s,neon_fp_mla_d,\ ++ neon_fp_mla_s_q,neon_fp_mla_d_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_fp_conv" 7 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\ ++ neon_fp_to_int_s,neon_fp_to_int_d,\ ++ neon_fp_to_int_s_q,neon_fp_to_int_d_q,\ ++ neon_fp_round_s,neon_fp_round_d,\ ++ neon_fp_round_s_q,neon_fp_round_d_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_fp_div_s" 16 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_fp_div_d" 23 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q")) ++ "thunderx2t99_f01") ++ ++;; ASIMD miscellaneous instructions. ++ ++(define_insn_reservation "thunderx2t99_asimd_misc" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_rbit,\ ++ neon_bsl,neon_bsl_q,\ ++ neon_cls,neon_cls_q,\ ++ neon_cnt,neon_cnt_q,\ ++ neon_from_gp,neon_from_gp_q,\ ++ neon_dup,neon_dup_q,\ ++ neon_ext,neon_ext_q,\ ++ neon_ins,neon_ins_q,\ ++ neon_move,neon_move_q,\ ++ neon_fp_recpe_s,neon_fp_recpe_d,\ ++ neon_fp_recpe_s_q,neon_fp_recpe_d_q,\ ++ neon_fp_recpx_s,neon_fp_recpx_d,\ ++ neon_fp_recpx_s_q,neon_fp_recpx_d_q,\ ++ neon_rev,neon_rev_q,\ ++ neon_dup,neon_dup_q,\ ++ neon_permute,neon_permute_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_recip_step" 6 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\ ++ neon_fp_recps_d,neon_fp_recps_d_q,\ ++ neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\ ++ neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_lut" 8 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_tbl1,neon_tbl1_q,neon_tbl2_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_elt_to_gr" 6 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_to_gp,neon_to_gp_q")) ++ "thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_ext" 7 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_shift_imm_narrow_q,neon_sat_shift_imm_narrow_q")) ++ "thunderx2t99_f01") ++ ++;; ASIMD load instructions. ++ ++; NOTE: These reservations attempt to model latency and throughput correctly, ++; but the cycle timing of unit allocation is not necessarily accurate (because ++; insns are split into uops, and those may be issued out-of-order). ++ ++(define_insn_reservation "thunderx2t99_asimd_load1_1_mult" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q")) ++ "thunderx2t99_ls01") ++ ++(define_insn_reservation "thunderx2t99_asimd_load1_2_mult" 4 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q")) ++ "thunderx2t99_ls_both") ++ ++(define_insn_reservation "thunderx2t99_asimd_load1_onelane" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q")) ++ "thunderx2t99_l01delay,thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_load1_all" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q")) ++ "thunderx2t99_l01delay,thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_load2" 5 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\ ++ neon_load2_one_lane,neon_load2_one_lane_q,\ ++ neon_load2_all_lanes,neon_load2_all_lanes_q")) ++ "(thunderx2t99_l0delay,thunderx2t99_f01)|(thunderx2t99_l1delay,\ ++ thunderx2t99_f01)") ++ ++;; ASIMD store instructions. ++ ++; Same note applies as for ASIMD load instructions. ++ ++(define_insn_reservation "thunderx2t99_asimd_store1_1_mult" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q")) ++ "thunderx2t99_ls01") ++ ++(define_insn_reservation "thunderx2t99_asimd_store1_2_mult" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q")) ++ "thunderx2t99_ls_both") ++ ++(define_insn_reservation "thunderx2t99_asimd_store1_onelane" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q")) ++ "thunderx2t99_ls01,thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_store2_mult" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q")) ++ "thunderx2t99_ls_both,thunderx2t99_f01") ++ ++(define_insn_reservation "thunderx2t99_asimd_store2_onelane" 1 ++ (and (eq_attr "tune" "thunderx2t99") ++ (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q")) ++ "thunderx2t99_ls01,thunderx2t99_f01") only in patch2: unchanged: --- gcc-5-5.5.0.orig/debian/patches/thunderx2t99p1.diff +++ gcc-5-5.5.0/debian/patches/thunderx2t99p1.diff @@ -0,0 +1,48 @@ +Description: Change -mcpu=thunderx2t99 's -mcpu=native support + Hi, + When I implemented the -mcpu=thunderx2t99 I did not have the Cavium + partno for ThunderX CN99xx, only the original part no. This patch + adds the new part no for the future versions of the chip. + . + OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. + . + Thanks, + Andrew + . + ChangeLog: + * config/aarch64/aarch64-cores.def (thunderx2t99): Move to under 'C" + cores and change the partno/implementer to be correct. + (thunderx2t99p1): New core which replaces thunderx2t99 and still has + the 'B" as the implementer. + . + Backported to gcc-5 by dann frazier +Author: Andrew Pinski +Origin: https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=245461 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1727540 +Applied-Upstream: svn://gcc.gnu.org/svn/gcc/trunk@245461 + +--- a/src/gcc/config/aarch64/aarch64-cores.def 2017-10-20 18:10:47.523130202 -0400 ++++ b/src/gcc/config/aarch64/aarch64-cores.def 2017-10-20 18:19:03.407936680 -0400 +@@ -48,10 +48,11 @@ AARCH64_CORE("exynos-m1", exynosm1, c + AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800") + AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") + AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") ++AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x43", "0x0af") + + /* V8.1 Architecture Processors. */ + +-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") ++AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") + AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") + + /* V8 big.LITTLE implementations. */ +diff -urpN a/src/gcc/config/aarch64/aarch64-tune.md b/src/gcc/config/aarch64/aarch64-tune.md +--- a/src/gcc/config/aarch64/aarch64-tune.md 2017-10-20 17:23:44.212332273 -0400 ++++ b/src/gcc/config/aarch64/aarch64-tune.md 2017-10-20 18:19:42.484113735 -0400 +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,thunderx2t99,vulcan,cortexa57cortexa53,cortexa72cortexa53" ++ "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,thunderx2t99,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) only in patch2: unchanged: --- gcc-5-5.5.0.orig/debian/patches/vulcan-to-thunderx2t99.diff +++ gcc-5-5.5.0/debian/patches/vulcan-to-thunderx2t99.diff @@ -0,0 +1,129 @@ +Description: Add -mcpu=thunderx2t99 support + * config/aarch64/aarch64-cores.def: Add thunderx2t99. Change vulcan + to reference thunderx2t99 for the tuning structure + * config/aarch64/aarch64-cost-tables.h (vulcan_extra_costs): Rename to ... + (thunderx2t99_extra_costs): This. + * config/aarch64/aarch64-tune.md: Regenerate. + * config/aarch64/aarch64.c (vulcan_addrcost_table): Rename to ... + (vulcan_addrcost_table): This. + (vulcan_regmove_cost): Rename to ... + (thunderx2t99_regmove_cost): This. + (vulcan_vector_cost): Rename to ... + (thunderx2t99_vector_cost): this. + (vulcan_branch_cost): Rename to ... + (thunderx2t99_branch_cost): This. + (vulcan_tunings): Rename to ... + (thunderx2t99_tunings): This and s/vulcan/thunderx2t99 . + * doc/invoke.texi (AARCH64/mtune): Add thunderx2t99. + . + Backported to gcc-5 by dann frazier +Author: Andrew Pinski +Origin: https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=244235 +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1727540 +Applied-Upstream: svn://gcc.gnu.org/svn/gcc/trunk@244235 + +--- a/src/gcc/config/aarch64/aarch64.c 2017-10-20 17:04:37.205144993 -0400 ++++ b/src/gcc/config/aarch64/aarch64.c 2017-10-20 17:19:39.650846958 -0400 +@@ -267,7 +267,7 @@ static const struct cpu_addrcost_table x + 0, /* imm_offset */ + }; + +-static const struct cpu_addrcost_table vulcan_addrcost_table = ++static const struct cpu_addrcost_table thunderx2t99_addrcost_table = + { + { + 0, /* hi */ +@@ -331,7 +331,7 @@ static const struct cpu_regmove_cost xge + 2 /* FP2FP */ + }; + +-static const struct cpu_regmove_cost vulcan_regmove_cost = ++static const struct cpu_regmove_cost thunderx2t99_regmove_cost = + { + 1, /* GP2GP */ + /* Avoid the use of int<->fp moves for spilling. */ +@@ -392,7 +392,7 @@ static const struct cpu_vector_cost xgen + }; + + /* Costs for vector insn classes for Vulcan. */ +-static const struct cpu_vector_cost vulcan_vector_cost = ++static const struct cpu_vector_cost thunderx2t99_vector_cost = + { + 6, /* scalar_stmt_cost */ + 4, /* scalar_load_cost */ +@@ -423,7 +423,7 @@ static const struct cpu_branch_cost cort + }; + + /* Branch costs for Vulcan. */ +-static const struct cpu_branch_cost vulcan_branch_cost = ++static const struct cpu_branch_cost thunderx2t99_branch_cost = + { + 1, /* Predictable. */ + 3 /* Unpredictable. */ +@@ -601,13 +601,13 @@ static const struct tune_params xgene1_t + (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + }; + +-static const struct tune_params vulcan_tunings = ++static const struct tune_params thunderx2t99_tunings = + { +- &vulcan_extra_costs, +- &vulcan_addrcost_table, +- &vulcan_regmove_cost, +- &vulcan_vector_cost, +- &vulcan_branch_cost, ++ &thunderx2t99_extra_costs, ++ &thunderx2t99_addrcost_table, ++ &thunderx2t99_regmove_cost, ++ &thunderx2t99_vector_cost, ++ &thunderx2t99_branch_cost, + 4, /* memmov_cost. */ + 4, /* issue_rate. */ + AARCH64_FUSE_NOTHING, /* fuseable_ops. */ +diff -urpN a/src/gcc/config/aarch64/aarch64-cores.def b/src/gcc/config/aarch64/aarch64-cores.def +--- a/src/gcc/config/aarch64/aarch64-cores.def 2017-10-20 17:04:37.209145404 -0400 ++++ b/src/gcc/config/aarch64/aarch64-cores.def 2017-10-20 17:19:39.650846958 -0400 +@@ -51,7 +51,8 @@ AARCH64_CORE("xgene1", xgene1, x + + /* V8.1 Architecture Processors. */ + +-AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516") ++AARCH64_CORE("thunderx2t99", thunderx2t99, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") ++AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, "0x42", "0x516") + + /* V8 big.LITTLE implementations. */ + +diff -urpN a/src/gcc/config/aarch64/aarch64-cost-tables.h b/src/gcc/config/aarch64/aarch64-cost-tables.h +--- a/src/gcc/config/aarch64/aarch64-cost-tables.h 2017-10-20 17:04:37.209145404 -0400 ++++ b/src/gcc/config/aarch64/aarch64-cost-tables.h 2017-10-20 17:19:39.650846958 -0400 +@@ -127,7 +127,7 @@ const struct cpu_cost_table thunderx_ext + } + }; + +-const struct cpu_cost_table vulcan_extra_costs = ++const struct cpu_cost_table thunderx2t99_extra_costs = + { + /* ALU */ + { +diff -urpN a/src/gcc/config/aarch64/aarch64-tune.md b/src/gcc/config/aarch64/aarch64-tune.md +--- a/src/gcc/config/aarch64/aarch64-tune.md 2017-10-20 17:04:37.173141710 -0400 ++++ b/src/gcc/config/aarch64/aarch64-tune.md 2017-10-20 17:23:44.212332273 -0400 +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,vulcan,cortexa57cortexa53,cortexa72cortexa53" ++ "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,thunderx2t99,vulcan,cortexa57cortexa53,cortexa72cortexa53" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff -urpN a/src/gcc/doc/invoke.texi b/src/gcc/doc/invoke.texi +--- a/src/gcc/doc/invoke.texi 2017-10-20 17:04:39.461376437 -0400 ++++ b/src/gcc/doc/invoke.texi 2017-10-20 17:19:39.658847810 -0400 +@@ -12412,7 +12412,7 @@ Specify the name of the target processor + performance of the code. Permissible values for this option are: + @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a57}, + @samp{cortex-a72}, @samp{exynos-m1}, @samp{qdf24xx}, @samp{thunderx}, +-@samp{vulcan}, @samp{xgene1}. ++@samp{thunderx2t99}, @samp{vulcan}, @samp{xgene1}. + + Additionally, this option can specify that GCC should tune the performance + of the code for a big.LITTLE system. Permissible values for this