ChangeLog: * config/arm/arm.c (arm_rtx_costs_1): Handle vec_extract patterns. * config/arm/arm.md ("vec_extract"): Support memory destination operands, implemented via vst1 instruction. ("neon_vst1_lane"): Use UNSPEC_VST1_LANE instead of vec_select. * config/arm/predicates.md ("neon_lane_number"): Remove. Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 183253) +++ gcc/config/arm/arm.c (working copy) @@ -7647,6 +7647,17 @@ return true; case SET: + /* The vec_extract patterns accept memory operands that require an + address reload. Account for the cost of that reload to give the + auto-inc-dec pass an incentive to try to replace them. */ + if (TARGET_NEON && MEM_P (SET_DEST (x)) + && GET_CODE (SET_SRC (x)) == VEC_SELECT) + { + *total = rtx_cost (SET_DEST (x), code, 0, speed); + if (!neon_vector_mem_operand (SET_DEST (x), 2)) + *total += COSTS_N_INSNS (1); + return true; + } return false; default: Index: gcc/config/arm/neon.md =================================================================== --- gcc/config/arm/neon.md (revision 183253) +++ gcc/config/arm/neon.md (working copy) @@ -498,10 +498,10 @@ }) (define_insn "vec_extract" - [(set (match_operand: 0 "s_register_operand" "=r") + [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") (vec_select: - (match_operand:VD 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VD 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { if (BYTES_BIG_ENDIAN) @@ -510,17 +510,21 @@ elt = GET_MODE_NUNITS (mode) - 1 - elt; operands[2] = GEN_INT (elt); } - return "vmov%?.\t%0, %P1[%c2]"; + + if (which_alternative == 0) + return "vst1.\t{%P1[%c2]}, %A0"; + else + return "vmov%?.\t%0, %P1[%c2]"; } - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] + [(set_attr "predicable" "*,yes") + (set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")] ) (define_insn "vec_extract" - [(set (match_operand: 0 "s_register_operand" "=r") + [(set (match_operand: 0 "nonimmediate_operand" "=Um,r") (vec_select: - (match_operand:VQ 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:VQ 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { int half_elts = GET_MODE_NUNITS (mode) / 2; @@ -534,27 +538,33 @@ operands[1] = gen_rtx_REG (mode, regno + hi); operands[2] = GEN_INT (elt); - return "vmov%?.\t%0, %P1[%c2]"; + if (which_alternative == 0) + return "vst1.\t{%P1[%c2]}, %A0"; + else + return "vmov%?.\t%0, %P1[%c2]"; } - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] + [(set_attr "predicable" "*,yes") + (set_attr "neon_type" "neon_vst1_vst2_lane,neon_bp_simple")] ) (define_insn "vec_extractv2di" - [(set (match_operand:DI 0 "s_register_operand" "=r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r") (vec_select:DI - (match_operand:V2DI 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] + (match_operand:V2DI 1 "s_register_operand" "w,w") + (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] "TARGET_NEON" { int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); operands[1] = gen_rtx_REG (DImode, regno); - return "vmov%?\t%Q0, %R0, %P1 @ v2di"; + if (which_alternative == 0) + return "vst1.\t{%P1}, %A0 @ v2di"; + else + return "vmov%?\t%Q0, %R0, %P1 @ v2di"; } - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_int_1")] + [(set_attr "predicable" "*,yes") + (set_attr "neon_type" "neon_vst1_vst2_lane,neon_int_1")] ) (define_expand "vec_init" @@ -4354,9 +4364,10 @@ (define_insn "neon_vst1_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") - (vec_select: - (match_operand:VDX 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] + (unspec: + [(match_operand:VDX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VST1_LANE))] "TARGET_NEON" { HOST_WIDE_INT lane = INTVAL (operands[2]); @@ -4375,9 +4386,10 @@ (define_insn "neon_vst1_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") - (vec_select: - (match_operand:VQX 1 "s_register_operand" "w") - (parallel [(match_operand:SI 2 "neon_lane_number" "i")])))] + (unspec: + [(match_operand:VQX 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_VST1_LANE))] "TARGET_NEON" { HOST_WIDE_INT lane = INTVAL (operands[2]); Index: gcc/config/arm/predicates.md =================================================================== --- gcc/config/arm/predicates.md (revision 183253) +++ gcc/config/arm/predicates.md (working copy) @@ -677,10 +677,6 @@ (ior (match_operand 0 "imm_for_neon_inv_logic_operand") (match_operand 0 "s_register_operand"))) -;; TODO: We could check lane numbers more precisely based on the mode. -(define_predicate "neon_lane_number" - (and (match_code "const_int") - (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15"))) ;; Predicates for named expanders that overlap multiple ISAs. (define_predicate "cmpdi_operand"