On Tue, Sep 05 2023, Russell King (Oracle) wrote:
[...]
+/* dst = *(signed size*)(src + off) */ +static inline void emit_ldsx_r(const s8 dst[], const s8 src,
s16 off, struct jit_ctx *ctx, const u8 sz){
- const s8 *tmp = bpf2a32[TMP_REG_1];
- const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
- s8 rm = src;
- if (!is_ldst_imm8(off, sz)) {
emit_a32_mov_i(tmp[0], off, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
Hmm. This looks inefficient when "off" is able to fit in an immediate. Please try:
int add_off;
if (!is_ldst_imm8(off, sz)) { add_off = imm8m(off); if (add_off > 0) { emit(ARM_ADD_I(tmp[0], src, add_off), ctx); rm = tmp[0]; } else { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; } off = 0;
- } else if (rd[1] == rm) {
emit(ARM_MOV_R(tmp[0], rm), ctx);
rm = tmp[0];
Why do you need this? rd and rm can be the same for LDRS[BH].
I agree that this is not required, will remove in the next version. Will also use the suggested optimization for immediate.
- }
- switch (sz) {
- case BPF_B:
/* Load a Byte with sign extension*/
emit(ARM_LDRSB_I(rd[1], rm, off), ctx);
/* Carry the sign extension to upper 32 bits */
emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
break;
- case BPF_H:
/* Load a HalfWord with sign extension*/
emit(ARM_LDRSH_I(rd[1], rm, off), ctx);
/* Carry the sign extension to upper 32 bits */
emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
break;
- case BPF_W:
/* Load a Word*/
emit(ARM_LDR_I(rd[1], rm, off), ctx);
/* Carry the sign extension to upper 32 bits */
emit(ARM_ASR_I(rd[0], rd[1], 31), ctx);
The last instruction extending to the upper 32 bits is the same in each of these cases, so is there any reason not to do it outside the switch statement?
Will move it outside in the next version.
Thanks, Puranjay