gem5-dev@gem5.org

The gem5 Developer List

View all threads

[L] Change in gem5/gem5[develop]: arch-arm: Partial SVE2 Implementation

GT
Giacomo Travaglini (Gerrit)
Thu, May 4, 2023 9:03 AM

Giacomo Travaglini has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=email )

Change subject: arch-arm: Partial SVE2 Implementation
......................................................................

arch-arm: Partial SVE2 Implementation

Instructions added:

ADCLB/T, SBCLB/T, BGRP, RAX1, EOR3, BCAX,
XAR & TBX, PMUL, PMULLB/T, SMULLB/T and UMULLB/T

Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113

M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/formats/sve_top_level.isa
M src/arch/arm/isa/insts/sve.isa
3 files changed, 482 insertions(+), 16 deletions(-)

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 2ee3817..dae6fc6 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -483,6 +483,196 @@
}  // decodeSveIntArithUnpred

  StaticInstPtr
  • decodeSveIntMulUnpred(ExtMachInst machInst)
  • {
  •    RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    uint8_t opc = bits(machInst, 11, 10);
    
  •    uint8_t size = bits(machInst, 23, 22);
    
  •    switch (opc) {
    
  •        case 0x1:
    
  •          if (size == 0x0) {
    
  •              return new SvePmul<uint8_t>(machInst, zd, zn, zm);
    
  •          }
    
  •          [[fallthrough]];
    
  •        case 0x0:
    
  •          // MUL (vectors, unpredicated)
    
  •        case 0x2:
    
  •          // SMULH (unpredicated)
    
  •        case 0x3:
    
  •          // UMULH (unpredicated)
    
  •        default:
    
  •          return new Unknown64(machInst);
    
  •    }
    
  • }  // decodeSveIntMulUnpred
  • StaticInstPtr
  • decodeSveIntTerUnpred(ExtMachInst machInst)
  • {
  •    RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zk = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    uint8_t opc = bits(machInst, 23, 22) << 1 | bits(machInst, 10);
    
  •    switch (opc) {
    
  •      case 0x0:
    
  •        return new SveEor3<uint64_t>(machInst, zdn, zm, zk);
    
  •      case 0x2:
    
  •        return new SveBcax<uint64_t>(machInst, zdn, zm, zk);
    
  •      case 0x1:
    
  •        // BSL
    
  •      case 0x3:
    
  •        // BSL1N
    
  •      case 0x5:
    
  •        // BSL2N
    
  •      case 0x7:
    
  •        // NBSL
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }  // decodeSveIntTerUnpred
  • StaticInstPtr
  • decodeSveIntMulLong(ExtMachInst machInst)
  • {
  •    RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    uint8_t opc_u_t = bits(machInst, 12, 10);
    
  •    uint8_t size = bits(machInst, 23, 22);
    
  •    switch (opc_u_t) {
    
  •        case 0x2:
    
  •          return decodeSveBinUnpredS2<SvePmullb>(
    
  •                  size, machInst, zd, zn, zm);
    
  •        case 0x3:
    
  •          return decodeSveBinUnpredS2<SvePmullt>(
    
  •                  size, machInst, zd, zn, zm);
    
  •        case 0x4:
    
  •          return decodeSveBinUnpred2<SveSmullb>(
    
  •                  size, 0, machInst, zd, zn, zm);
    
  •        case 0x5:
    
  •          return decodeSveBinUnpred2<SveSmullt>(
    
  •                  size, 0, machInst, zd, zn, zm);
    
  •        case 0x6:
    
  •          return decodeSveBinUnpred2<SveUmullb>(
    
  •                  size, 1, machInst, zd, zn, zm);
    
  •        case 0x7:
    
  •          return decodeSveBinUnpred2<SveUmullt>(
    
  •                  size, 1, machInst, zd, zn, zm);
    
  •        case 0x0:
    
  •          // SQDMULLB
    
  •        case 0x1:
    
  •          // SQDMULLT
    
  •        default:
    
  •          return new Unknown64(machInst);
    
  •    }
    
  • }  // decodeSveIntMulLong
  • StaticInstPtr
  • decodeSveBitPerm(ExtMachInst machInst)
  • {
  •    RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    uint8_t opc = bits(machInst, 11, 10);
    
  •    uint8_t size = bits(machInst, 23, 22);
    
  •    switch (opc) {
    
  •      case 0x2:
    
  •        return decodeSveBinUnpredU<SveBgrp>(
    
  •                size, machInst, zd, zn, zm);
    
  •      case 0x0:
    
  •        // BEXT
    
  •      case 0x1:
    
  •        // BDEP
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }  // decodeSveBitPerm
  • StaticInstPtr
  • decodeSveIntLongCarry(ExtMachInst machInst)
  • {
  •    RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    uint8_t opc = (bits(machInst, 23) << 1) | bits(machInst, 10);
    
  •    uint8_t size = bits(machInst, 22);
    
  •    switch (opc) {
    
  •        case 0x0:
    
  •          return decodeSveTerUnpredU<SveAdclb>(
    
  •                    size, machInst, zda, zn, zm);
    
  •        case 0x1:
    
  •          return decodeSveTerUnpredU<SveAdclt>(
    
  •                    size, machInst, zda, zn, zm);
    
  •        case 0x2:
    
  •          return decodeSveTerUnpredU<SveSbclb>(
    
  •                    size, machInst, zda, zn, zm);
    
  •        case 0x3:
    
  •          return decodeSveTerUnpredU<SveSbclt>(
    
  •                    size, machInst, zda, zn, zm);
    
  •        default:
    
  •          return new Unknown64(machInst);
    
  •    }
    
  • }  // decodeSveIntLongCarry
  • StaticInstPtr
  • decodeSveIntRotImm(ExtMachInst machInst)
  • {
  •    RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    uint8_t imm3 = (RegIndex) (uint8_t) bits(machInst, 18, 16);
    
  •    uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20,  
    

19);

  •    uint8_t esize = 0;
    
  •    uint8_t size = 0;
    
  •    if (tsize == 0x0) {
    
  •        return new Unknown64(machInst);
    
  •    } else if (tsize == 0x1) {
    
  •        esize = 8;
    
  •    } else if ((tsize & 0x0E) == 0x2) {
    
  •        esize = 16;
    
  •        size = 1;
    
  •    } else if ((tsize & 0x0C) == 0x4) {
    
  •        esize = 32;
    
  •        size = 2;
    
  •    } else if ((tsize & 0x08) == 0x8) {
    
  •        esize = 64;
    
  •        size = 3;
    
  •    }
    
  •    unsigned rot_am = 2 * esize - ((tsize << 3) | imm3);
    
  •    return decodeSveBinImmDestrUnpredU<SveXar>(
    
  •            size, machInst, zdn, zm, rot_am);
    
  • }  // decodeSveIntRotImm
  • StaticInstPtr
  • decodeSveCryptBinConstr(ExtMachInst machInst)
  • {
  •    RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    uint8_t size = bits(machInst, 23, 22);
    
  •    uint8_t opc = bits(machInst, 10);
    
  •    uint8_t size_opc = (size << 1) | opc;
    
  •    switch (size_opc) {
    
  •      case 0x1:
    
  •        return new SveRax1<uint64_t>(machInst, zd, zn, zm);
    
  •      case 0x0:
    
  •        // SM4EKEY
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }  // decodeSveCryptBinConstr
  • StaticInstPtr
    decodeSveIntLogUnpred(ExtMachInst machInst)
    {
    RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    @@ -1014,12 +1204,19 @@
    decodeSvePermUnpred(ExtMachInst machInst)
    {
    uint8_t b12_10 = bits(machInst, 12, 10);
  •    if (b12_10 == 0x4) {
    
  •    if ((b12_10 == 0x4) || (bits(machInst, 12, 11) == 0x1)) {
            unsigned size = (unsigned) bits(machInst, 23, 22);
            RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
            RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
            RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •        return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm);
    
  •        if (b12_10 == 0x4) { // TBL, two sources
    
  •            return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn,  
    

zm);

  •        } else if (bits(machInst, 10) == 0x1) { // TBX
    
  •            return decodeSveBinUnpredU<SveTbx>(size, machInst, zd, zn,  
    

zm);

  •        // } else { // TBL, three sources
    
  •            // TBL, three sources
    
  •        }
    
  •        return new Unknown64(machInst);
        } else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) {
            uint8_t size = bits(machInst, 23, 22);
            RegIndex rn = makeSP(
    

@@ -1362,7 +1559,6 @@
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex pg = (RegIndex) (uint8_t) bits(machInst, 13, 10);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);

      uint8_t size = bits(machInst, 23, 22);

      return decodeSveBinConstrPredU<SveSel>(size,

diff --git a/src/arch/arm/isa/formats/sve_top_level.isa
b/src/arch/arm/isa/formats/sve_top_level.isa
index 155ec1c..04642b8 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -44,7 +44,9 @@
StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst);
StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst);
StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst);

  • StaticInstPtr decodeSveIntMulUnpred(ExtMachInst machInst);
    StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst);

  • StaticInstPtr decodeSveIntTerUnpred(ExtMachInst machInst);
    StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst);
    StaticInstPtr decodeSveIndexGen(ExtMachInst machInst);
    StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst);
    @@ -69,6 +71,11 @@
    StaticInstPtr decodeSvePsel(ExtMachInst machInst);
    StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst);
    StaticInstPtr decodeSveClamp(ExtMachInst machInst);

  • StaticInstPtr decodeSveIntRotImm(ExtMachInst machInst);

  • StaticInstPtr decodeSveCryptBinConstr(ExtMachInst machInst);

  • StaticInstPtr decodeSveBitPerm(ExtMachInst machInst);

  • StaticInstPtr decodeSveIntLongCarry(ExtMachInst machInst);

  • StaticInstPtr decodeSveIntMulLong(ExtMachInst machInst);

    StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst);
    StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst);
    @@ -141,7 +148,15 @@
    switch (b_15_14) {
    case 0x0:
    if (b_13) {

  •                    return decodeSveIntLogUnpred(machInst);
    
  •                    if (bits(machInst, 11)) {
    
  •                        return decodeSveIntTerUnpred(machInst);
    
  •                    } else {
    
  •                        if (bits(machInst, 10)) {
    
  •                            return decodeSveIntRotImm(machInst);
    
  •                        } else {
    
  •                            return decodeSveIntLogUnpred(machInst);
    
  •                        }
    
  •                    }
                    } else {
                        if (bits(machInst, 30)) {
                            return decodeSveMultiplyIndexed(machInst);
    

@@ -151,7 +166,7 @@
}
case 0x1:
if (b_13) {

  •                    return new Unknown64(machInst);
    
  •                    return decodeSveIntMulUnpred(machInst);
                    } else if (b_12) {
                        return decodeSveStackAlloc(machInst);
                    } else {
    

@@ -173,10 +188,23 @@
break;
}
case 0x2:

  •        if (bits(machInst, 20)) {
    
  •            return decodeSveIntWideImmPred(machInst);
    
  •        if (bits(machInst, 30)) {
    
  •            uint8_t b_15_14_13 = bits(machInst, 15, 13);
    
  •            switch (b_15_14_13) {
    
  •              case 0x3:
    
  •                return decodeSveIntMulLong(machInst);
    
  •              case 0x5:
    
  •                return decodeSveBitPerm(machInst);
    
  •              case 0x6:
    
  •                return decodeSveIntLongCarry(machInst);
    
  •            }
    
  •            break;
            } else {
    
  •            return decodeSveLogMaskImm(machInst);
    
  •            if (bits(machInst, 20)) {
    
  •                return decodeSveIntWideImmPred(machInst);
    
  •            } else {
    
  •                return decodeSveLogMaskImm(machInst);
    
  •            }
            }
          case 0x3:
            {
    

@@ -198,7 +226,11 @@
case 0x2:
return decodeSvePermPred(machInst);
case 0x3:

  •                return decodeSveSelVec(machInst);
    
  •                if (bits(machInst, 30)) {
    
  •                    return decodeSveCryptBinConstr(machInst);
    
  •                } else {
    
  •                    return decodeSveSelVec(machInst);
    
  •                }
                }
                break;
            }
    

diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 97d4ec7..91ecb47 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -325,6 +325,28 @@
}
}

  • // Decodes binary with immediate operand, destructive, unpredicated
  • // SVE instructions, handling unsigned variants only.
  • template <template <typename T> class Base>
  • StaticInstPtr
  • decodeSveBinImmDestrUnpredU(unsigned size, ExtMachInst machInst,
  •        RegIndex dest, RegIndex op1, unsigned immediate)
    
  • {
  •    switch (size) {
    
  •      case 0:
    
  •        return new Base<uint8_t>(machInst, dest, op1, immediate);
    
  •      case 1:
    
  •        return new Base<uint16_t>(machInst, dest, op1, immediate);
    
  •      case 2:
    
  •        return new Base<uint32_t>(machInst, dest, op1, immediate);
    
  •      case 3:
    
  •        return new Base<uint64_t>(machInst, dest, op1, immediate);
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }
  • // Decodes binary with immediate operand, destructive, predicated  
    

(merging)
// SVE instructions, handling unsigned variants only.
template <template <typename T> class Base>
@@ -612,6 +634,37 @@
}

  // Decodes binary, constructive, unpredicated SVE instructions.
  • // limited variants

  • template <template <typename T> class Base>

  • StaticInstPtr

  • decodeSveBinUnpred2(unsigned size, unsigned u, ExtMachInst machInst,

  •                   RegIndex dest, RegIndex op1, RegIndex op2)
    
  • {

  •    switch (size) {
    
  •      case 1:
    
  •        if (u) {
    
  •            return new Base<uint8_t>(machInst, dest, op1, op2);
    
  •        } else {
    
  •            return new Base<int8_t>(machInst, dest, op1, op2);
    
  •        }
    
  •      case 2:
    
  •        if (u) {
    
  •            return new Base<uint16_t>(machInst, dest, op1, op2);
    
  •        } else {
    
  •            return new Base<int16_t>(machInst, dest, op1, op2);
    
  •        }
    
  •      case 3:
    
  •        if (u) {
    
  •            return new Base<uint32_t>(machInst, dest, op1, op2);
    
  •        } else {
    
  •            return new Base<int32_t>(machInst, dest, op1, op2);
    
  •        }
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }

  • // Decodes binary, constructive, unpredicated SVE instructions.
    // Unsigned instructions only.
    template <template <typename T> class Base>
    StaticInstPtr
    @@ -653,6 +706,25 @@
    }
    }

  • // Decodes binary, constructive, unpredicated SVE instructions.

  • // unsigned instructions only, limited variants.

  • template <template <typename T> class Base>

  • StaticInstPtr

  • decodeSveBinUnpredS2(unsigned size, ExtMachInst machInst, RegIndex
    dest,

  •        RegIndex op1, RegIndex op2)
    
  • {

  •    switch (size) {
    
  •      case 0:
    
  •        return new Base<uint64_t>(machInst, dest, op1, op2);
    
  •      case 1:
    
  •        return new Base<uint8_t>(machInst, dest, op1, op2);
    
  •      case 3:
    
  •        return new Base<uint32_t>(machInst, dest, op1, op2);
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }

  • // Decodes binary, costructive, unpredicated SVE instructions, handling
    // floating-point variants only.
    template <template <typename T> class Base>
    

@@ -926,6 +998,24 @@
}
}

  • // Decodes ternary, destructive, unpredicated SVE instructions,
  • // handling unsigned words & double words only.
  • template <template <typename T> class Base>
  • StaticInstPtr
  • decodeSveTerUnpredU(unsigned size, ExtMachInst machInst,
  •                    RegIndex dest, RegIndex op1, RegIndex op2)
    
  • {
  •    switch (size) {
    
  •      case 0:
    
  •        return new Base<uint32_t>(machInst, dest, op1, op2);
    
  •      case 1:
    
  •        return new Base<uint64_t>(machInst, dest, op1, op2);
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }
  • // Decodes ternary with immediate operand, destructive, unpredicated  
    

SVE
// instructions handling floating-point variants only.
template <template <typename T> class Base>
@@ -1898,8 +1988,7 @@
def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
isDestructive=False, customIterCode=None,
decoder='Generic'):

  •    assert not (predType in (PredType.NONE, PredType.SELECT) and
    
  •                isDestructive)
    
  •    assert not ((predType == PredType.SELECT) and isDestructive)
        global header_output, exec_output, decoders
        code = sveEnabledCheckCode + '''
        unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
    

@@ -1914,7 +2003,12 @@
code += '''
const Element& srcElem1 = AA64FpOp1_x[i];'''
code += '''

  •            const Element& srcElem2 = AA64FpOp2_x[i];
    
  •            const Element& srcElem2 = AA64FpOp2_x[i];'''
    
  •        if (predType == PredType.NONE) and isDestructive:
    
  •            code += '''
    
  •            Element destElem = AA64FpDestMerge_x[i];'''
    
  •        else:
    
  •            code += '''
                Element destElem = 0;'''
            if predType != PredType.NONE:
                code += '''
    

@@ -2592,8 +2686,8 @@
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)

  • Generate definitions for SVE TBL instructions

  • def sveTblInst(name, Name, opClass, decoder = 'Generic'):
  • Generate definitions for SVE table lookup instructions with 2 sources

  • def sveTblInst(name, Name, opClass, decoder = 'Generic', merging =
    False):
    global header_output, exec_output, decoders
    code = sveEnabledCheckCode + '''
    unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
    @@ -2604,10 +2698,10 @@
    if (idx < eCount) {
    val = AA64FpOp1_x[idx];
    } else {
  •            val = 0;
    
  •            val = %(dest_elem)s;;
            }
            AA64FpDest_x[i] = val;
    
  •    }'''
    
  •    }''' % {'dest_elem': 'AA64FpDestMerge_x[i]' if merging else '0'}
        iop = ArmInstObjParams(name, 'Sve' + Name, 'SveTblOp',
                {'code': code, 'op_class': opClass}, [])
        header_output += SveBinUnpredOpDeclare.subst(iop)
    

@@ -2617,6 +2711,63 @@
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)

  • Generate definitions for integer add/subtract long with carry

  • def sveLongCarryInst(name, Name, opClass, decoder = 'Generic',
  •        uptTop = False, subtract = False):
    
  •    global header_output, exec_output, decoders
    
  •    code = sveEnabledCheckCode + '''
    
  •    unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
    
  •            xc->tcBase());
    
  •    for (int i = 0; i < eCount/2; ++i) {
    
  •        const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
    
  •        const Element& srcElem2 = AA64FpOp2_x[2*i+1];
    
  •        const Element& srcElem3 = AA64FpDestMerge_x[2*i];
    
  •        __uint128_t unsigned_sum = (__uint128_t)srcElem3 +
    
  •                                   (%(op)ssrcElem1) +
    
  •                                   (srcElem2 & 0x1);
    
  •        AA64FpDest_x[2*i] = (Element)unsigned_sum;
    
  •        AA64FpDest_x[2*i+1] = (Element)unsigned_sum !=
    
  •                              (__uint128_t)unsigned_sum;
    
  •    }
    
  •    ''' % {'offset': 1 if uptTop else 0,
    
  •           'op': '~' if subtract else '',
    
  •          }
    
  •    iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
    
  •                           {'code': code, 'op_class': opClass}, [])
    
  •    header_output += SveBinUnpredOpDeclare.subst(iop)
    
  •    exec_output += SveOpExecute.subst(iop)
    
  •    for type in ('uint32_t', 'uint64_t'):
    
  •        substDict = {'targs' : type,
    
  •                     'class_name' : 'Sve' + Name}
    
  •        exec_output += SveOpExecDeclare.subst(substDict)
    
  • Generate definitions for long integer/poly multiplication instruction

  • def sveLongMulInst(name, Name, opClass, types, op, decoder = 'Generic',
  •        uptTop = False):
    
  •    global header_output, exec_output, decoders
    
  •    code = sveEnabledCheckCode + '''
    
  •    unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
    
  •            xc->tcBase());
    
  •    for (int i = 0; i < eCount/2; ++i) {
    
  •        const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
    
  •        const Element& srcElem2 = AA64FpOp2_x[2*i+%(offset)s];
    
  •        %(op)s
    
  •        AA64FpDest_x[2*i] = (Element)destElem;
    
  •        AA64FpDest_x[2*i+1] = (Element)(destElem >>
    
  •                               (sizeof(Element) << 3));
    
  •    }
    
  •    ''' % {'offset': 1 if uptTop else 0,
    
  •           'op': op,
    
  •          }
    
  •    iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
    
  •                           {'code': code, 'op_class': opClass}, [])
    
  •    header_output += SveBinUnpredOpDeclare.subst(iop)
    
  •    exec_output += SveOpExecute.subst(iop)
    
  •    for type in types:
    
  •        substDict = {'targs' : type,
    
  •                     'class_name' : 'Sve' + Name}
    
  •        exec_output += SveOpExecDeclare.subst(substDict)
    
  • # Generate definitions for SVE Unpack instructions
    def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
                      regType, decoder = 'Generic'):
    

@@ -3167,6 +3318,10 @@
absCode = 'destElem = (Element) std::abs(srcElem1);'
sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
PredType.MERGE)

  • ADCLB

  • sveLongCarryInst('adclb', 'Adclb', 'SimdAluOp')
  • ADCLT

  • sveLongCarryInst('adclt', 'Adclt', 'SimdAluOp', uptTop = True)

    ADD (immediate)

    sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode,
    False)

    ADD (vectors, predicated)

@@ -3272,6 +3427,29 @@
'''
sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
PredType.MERGE, True)

  • BCAX

  • bcaxCode = 'destElem ^= srcElem1 & (~srcElem2);'
  • sveBinInst('bcax', 'Bcax', 'SimdAluOp', ('uint64_t',), bcaxCode,
  •            isDestructive=True)
    
  • BGRP

  • bgrpCode = '''
  •        int k = 0;
    
  •        int len = sizeof(Element) * 8;
    
  •        for(int j = 0; j < len; j++) {
    
  •            if(((srcElem2>>j) & (Element)0x1) == ((Element)0x1)){
    
  •                destElem |= (((srcElem1>>j) & (Element)0x1) << k);
    
  •                k++;
    
  •            }
    
  •        }
    
  •        k = len-1;
    
  •        for(int j = len-1; j >= 0; j--) {
    
  •            if(((srcElem2>>j) & ((Element)0x1)) == ((Element)0x0)){
    
  •                destElem |= (((srcElem1>>j) & (Element)0x1) << k);
    
  •                k--;
    
  •            }
    
  •        }
    
  • '''
  • sveBinInst('bgrp', 'Bgrp', 'SimdAluOp', unsignedTypes, bgrpCode)

    BIC (vectors, predicated)

    bicCode = 'destElem = srcElem1 & ~srcElem2;'
    sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
    @@ -3555,6 +3733,10 @@
    eorCode)
    svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
    eorCode, isFlagSetting=True)
  • EOR3

  • eorCode = 'destElem ^= srcElem1 ^ srcElem2;'
  • sveBinInst('eor', 'Eor3', 'SimdAluOp', ('uint64_t',), eorCode,
  •            isDestructive=True)
    # EORV
    eorvCode = 'destElem ^= srcElem1;'
    sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
    

@@ -4122,6 +4304,30 @@
pfalseCode)
# PFIRST
svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')

  • PMUL

  • exec_output += '''
  • __uint128_t poly_mul(uint64_t srcElem1, uint64_t srcElem2)
  • {
  •    __uint128_t destElem = 0;
    
  •    __uint128_t extendedElem2 = srcElem2;
    
  •    int i;
    
  •    for (i=0; i < 64; i++) {
    
  •        if (((srcElem1 >> i) & 0x1) == 0x1) {
    
  •            destElem ^= (extendedElem2 << i);
    
  •        }
    
  •    }
    
  •    return destElem;
    
  • }'''
  • pmulCode = 'destElem = (uint8_t)poly_mul(srcElem1, srcElem2);'
  • sveBinInst('pmul', 'Pmul', 'SimdAluOp', ('uint8_t',), pmulCode)
  • PMULLB

  • pmullCode = '__uint128_t destElem = poly_mul(srcElem1, srcElem2);'
  • sveLongMulInst('pmullb', 'Pmullb', 'SimdAluOp',
  •               ('uint8_t','uint32_t','uint64_t',), pmullCode)
    
  • PMULLT

  • sveLongMulInst('pmullt', 'Pmullt', 'SimdAluOp',
  •                ('uint8_t','uint32_t','uint64_t',),
    
  •                pmullCode, uptTop = True)
    # PNEXT
    svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
    # PSEL
    

@@ -4138,6 +4344,9 @@
# PUNPKLO
sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp',
unsignedWideSDTypes,
unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)

  • RAX1

  • rax1Code = 'destElem = srcElem1 ^ ((srcElem2 << 1) | (srcElem2 >>
    63));'
  • sveBinInst('rax', 'Rax1', 'SimdAluOp', ('uint64_t',), rax1Code)

    RBIT

    rbitCode = '''
    destElem = reverseBits(srcElem1);'''
    @@ -4214,6 +4423,11 @@
    '''
    sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
    PredType.MERGE, True)
  • SBCLB

  • sveLongCarryInst('sbclb', 'Sbclb', 'SimdAluOp', subtract = True)
  • SBCLT

  • sveLongCarryInst('sbclt', 'Sbclt', 'SimdAluOp', uptTop = True,
  •                 subtract = True)
    # SADDV
    addvCode = 'destElem += srcElem1;'
    sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
    

@@ -4372,6 +4586,13 @@
destElem = do_mulh(srcElem1, srcElem2);'''
sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
PredType.MERGE, True)

  • SMULLB

  • smullCode = 'int64_t destElem = (int64_t)srcElem1 * (int64_t)srcElem2;'
  • sveLongMulInst('smullb', 'Smullb', 'SimdAluOp',
  •               ('int8_t','int16_t','int32_t',), smullCode)
    
  • SMULLT

  • sveLongMulInst('smullt', 'Smullt', 'SimdAluOp',
  •               ('int8_t','int16_t','int32_t',), smullCode, uptTop =  
    

True)
# SPLICE
sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
# SQADD (immediate)
@@ -4557,6 +4778,8 @@
sxtCode, PredType.MERGE)
# TBL
sveTblInst('tbl', 'Tbl', 'SimdAluOp')

  • TBX

  • sveTblInst('tbx', 'Tbx', 'SimdAluOp', merging=True)

    TRN1, TRN2 (predicates)

    trnPredIterCode = '''
    constexpr unsigned sz = sizeof(Element);
    @@ -4654,6 +4877,14 @@

    UMULH

    sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
    PredType.MERGE, True)
  • UMULLB

  • umullCode = 'uint64_t destElem = (uint64_t)srcElem1 *
    (uint64_t)srcElem2;'
  • sveLongMulInst('umullb', 'Umullb', 'SimdAluOp',
  •               ('uint8_t','uint16_t','uint32_t',), umullCode)
    
  • UMULLT

  • sveLongMulInst('umullt', 'Umullt', 'SimdAluOp',
  •               ('uint8_t','uint16_t','uint32_t',), umullCode,
    
  •                uptTop = True)
    # UQADD (immediate)
    uqaddCode = '''
            destElem = srcElem1 + srcElem2;
    

@@ -4861,6 +5092,13 @@
Ffr_ub[i] = POp1_ub[i];
}'''
svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode,
False)

  • XAR

  • xarCode = '''
  •        destElem = AA64FpDestMerge_x[i] ^ srcElem1;
    
  •        destElem = ((destElem >> srcElem2) |
    
  •                (destElem << (sizeof(Element) * 8 - srcElem2)));
    
  • '''
  • sveBinImmInst('xar', 'Xar', 'SimdAluOp', unsignedTypes, xarCode)

    ZIP1, ZIP2 (predicates)

    zipPredIterCode = '''
    constexpr unsigned sz = sizeof(Element);

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-MessageType: newchange
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113
Gerrit-Change-Number: 70277
Gerrit-PatchSet: 1
Gerrit-Owner: Giacomo Travaglini giacomo.travaglini@arm.com

Giacomo Travaglini has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=email ) Change subject: arch-arm: Partial SVE2 Implementation ...................................................................... arch-arm: Partial SVE2 Implementation Instructions added: ADCLB/T, SBCLB/T, BGRP, RAX1, EOR3, BCAX, XAR & TBX, PMUL, PMULLB/T, SMULLB/T and UMULLB/T Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113 --- M src/arch/arm/isa/formats/sve_2nd_level.isa M src/arch/arm/isa/formats/sve_top_level.isa M src/arch/arm/isa/insts/sve.isa 3 files changed, 482 insertions(+), 16 deletions(-) diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 2ee3817..dae6fc6 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -483,6 +483,196 @@ } // decodeSveIntArithUnpred StaticInstPtr + decodeSveIntMulUnpred(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = bits(machInst, 11, 10); + uint8_t size = bits(machInst, 23, 22); + + switch (opc) { + case 0x1: + if (size == 0x0) { + return new SvePmul<uint8_t>(machInst, zd, zn, zm); + } + [[fallthrough]]; + case 0x0: + // MUL (vectors, unpredicated) + case 0x2: + // SMULH (unpredicated) + case 0x3: + // UMULH (unpredicated) + default: + return new Unknown64(machInst); + } + + } // decodeSveIntMulUnpred + + StaticInstPtr + decodeSveIntTerUnpred(ExtMachInst machInst) + { + RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zk = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = bits(machInst, 23, 22) << 1 | bits(machInst, 10); + + switch (opc) { + case 0x0: + return new SveEor3<uint64_t>(machInst, zdn, zm, zk); + case 0x2: + return new SveBcax<uint64_t>(machInst, zdn, zm, zk); + case 0x1: + // BSL + case 0x3: + // BSL1N + case 0x5: + // BSL2N + case 0x7: + // NBSL + default: + return new Unknown64(machInst); + } + } // decodeSveIntTerUnpred + + StaticInstPtr + decodeSveIntMulLong(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc_u_t = bits(machInst, 12, 10); + uint8_t size = bits(machInst, 23, 22); + + switch (opc_u_t) { + case 0x2: + return decodeSveBinUnpredS2<SvePmullb>( + size, machInst, zd, zn, zm); + case 0x3: + return decodeSveBinUnpredS2<SvePmullt>( + size, machInst, zd, zn, zm); + case 0x4: + return decodeSveBinUnpred2<SveSmullb>( + size, 0, machInst, zd, zn, zm); + case 0x5: + return decodeSveBinUnpred2<SveSmullt>( + size, 0, machInst, zd, zn, zm); + case 0x6: + return decodeSveBinUnpred2<SveUmullb>( + size, 1, machInst, zd, zn, zm); + case 0x7: + return decodeSveBinUnpred2<SveUmullt>( + size, 1, machInst, zd, zn, zm); + case 0x0: + // SQDMULLB + case 0x1: + // SQDMULLT + default: + return new Unknown64(machInst); + } + } // decodeSveIntMulLong + + StaticInstPtr + decodeSveBitPerm(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = bits(machInst, 11, 10); + uint8_t size = bits(machInst, 23, 22); + + switch (opc) { + case 0x2: + return decodeSveBinUnpredU<SveBgrp>( + size, machInst, zd, zn, zm); + case 0x0: + // BEXT + case 0x1: + // BDEP + default: + return new Unknown64(machInst); + } + } // decodeSveBitPerm + + StaticInstPtr + decodeSveIntLongCarry(ExtMachInst machInst) + { + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = (bits(machInst, 23) << 1) | bits(machInst, 10); + uint8_t size = bits(machInst, 22); + + switch (opc) { + case 0x0: + return decodeSveTerUnpredU<SveAdclb>( + size, machInst, zda, zn, zm); + case 0x1: + return decodeSveTerUnpredU<SveAdclt>( + size, machInst, zda, zn, zm); + case 0x2: + return decodeSveTerUnpredU<SveSbclb>( + size, machInst, zda, zn, zm); + case 0x3: + return decodeSveTerUnpredU<SveSbclt>( + size, machInst, zda, zn, zm); + default: + return new Unknown64(machInst); + } + } // decodeSveIntLongCarry + + StaticInstPtr + decodeSveIntRotImm(ExtMachInst machInst) + { + RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 9, 5); + uint8_t imm3 = (RegIndex) (uint8_t) bits(machInst, 18, 16); + + uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20, 19); + uint8_t esize = 0; + uint8_t size = 0; + + if (tsize == 0x0) { + return new Unknown64(machInst); + } else if (tsize == 0x1) { + esize = 8; + } else if ((tsize & 0x0E) == 0x2) { + esize = 16; + size = 1; + } else if ((tsize & 0x0C) == 0x4) { + esize = 32; + size = 2; + } else if ((tsize & 0x08) == 0x8) { + esize = 64; + size = 3; + } + + unsigned rot_am = 2 * esize - ((tsize << 3) | imm3); + return decodeSveBinImmDestrUnpredU<SveXar>( + size, machInst, zdn, zm, rot_am); + } // decodeSveIntRotImm + + StaticInstPtr + decodeSveCryptBinConstr(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 10); + uint8_t size_opc = (size << 1) | opc; + + switch (size_opc) { + case 0x1: + return new SveRax1<uint64_t>(machInst, zd, zn, zm); + case 0x0: + // SM4EKEY + default: + return new Unknown64(machInst); + } + } // decodeSveCryptBinConstr + + StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst) { RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); @@ -1014,12 +1204,19 @@ decodeSvePermUnpred(ExtMachInst machInst) { uint8_t b12_10 = bits(machInst, 12, 10); - if (b12_10 == 0x4) { + if ((b12_10 == 0x4) || (bits(machInst, 12, 11) == 0x1)) { unsigned size = (unsigned) bits(machInst, 23, 22); RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm); + if (b12_10 == 0x4) { // TBL, two sources + return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm); + } else if (bits(machInst, 10) == 0x1) { // TBX + return decodeSveBinUnpredU<SveTbx>(size, machInst, zd, zn, zm); + // } else { // TBL, three sources + // TBL, three sources + } + return new Unknown64(machInst); } else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) { uint8_t size = bits(machInst, 23, 22); RegIndex rn = makeSP( @@ -1362,7 +1559,6 @@ RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); RegIndex pg = (RegIndex) (uint8_t) bits(machInst, 13, 10); RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - uint8_t size = bits(machInst, 23, 22); return decodeSveBinConstrPredU<SveSel>(size, diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa index 155ec1c..04642b8 100644 --- a/src/arch/arm/isa/formats/sve_top_level.isa +++ b/src/arch/arm/isa/formats/sve_top_level.isa @@ -44,7 +44,9 @@ StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst); StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst); StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst); + StaticInstPtr decodeSveIntMulUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveIntTerUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIndexGen(ExtMachInst machInst); StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst); @@ -69,6 +71,11 @@ StaticInstPtr decodeSvePsel(ExtMachInst machInst); StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst); StaticInstPtr decodeSveClamp(ExtMachInst machInst); + StaticInstPtr decodeSveIntRotImm(ExtMachInst machInst); + StaticInstPtr decodeSveCryptBinConstr(ExtMachInst machInst); + StaticInstPtr decodeSveBitPerm(ExtMachInst machInst); + StaticInstPtr decodeSveIntLongCarry(ExtMachInst machInst); + StaticInstPtr decodeSveIntMulLong(ExtMachInst machInst); StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst); StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst); @@ -141,7 +148,15 @@ switch (b_15_14) { case 0x0: if (b_13) { - return decodeSveIntLogUnpred(machInst); + if (bits(machInst, 11)) { + return decodeSveIntTerUnpred(machInst); + } else { + if (bits(machInst, 10)) { + return decodeSveIntRotImm(machInst); + } else { + return decodeSveIntLogUnpred(machInst); + } + } } else { if (bits(machInst, 30)) { return decodeSveMultiplyIndexed(machInst); @@ -151,7 +166,7 @@ } case 0x1: if (b_13) { - return new Unknown64(machInst); + return decodeSveIntMulUnpred(machInst); } else if (b_12) { return decodeSveStackAlloc(machInst); } else { @@ -173,10 +188,23 @@ break; } case 0x2: - if (bits(machInst, 20)) { - return decodeSveIntWideImmPred(machInst); + if (bits(machInst, 30)) { + uint8_t b_15_14_13 = bits(machInst, 15, 13); + switch (b_15_14_13) { + case 0x3: + return decodeSveIntMulLong(machInst); + case 0x5: + return decodeSveBitPerm(machInst); + case 0x6: + return decodeSveIntLongCarry(machInst); + } + break; } else { - return decodeSveLogMaskImm(machInst); + if (bits(machInst, 20)) { + return decodeSveIntWideImmPred(machInst); + } else { + return decodeSveLogMaskImm(machInst); + } } case 0x3: { @@ -198,7 +226,11 @@ case 0x2: return decodeSvePermPred(machInst); case 0x3: - return decodeSveSelVec(machInst); + if (bits(machInst, 30)) { + return decodeSveCryptBinConstr(machInst); + } else { + return decodeSveSelVec(machInst); + } } break; } diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 97d4ec7..91ecb47 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -325,6 +325,28 @@ } } + + // Decodes binary with immediate operand, destructive, unpredicated + // SVE instructions, handling unsigned variants only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinImmDestrUnpredU(unsigned size, ExtMachInst machInst, + RegIndex dest, RegIndex op1, unsigned immediate) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, immediate); + case 1: + return new Base<uint16_t>(machInst, dest, op1, immediate); + case 2: + return new Base<uint32_t>(machInst, dest, op1, immediate); + case 3: + return new Base<uint64_t>(machInst, dest, op1, immediate); + default: + return new Unknown64(machInst); + } + } + // Decodes binary with immediate operand, destructive, predicated (merging) // SVE instructions, handling unsigned variants only. template <template <typename T> class Base> @@ -612,6 +634,37 @@ } // Decodes binary, constructive, unpredicated SVE instructions. + // limited variants + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinUnpred2(unsigned size, unsigned u, ExtMachInst machInst, + RegIndex dest, RegIndex op1, RegIndex op2) + { + switch (size) { + case 1: + if (u) { + return new Base<uint8_t>(machInst, dest, op1, op2); + } else { + return new Base<int8_t>(machInst, dest, op1, op2); + } + case 2: + if (u) { + return new Base<uint16_t>(machInst, dest, op1, op2); + } else { + return new Base<int16_t>(machInst, dest, op1, op2); + } + case 3: + if (u) { + return new Base<uint32_t>(machInst, dest, op1, op2); + } else { + return new Base<int32_t>(machInst, dest, op1, op2); + } + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, constructive, unpredicated SVE instructions. // Unsigned instructions only. template <template <typename T> class Base> StaticInstPtr @@ -653,6 +706,25 @@ } } + // Decodes binary, constructive, unpredicated SVE instructions. + // unsigned instructions only, limited variants. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinUnpredS2(unsigned size, ExtMachInst machInst, RegIndex dest, + RegIndex op1, RegIndex op2) + { + switch (size) { + case 0: + return new Base<uint64_t>(machInst, dest, op1, op2); + case 1: + return new Base<uint8_t>(machInst, dest, op1, op2); + case 3: + return new Base<uint32_t>(machInst, dest, op1, op2); + default: + return new Unknown64(machInst); + } + } + // Decodes binary, costructive, unpredicated SVE instructions, handling // floating-point variants only. template <template <typename T> class Base> @@ -926,6 +998,24 @@ } } + // Decodes ternary, destructive, unpredicated SVE instructions, + // handling unsigned words & double words only. + template <template <typename T> class Base> + StaticInstPtr + decodeSveTerUnpredU(unsigned size, ExtMachInst machInst, + RegIndex dest, RegIndex op1, RegIndex op2) + { + switch (size) { + case 0: + return new Base<uint32_t>(machInst, dest, op1, op2); + case 1: + return new Base<uint64_t>(machInst, dest, op1, op2); + default: + return new Unknown64(machInst); + } + } + + // Decodes ternary with immediate operand, destructive, unpredicated SVE // instructions handling floating-point variants only. template <template <typename T> class Base> @@ -1898,8 +1988,7 @@ def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE, isDestructive=False, customIterCode=None, decoder='Generic'): - assert not (predType in (PredType.NONE, PredType.SELECT) and - isDestructive) + assert not ((predType == PredType.SELECT) and isDestructive) global header_output, exec_output, decoders code = sveEnabledCheckCode + ''' unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( @@ -1914,7 +2003,12 @@ code += ''' const Element& srcElem1 = AA64FpOp1_x[i];''' code += ''' - const Element& srcElem2 = AA64FpOp2_x[i]; + const Element& srcElem2 = AA64FpOp2_x[i];''' + if (predType == PredType.NONE) and isDestructive: + code += ''' + Element destElem = AA64FpDestMerge_x[i];''' + else: + code += ''' Element destElem = 0;''' if predType != PredType.NONE: code += ''' @@ -2592,8 +2686,8 @@ 'class_name' : 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) - # Generate definitions for SVE TBL instructions - def sveTblInst(name, Name, opClass, decoder = 'Generic'): + # Generate definitions for SVE table lookup instructions with 2 sources + def sveTblInst(name, Name, opClass, decoder = 'Generic', merging = False): global header_output, exec_output, decoders code = sveEnabledCheckCode + ''' unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( @@ -2604,10 +2698,10 @@ if (idx < eCount) { val = AA64FpOp1_x[idx]; } else { - val = 0; + val = %(dest_elem)s;; } AA64FpDest_x[i] = val; - }''' + }''' % {'dest_elem': 'AA64FpDestMerge_x[i]' if merging else '0'} iop = ArmInstObjParams(name, 'Sve' + Name, 'SveTblOp', {'code': code, 'op_class': opClass}, []) header_output += SveBinUnpredOpDeclare.subst(iop) @@ -2617,6 +2711,63 @@ 'class_name' : 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) + # Generate definitions for integer add/subtract long with carry + def sveLongCarryInst(name, Name, opClass, decoder = 'Generic', + uptTop = False, subtract = False): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (int i = 0; i < eCount/2; ++i) { + const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s]; + const Element& srcElem2 = AA64FpOp2_x[2*i+1]; + const Element& srcElem3 = AA64FpDestMerge_x[2*i]; + __uint128_t unsigned_sum = (__uint128_t)srcElem3 + + (%(op)ssrcElem1) + + (srcElem2 & 0x1); + AA64FpDest_x[2*i] = (Element)unsigned_sum; + AA64FpDest_x[2*i+1] = (Element)unsigned_sum != + (__uint128_t)unsigned_sum; + } + ''' % {'offset': 1 if uptTop else 0, + 'op': '~' if subtract else '', + } + iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in ('uint32_t', 'uint64_t'): + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + + # Generate definitions for long integer/poly multiplication instruction + def sveLongMulInst(name, Name, opClass, types, op, decoder = 'Generic', + uptTop = False): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( + xc->tcBase()); + for (int i = 0; i < eCount/2; ++i) { + const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s]; + const Element& srcElem2 = AA64FpOp2_x[2*i+%(offset)s]; + %(op)s + AA64FpDest_x[2*i] = (Element)destElem; + AA64FpDest_x[2*i+1] = (Element)(destElem >> + (sizeof(Element) << 3)); + } + ''' % {'offset': 1 if uptTop else 0, + 'op': op, + } + iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveBinUnpredOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve' + Name} + exec_output += SveOpExecDeclare.subst(substDict) + # Generate definitions for SVE Unpack instructions def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf, regType, decoder = 'Generic'): @@ -3167,6 +3318,10 @@ absCode = 'destElem = (Element) std::abs(srcElem1);' sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode, PredType.MERGE) + # ADCLB + sveLongCarryInst('adclb', 'Adclb', 'SimdAluOp') + # ADCLT + sveLongCarryInst('adclt', 'Adclt', 'SimdAluOp', uptTop = True) # ADD (immediate) sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode, False) # ADD (vectors, predicated) @@ -3272,6 +3427,29 @@ ''' sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode, PredType.MERGE, True) + # BCAX + bcaxCode = 'destElem ^= srcElem1 & (~srcElem2);' + sveBinInst('bcax', 'Bcax', 'SimdAluOp', ('uint64_t',), bcaxCode, + isDestructive=True) + # BGRP + bgrpCode = ''' + int k = 0; + int len = sizeof(Element) * 8; + for(int j = 0; j < len; j++) { + if(((srcElem2>>j) & (Element)0x1) == ((Element)0x1)){ + destElem |= (((srcElem1>>j) & (Element)0x1) << k); + k++; + } + } + k = len-1; + for(int j = len-1; j >= 0; j--) { + if(((srcElem2>>j) & ((Element)0x1)) == ((Element)0x0)){ + destElem |= (((srcElem1>>j) & (Element)0x1) << k); + k--; + } + } + ''' + sveBinInst('bgrp', 'Bgrp', 'SimdAluOp', unsignedTypes, bgrpCode) # BIC (vectors, predicated) bicCode = 'destElem = srcElem1 & ~srcElem2;' sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode, @@ -3555,6 +3733,10 @@ eorCode) svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',), eorCode, isFlagSetting=True) + # EOR3 + eorCode = 'destElem ^= srcElem1 ^ srcElem2;' + sveBinInst('eor', 'Eor3', 'SimdAluOp', ('uint64_t',), eorCode, + isDestructive=True) # EORV eorvCode = 'destElem ^= srcElem1;' sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes, @@ -4122,6 +4304,30 @@ pfalseCode) # PFIRST svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp') + # PMUL + exec_output += ''' + __uint128_t poly_mul(uint64_t srcElem1, uint64_t srcElem2) + { + __uint128_t destElem = 0; + __uint128_t extendedElem2 = srcElem2; + int i; + for (i=0; i < 64; i++) { + if (((srcElem1 >> i) & 0x1) == 0x1) { + destElem ^= (extendedElem2 << i); + } + } + return destElem; + }''' + pmulCode = 'destElem = (uint8_t)poly_mul(srcElem1, srcElem2);' + sveBinInst('pmul', 'Pmul', 'SimdAluOp', ('uint8_t',), pmulCode) + # PMULLB + pmullCode = '__uint128_t destElem = poly_mul(srcElem1, srcElem2);' + sveLongMulInst('pmullb', 'Pmullb', 'SimdAluOp', + ('uint8_t','uint32_t','uint64_t',), pmullCode) + # PMULLT + sveLongMulInst('pmullt', 'Pmullt', 'SimdAluOp', + ('uint8_t','uint32_t','uint64_t',), + pmullCode, uptTop = True) # PNEXT svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes) # PSEL @@ -4138,6 +4344,9 @@ # PUNPKLO sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp', unsignedWideSDTypes, unpackHalf = Unpack.Low, regType = SrcRegType.Predicate) + # RAX1 + rax1Code = 'destElem = srcElem1 ^ ((srcElem2 << 1) | (srcElem2 >> 63));' + sveBinInst('rax', 'Rax1', 'SimdAluOp', ('uint64_t',), rax1Code) # RBIT rbitCode = ''' destElem = reverseBits(srcElem1);''' @@ -4214,6 +4423,11 @@ ''' sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode, PredType.MERGE, True) + # SBCLB + sveLongCarryInst('sbclb', 'Sbclb', 'SimdAluOp', subtract = True) + # SBCLT + sveLongCarryInst('sbclt', 'Sbclt', 'SimdAluOp', uptTop = True, + subtract = True) # SADDV addvCode = 'destElem += srcElem1;' sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp', @@ -4372,6 +4586,13 @@ destElem = do_mulh(srcElem1, srcElem2);''' sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode, PredType.MERGE, True) + # SMULLB + smullCode = 'int64_t destElem = (int64_t)srcElem1 * (int64_t)srcElem2;' + sveLongMulInst('smullb', 'Smullb', 'SimdAluOp', + ('int8_t','int16_t','int32_t',), smullCode) + # SMULLT + sveLongMulInst('smullt', 'Smullt', 'SimdAluOp', + ('int8_t','int16_t','int32_t',), smullCode, uptTop = True) # SPLICE sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes) # SQADD (immediate) @@ -4557,6 +4778,8 @@ sxtCode, PredType.MERGE) # TBL sveTblInst('tbl', 'Tbl', 'SimdAluOp') + # TBX + sveTblInst('tbx', 'Tbx', 'SimdAluOp', merging=True) # TRN1, TRN2 (predicates) trnPredIterCode = ''' constexpr unsigned sz = sizeof(Element); @@ -4654,6 +4877,14 @@ # UMULH sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode, PredType.MERGE, True) + # UMULLB + umullCode = 'uint64_t destElem = (uint64_t)srcElem1 * (uint64_t)srcElem2;' + sveLongMulInst('umullb', 'Umullb', 'SimdAluOp', + ('uint8_t','uint16_t','uint32_t',), umullCode) + # UMULLT + sveLongMulInst('umullt', 'Umullt', 'SimdAluOp', + ('uint8_t','uint16_t','uint32_t',), umullCode, + uptTop = True) # UQADD (immediate) uqaddCode = ''' destElem = srcElem1 + srcElem2; @@ -4861,6 +5092,13 @@ Ffr_ub[i] = POp1_ub[i]; }''' svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode, False) + # XAR + xarCode = ''' + destElem = AA64FpDestMerge_x[i] ^ srcElem1; + destElem = ((destElem >> srcElem2) | + (destElem << (sizeof(Element) * 8 - srcElem2))); + ''' + sveBinImmInst('xar', 'Xar', 'SimdAluOp', unsignedTypes, xarCode) # ZIP1, ZIP2 (predicates) zipPredIterCode = ''' constexpr unsigned sz = sizeof(Element); -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70277?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-MessageType: newchange Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113 Gerrit-Change-Number: 70277 Gerrit-PatchSet: 1 Gerrit-Owner: Giacomo Travaglini <giacomo.travaglini@arm.com>