Empathy List Archives

gem5-dev@gem5.org

The gem5 Developer List

View all threads

[M] Change in gem5/gem5[develop]: arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs.

Bobby Bruce (Gerrit)

Thu, May 25, 2023 9:36 PM

Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email )

(

7 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
)Change subject: arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product
instrs.
......................................................................

arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs.

Add support for the SVE mixed sign dot product instructions (USDOT,
SUDOT) required by the Armv8.2 SVE Int8 matrix multiplication
extension (ARMv8.2-I8MM).

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c
Reviewed-by: Richard Cooper richard.cooper@arm.com
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70732
Maintainer: Jason Lowe-Power power.jg@gmail.com
Reviewed-by: Jason Lowe-Power power.jg@gmail.com
Reviewed-by: Andreas Sandberg andreas.sandberg@arm.com
Tested-by: kokoro noreply+kokoro@google.com

M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/insts/sve.isa
M src/arch/arm/isa/templates/sve.isa
3 files changed, 98 insertions(+), 61 deletions(-)

Approvals:
Andreas Sandberg: Looks good to me, approved
Jason Lowe-Power: Looks good to me, but someone else must approve; Looks
good to me, approved
kokoro: Regressions pass

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 0d12a22..86c174d 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2256,19 +2256,19 @@
uint8_t usig = (uint8_t) bits(machInst, 10);
if (size & 0x1) {
if (usig) {

           return new SveUdotv<uint16_t, uint64_t>(machInst,

                                                   zda, zn, zm);

           return new SveUdotv<uint16_t, uint16_t, uint64_t>

                                   (machInst, zda, zn, zm);
        } else {

           return new SveSdotv<int16_t, int64_t>(machInst,

                                                   zda, zn, zm);

           return new SveSdotv<int16_t, int16_t, int64_t>

                                   (machInst, zda, zn, zm);
        }
    } else {
        if (usig) {

           return new SveUdotv<uint8_t, uint32_t>(machInst,

                                                   zda, zn, zm);

           return new SveUdotv<uint8_t, uint8_t, uint32_t>

                                   (machInst, zda, zn, zm);
        } else {

           return new SveSdotv<int8_t, int32_t>(machInst,

                                                   zda, zn, zm);

           return new SveSdotv<int8_t, int8_t, int32_t>

                                   (machInst, zda, zn, zm);
        }
    }

@@ -2292,21 +2292,21 @@
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 19, 16);
uint8_t i1 = (uint8_t) bits(machInst, 20);
if (usig) {

           return new SveUdoti<uint16_t, uint64_t>(machInst,

                                                   zda, zn, zm, i1);

           return new SveUdoti<uint16_t, uint16_t, uint64_t>

                                  (machInst, zda, zn, zm, i1);
        } else {

           return new SveSdoti<int16_t, int64_t>(machInst,

                                                   zda, zn, zm, i1);

           return new SveSdoti<int16_t, int16_t, int64_t>

                                  (machInst, zda, zn, zm, i1);
        }
    } else {
        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);
        uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
        if (usig) {

           return new SveUdoti<uint8_t, uint32_t>(machInst,

                                                   zda, zn, zm, i2);

           return new SveUdoti<uint8_t, uint8_t, uint32_t>

                                   (machInst, zda, zn, zm, i2);
        } else {

           return new SveSdoti<int8_t, int32_t>(machInst,

                                                   zda, zn, zm, i2);

           return new SveSdoti<int8_t, int8_t, int32_t>

                                   (machInst, zda, zn, zm, i2);
        }
    }
    return new Unknown64(machInst);

@@ -2320,16 +2320,12 @@
return new Unknown64(machInst);
}

   RegIndex zda M5_VAR_USED = (RegIndex)

                                     (uint8_t) bits(machInst, 4, 0);

   RegIndex zn M5_VAR_USED = (RegIndex)

                                     (uint8_t) bits(machInst, 9, 5);

   RegIndex zm M5_VAR_USED = (RegIndex)

                                     (uint8_t) bits(machInst, 20, 16);

   RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);

   RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);

   RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);

```
   // Placeholder for SveUsdotv
```

   //return SveUsdotv<int32_t, uint8_t, int8_t>(machInst, zda, zn,

zm);

```
   return new Unknown64(machInst);
```

   return new SveUsdotv<uint8_t, int8_t, int32_t>

                            (machInst, zda, zn, zm);
} // decodeSveMixedSignDotProduct

StaticInstPtr

@@ -2340,26 +2336,18 @@
return new Unknown64(machInst);
}

   RegIndex zda M5_VAR_USED = (RegIndex)

                                     (uint8_t) bits(machInst, 4, 0);

   RegIndex zn M5_VAR_USED = (RegIndex)

                                     (uint8_t) bits(machInst, 9, 5);

   RegIndex zm M5_VAR_USED = (RegIndex)

                                     (uint8_t) bits(machInst, 18, 16);

   uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19);

   RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);

   RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);

   RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);

   uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
    uint8_t usig = (uint8_t) bits(machInst, 10);

    if (usig) {

```
       // Placeholder for SveSudoti
```

       //return SveSudoti<int32_t, int8_t, uint8_t>

       //                         (machInst, zda, zn, zm, i2);

```
       return new Unknown64(machInst);
```

       return new SveSudoti<int8_t, uint8_t, int32_t>

                                (machInst, zda, zn, zm, i2);
    } else {

```
       // Placeholder for SveUsdoti
```

       //return SveUsdoti<int32_t, uint8_t, int8_t>

       //                         (machInst, zda, zn, zm, i2);

```
       return new Unknown64(machInst);
```

       return new SveUsdoti<uint8_t, int8_t, int32_t>

                                (machInst, zda, zn, zm, i2);
    }

} // decodeSveMixedSignDotProductIndexed

diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 6e8313b..e7a773e 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -3098,6 +3098,15 @@
def sveDotInst(name, Name, opClass, types, isIndexed = True):
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''

   // Types of the extended versions of the source elements.

   // Required to make sure the intermediate calculations don't

overflow.

   using ExtendedElementA = typename vector_element_traits::

                              extend_element<DElement,

                                             SElementA>::type;

   using ExtendedElementB = typename vector_element_traits::

                              extend_element<DElement,

                                             SElementB>::type;

    unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
            xc->tcBase());
    for (int i = 0; i < eCount; ++i) {'''

@@ -3107,17 +3116,21 @@
int s = segbase + imm;'''
code += '''
DElement res = AA64FpDest_xd[i];

```
       DElement srcElem1, srcElem2;
```

```
       ExtendedElementA srcElemA;
```

       ExtendedElementB srcElemB;
        for (int j = 0; j <= 3; ++j) {

           srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i +

j]);'''

           srcElemA = static_cast<ExtendedElementA>

                                     (AA64FpOp1_srcA[4 * i + j]);'''
    if isIndexed:
        code += '''

           srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s +

j]);'''

           srcElemB = static_cast<ExtendedElementB>

                                     (AA64FpOp2_srcB[4 * s + j]);'''
    else:
        code += '''

           srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i +

j]);'''

           srcElemB = static_cast<ExtendedElementB>

                                     (AA64FpOp2_srcB[4 * i + j]);'''
    code += '''

```
           res += srcElem1 * srcElem2;
```

           res += srcElemA * srcElemB;
        }
        AA64FpDestMerge_xd[i] = res;
    }'''

@@ -3129,7 +3142,7 @@
header_output += SveWideningTerImmOpDeclare.subst(iop)
else:
header_output += SveWideningTerOpDeclare.subst(iop)

   exec_output += SveWideningOpExecute.subst(iop)

   exec_output += SveWideningTerOpExecute.subst(iop)
    for type in types:
        substDict = {'targs': type, 'class_name': 'Sve' + Name}
        exec_output += SveOpExecDeclare.subst(substDict)

@@ -4468,11 +4481,14 @@
sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode,
PredType.MERGE, True)
# SDOT (indexed)

sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t',

   'int16_t, int64_t'], isIndexed = True)

sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int8_t, int32_t',

   'int16_t, int16_t, int64_t'], isIndexed = True)
# SDOT (vectors)

sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t',

   'int16_t, int64_t'], isIndexed = False)

sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int8_t, int32_t',

   'int16_t, int16_t, int64_t'], isIndexed = False)

SUDOT (indexed)
sveDotInst('sudot', 'Sudoti', 'SimdAluOp', ['int8_t, uint8_t,
int32_t'],

          isIndexed = True)
# SEL (predicates)
selCode = 'destElem = srcElem1;'
svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',),

@@ -4857,11 +4873,17 @@
sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode,
PredType.MERGE, True)
# UDOT (indexed)

sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t',

   'uint16_t, uint64_t'], isIndexed = True)

sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t',

   'uint16_t, uint16_t, uint64_t'], isIndexed = True)
# UDOT (vectors)

sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t',

   'uint16_t, uint64_t'], isIndexed = False)

sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t',

   'uint16_t, uint16_t, uint64_t'], isIndexed = False)

USDOT (indexed)
sveDotInst('usdot', 'Usdoti', 'SimdAluOp', ['uint8_t, int8_t,
int32_t'],
```
          isIndexed = True)
```
USDOT (vectors)
sveDotInst('usdot', 'Usdotv', 'SimdAluOp', ['uint8_t, int8_t,
int32_t'],

          isIndexed = False)
# UMAX (immediate)
sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode)
# UMAX (vectors)

diff --git a/src/arch/arm/isa/templates/sve.isa
b/src/arch/arm/isa/templates/sve.isa
index 65abb1b..813bda0 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -1139,17 +1139,22 @@
}};

def template SveWideningTerImmOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
class %(class_name)s : public %(base_class)s
{

static_assert(sizeof(_SElementA) == sizeof(_SElementB),

           "Source elements must have the same size.");

private:
%(reg_idx_arr_decl)s;

protected:
typedef _DElement Element;

typedef _SElement SElement;

typedef _SElementA SElementA;
typedef _SElementB SElementB;
typedef _DElement DElement;

typedef _SElement TPSElem;

typedef _SElementA TPSrcAElem;
typedef _SElementB TPSrcBElem;
typedef _DElement TPDElem;

public:

@@ -1168,7 +1173,7 @@
}};

def template SveWideningTerOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
class %(class_name)s : public %(base_class)s
{
private:
@@ -1176,9 +1181,11 @@

protected:
  typedef _DElement Element;

typedef _SElement SElement;

typedef _SElementA SElementA;
typedef _SElementB SElementB;
typedef _DElement DElement;

typedef _SElement TPSElem;

typedef _SElementA TPSrcAElem;
typedef _SElementB TPSrcBElem;
typedef _DElement TPDElem;

public:

@@ -1295,6 +1302,26 @@
}
}};

+def template SveWideningTerOpExecute {{

template <class SElementA, class SElementB, class DElement>
Fault %(class_name)s<SElementA, SElementB, DElement>::execute
```
      (ExecContext *xc,
```

       trace::InstRecord *traceData) const

{
```
   Fault fault = NoFault;
```
```
   %(op_decl)s;
```
```
   %(op_rd)s;
```
```
   %(code)s;
```
```
   if (fault == NoFault)
```
```
   {
```
```
       %(op_wb)s;
```
```
   }
```
```
   return fault;
```
}
+}};
def template SveNonTemplatedOpExecute {{
Fault
%(class_name)s::execute(ExecContext *xc,

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c
Gerrit-Change-Number: 70732
Gerrit-PatchSet: 9
Gerrit-Owner: Giacomo Travaglini giacomo.travaglini@arm.com
Gerrit-Reviewer: Andreas Sandberg andreas.sandberg@arm.com
Gerrit-Reviewer: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com
Gerrit-CC: Richard Cooper richard.cooper@arm.com

Bobby Bruce has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email ) ( 7 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. )Change subject: arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs. ...................................................................... arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs. Add support for the SVE mixed sign dot product instructions (USDOT, SUDOT) required by the Armv8.2 SVE Int8 matrix multiplication extension (ARMv8.2-I8MM). For more information please refer to the "ARM Architecture Reference Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A" (https://developer.arm.com/architectures/cpu-architecture/a-profile/ docs/arm-architecture-reference-manual-supplement-armv8-a) Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c Reviewed-by: Richard Cooper <richard.cooper@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70732 Maintainer: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Tested-by: kokoro <noreply+kokoro@google.com> --- M src/arch/arm/isa/formats/sve_2nd_level.isa M src/arch/arm/isa/insts/sve.isa M src/arch/arm/isa/templates/sve.isa 3 files changed, 98 insertions(+), 61 deletions(-) Approvals: Andreas Sandberg: Looks good to me, approved Jason Lowe-Power: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 0d12a22..86c174d 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -2256,19 +2256,19 @@ uint8_t usig = (uint8_t) bits(machInst, 10); if (size & 0x1) { if (usig) { - return new SveUdotv<uint16_t, uint64_t>(machInst, - zda, zn, zm); + return new SveUdotv<uint16_t, uint16_t, uint64_t> + (machInst, zda, zn, zm); } else { - return new SveSdotv<int16_t, int64_t>(machInst, - zda, zn, zm); + return new SveSdotv<int16_t, int16_t, int64_t> + (machInst, zda, zn, zm); } } else { if (usig) { - return new SveUdotv<uint8_t, uint32_t>(machInst, - zda, zn, zm); + return new SveUdotv<uint8_t, uint8_t, uint32_t> + (machInst, zda, zn, zm); } else { - return new SveSdotv<int8_t, int32_t>(machInst, - zda, zn, zm); + return new SveSdotv<int8_t, int8_t, int32_t> + (machInst, zda, zn, zm); } } @@ -2292,21 +2292,21 @@ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 19, 16); uint8_t i1 = (uint8_t) bits(machInst, 20); if (usig) { - return new SveUdoti<uint16_t, uint64_t>(machInst, - zda, zn, zm, i1); + return new SveUdoti<uint16_t, uint16_t, uint64_t> + (machInst, zda, zn, zm, i1); } else { - return new SveSdoti<int16_t, int64_t>(machInst, - zda, zn, zm, i1); + return new SveSdoti<int16_t, int16_t, int64_t> + (machInst, zda, zn, zm, i1); } } else { RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16); uint8_t i2 = (uint8_t) bits(machInst, 20, 19); if (usig) { - return new SveUdoti<uint8_t, uint32_t>(machInst, - zda, zn, zm, i2); + return new SveUdoti<uint8_t, uint8_t, uint32_t> + (machInst, zda, zn, zm, i2); } else { - return new SveSdoti<int8_t, int32_t>(machInst, - zda, zn, zm, i2); + return new SveSdoti<int8_t, int8_t, int32_t> + (machInst, zda, zn, zm, i2); } } return new Unknown64(machInst); @@ -2320,16 +2320,12 @@ return new Unknown64(machInst); } - RegIndex zda M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 4, 0); - RegIndex zn M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 9, 5); - RegIndex zm M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 20, 16); + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - // Placeholder for SveUsdotv - //return SveUsdotv<int32_t, uint8_t, int8_t>(machInst, zda, zn, zm); - return new Unknown64(machInst); + return new SveUsdotv<uint8_t, int8_t, int32_t> + (machInst, zda, zn, zm); } // decodeSveMixedSignDotProduct StaticInstPtr @@ -2340,26 +2336,18 @@ return new Unknown64(machInst); } - RegIndex zda M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 4, 0); - RegIndex zn M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 9, 5); - RegIndex zm M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 18, 16); - uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19); - + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16); + uint8_t i2 = (uint8_t) bits(machInst, 20, 19); uint8_t usig = (uint8_t) bits(machInst, 10); if (usig) { - // Placeholder for SveSudoti - //return SveSudoti<int32_t, int8_t, uint8_t> - // (machInst, zda, zn, zm, i2); - return new Unknown64(machInst); + return new SveSudoti<int8_t, uint8_t, int32_t> + (machInst, zda, zn, zm, i2); } else { - // Placeholder for SveUsdoti - //return SveUsdoti<int32_t, uint8_t, int8_t> - // (machInst, zda, zn, zm, i2); - return new Unknown64(machInst); + return new SveUsdoti<uint8_t, int8_t, int32_t> + (machInst, zda, zn, zm, i2); } } // decodeSveMixedSignDotProductIndexed diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 6e8313b..e7a773e 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -3098,6 +3098,15 @@ def sveDotInst(name, Name, opClass, types, isIndexed = True): global header_output, exec_output, decoders code = sveEnabledCheckCode + ''' + // Types of the extended versions of the source elements. + // Required to make sure the intermediate calculations don't overflow. + using ExtendedElementA = typename vector_element_traits:: + extend_element<DElement, + SElementA>::type; + using ExtendedElementB = typename vector_element_traits:: + extend_element<DElement, + SElementB>::type; + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( xc->tcBase()); for (int i = 0; i < eCount; ++i) {''' @@ -3107,17 +3116,21 @@ int s = segbase + imm;''' code += ''' DElement res = AA64FpDest_xd[i]; - DElement srcElem1, srcElem2; + ExtendedElementA srcElemA; + ExtendedElementB srcElemB; for (int j = 0; j <= 3; ++j) { - srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i + j]);''' + srcElemA = static_cast<ExtendedElementA> + (AA64FpOp1_srcA[4 * i + j]);''' if isIndexed: code += ''' - srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s + j]);''' + srcElemB = static_cast<ExtendedElementB> + (AA64FpOp2_srcB[4 * s + j]);''' else: code += ''' - srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i + j]);''' + srcElemB = static_cast<ExtendedElementB> + (AA64FpOp2_srcB[4 * i + j]);''' code += ''' - res += srcElem1 * srcElem2; + res += srcElemA * srcElemB; } AA64FpDestMerge_xd[i] = res; }''' @@ -3129,7 +3142,7 @@ header_output += SveWideningTerImmOpDeclare.subst(iop) else: header_output += SveWideningTerOpDeclare.subst(iop) - exec_output += SveWideningOpExecute.subst(iop) + exec_output += SveWideningTerOpExecute.subst(iop) for type in types: substDict = {'targs': type, 'class_name': 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) @@ -4468,11 +4481,14 @@ sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode, PredType.MERGE, True) # SDOT (indexed) - sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t', - 'int16_t, int64_t'], isIndexed = True) + sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int8_t, int32_t', + 'int16_t, int16_t, int64_t'], isIndexed = True) # SDOT (vectors) - sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t', - 'int16_t, int64_t'], isIndexed = False) + sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int8_t, int32_t', + 'int16_t, int16_t, int64_t'], isIndexed = False) + # SUDOT (indexed) + sveDotInst('sudot', 'Sudoti', 'SimdAluOp', ['int8_t, uint8_t, int32_t'], + isIndexed = True) # SEL (predicates) selCode = 'destElem = srcElem1;' svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',), @@ -4857,11 +4873,17 @@ sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode, PredType.MERGE, True) # UDOT (indexed) - sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t', - 'uint16_t, uint64_t'], isIndexed = True) + sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t', + 'uint16_t, uint16_t, uint64_t'], isIndexed = True) # UDOT (vectors) - sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t', - 'uint16_t, uint64_t'], isIndexed = False) + sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t', + 'uint16_t, uint16_t, uint64_t'], isIndexed = False) + # USDOT (indexed) + sveDotInst('usdot', 'Usdoti', 'SimdAluOp', ['uint8_t, int8_t, int32_t'], + isIndexed = True) + # USDOT (vectors) + sveDotInst('usdot', 'Usdotv', 'SimdAluOp', ['uint8_t, int8_t, int32_t'], + isIndexed = False) # UMAX (immediate) sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode) # UMAX (vectors) diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index 65abb1b..813bda0 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -1139,17 +1139,22 @@ }}; def template SveWideningTerImmOpDeclare {{ -template <class _SElement, class _DElement> +template <class _SElementA, class _SElementB, class _DElement> class %(class_name)s : public %(base_class)s { + static_assert(sizeof(_SElementA) == sizeof(_SElementB), + "Source elements must have the same size."); + private: %(reg_idx_arr_decl)s; protected: typedef _DElement Element; - typedef _SElement SElement; + typedef _SElementA SElementA; + typedef _SElementB SElementB; typedef _DElement DElement; - typedef _SElement TPSElem; + typedef _SElementA TPSrcAElem; + typedef _SElementB TPSrcBElem; typedef _DElement TPDElem; public: @@ -1168,7 +1173,7 @@ }}; def template SveWideningTerOpDeclare {{ -template <class _SElement, class _DElement> +template <class _SElementA, class _SElementB, class _DElement> class %(class_name)s : public %(base_class)s { private: @@ -1176,9 +1181,11 @@ protected: typedef _DElement Element; - typedef _SElement SElement; + typedef _SElementA SElementA; + typedef _SElementB SElementB; typedef _DElement DElement; - typedef _SElement TPSElem; + typedef _SElementA TPSrcAElem; + typedef _SElementB TPSrcBElem; typedef _DElement TPDElem; public: @@ -1295,6 +1302,26 @@ } }}; +def template SveWideningTerOpExecute {{ + template <class SElementA, class SElementB, class DElement> + Fault %(class_name)s<SElementA, SElementB, DElement>::execute + (ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + def template SveNonTemplatedOpExecute {{ Fault %(class_name)s::execute(ExecContext *xc, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c Gerrit-Change-Number: 70732 Gerrit-PatchSet: 9 Gerrit-Owner: Giacomo Travaglini <giacomo.travaglini@arm.com> Gerrit-Reviewer: Andreas Sandberg <andreas.sandberg@arm.com> Gerrit-Reviewer: Bobby Bruce <bbruce@ucdavis.edu> Gerrit-Reviewer: Jason Lowe-Power <power.jg@gmail.com> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com> Gerrit-CC: Richard Cooper <richard.cooper@arm.com>