gem5-dev@gem5.org

The gem5 Developer List

View all threads

[M] Change in gem5/gem5[develop]: arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs.

BB
Bobby Bruce (Gerrit)
Thu, May 25, 2023 9:36 PM

Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email )

(

7 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
)Change subject: arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product
instrs.
......................................................................

arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs.

Add support for the SVE mixed sign dot product instructions (USDOT,
SUDOT) required by the Armv8.2 SVE Int8 matrix multiplication
extension (ARMv8.2-I8MM).

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c
Reviewed-by: Richard Cooper richard.cooper@arm.com
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70732
Maintainer: Jason Lowe-Power power.jg@gmail.com
Reviewed-by: Jason Lowe-Power power.jg@gmail.com
Reviewed-by: Andreas Sandberg andreas.sandberg@arm.com
Tested-by: kokoro noreply+kokoro@google.com

M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/insts/sve.isa
M src/arch/arm/isa/templates/sve.isa
3 files changed, 98 insertions(+), 61 deletions(-)

Approvals:
Andreas Sandberg: Looks good to me, approved
Jason Lowe-Power: Looks good to me, but someone else must approve; Looks
good to me, approved
kokoro: Regressions pass

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 0d12a22..86c174d 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2256,19 +2256,19 @@
uint8_t usig = (uint8_t) bits(machInst, 10);
if (size & 0x1) {
if (usig) {

  •            return new SveUdotv<uint16_t, uint64_t>(machInst,
    
  •                                                    zda, zn, zm);
    
  •            return new SveUdotv<uint16_t, uint16_t, uint64_t>
    
  •                                    (machInst, zda, zn, zm);
            } else {
    
  •            return new SveSdotv<int16_t, int64_t>(machInst,
    
  •                                                    zda, zn, zm);
    
  •            return new SveSdotv<int16_t, int16_t, int64_t>
    
  •                                    (machInst, zda, zn, zm);
            }
        } else {
            if (usig) {
    
  •            return new SveUdotv<uint8_t, uint32_t>(machInst,
    
  •                                                    zda, zn, zm);
    
  •            return new SveUdotv<uint8_t, uint8_t, uint32_t>
    
  •                                    (machInst, zda, zn, zm);
            } else {
    
  •            return new SveSdotv<int8_t, int32_t>(machInst,
    
  •                                                    zda, zn, zm);
    
  •            return new SveSdotv<int8_t, int8_t, int32_t>
    
  •                                    (machInst, zda, zn, zm);
            }
        }
    

@@ -2292,21 +2292,21 @@
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 19, 16);
uint8_t i1 = (uint8_t) bits(machInst, 20);
if (usig) {

  •            return new SveUdoti<uint16_t, uint64_t>(machInst,
    
  •                                                    zda, zn, zm, i1);
    
  •            return new SveUdoti<uint16_t, uint16_t, uint64_t>
    
  •                                   (machInst, zda, zn, zm, i1);
            } else {
    
  •            return new SveSdoti<int16_t, int64_t>(machInst,
    
  •                                                    zda, zn, zm, i1);
    
  •            return new SveSdoti<int16_t, int16_t, int64_t>
    
  •                                   (machInst, zda, zn, zm, i1);
            }
        } else {
            RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);
            uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
            if (usig) {
    
  •            return new SveUdoti<uint8_t, uint32_t>(machInst,
    
  •                                                    zda, zn, zm, i2);
    
  •            return new SveUdoti<uint8_t, uint8_t, uint32_t>
    
  •                                    (machInst, zda, zn, zm, i2);
            } else {
    
  •            return new SveSdoti<int8_t, int32_t>(machInst,
    
  •                                                    zda, zn, zm, i2);
    
  •            return new SveSdoti<int8_t, int8_t, int32_t>
    
  •                                    (machInst, zda, zn, zm, i2);
            }
        }
        return new Unknown64(machInst);
    

@@ -2320,16 +2320,12 @@
return new Unknown64(machInst);
}

  •    RegIndex zda M5_VAR_USED = (RegIndex)
    
  •                                      (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn M5_VAR_USED = (RegIndex)
    
  •                                      (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm M5_VAR_USED = (RegIndex)
    
  •                                      (uint8_t) bits(machInst, 20, 16);
    
  •    RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    // Placeholder for SveUsdotv
    
  •    //return SveUsdotv<int32_t, uint8_t, int8_t>(machInst, zda, zn,  
    

zm);

  •    return new Unknown64(machInst);
    
  •    return new SveUsdotv<uint8_t, int8_t, int32_t>
    
  •                             (machInst, zda, zn, zm);
    } // decodeSveMixedSignDotProduct
    
    StaticInstPtr
    

@@ -2340,26 +2336,18 @@
return new Unknown64(machInst);
}

  •    RegIndex zda M5_VAR_USED = (RegIndex)
    
  •                                      (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn M5_VAR_USED = (RegIndex)
    
  •                                      (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm M5_VAR_USED = (RegIndex)
    
  •                                      (uint8_t) bits(machInst, 18, 16);
    
  •    uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19);
    
  •    RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);
    
  •    uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
        uint8_t usig = (uint8_t) bits(machInst, 10);
    
        if (usig) {
    
  •        // Placeholder for SveSudoti
    
  •        //return SveSudoti<int32_t, int8_t, uint8_t>
    
  •        //                         (machInst, zda, zn, zm, i2);
    
  •        return new Unknown64(machInst);
    
  •        return new SveSudoti<int8_t, uint8_t, int32_t>
    
  •                                 (machInst, zda, zn, zm, i2);
        } else {
    
  •        // Placeholder for SveUsdoti
    
  •        //return SveUsdoti<int32_t, uint8_t, int8_t>
    
  •        //                         (machInst, zda, zn, zm, i2);
    
  •        return new Unknown64(machInst);
    
  •        return new SveUsdoti<uint8_t, int8_t, int32_t>
    
  •                                 (machInst, zda, zn, zm, i2);
        }
    
    } // decodeSveMixedSignDotProductIndexed
    

diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 6e8313b..e7a773e 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -3098,6 +3098,15 @@
def sveDotInst(name, Name, opClass, types, isIndexed = True):
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''

  •    // Types of the extended versions of the source elements.
    
  •    // Required to make sure the intermediate calculations don't  
    

overflow.

  •    using ExtendedElementA = typename vector_element_traits::
    
  •                               extend_element<DElement,
    
  •                                              SElementA>::type;
    
  •    using ExtendedElementB = typename vector_element_traits::
    
  •                               extend_element<DElement,
    
  •                                              SElementB>::type;
    
  •     unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
                xc->tcBase());
        for (int i = 0; i < eCount; ++i) {'''
    

@@ -3107,17 +3116,21 @@
int s = segbase + imm;'''
code += '''
DElement res = AA64FpDest_xd[i];

  •        DElement srcElem1, srcElem2;
    
  •        ExtendedElementA srcElemA;
    
  •        ExtendedElementB srcElemB;
            for (int j = 0; j <= 3; ++j) {
    
  •            srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i +  
    

j]);'''

  •            srcElemA = static_cast<ExtendedElementA>
    
  •                                      (AA64FpOp1_srcA[4 * i + j]);'''
        if isIndexed:
            code += '''
    
  •            srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s +  
    

j]);'''

  •            srcElemB = static_cast<ExtendedElementB>
    
  •                                      (AA64FpOp2_srcB[4 * s + j]);'''
        else:
            code += '''
    
  •            srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i +  
    

j]);'''

  •            srcElemB = static_cast<ExtendedElementB>
    
  •                                      (AA64FpOp2_srcB[4 * i + j]);'''
        code += '''
    
  •            res += srcElem1 * srcElem2;
    
  •            res += srcElemA * srcElemB;
            }
            AA64FpDestMerge_xd[i] = res;
        }'''
    

@@ -3129,7 +3142,7 @@
header_output += SveWideningTerImmOpDeclare.subst(iop)
else:
header_output += SveWideningTerOpDeclare.subst(iop)

  •    exec_output += SveWideningOpExecute.subst(iop)
    
  •    exec_output += SveWideningTerOpExecute.subst(iop)
        for type in types:
            substDict = {'targs': type, 'class_name': 'Sve' + Name}
            exec_output += SveOpExecDeclare.subst(substDict)
    

@@ -4468,11 +4481,14 @@
sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode,
PredType.MERGE, True)
# SDOT (indexed)

  • sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t',
  •    'int16_t, int64_t'], isIndexed = True)
    
  • sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int8_t, int32_t',
  •    'int16_t, int16_t, int64_t'], isIndexed = True)
    # SDOT (vectors)
    
  • sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t',
  •    'int16_t, int64_t'], isIndexed = False)
    
  • sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int8_t, int32_t',
  •    'int16_t, int16_t, int64_t'], isIndexed = False)
    
  • SUDOT (indexed)

  • sveDotInst('sudot', 'Sudoti', 'SimdAluOp', ['int8_t, uint8_t,
    int32_t'],
  •           isIndexed = True)
    # SEL (predicates)
    selCode = 'destElem = srcElem1;'
    svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',),
    

@@ -4857,11 +4873,17 @@
sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode,
PredType.MERGE, True)
# UDOT (indexed)

  • sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t',
  •    'uint16_t, uint64_t'], isIndexed = True)
    
  • sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t',
  •    'uint16_t, uint16_t, uint64_t'], isIndexed = True)
    # UDOT (vectors)
    
  • sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t',
  •    'uint16_t, uint64_t'], isIndexed = False)
    
  • sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t',
  •    'uint16_t, uint16_t, uint64_t'], isIndexed = False)
    
  • USDOT (indexed)

  • sveDotInst('usdot', 'Usdoti', 'SimdAluOp', ['uint8_t, int8_t,
    int32_t'],
  •           isIndexed = True)
    
  • USDOT (vectors)

  • sveDotInst('usdot', 'Usdotv', 'SimdAluOp', ['uint8_t, int8_t,
    int32_t'],
  •           isIndexed = False)
    # UMAX (immediate)
    sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode)
    # UMAX (vectors)
    

diff --git a/src/arch/arm/isa/templates/sve.isa
b/src/arch/arm/isa/templates/sve.isa
index 65abb1b..813bda0 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -1139,17 +1139,22 @@
}};

def template SveWideningTerImmOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
class %(class_name)s : public %(base_class)s
{

  • static_assert(sizeof(_SElementA) == sizeof(_SElementB),

  •            "Source elements must have the same size.");
    
  • private:
    %(reg_idx_arr_decl)s;

    protected:
    typedef _DElement Element;

  • typedef _SElement SElement;
  • typedef _SElementA SElementA;
  • typedef _SElementB SElementB;
    typedef _DElement DElement;
  • typedef _SElement TPSElem;
  • typedef _SElementA TPSrcAElem;
  • typedef _SElementB TPSrcBElem;
    typedef _DElement TPDElem;
public:

@@ -1168,7 +1173,7 @@
}};

def template SveWideningTerOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
class %(class_name)s : public %(base_class)s
{
private:
@@ -1176,9 +1181,11 @@

protected:
  typedef _DElement Element;
  • typedef _SElement SElement;
  • typedef _SElementA SElementA;
  • typedef _SElementB SElementB;
    typedef _DElement DElement;
  • typedef _SElement TPSElem;
  • typedef _SElementA TPSrcAElem;
  • typedef _SElementB TPSrcBElem;
    typedef _DElement TPDElem;
public:

@@ -1295,6 +1302,26 @@
}
}};

+def template SveWideningTerOpExecute {{

  • template <class SElementA, class SElementB, class DElement>
  • Fault %(class_name)s<SElementA, SElementB, DElement>::execute
  •       (ExecContext *xc,
    
  •        trace::InstRecord *traceData) const
    
  • {
  •    Fault fault = NoFault;
    
  •    %(op_decl)s;
    
  •    %(op_rd)s;
    
  •    %(code)s;
    
  •    if (fault == NoFault)
    
  •    {
    
  •        %(op_wb)s;
    
  •    }
    
  •    return fault;
    
  • }
    +}};
  • def template SveNonTemplatedOpExecute {{
    Fault
    %(class_name)s::execute(ExecContext *xc,

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c
Gerrit-Change-Number: 70732
Gerrit-PatchSet: 9
Gerrit-Owner: Giacomo Travaglini giacomo.travaglini@arm.com
Gerrit-Reviewer: Andreas Sandberg andreas.sandberg@arm.com
Gerrit-Reviewer: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com
Gerrit-CC: Richard Cooper richard.cooper@arm.com

Bobby Bruce has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email ) ( 7 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. )Change subject: arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs. ...................................................................... arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs. Add support for the SVE mixed sign dot product instructions (USDOT, SUDOT) required by the Armv8.2 SVE Int8 matrix multiplication extension (ARMv8.2-I8MM). For more information please refer to the "ARM Architecture Reference Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A" (https://developer.arm.com/architectures/cpu-architecture/a-profile/ docs/arm-architecture-reference-manual-supplement-armv8-a) Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c Reviewed-by: Richard Cooper <richard.cooper@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70732 Maintainer: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Tested-by: kokoro <noreply+kokoro@google.com> --- M src/arch/arm/isa/formats/sve_2nd_level.isa M src/arch/arm/isa/insts/sve.isa M src/arch/arm/isa/templates/sve.isa 3 files changed, 98 insertions(+), 61 deletions(-) Approvals: Andreas Sandberg: Looks good to me, approved Jason Lowe-Power: Looks good to me, but someone else must approve; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 0d12a22..86c174d 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -2256,19 +2256,19 @@ uint8_t usig = (uint8_t) bits(machInst, 10); if (size & 0x1) { if (usig) { - return new SveUdotv<uint16_t, uint64_t>(machInst, - zda, zn, zm); + return new SveUdotv<uint16_t, uint16_t, uint64_t> + (machInst, zda, zn, zm); } else { - return new SveSdotv<int16_t, int64_t>(machInst, - zda, zn, zm); + return new SveSdotv<int16_t, int16_t, int64_t> + (machInst, zda, zn, zm); } } else { if (usig) { - return new SveUdotv<uint8_t, uint32_t>(machInst, - zda, zn, zm); + return new SveUdotv<uint8_t, uint8_t, uint32_t> + (machInst, zda, zn, zm); } else { - return new SveSdotv<int8_t, int32_t>(machInst, - zda, zn, zm); + return new SveSdotv<int8_t, int8_t, int32_t> + (machInst, zda, zn, zm); } } @@ -2292,21 +2292,21 @@ RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 19, 16); uint8_t i1 = (uint8_t) bits(machInst, 20); if (usig) { - return new SveUdoti<uint16_t, uint64_t>(machInst, - zda, zn, zm, i1); + return new SveUdoti<uint16_t, uint16_t, uint64_t> + (machInst, zda, zn, zm, i1); } else { - return new SveSdoti<int16_t, int64_t>(machInst, - zda, zn, zm, i1); + return new SveSdoti<int16_t, int16_t, int64_t> + (machInst, zda, zn, zm, i1); } } else { RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16); uint8_t i2 = (uint8_t) bits(machInst, 20, 19); if (usig) { - return new SveUdoti<uint8_t, uint32_t>(machInst, - zda, zn, zm, i2); + return new SveUdoti<uint8_t, uint8_t, uint32_t> + (machInst, zda, zn, zm, i2); } else { - return new SveSdoti<int8_t, int32_t>(machInst, - zda, zn, zm, i2); + return new SveSdoti<int8_t, int8_t, int32_t> + (machInst, zda, zn, zm, i2); } } return new Unknown64(machInst); @@ -2320,16 +2320,12 @@ return new Unknown64(machInst); } - RegIndex zda M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 4, 0); - RegIndex zn M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 9, 5); - RegIndex zm M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 20, 16); + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - // Placeholder for SveUsdotv - //return SveUsdotv<int32_t, uint8_t, int8_t>(machInst, zda, zn, zm); - return new Unknown64(machInst); + return new SveUsdotv<uint8_t, int8_t, int32_t> + (machInst, zda, zn, zm); } // decodeSveMixedSignDotProduct StaticInstPtr @@ -2340,26 +2336,18 @@ return new Unknown64(machInst); } - RegIndex zda M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 4, 0); - RegIndex zn M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 9, 5); - RegIndex zm M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 18, 16); - uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19); - + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16); + uint8_t i2 = (uint8_t) bits(machInst, 20, 19); uint8_t usig = (uint8_t) bits(machInst, 10); if (usig) { - // Placeholder for SveSudoti - //return SveSudoti<int32_t, int8_t, uint8_t> - // (machInst, zda, zn, zm, i2); - return new Unknown64(machInst); + return new SveSudoti<int8_t, uint8_t, int32_t> + (machInst, zda, zn, zm, i2); } else { - // Placeholder for SveUsdoti - //return SveUsdoti<int32_t, uint8_t, int8_t> - // (machInst, zda, zn, zm, i2); - return new Unknown64(machInst); + return new SveUsdoti<uint8_t, int8_t, int32_t> + (machInst, zda, zn, zm, i2); } } // decodeSveMixedSignDotProductIndexed diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 6e8313b..e7a773e 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -3098,6 +3098,15 @@ def sveDotInst(name, Name, opClass, types, isIndexed = True): global header_output, exec_output, decoders code = sveEnabledCheckCode + ''' + // Types of the extended versions of the source elements. + // Required to make sure the intermediate calculations don't overflow. + using ExtendedElementA = typename vector_element_traits:: + extend_element<DElement, + SElementA>::type; + using ExtendedElementB = typename vector_element_traits:: + extend_element<DElement, + SElementB>::type; + unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>( xc->tcBase()); for (int i = 0; i < eCount; ++i) {''' @@ -3107,17 +3116,21 @@ int s = segbase + imm;''' code += ''' DElement res = AA64FpDest_xd[i]; - DElement srcElem1, srcElem2; + ExtendedElementA srcElemA; + ExtendedElementB srcElemB; for (int j = 0; j <= 3; ++j) { - srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i + j]);''' + srcElemA = static_cast<ExtendedElementA> + (AA64FpOp1_srcA[4 * i + j]);''' if isIndexed: code += ''' - srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s + j]);''' + srcElemB = static_cast<ExtendedElementB> + (AA64FpOp2_srcB[4 * s + j]);''' else: code += ''' - srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i + j]);''' + srcElemB = static_cast<ExtendedElementB> + (AA64FpOp2_srcB[4 * i + j]);''' code += ''' - res += srcElem1 * srcElem2; + res += srcElemA * srcElemB; } AA64FpDestMerge_xd[i] = res; }''' @@ -3129,7 +3142,7 @@ header_output += SveWideningTerImmOpDeclare.subst(iop) else: header_output += SveWideningTerOpDeclare.subst(iop) - exec_output += SveWideningOpExecute.subst(iop) + exec_output += SveWideningTerOpExecute.subst(iop) for type in types: substDict = {'targs': type, 'class_name': 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) @@ -4468,11 +4481,14 @@ sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode, PredType.MERGE, True) # SDOT (indexed) - sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t', - 'int16_t, int64_t'], isIndexed = True) + sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int8_t, int32_t', + 'int16_t, int16_t, int64_t'], isIndexed = True) # SDOT (vectors) - sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t', - 'int16_t, int64_t'], isIndexed = False) + sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int8_t, int32_t', + 'int16_t, int16_t, int64_t'], isIndexed = False) + # SUDOT (indexed) + sveDotInst('sudot', 'Sudoti', 'SimdAluOp', ['int8_t, uint8_t, int32_t'], + isIndexed = True) # SEL (predicates) selCode = 'destElem = srcElem1;' svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',), @@ -4857,11 +4873,17 @@ sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode, PredType.MERGE, True) # UDOT (indexed) - sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t', - 'uint16_t, uint64_t'], isIndexed = True) + sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t', + 'uint16_t, uint16_t, uint64_t'], isIndexed = True) # UDOT (vectors) - sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t', - 'uint16_t, uint64_t'], isIndexed = False) + sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t', + 'uint16_t, uint16_t, uint64_t'], isIndexed = False) + # USDOT (indexed) + sveDotInst('usdot', 'Usdoti', 'SimdAluOp', ['uint8_t, int8_t, int32_t'], + isIndexed = True) + # USDOT (vectors) + sveDotInst('usdot', 'Usdotv', 'SimdAluOp', ['uint8_t, int8_t, int32_t'], + isIndexed = False) # UMAX (immediate) sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode) # UMAX (vectors) diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index 65abb1b..813bda0 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -1139,17 +1139,22 @@ }}; def template SveWideningTerImmOpDeclare {{ -template <class _SElement, class _DElement> +template <class _SElementA, class _SElementB, class _DElement> class %(class_name)s : public %(base_class)s { + static_assert(sizeof(_SElementA) == sizeof(_SElementB), + "Source elements must have the same size."); + private: %(reg_idx_arr_decl)s; protected: typedef _DElement Element; - typedef _SElement SElement; + typedef _SElementA SElementA; + typedef _SElementB SElementB; typedef _DElement DElement; - typedef _SElement TPSElem; + typedef _SElementA TPSrcAElem; + typedef _SElementB TPSrcBElem; typedef _DElement TPDElem; public: @@ -1168,7 +1173,7 @@ }}; def template SveWideningTerOpDeclare {{ -template <class _SElement, class _DElement> +template <class _SElementA, class _SElementB, class _DElement> class %(class_name)s : public %(base_class)s { private: @@ -1176,9 +1181,11 @@ protected: typedef _DElement Element; - typedef _SElement SElement; + typedef _SElementA SElementA; + typedef _SElementB SElementB; typedef _DElement DElement; - typedef _SElement TPSElem; + typedef _SElementA TPSrcAElem; + typedef _SElementB TPSrcBElem; typedef _DElement TPDElem; public: @@ -1295,6 +1302,26 @@ } }}; +def template SveWideningTerOpExecute {{ + template <class SElementA, class SElementB, class DElement> + Fault %(class_name)s<SElementA, SElementB, DElement>::execute + (ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + def template SveNonTemplatedOpExecute {{ Fault %(class_name)s::execute(ExecContext *xc, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c Gerrit-Change-Number: 70732 Gerrit-PatchSet: 9 Gerrit-Owner: Giacomo Travaglini <giacomo.travaglini@arm.com> Gerrit-Reviewer: Andreas Sandberg <andreas.sandberg@arm.com> Gerrit-Reviewer: Bobby Bruce <bbruce@ucdavis.edu> Gerrit-Reviewer: Jason Lowe-Power <power.jg@gmail.com> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com> Gerrit-CC: Richard Cooper <richard.cooper@arm.com>