gem5-dev@gem5.org

The gem5 Developer List

View all threads

[M] Change in gem5/gem5[develop]: arch-arm: Add support for Arm SVE Integer Matrix instructions.

BB
Bobby Bruce (Gerrit)
Thu, May 25, 2023 9:36 PM

Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70730?usp=email )

(

6 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
)Change subject: arch-arm: Add support for Arm SVE Integer Matrix
instructions.
......................................................................

arch-arm: Add support for Arm SVE Integer Matrix instructions.

Add support for the Arm SVE Integer Matrix Multiply-Accumulate
(SMMLA, USMMLA, UMMLA) instructions. Because the associated SUDOT and
USDOT instructions have not yet been implemented, the SVE Feature ID
register 0 (ID_AA64ZFR0_EL1) has not yet been updated to indicate
support for SVE Int8 matrix multiplication instructions at this time.

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Additional Contributors: Giacomo Travaglini

Change-Id: Ia50e28fae03634cbe04b42a9900bab65a604817f
Reviewed-by: Richard Cooper richard.cooper@arm.com
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70730
Maintainer: Andreas Sandberg andreas.sandberg@arm.com
Tested-by: kokoro noreply+kokoro@google.com
Reviewed-by: Andreas Sandberg andreas.sandberg@arm.com

M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/formats/sve_top_level.isa
M src/arch/arm/isa/insts/sve.isa
3 files changed, 70 insertions(+), 0 deletions(-)

Approvals:
Andreas Sandberg: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 3d211bc..4a44bab 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -246,6 +246,33 @@
}  // decodeSveIntMulAdd

  StaticInstPtr
  • decodeSveIntMatMulAdd(ExtMachInst machInst)

  • {

  •    RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    
  •    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    
  •    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
    
  •    uint8_t uns = bits(machInst, 23, 22);
    
  •    switch (uns) {
    
  •      case 0x0:
    
  •        return new SveSmmla<int32_t, int8_t, int8_t>(
    
  •                     machInst, zda, zn, zm);
    
  •      case 0x2:
    
  •        return new SveUsmmla<int32_t, uint8_t, int8_t>(
    
  •                     machInst, zda, zn, zm);
    
  •      case 0x3:
    
  •        return new SveUmmla<uint32_t, uint8_t, uint8_t>(
    
  •                     machInst, zda, zn, zm);
    
  •      case 0x1:
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  •    return new Unknown64(machInst);
    
  • }  // decodeSveIntMatMulAdd

  • StaticInstPtr
    decodeSveShiftByImmPred0(ExtMachInst machInst)
    {
    RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    @@ -3809,5 +3836,21 @@
    return new Unknown64(machInst);
    }  // decodeSveMemStore

  • StaticInstPtr

  • decodeSveMisc(ExtMachInst machInst) {

  •    switch(bits(machInst, 13, 10)) {
    
  •      case 0b0110: {
    
  •          return decodeSveIntMatMulAdd(machInst);
    
  •          break;
    
  •      }
    
  •      default: {
    
  •          return new Unknown64(machInst);
    
  •          break;
    
  •      }
    
  •    }
    
  •    return new Unknown64(machInst);
    
  • }  // decodeSveMisc

  • }  // namespace Aarch64
    }};
    diff --git a/src/arch/arm/isa/formats/sve_top_level.isa
    b/src/arch/arm/isa/formats/sve_top_level.isa
    index 61f2f5c..20a15a2 100644
    --- a/src/arch/arm/isa/formats/sve_top_level.isa
    +++ b/src/arch/arm/isa/formats/sve_top_level.isa
    @@ -44,6 +44,7 @@
    StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst);
    StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst);
    StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst);

  • StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst);
    StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst);
    StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst);
    StaticInstPtr decodeSveIndexGen(ExtMachInst machInst);
    @@ -94,6 +95,8 @@
    StaticInstPtr decodeSveMemContigLoad(ExtMachInst machInst);
    StaticInstPtr decodeSveMemGather64(ExtMachInst machInst);
    StaticInstPtr decodeSveMemStore(ExtMachInst machInst);

  • StaticInstPtr decodeSveMisc(ExtMachInst machInst);
    }
    }};

@@ -104,6 +107,14 @@
StaticInstPtr
decodeSveInt(ExtMachInst machInst)
{

  •    if (bits(machInst, 31, 29) == 0b010) {
    
  •        if (bits(machInst, 24) == 0b1 &&
    
  •            bits(machInst, 21) == 0b0 &&
    
  •            bits(machInst, 15, 14)==0b10) {
    
  •            return decodeSveMisc(machInst);
    
  •        }
    
  •    }
    
  •     uint8_t b_29_24_21 = (bits(machInst, 29) << 2) |
                             (bits(machInst, 24) << 1) |
                             bits(machInst, 21);
    

diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index cbaa2b5..6e8313b 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -4250,6 +4250,22 @@
sbclbCode = 'res = srcElem1 + ~(srcElem2) + carryIn;'
sveTerInstUnpred('sbclb', 'Sbclb', 'VectorIntegerArithOp',
unsignedTypes,
sbclbCode, isTop=False, isAdd=False)

  • mmlaCode = ('destElem += srcElemA * srcElemB')
  • SMMLA (vectors)

  • sveMatMulInst('smmla', 'Smmla', 'SimdMultAccOp',
  •              (('int32_t', 'int8_t', 'int8_t'),),
    
  •              numDestRows=2, numDestCols=2, K=8,
    
  •              elt_mul_op=mmlaCode)
    
  • USMMLA (vectors)

  • sveMatMulInst('usmmla', 'Usmmla', 'SimdMultAccOp',
  •              (('int32_t', 'uint8_t', 'int8_t'),),
    
  •              numDestRows=2, numDestCols=2, K=8,
    
  •              elt_mul_op=mmlaCode)
    
  • UMMLA (vectors)

  • sveMatMulInst('ummla', 'Ummla', 'SimdMultAccOp',
  •              (('uint32_t', 'uint8_t', 'uint8_t'),),
    
  •              numDestRows=2, numDestCols=2, K=8,
    
  •              elt_mul_op=mmlaCode)
    # MOVPRFX (predicated)
    movCode = 'destElem = srcElem1;'
    sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes,
    

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70730?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ia50e28fae03634cbe04b42a9900bab65a604817f
Gerrit-Change-Number: 70730
Gerrit-PatchSet: 8
Gerrit-Owner: Giacomo Travaglini giacomo.travaglini@arm.com
Gerrit-Reviewer: Andreas Sandberg andreas.sandberg@arm.com
Gerrit-Reviewer: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: Richard Cooper richard.cooper@arm.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com

Bobby Bruce has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/70730?usp=email ) ( 6 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. )Change subject: arch-arm: Add support for Arm SVE Integer Matrix instructions. ...................................................................... arch-arm: Add support for Arm SVE Integer Matrix instructions. Add support for the Arm SVE Integer Matrix Multiply-Accumulate (SMMLA, USMMLA, UMMLA) instructions. Because the associated SUDOT and USDOT instructions have not yet been implemented, the SVE Feature ID register 0 (ID_AA64ZFR0_EL1) has not yet been updated to indicate support for SVE Int8 matrix multiplication instructions at this time. For more information please refer to the "ARM Architecture Reference Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A" (https://developer.arm.com/architectures/cpu-architecture/a-profile/ docs/arm-architecture-reference-manual-supplement-armv8-a) Additional Contributors: Giacomo Travaglini Change-Id: Ia50e28fae03634cbe04b42a9900bab65a604817f Reviewed-by: Richard Cooper <richard.cooper@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70730 Maintainer: Andreas Sandberg <andreas.sandberg@arm.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> --- M src/arch/arm/isa/formats/sve_2nd_level.isa M src/arch/arm/isa/formats/sve_top_level.isa M src/arch/arm/isa/insts/sve.isa 3 files changed, 70 insertions(+), 0 deletions(-) Approvals: Andreas Sandberg: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 3d211bc..4a44bab 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -246,6 +246,33 @@ } // decodeSveIntMulAdd StaticInstPtr + decodeSveIntMatMulAdd(ExtMachInst machInst) + { + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + + uint8_t uns = bits(machInst, 23, 22); + + switch (uns) { + case 0x0: + return new SveSmmla<int32_t, int8_t, int8_t>( + machInst, zda, zn, zm); + case 0x2: + return new SveUsmmla<int32_t, uint8_t, int8_t>( + machInst, zda, zn, zm); + case 0x3: + return new SveUmmla<uint32_t, uint8_t, uint8_t>( + machInst, zda, zn, zm); + case 0x1: + default: + return new Unknown64(machInst); + } + + return new Unknown64(machInst); + } // decodeSveIntMatMulAdd + + StaticInstPtr decodeSveShiftByImmPred0(ExtMachInst machInst) { RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0); @@ -3809,5 +3836,21 @@ return new Unknown64(machInst); } // decodeSveMemStore + StaticInstPtr + decodeSveMisc(ExtMachInst machInst) { + switch(bits(machInst, 13, 10)) { + case 0b0110: { + return decodeSveIntMatMulAdd(machInst); + break; + } + default: { + return new Unknown64(machInst); + break; + } + } + return new Unknown64(machInst); + } // decodeSveMisc + + } // namespace Aarch64 }}; diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa index 61f2f5c..20a15a2 100644 --- a/src/arch/arm/isa/formats/sve_top_level.isa +++ b/src/arch/arm/isa/formats/sve_top_level.isa @@ -44,6 +44,7 @@ StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst); StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst); StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst); + StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst); StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIndexGen(ExtMachInst machInst); @@ -94,6 +95,8 @@ StaticInstPtr decodeSveMemContigLoad(ExtMachInst machInst); StaticInstPtr decodeSveMemGather64(ExtMachInst machInst); StaticInstPtr decodeSveMemStore(ExtMachInst machInst); + + StaticInstPtr decodeSveMisc(ExtMachInst machInst); } }}; @@ -104,6 +107,14 @@ StaticInstPtr decodeSveInt(ExtMachInst machInst) { + if (bits(machInst, 31, 29) == 0b010) { + if (bits(machInst, 24) == 0b1 && + bits(machInst, 21) == 0b0 && + bits(machInst, 15, 14)==0b10) { + return decodeSveMisc(machInst); + } + } + uint8_t b_29_24_21 = (bits(machInst, 29) << 2) | (bits(machInst, 24) << 1) | bits(machInst, 21); diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index cbaa2b5..6e8313b 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -4250,6 +4250,22 @@ sbclbCode = 'res = srcElem1 + ~(srcElem2) + carryIn;' sveTerInstUnpred('sbclb', 'Sbclb', 'VectorIntegerArithOp', unsignedTypes, sbclbCode, isTop=False, isAdd=False) + mmlaCode = ('destElem += srcElemA * srcElemB') + # SMMLA (vectors) + sveMatMulInst('smmla', 'Smmla', 'SimdMultAccOp', + (('int32_t', 'int8_t', 'int8_t'),), + numDestRows=2, numDestCols=2, K=8, + elt_mul_op=mmlaCode) + # USMMLA (vectors) + sveMatMulInst('usmmla', 'Usmmla', 'SimdMultAccOp', + (('int32_t', 'uint8_t', 'int8_t'),), + numDestRows=2, numDestCols=2, K=8, + elt_mul_op=mmlaCode) + # UMMLA (vectors) + sveMatMulInst('ummla', 'Ummla', 'SimdMultAccOp', + (('uint32_t', 'uint8_t', 'uint8_t'),), + numDestRows=2, numDestCols=2, K=8, + elt_mul_op=mmlaCode) # MOVPRFX (predicated) movCode = 'destElem = srcElem1;' sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes, -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70730?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ia50e28fae03634cbe04b42a9900bab65a604817f Gerrit-Change-Number: 70730 Gerrit-PatchSet: 8 Gerrit-Owner: Giacomo Travaglini <giacomo.travaglini@arm.com> Gerrit-Reviewer: Andreas Sandberg <andreas.sandberg@arm.com> Gerrit-Reviewer: Bobby Bruce <bbruce@ucdavis.edu> Gerrit-Reviewer: Jason Lowe-Power <power.jg@gmail.com> Gerrit-Reviewer: Richard Cooper <richard.cooper@arm.com> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com>