gem5-dev@gem5.org

The gem5 Developer List

View all threads

[M] Change in gem5/gem5[develop]: arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts.

BB
Bobby Bruce (Gerrit)
Thu, May 25, 2023 9:36 PM

Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=email )

(

6 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
)Change subject: arch-arm: Added 128-bit encodings of SVE TRN, UZP, and
ZIP insts.
......................................................................

arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts.

Add support for the 128-bit element encodings of the TRN1, TRN2, UZP1,
UZP2, ZIP1, and ZIP2 instructions, required by the Armv8.2 SVE
Double-precision floating-point Matrix Multiplication
instructions (ARMv8.2-F64MM).

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4
Reviewed-by: Richard Cooper richard.cooper@arm.com
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70728
Maintainer: Jason Lowe-Power power.jg@gmail.com
Tested-by: kokoro noreply+kokoro@google.com
Reviewed-by: Jason Lowe-Power power.jg@gmail.com
Reviewed-by: Andreas Sandberg andreas.sandberg@arm.com
Reviewed-by: Giacomo Travaglini giacomo.travaglini@arm.com
Maintainer: Andreas Sandberg andreas.sandberg@arm.com

M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/formats/sve_top_level.isa
M src/arch/arm/isa/insts/sve.isa
3 files changed, 108 insertions(+), 35 deletions(-)

Approvals:
Andreas Sandberg: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
Jason Lowe-Power: Looks good to me, but someone else must approve; Looks
good to me, approved
Giacomo Travaglini: Looks good to me, approved

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index f74181a..3d211bc 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -1145,29 +1145,31 @@
}  // decodeSvePermPredicates

  StaticInstPtr
  • decodeSvePermIntlv(ExtMachInst machInst)
  • decodeSvePermIntlv(ExtMachInst machInst, bool f64mm)
    {
    RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
    RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
    RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
  •    uint8_t size = bits(machInst, 23, 22);
    
  •    uint8_t size = f64mm ? 4 : (uint8_t)bits(machInst, 23, 22);
    
        uint8_t opc = bits(machInst, 12, 10);
    
        switch (opc) {
          case 0x0:
    
  •        return decodeSveBinUnpredU<SveZip1>(size, machInst, zd, zn,  
    

zm);

  •        return decodeSveBinUnpredUQ<SveZip1>(size, machInst, zd, zn,  
    

zm);
case 0x1:

  •        return decodeSveBinUnpredU<SveZip2>(size, machInst, zd, zn,  
    

zm);

  •        return decodeSveBinUnpredUQ<SveZip2>(size, machInst, zd, zn,  
    

zm);
case 0x2:

  •        return decodeSveBinUnpredU<SveUzp1>(size, machInst, zd, zn,  
    

zm);

  •        return decodeSveBinUnpredUQ<SveUzp1>(size, machInst, zd, zn,  
    

zm);
case 0x3:

  •        return decodeSveBinUnpredU<SveUzp2>(size, machInst, zd, zn,  
    

zm);

  •        return decodeSveBinUnpredUQ<SveUzp2>(size, machInst, zd, zn,  
    

zm);
case 0x4:

  •        return decodeSveBinUnpredU<SveTrn1>(size, machInst, zd, zn,  
    

zm);

  •      case 0x6:
    
  •        return decodeSveBinUnpredUQ<SveTrn1>(size, machInst, zd, zn,  
    

zm);
case 0x5:

  •        return decodeSveBinUnpredU<SveTrn2>(size, machInst, zd, zn,  
    

zm);

  •      case 0x7:
    
  •        return decodeSveBinUnpredUQ<SveTrn2>(size, machInst, zd, zn,  
    

zm);
}
return new Unknown64(machInst);
}  // decodeSvePermIntlv
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa
b/src/arch/arm/isa/formats/sve_top_level.isa
index b0579fb..61f2f5c 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -57,7 +57,7 @@
StaticInstPtr decodeSvePermExtract(ExtMachInst machInst);
StaticInstPtr decodeSvePermUnpred(ExtMachInst machInst);
StaticInstPtr decodeSvePermPredicates(ExtMachInst machInst);

  • StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst);
  • StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst, bool f64mm);
    StaticInstPtr decodeSvePermPred(ExtMachInst machInst);
    StaticInstPtr decodeSveSelVec(ExtMachInst machInst);
    StaticInstPtr decodeSveIntCmpVec(ExtMachInst machInst);
    @@ -202,11 +202,18 @@
    if (b_13) {
    return decodeSvePermUnpred(machInst);
    } else {
  •                    return decodeSvePermExtract(machInst);
    
  •                    uint8_t b_23 = bits(machInst, 23);
    
  •                    if (b_23) {
    
  •                        // 128-bit element encodings for Armv8.6 F64MM
    
  •                        return decodeSvePermIntlv(machInst, true);
    
  •                    } else {
    
  •                        return decodeSvePermExtract(machInst);
    
  •                    }
                    }
                  case 0x1:
                    if (b_13) {
    
  •                    return decodeSvePermIntlv(machInst);
    
  •                    // 8,16,32,64-bit element encodings
    
  •                    return decodeSvePermIntlv(machInst, false);
                    } else {
                        return decodeSvePermPredicates(machInst);
                    }
    

diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 74eacb8..cbaa2b5 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -633,6 +633,29 @@
}

  // Decodes binary, constructive, unpredicated SVE instructions.
  • // Unsigned instructions only, including Quadword variants.
  • template <template <typename T> class Base>
  • StaticInstPtr
  • decodeSveBinUnpredUQ(unsigned size, ExtMachInst machInst, RegIndex
    dest,
  •        RegIndex op1, RegIndex op2)
    
  • {
  •    switch (size) {
    
  •      case 0:
    
  •        return new Base<uint8_t>(machInst, dest, op1, op2);
    
  •      case 1:
    
  •        return new Base<uint16_t>(machInst, dest, op1, op2);
    
  •      case 2:
    
  •        return new Base<uint32_t>(machInst, dest, op1, op2);
    
  •      case 3:
    
  •        return new Base<uint64_t>(machInst, dest, op1, op2);
    
  •      case 4:
    
  •        return new Base<__uint128_t>(machInst, dest, op1, op2);
    
  •      default:
    
  •        return new Unknown64(machInst);
    
  •    }
    
  • }
  • // Decodes binary, constructive, unpredicated SVE instructions.
    // Signed instructions only.
    template <template <typename T> class Base>
    StaticInstPtr
    @@ -3299,6 +3322,8 @@
    fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
    signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
    unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
  • extendedUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
  •                         '__uint128_t')
    
    smallSignedTypes = ('int8_t', 'int16_t', 'int32_t')
    bigSignedTypes = ('int16_t', 'int32_t', 'int64_t')
    

@@ -4754,23 +4779,36 @@
trnPredIterCode % 1)
# TRN1, TRN2 (vectors)
trnIterCode = '''

  •    // SVE F64MM support requires that there are at least two elements
    
  •    // in the vector.
    
  •    if (eCount < 2) {
    
  •        return std::make_shared<UndefinedInstruction>(machInst, false,
    
  •                                                      "%(mnemonic)s");
    
  •    }
        int s;
    
  •    int part = %d;
    
  •    int part = %(part)d;
        ArmISA::VecRegContainer tmpVecC;
        auto auxDest = tmpVecC.as<Element>();
    
  •    for (unsigned i = 0; i < eCount / 2; i++) {
    
  •    const unsigned eltPairsCount = eCount / 2;
    
  •    const unsigned eltsInPairsCount = eltPairsCount * 2;
    
  •    for (unsigned i = 0; i < eltPairsCount; i++) {
            s = 2 * i + part;
            auxDest[2 * i] = AA64FpOp1_x[s];
            auxDest[2 * i + 1] = AA64FpOp2_x[s];
        }
    
  •    for (unsigned i = 0; i < eCount; i++) {
    
  •    // Fill output vector with pairs of elements
    
  •    for (unsigned i = 0; i < eltsInPairsCount; i++) {
            AA64FpDest_x[i] = auxDest[i];
        }
    
  •    // Fill any trailing non-full pairs with zeros
    
  •    for (unsigned i = eltsInPairsCount; i < eCount; i++) {
    
  •        AA64FpDest_x[i] = 0;
    
  •    }
    '''
    
  • sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '',
  •           customIterCode=trnIterCode % 0)
    
  • sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '',
  •           customIterCode=trnIterCode % 1)
    
  • sveBinInst('trn1', 'Trn1', 'SimdAluOp', extendedUnsignedTypes, '',
  •           customIterCode=trnIterCode % dict(mnemonic='trn1', part=0))
    
  • sveBinInst('trn2', 'Trn2', 'SimdAluOp', extendedUnsignedTypes, '',
  •           customIterCode=trnIterCode % dict(mnemonic='trn2', part=1))
    # UABD
    sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode,
               PredType.MERGE, True)
    

@@ -4976,26 +5014,39 @@
uzpPredIterCode % 1)
# UZP1, UZP2 (vectors)
uzpIterCode = '''

  •    // SVE F64MM support requires that there are at least two elements
    
  •    // in the vector.
    
  •    if (eCount < 2) {
    
  •        return std::make_shared<UndefinedInstruction>(machInst, false,
    
  •                                                      "%(mnemonic)s");
    
  •    }
        int s;
    
  •    int part = %d;
    
  •    int part = %(part)d;
        ArmISA::VecRegContainer tmpVecC;
        auto auxDest = tmpVecC.as<Element>();
    
  •    for (unsigned i = 0; i < eCount; i++) {
    
  •    const unsigned eltPairsCount = eCount / 2;
    
  •    const unsigned eltsInPairsCount = eltPairsCount * 2;
    
  •    for (unsigned i = 0; i < eltsInPairsCount; i++) {
            s = 2 * i + part;
    
  •        if (s < eCount) {
    
  •        if (s < eltsInPairsCount) {
                auxDest[i] = AA64FpOp1_x[s];
            } else {
    
  •            auxDest[i] = AA64FpOp2_x[s - eCount];
    
  •            auxDest[i] = AA64FpOp2_x[s - eltsInPairsCount];
            }
        }
    
  •    for (unsigned i = 0; i < eCount; i++) {
    
  •    // Fill output vector with pairs of elements
    
  •    for (unsigned i = 0; i < eltsInPairsCount; i++) {
            AA64FpDest_x[i] = auxDest[i];
        }
    
  •    // Fill any trailing non-full pairs with zeros
    
  •    for (unsigned i = eltsInPairsCount; i < eCount; i++) {
    
  •        AA64FpDest_x[i] = 0;
    
  •    }
    '''
    
  • sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '',
  •           customIterCode=uzpIterCode % 0)
    
  • sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '',
  •           customIterCode=uzpIterCode % 1)
    
  • sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', extendedUnsignedTypes, '',
  •           customIterCode=uzpIterCode % dict(mnemonic='uzp1', part=0))
    
  • sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', extendedUnsignedTypes, '',
  •           customIterCode=uzpIterCode % dict(mnemonic='uzp2', part=1))
    # WHILELE (32-bit)
    whileLECode = '''
            cond = srcElem1 <= srcElem2;
    

@@ -5058,22 +5109,35 @@
zipPredIterCode % 1)
# ZIP1, ZIP2 (vectors)
zipIterCode = '''

  •    // SVE F64MM support requires that there are at least two elements
    
  •    // in the vector.
    
  •    if (eCount < 2) {
    
  •        return std::make_shared<UndefinedInstruction>(machInst, false,
    
  •                                                      "%(mnemonic)s");
    
  •    }
        int s;
    
  •    int part = %d;
    
  •    int part = %(part)d;
        ArmISA::VecRegContainer tmpVecC;
        auto auxDest = tmpVecC.as<Element>();
    
  •    for (unsigned i = 0; i < eCount / 2; i++) {
    
  •        s = i + (part * (eCount / 2));
    
  •    const unsigned eltPairsCount = eCount / 2;
    
  •    const unsigned eltsInPairsCount = eltPairsCount * 2;
    
  •    for (unsigned i = 0; i < eltPairsCount; i++) {
    
  •        s = i + (part * (eltsInPairsCount / 2));
            auxDest[2 * i] = AA64FpOp1_x[s];
            auxDest[2 * i + 1] = AA64FpOp2_x[s];
        }
    
  •    for (unsigned i = 0; i < eCount; i++) {
    
  •    // Fill output vector with pairs of elements
    
  •    for (unsigned i = 0; i < eltsInPairsCount; i++) {
            AA64FpDest_x[i] = auxDest[i];
        }
    
  •    // Fill any trailing non-full pairs with zeros
    
  •    for (unsigned i = eltsInPairsCount; i < eCount; i++) {
    
  •        AA64FpDest_x[i] = 0;
    
  •    }
    '''
    
  • sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '',
  •           customIterCode=zipIterCode % 0)
    
  • sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '',
  •           customIterCode=zipIterCode % 1)
    
  • sveBinInst('zip1', 'Zip1', 'SimdAluOp', extendedUnsignedTypes, '',

  •           customIterCode=zipIterCode % dict(mnemonic='zip1', part=0))
    
  • sveBinInst('zip2', 'Zip2', 'SimdAluOp', extendedUnsignedTypes, '',

  •           customIterCode=zipIterCode % dict(mnemonic='zip2', part=1))
    

    }};

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4
Gerrit-Change-Number: 70728
Gerrit-PatchSet: 8
Gerrit-Owner: Giacomo Travaglini giacomo.travaglini@arm.com
Gerrit-Reviewer: Andreas Sandberg andreas.sandberg@arm.com
Gerrit-Reviewer: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Giacomo Travaglini giacomo.travaglini@arm.com
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: Richard Cooper richard.cooper@arm.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com

Bobby Bruce has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=email ) ( 6 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. )Change subject: arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts. ...................................................................... arch-arm: Added 128-bit encodings of SVE TRN, UZP, and ZIP insts. Add support for the 128-bit element encodings of the TRN1, TRN2, UZP1, UZP2, ZIP1, and ZIP2 instructions, required by the Armv8.2 SVE Double-precision floating-point Matrix Multiplication instructions (ARMv8.2-F64MM). For more information please refer to the "ARM Architecture Reference Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A" (https://developer.arm.com/architectures/cpu-architecture/a-profile/ docs/arm-architecture-reference-manual-supplement-armv8-a) Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4 Reviewed-by: Richard Cooper <richard.cooper@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70728 Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com> --- M src/arch/arm/isa/formats/sve_2nd_level.isa M src/arch/arm/isa/formats/sve_top_level.isa M src/arch/arm/isa/insts/sve.isa 3 files changed, 108 insertions(+), 35 deletions(-) Approvals: Andreas Sandberg: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass Jason Lowe-Power: Looks good to me, but someone else must approve; Looks good to me, approved Giacomo Travaglini: Looks good to me, approved diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index f74181a..3d211bc 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -1145,29 +1145,31 @@ } // decodeSvePermPredicates StaticInstPtr - decodeSvePermIntlv(ExtMachInst machInst) + decodeSvePermIntlv(ExtMachInst machInst, bool f64mm) { RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - uint8_t size = bits(machInst, 23, 22); + uint8_t size = f64mm ? 4 : (uint8_t)bits(machInst, 23, 22); uint8_t opc = bits(machInst, 12, 10); switch (opc) { case 0x0: - return decodeSveBinUnpredU<SveZip1>(size, machInst, zd, zn, zm); + return decodeSveBinUnpredUQ<SveZip1>(size, machInst, zd, zn, zm); case 0x1: - return decodeSveBinUnpredU<SveZip2>(size, machInst, zd, zn, zm); + return decodeSveBinUnpredUQ<SveZip2>(size, machInst, zd, zn, zm); case 0x2: - return decodeSveBinUnpredU<SveUzp1>(size, machInst, zd, zn, zm); + return decodeSveBinUnpredUQ<SveUzp1>(size, machInst, zd, zn, zm); case 0x3: - return decodeSveBinUnpredU<SveUzp2>(size, machInst, zd, zn, zm); + return decodeSveBinUnpredUQ<SveUzp2>(size, machInst, zd, zn, zm); case 0x4: - return decodeSveBinUnpredU<SveTrn1>(size, machInst, zd, zn, zm); + case 0x6: + return decodeSveBinUnpredUQ<SveTrn1>(size, machInst, zd, zn, zm); case 0x5: - return decodeSveBinUnpredU<SveTrn2>(size, machInst, zd, zn, zm); + case 0x7: + return decodeSveBinUnpredUQ<SveTrn2>(size, machInst, zd, zn, zm); } return new Unknown64(machInst); } // decodeSvePermIntlv diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa index b0579fb..61f2f5c 100644 --- a/src/arch/arm/isa/formats/sve_top_level.isa +++ b/src/arch/arm/isa/formats/sve_top_level.isa @@ -57,7 +57,7 @@ StaticInstPtr decodeSvePermExtract(ExtMachInst machInst); StaticInstPtr decodeSvePermUnpred(ExtMachInst machInst); StaticInstPtr decodeSvePermPredicates(ExtMachInst machInst); - StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst); + StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst, bool f64mm); StaticInstPtr decodeSvePermPred(ExtMachInst machInst); StaticInstPtr decodeSveSelVec(ExtMachInst machInst); StaticInstPtr decodeSveIntCmpVec(ExtMachInst machInst); @@ -202,11 +202,18 @@ if (b_13) { return decodeSvePermUnpred(machInst); } else { - return decodeSvePermExtract(machInst); + uint8_t b_23 = bits(machInst, 23); + if (b_23) { + // 128-bit element encodings for Armv8.6 F64MM + return decodeSvePermIntlv(machInst, true); + } else { + return decodeSvePermExtract(machInst); + } } case 0x1: if (b_13) { - return decodeSvePermIntlv(machInst); + // 8,16,32,64-bit element encodings + return decodeSvePermIntlv(machInst, false); } else { return decodeSvePermPredicates(machInst); } diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 74eacb8..cbaa2b5 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -633,6 +633,29 @@ } // Decodes binary, constructive, unpredicated SVE instructions. + // Unsigned instructions only, including Quadword variants. + template <template <typename T> class Base> + StaticInstPtr + decodeSveBinUnpredUQ(unsigned size, ExtMachInst machInst, RegIndex dest, + RegIndex op1, RegIndex op2) + { + switch (size) { + case 0: + return new Base<uint8_t>(machInst, dest, op1, op2); + case 1: + return new Base<uint16_t>(machInst, dest, op1, op2); + case 2: + return new Base<uint32_t>(machInst, dest, op1, op2); + case 3: + return new Base<uint64_t>(machInst, dest, op1, op2); + case 4: + return new Base<__uint128_t>(machInst, dest, op1, op2); + default: + return new Unknown64(machInst); + } + } + + // Decodes binary, constructive, unpredicated SVE instructions. // Signed instructions only. template <template <typename T> class Base> StaticInstPtr @@ -3299,6 +3322,8 @@ fpTypes = ('uint16_t', 'uint32_t', 'uint64_t') signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t') unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t') + extendedUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', + '__uint128_t') smallSignedTypes = ('int8_t', 'int16_t', 'int32_t') bigSignedTypes = ('int16_t', 'int32_t', 'int64_t') @@ -4754,23 +4779,36 @@ trnPredIterCode % 1) # TRN1, TRN2 (vectors) trnIterCode = ''' + // SVE F64MM support requires that there are at least two elements + // in the vector. + if (eCount < 2) { + return std::make_shared<UndefinedInstruction>(machInst, false, + "%(mnemonic)s"); + } int s; - int part = %d; + int part = %(part)d; ArmISA::VecRegContainer tmpVecC; auto auxDest = tmpVecC.as<Element>(); - for (unsigned i = 0; i < eCount / 2; i++) { + const unsigned eltPairsCount = eCount / 2; + const unsigned eltsInPairsCount = eltPairsCount * 2; + for (unsigned i = 0; i < eltPairsCount; i++) { s = 2 * i + part; auxDest[2 * i] = AA64FpOp1_x[s]; auxDest[2 * i + 1] = AA64FpOp2_x[s]; } - for (unsigned i = 0; i < eCount; i++) { + // Fill output vector with pairs of elements + for (unsigned i = 0; i < eltsInPairsCount; i++) { AA64FpDest_x[i] = auxDest[i]; } + // Fill any trailing non-full pairs with zeros + for (unsigned i = eltsInPairsCount; i < eCount; i++) { + AA64FpDest_x[i] = 0; + } ''' - sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '', - customIterCode=trnIterCode % 0) - sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '', - customIterCode=trnIterCode % 1) + sveBinInst('trn1', 'Trn1', 'SimdAluOp', extendedUnsignedTypes, '', + customIterCode=trnIterCode % dict(mnemonic='trn1', part=0)) + sveBinInst('trn2', 'Trn2', 'SimdAluOp', extendedUnsignedTypes, '', + customIterCode=trnIterCode % dict(mnemonic='trn2', part=1)) # UABD sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode, PredType.MERGE, True) @@ -4976,26 +5014,39 @@ uzpPredIterCode % 1) # UZP1, UZP2 (vectors) uzpIterCode = ''' + // SVE F64MM support requires that there are at least two elements + // in the vector. + if (eCount < 2) { + return std::make_shared<UndefinedInstruction>(machInst, false, + "%(mnemonic)s"); + } int s; - int part = %d; + int part = %(part)d; ArmISA::VecRegContainer tmpVecC; auto auxDest = tmpVecC.as<Element>(); - for (unsigned i = 0; i < eCount; i++) { + const unsigned eltPairsCount = eCount / 2; + const unsigned eltsInPairsCount = eltPairsCount * 2; + for (unsigned i = 0; i < eltsInPairsCount; i++) { s = 2 * i + part; - if (s < eCount) { + if (s < eltsInPairsCount) { auxDest[i] = AA64FpOp1_x[s]; } else { - auxDest[i] = AA64FpOp2_x[s - eCount]; + auxDest[i] = AA64FpOp2_x[s - eltsInPairsCount]; } } - for (unsigned i = 0; i < eCount; i++) { + // Fill output vector with pairs of elements + for (unsigned i = 0; i < eltsInPairsCount; i++) { AA64FpDest_x[i] = auxDest[i]; } + // Fill any trailing non-full pairs with zeros + for (unsigned i = eltsInPairsCount; i < eCount; i++) { + AA64FpDest_x[i] = 0; + } ''' - sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '', - customIterCode=uzpIterCode % 0) - sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '', - customIterCode=uzpIterCode % 1) + sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', extendedUnsignedTypes, '', + customIterCode=uzpIterCode % dict(mnemonic='uzp1', part=0)) + sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', extendedUnsignedTypes, '', + customIterCode=uzpIterCode % dict(mnemonic='uzp2', part=1)) # WHILELE (32-bit) whileLECode = ''' cond = srcElem1 <= srcElem2; @@ -5058,22 +5109,35 @@ zipPredIterCode % 1) # ZIP1, ZIP2 (vectors) zipIterCode = ''' + // SVE F64MM support requires that there are at least two elements + // in the vector. + if (eCount < 2) { + return std::make_shared<UndefinedInstruction>(machInst, false, + "%(mnemonic)s"); + } int s; - int part = %d; + int part = %(part)d; ArmISA::VecRegContainer tmpVecC; auto auxDest = tmpVecC.as<Element>(); - for (unsigned i = 0; i < eCount / 2; i++) { - s = i + (part * (eCount / 2)); + const unsigned eltPairsCount = eCount / 2; + const unsigned eltsInPairsCount = eltPairsCount * 2; + for (unsigned i = 0; i < eltPairsCount; i++) { + s = i + (part * (eltsInPairsCount / 2)); auxDest[2 * i] = AA64FpOp1_x[s]; auxDest[2 * i + 1] = AA64FpOp2_x[s]; } - for (unsigned i = 0; i < eCount; i++) { + // Fill output vector with pairs of elements + for (unsigned i = 0; i < eltsInPairsCount; i++) { AA64FpDest_x[i] = auxDest[i]; } + // Fill any trailing non-full pairs with zeros + for (unsigned i = eltsInPairsCount; i < eCount; i++) { + AA64FpDest_x[i] = 0; + } ''' - sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '', - customIterCode=zipIterCode % 0) - sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '', - customIterCode=zipIterCode % 1) + sveBinInst('zip1', 'Zip1', 'SimdAluOp', extendedUnsignedTypes, '', + customIterCode=zipIterCode % dict(mnemonic='zip1', part=0)) + sveBinInst('zip2', 'Zip2', 'SimdAluOp', extendedUnsignedTypes, '', + customIterCode=zipIterCode % dict(mnemonic='zip2', part=1)) }}; -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70728?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4 Gerrit-Change-Number: 70728 Gerrit-PatchSet: 8 Gerrit-Owner: Giacomo Travaglini <giacomo.travaglini@arm.com> Gerrit-Reviewer: Andreas Sandberg <andreas.sandberg@arm.com> Gerrit-Reviewer: Bobby Bruce <bbruce@ucdavis.edu> Gerrit-Reviewer: Giacomo Travaglini <giacomo.travaglini@arm.com> Gerrit-Reviewer: Jason Lowe-Power <power.jg@gmail.com> Gerrit-Reviewer: Richard Cooper <richard.cooper@arm.com> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com>