Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email )
(
7 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
)Change subject: arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product
instrs.
......................................................................
arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot product instrs.
Add support for the SVE mixed sign dot product instructions (USDOT,
SUDOT) required by the Armv8.2 SVE Int8 matrix multiplication
extension (ARMv8.2-I8MM).
For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)
M src/arch/arm/isa/formats/sve_2nd_level.isa
M src/arch/arm/isa/insts/sve.isa
M src/arch/arm/isa/templates/sve.isa
3 files changed, 98 insertions(+), 61 deletions(-)
Approvals:
Andreas Sandberg: Looks good to me, approved
Jason Lowe-Power: Looks good to me, but someone else must approve; Looks
good to me, approved
kokoro: Regressions pass
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa
b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 0d12a22..86c174d 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2256,19 +2256,19 @@
uint8_t usig = (uint8_t) bits(machInst, 10);
if (size & 0x1) {
if (usig) {
return new SveUdotv<uint16_t, uint64_t>(machInst,
zda, zn, zm);
return new SveUdotv<uint16_t, uint16_t, uint64_t>
(machInst, zda, zn, zm);
} else {
return new SveSdotv<int16_t, int64_t>(machInst,
zda, zn, zm);
return new SveSdotv<int16_t, int16_t, int64_t>
(machInst, zda, zn, zm);
}
} else {
if (usig) {
return new SveUdotv<uint8_t, uint32_t>(machInst,
zda, zn, zm);
return new SveUdotv<uint8_t, uint8_t, uint32_t>
(machInst, zda, zn, zm);
} else {
return new SveSdotv<int8_t, int32_t>(machInst,
zda, zn, zm);
return new SveSdotv<int8_t, int8_t, int32_t>
(machInst, zda, zn, zm);
}
}
@@ -2292,21 +2292,21 @@
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 19, 16);
uint8_t i1 = (uint8_t) bits(machInst, 20);
if (usig) {
return new SveUdoti<uint16_t, uint64_t>(machInst,
zda, zn, zm, i1);
return new SveUdoti<uint16_t, uint16_t, uint64_t>
(machInst, zda, zn, zm, i1);
} else {
return new SveSdoti<int16_t, int64_t>(machInst,
zda, zn, zm, i1);
return new SveSdoti<int16_t, int16_t, int64_t>
(machInst, zda, zn, zm, i1);
}
} else {
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);
uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
if (usig) {
return new SveUdoti<uint8_t, uint32_t>(machInst,
zda, zn, zm, i2);
return new SveUdoti<uint8_t, uint8_t, uint32_t>
(machInst, zda, zn, zm, i2);
} else {
return new SveSdoti<int8_t, int32_t>(machInst,
zda, zn, zm, i2);
return new SveSdoti<int8_t, int8_t, int32_t>
(machInst, zda, zn, zm, i2);
}
}
return new Unknown64(machInst);
@@ -2320,16 +2320,12 @@
return new Unknown64(machInst);
}
RegIndex zda M5_VAR_USED = (RegIndex)
(uint8_t) bits(machInst, 4, 0);
RegIndex zn M5_VAR_USED = (RegIndex)
(uint8_t) bits(machInst, 9, 5);
RegIndex zm M5_VAR_USED = (RegIndex)
(uint8_t) bits(machInst, 20, 16);
RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
// Placeholder for SveUsdotv
//return SveUsdotv<int32_t, uint8_t, int8_t>(machInst, zda, zn,
zm);
return new Unknown64(machInst);
return new SveUsdotv<uint8_t, int8_t, int32_t>
(machInst, zda, zn, zm);
} // decodeSveMixedSignDotProduct
StaticInstPtr
@@ -2340,26 +2336,18 @@
return new Unknown64(machInst);
}
RegIndex zda M5_VAR_USED = (RegIndex)
(uint8_t) bits(machInst, 4, 0);
RegIndex zn M5_VAR_USED = (RegIndex)
(uint8_t) bits(machInst, 9, 5);
RegIndex zm M5_VAR_USED = (RegIndex)
(uint8_t) bits(machInst, 18, 16);
uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19);
RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);
uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
uint8_t usig = (uint8_t) bits(machInst, 10);
if (usig) {
// Placeholder for SveSudoti
//return SveSudoti<int32_t, int8_t, uint8_t>
// (machInst, zda, zn, zm, i2);
return new Unknown64(machInst);
return new SveSudoti<int8_t, uint8_t, int32_t>
(machInst, zda, zn, zm, i2);
} else {
// Placeholder for SveUsdoti
//return SveUsdoti<int32_t, uint8_t, int8_t>
// (machInst, zda, zn, zm, i2);
return new Unknown64(machInst);
return new SveUsdoti<uint8_t, int8_t, int32_t>
(machInst, zda, zn, zm, i2);
}
} // decodeSveMixedSignDotProductIndexed
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 6e8313b..e7a773e 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -3098,6 +3098,15 @@
def sveDotInst(name, Name, opClass, types, isIndexed = True):
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
// Types of the extended versions of the source elements.
// Required to make sure the intermediate calculations don't
overflow.
using ExtendedElementA = typename vector_element_traits::
extend_element<DElement,
SElementA>::type;
using ExtendedElementB = typename vector_element_traits::
extend_element<DElement,
SElementB>::type;
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (int i = 0; i < eCount; ++i) {'''
@@ -3107,17 +3116,21 @@
int s = segbase + imm;'''
code += '''
DElement res = AA64FpDest_xd[i];
DElement srcElem1, srcElem2;
ExtendedElementA srcElemA;
ExtendedElementB srcElemB;
for (int j = 0; j <= 3; ++j) {
srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i +
j]);'''
srcElemA = static_cast<ExtendedElementA>
(AA64FpOp1_srcA[4 * i + j]);'''
if isIndexed:
code += '''
srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s +
j]);'''
srcElemB = static_cast<ExtendedElementB>
(AA64FpOp2_srcB[4 * s + j]);'''
else:
code += '''
srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i +
j]);'''
srcElemB = static_cast<ExtendedElementB>
(AA64FpOp2_srcB[4 * i + j]);'''
code += '''
res += srcElem1 * srcElem2;
res += srcElemA * srcElemB;
}
AA64FpDestMerge_xd[i] = res;
}'''
@@ -3129,7 +3142,7 @@
header_output += SveWideningTerImmOpDeclare.subst(iop)
else:
header_output += SveWideningTerOpDeclare.subst(iop)
exec_output += SveWideningOpExecute.subst(iop)
exec_output += SveWideningTerOpExecute.subst(iop)
for type in types:
substDict = {'targs': type, 'class_name': 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)
@@ -4468,11 +4481,14 @@
sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode,
PredType.MERGE, True)
# SDOT (indexed)
'int16_t, int64_t'], isIndexed = True)
'int16_t, int16_t, int64_t'], isIndexed = True)
# SDOT (vectors)
'int16_t, int64_t'], isIndexed = False)
'int16_t, int16_t, int64_t'], isIndexed = False)
isIndexed = True)
# SEL (predicates)
selCode = 'destElem = srcElem1;'
svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',),
@@ -4857,11 +4873,17 @@
sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode,
PredType.MERGE, True)
# UDOT (indexed)
'uint16_t, uint64_t'], isIndexed = True)
'uint16_t, uint16_t, uint64_t'], isIndexed = True)
# UDOT (vectors)
'uint16_t, uint64_t'], isIndexed = False)
'uint16_t, uint16_t, uint64_t'], isIndexed = False)
isIndexed = True)
isIndexed = False)
# UMAX (immediate)
sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode)
# UMAX (vectors)
diff --git a/src/arch/arm/isa/templates/sve.isa
b/src/arch/arm/isa/templates/sve.isa
index 65abb1b..813bda0 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -1139,17 +1139,22 @@
}};
def template SveWideningTerImmOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
class %(class_name)s : public %(base_class)s
{
static_assert(sizeof(_SElementA) == sizeof(_SElementB),
"Source elements must have the same size.");
private:
%(reg_idx_arr_decl)s;
protected:
typedef _DElement Element;
public:
@@ -1168,7 +1173,7 @@
}};
def template SveWideningTerOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
class %(class_name)s : public %(base_class)s
{
private:
@@ -1176,9 +1181,11 @@
protected:
typedef _DElement Element;
public:
@@ -1295,6 +1302,26 @@
}
}};
+def template SveWideningTerOpExecute {{
(ExecContext *xc,
trace::InstRecord *traceData) const
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
if (fault == NoFault)
{
%(op_wb)s;
}
return fault;
--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70732?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c
Gerrit-Change-Number: 70732
Gerrit-PatchSet: 9
Gerrit-Owner: Giacomo Travaglini giacomo.travaglini@arm.com
Gerrit-Reviewer: Andreas Sandberg andreas.sandberg@arm.com
Gerrit-Reviewer: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com
Gerrit-CC: Richard Cooper richard.cooper@arm.com