gem5-dev@gem5.org

The gem5 Developer List

View all threads

[L] Change in gem5/gem5[develop]: arch-riscv: Simplify amd merge RV32/RV64 the RVM instructions

RC
Roger Chang (Gerrit)
Tue, May 23, 2023 2:34 AM

Roger Chang has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70597?usp=email )

Change subject: arch-riscv: Simplify amd merge RV32/RV64 the RVM
instructions
......................................................................

arch-riscv: Simplify amd merge RV32/RV64 the RVM instructions

The change move the details implementation to utility.hh and merge
the RV32 and RV64 versions into one.

Change-Id: I438bfb0fc511f0f27e83f247d386c58493db65b4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70597
Tested-by: kokoro noreply+kokoro@google.com
Reviewed-by: Yu-hsin Wang yuhsingw@google.com
Maintainer: Bobby Bruce bbruce@ucdavis.edu

M src/arch/riscv/isa/decoder.isa
M src/arch/riscv/utility.hh
2 files changed, 149 insertions(+), 183 deletions(-)

Approvals:
kokoro: Regressions pass
Yu-hsin Wang: Looks good to me, approved
Bobby Bruce: Looks good to me, approved

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 3acd80e..47519ee 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -1084,34 +1084,13 @@
0x0: sll({{
Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>));
}});

  •                0x1: decode RVTYPE {
    
  •                    0x0: rv32_mulh({{
    
  •                        Rd_sw = ((int64_t)Rs1_sw * Rs2_sw) >> 32;
    
  •                    }}, IntMultOp);
    
  •                    0x1: mulh({{
    
  •                        bool negate = (Rs1_sd < 0) != (Rs2_sd < 0);
    
  •                        uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
    
  •                        uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd)  
    

32;

  •                        uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd);
    
  •                        uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd)  
    

32;

  •                        uint64_t hi = Rs1_hi*Rs2_hi;
    
  •                        uint64_t mid1 = Rs1_hi*Rs2_lo;
    
  •                        uint64_t mid2 = Rs1_lo*Rs2_hi;
    
  •                        uint64_t lo = Rs2_lo*Rs1_lo;
    
  •                        uint64_t carry = ((uint64_t)(uint32_t)mid1
    
  •                                + (uint64_t)(uint32_t)mid2
    
  •                                + (lo >> 32)) >> 32;
    
  •                        uint64_t res = hi +
    
  •                                      (mid1 >> 32) +
    
  •                                      (mid2 >> 32) +
    
  •                                      carry;
    
  •                        Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 :  
    
  •                                    : res;
    
  •                    }}, IntMultOp);
    
  •                }
    
  •                0x1: mulh({{
    
  •                    if (machInst.rv_type == RV32) {
    
  •                        Rd_sd = mulh_32(Rs1_sd, Rs2_sd);
    
  •                    } else {
    
  •                        Rd_sd = mulh_64(Rs1_sd, Rs2_sd);
    
  •                    }
    
  •                }}, IntMultOp);
                    0x5: clmul({{
                        uint64_t result = 0;
                        for (int i = 0; i < rvSelect(32, 64); i++) {
    

@@ -1144,32 +1123,13 @@
0x0: slt({{
Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0;
}});

  •                0x1: decode RVTYPE {
    
  •                    0x0: rv32_mulhsu({{
    
  •                        Rd_sw = ((int64_t)Rs1_sw * Rs2_uw) >> 32;
    
  •                    }}, IntMultOp);
    
  •                    0x1: mulhsu({{
    
  •                        bool negate = Rs1_sd < 0;
    
  •                        uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
    
  •                        uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd)  
    

32;

  •                        uint64_t Rs2_lo = (uint32_t)Rs2;
    
  •                        uint64_t Rs2_hi = Rs2 >> 32;
    
  •                        uint64_t hi = Rs1_hi*Rs2_hi;
    
  •                        uint64_t mid1 = Rs1_hi*Rs2_lo;
    
  •                        uint64_t mid2 = Rs1_lo*Rs2_hi;
    
  •                        uint64_t lo = Rs1_lo*Rs2_lo;
    
  •                        uint64_t carry = ((uint64_t)(uint32_t)mid1
    
  •                                + (uint64_t)(uint32_t)mid2
    
  •                                + (lo >> 32)) >> 32;
    
  •                        uint64_t res = hi +
    
  •                                      (mid1 >> 32) +
    
  •                                      (mid2 >> 32) +
    
  •                                      carry;
    
  •                        Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 :  
    
  1. : res;
  •                    }}, IntMultOp);
    
  •                }
    
  •                0x1: mulhsu({{
    
  •                    if (machInst.rv_type == RV32) {
    
  •                        Rd_sd = mulhsu_32(Rs1_sd, Rs2);
    
  •                    } else {
    
  •                        Rd_sd = mulhsu_64(Rs1_sd, Rs2);
    
  •                    }
    
  •                }}, IntMultOp);
                    0x5: clmulr({{
                        uint64_t result = 0;
                        uint64_t xlen = rvSelect(32, 64);
    

@@ -1197,27 +1157,13 @@
0x0: sltu({{
Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0;
}});

  •                0x1: decode RVTYPE {
    
  •                    0x0: rv32_mulhu({{
    
  •                        Rd_sw = ((uint64_t)Rs1_uw * Rs2_uw) >> 32;
    
  •                    }}, IntMultOp);
    
  •                    0x1: mulhu({{
    
  •                        uint64_t Rs1_lo = (uint32_t)Rs1;
    
  •                        uint64_t Rs1_hi = Rs1 >> 32;
    
  •                        uint64_t Rs2_lo = (uint32_t)Rs2;
    
  •                        uint64_t Rs2_hi = Rs2 >> 32;
    
  •                        uint64_t hi = Rs1_hi*Rs2_hi;
    
  •                        uint64_t mid1 = Rs1_hi*Rs2_lo;
    
  •                        uint64_t mid2 = Rs1_lo*Rs2_hi;
    
  •                        uint64_t lo = Rs1_lo*Rs2_lo;
    
  •                        uint64_t carry = ((uint64_t)(uint32_t)mid1
    
  •                                + (uint64_t)(uint32_t)mid2
    
  •                                + (lo >> 32)) >> 32;
    
  •                        Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry;
    
  •                    }}, IntMultOp);
    
  •                }
    
  •                0x1: mulhu({{
    
  •                    if (machInst.rv_type == RV32) {
    
  •                        Rd = (int32_t)mulhu_32(Rs1, Rs2);
    
  •                    } else {
    
  •                        Rd = mulhu_64(Rs1, Rs2);
    
  •                    }
    
  •                }}, IntMultOp);
                    0x5: clmulh({{
                        uint64_t result = 0;
                        uint64_t xlen = rvSelect(32, 64);
    

@@ -1235,30 +1181,13 @@
0x0: xor({{
Rd = rvSext(Rs1 ^ Rs2);
}});

  •                0x1: decode RVTYPE {
    
  •                    0x0: rv32_div({{
    
  •                        constexpr int32_t kRsMin = \
    
  •                            std::numeric_limits<int32_t>::min();
    
  •                        if (Rs2_sw == 0) {
    
  •                            Rd_sw = -1;
    
  •                        } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
    
  •                            Rd_sw = kRsMin;
    
  •                        } else {
    
  •                            Rd_sw = Rs1_sw/Rs2_sw;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                    0x1: div({{
    
  •                        constexpr int64_t kRsMin = \
    
  •                            std::numeric_limits<int64_t>::min();
    
  •                        if (Rs2_sd == 0) {
    
  •                            Rd_sd = -1;
    
  •                        } else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
    
  •                            Rd_sd = kRsMin;
    
  •                        } else {
    
  •                            Rd_sd = Rs1_sd/Rs2_sd;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                }
    
  •                0x1: div({{
    
  •                    if (machInst.rv_type == RV32) {
    
  •                        Rd_sd = div<int32_t>(Rs1, Rs2);
    
  •                    } else {
    
  •                        Rd_sd = div<int64_t>(Rs1, Rs2);
    
  •                    }
    
  •                }}, IntDivOp);
                    0x4: pack({{
                        int xlen = rvSelect(32, 64);
                        Rd = rvSext(
    

@@ -1289,22 +1218,13 @@
Rd = rvSext(rvZext(Rs1) >>
rvSelect(Rs2<4:0>, Rs2<5:0>));
}});

  •                0x1: decode RVTYPE {
    
  •                    0x0: rv32_divu({{
    
  •                        if (Rs2_uw == 0) {
    
  •                            Rd_sw =  
    

std::numeric_limits<uint32_t>::max();

  •                        } else {
    
  •                            Rd_sw = Rs1_uw/Rs2_uw;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                    0x1: divu({{
    
  •                        if (Rs2 == 0) {
    
  •                            Rd = std::numeric_limits<uint64_t>::max();
    
  •                        } else {
    
  •                            Rd = Rs1/Rs2;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                }
    
  •                0x1: divu({{
    
  •                    if (machInst.rv_type == RV32) {
    
  •                        Rd = (int32_t)divu<uint32_t>(Rs1, Rs2);
    
  •                    } else {
    
  •                        Rd = divu<uint64_t>(Rs1, Rs2);
    
  •                    }
    
  •                }}, IntDivOp);
                    0x20: sra({{
                        Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>,  
    

Rs2<5:0>);
}});
@@ -1327,30 +1247,13 @@
0x0: or({{
Rd = rvSext(Rs1 | Rs2);
}});

  •                0x1: decode RVTYPE {
    
  •                    0x0: rv32_rem({{
    
  •                        constexpr int32_t kRsMin = \
    
  •                            std::numeric_limits<int32_t>::min();
    
  •                        if (Rs2_sw == 0) {
    
  •                            Rd_sw = Rs1_sw;
    
  •                        } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
    
  •                            Rd_sw = 0;
    
  •                        } else {
    
  •                            Rd_sw = Rs1_sw%Rs2_sw;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                    0x1: rem({{
    
  •                        constexpr int64_t kRsMin = \
    
  •                            std::numeric_limits<int64_t>::min();
    
  •                        if (Rs2_sd == 0) {
    
  •                            Rd = Rs1_sd;
    
  •                        } else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
    
  •                            Rd = 0;
    
  •                        } else {
    
  •                            Rd = Rs1_sd%Rs2_sd;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                }
    
  •                0x1: rem({{
    
  •                    if (machInst.rv_type == RV32) {
    
  •                        Rd_sd = rem<int32_t>(Rs1, Rs2);
    
  •                    } else {
    
  •                        Rd_sd = rem<int64_t>(Rs1, Rs2);
    
  •                    }
    
  •                }}, IntDivOp);
                    0x5: max({{
                        Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd));
                    }});
    

@@ -1365,22 +1268,13 @@
0x0: and({{
Rd = rvSext(Rs1 & Rs2);
}});

  •                0x1: decode RVTYPE {
    
  •                    0x0: rv32_remu({{
    
  •                        if (Rs2_uw == 0) {
    
  •                            Rd_sw = Rs1_uw;
    
  •                        } else {
    
  •                            Rd_sw = Rs1_uw%Rs2_uw;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                    0x1: remu({{
    
  •                        if (Rs2 == 0) {
    
  •                            Rd = Rs1;
    
  •                        } else {
    
  •                            Rd = Rs1%Rs2;
    
  •                        }
    
  •                    }}, IntDivOp);
    
  •                }
    
  •                0x1: remu({{
    
  •                    if (machInst.rv_type == RV32) {
    
  •                        Rd = (int32_t)remu<uint32_t>(Rs1, Rs2);
    
  •                    } else {
    
  •                        Rd = remu<uint64_t>(Rs1, Rs2);
    
  •                    }
    
  •                }}, IntDivOp);
                    0x4: packh({{
                        // It doesn't need to sign ext as MSB is always 0
                        Rd = (Rs2_ub << 8) | Rs1_ub;
    

@@ -1432,15 +1326,7 @@
}
0x4: decode FUNCT7 {
0x1: divw({{

  •                        constexpr int32_t kRsMin = \
    
  •                            std::numeric_limits<int32_t>::min();
    
  •                        if (Rs2_sw == 0) {
    
  •                            Rd_sd = -1;
    
  •                        } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
    
  •                            Rd_sd = kRsMin;
    
  •                        } else {
    
  •                            Rd_sd = Rs1_sw/Rs2_sw;
    
  •                        }
    
  •                        Rd_sd = div<int32_t>(Rs1, Rs2);
                        }}, IntDivOp);
                        0x4: packw({{
                            Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh);
    

@@ -1454,11 +1340,7 @@
Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
}});
0x1: divuw({{

  •                        if (Rs2_uw == 0) {
    
  •                            Rd_sd =  
    

std::numeric_limits<uint64_t>::max();

  •                        } else {
    
  •                            Rd_sd = (int32_t)(Rs1_uw/Rs2_uw);
    
  •                        }
    
  •                        Rd = sext<32>(divu<uint32_t>(Rs1, Rs2));
                        }}, IntDivOp);
                        0x20: sraw({{
                            Rd_sd = Rs1_sw >> Rs2<4:0>;
    

@@ -1470,26 +1352,14 @@
}
0x6:  decode FUNCT7 {
0x1: remw({{

  •                        constexpr int32_t kRsMin = \
    
  •                            std::numeric_limits<int32_t>::min();
    
  •                        if (Rs2_sw == 0) {
    
  •                            Rd_sd = Rs1_sw;
    
  •                        } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
    
  •                            Rd_sd = 0;
    
  •                        } else {
    
  •                            Rd_sd = Rs1_sw%Rs2_sw;
    
  •                        }
    
  •                        Rd_sd = rem<int32_t>(Rs1, Rs2);
                        }}, IntDivOp);
                        0x10: sh3add_uw({{
                            Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
                        }});
                    }
                    0x7: remuw({{
    
  •                    if (Rs2_uw == 0) {
    
  •                        Rd_sd = (int32_t)Rs1_uw;
    
  •                    } else {
    
  •                        Rd_sd = (int32_t)(Rs1_uw%Rs2_uw);
    
  •                    }
    
  •                    Rd = sext<32>(remu<uint32_t>(Rs1, Rs2));
                    }}, IntDivOp);
                }
            }
    

diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh
index 3bd34c4..5fccc84 100644
--- a/src/arch/riscv/utility.hh
+++ b/src/arch/riscv/utility.hh
@@ -55,6 +55,7 @@
#include "cpu/reg_class.hh"
#include "cpu/static_inst.hh"
#include "cpu/thread_context.hh"
+#include "enums/RiscvType.hh"
#include "rvk.hh"

namespace gem5
@@ -137,6 +138,101 @@
}
}

+inline uint32_t
+mulhu_32(uint32_t rs1, uint32_t rs2)
+{

  • return ((uint64_t)rs1 * rs2) >> 32;
    +}

+inline uint64_t
+mulhu_64(uint64_t rs1, uint64_t rs2)
+{

  • uint64_t rs1_lo = (uint32_t)rs1;
  • uint64_t rs1_hi = rs1 >> 32;
  • uint64_t rs2_lo = (uint32_t)rs2;
  • uint64_t rs2_hi = rs2 >> 32;
  • uint64_t hi = rs1_hi * rs2_hi;
  • uint64_t mid1 = rs1_hi * rs2_lo;
  • uint64_t mid2 = rs1_lo * rs2_hi;
  • uint64_t lo = rs1_lo * rs2_lo;
  • uint64_t carry = ((uint64_t)(uint32_t)mid1
  •        + (uint64_t)(uint32_t)mid2
    
  •        + (lo >> 32)) >> 32;
    
  • return hi + (mid1 >> 32) + (mid2 >> 32) + carry;
    +}

+inline int32_t
+mulh_32(int32_t rs1, int32_t rs2)
+{

  • return ((int64_t)rs1 * rs2) >> 32;
    +}

+inline int64_t
+mulh_64(int64_t rs1, int64_t rs2)
+{

  • bool negate = (rs1 < 0) != (rs2 < 0);
  • uint64_t res = mulhu_64(std::abs(rs1), std::abs(rs2));
  • return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
    +}

+inline int32_t
+mulhsu_32(int32_t rs1, uint32_t rs2)
+{

  • return ((int64_t)rs1 * rs2) >> 32;
    +}

+inline int64_t
+mulhsu_64(int64_t rs1, uint64_t rs2)
+{

  • bool negate = rs1 < 0;
  • uint64_t res = mulhu_64(std::abs(rs1), rs2);
  • return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
    +}

+template<typename T> inline T
+div(T rs1, T rs2)
+{

  • constexpr T kRsMin = std::numeric_limits<T>::min();
  • if (rs2 == 0) {
  •    return -1;
    
  • } else if (rs1 == kRsMin && rs2 == -1) {
  •    return kRsMin;
    
  • } else {
  •    return rs1 / rs2;
    
  • }
    +}

+template<typename T> inline T
+divu(T rs1, T rs2)
+{

  • if (rs2 == 0) {
  •    return std::numeric_limits<T>::max();
    
  • } else {
  •    return rs1 / rs2;
    
  • }
    +}

+template<typename T> inline T
+rem(T rs1, T rs2)
+{

  • constexpr T kRsMin = std::numeric_limits<T>::min();
  • if (rs2 == 0) {
  •    return rs1;
    
  • } else if (rs1 == kRsMin && rs2 == -1) {
  •    return 0;
    
  • } else {
  •    return rs1 % rs2;
    
  • }
    +}

+template<typename T> inline T
+remu(T rs1, T rs2)
+{

  • return (rs2 == 0) ? rs1 : rs1 % rs2;
    +}
  • } // namespace RiscvISA
    } // namespace gem5

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70597?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I438bfb0fc511f0f27e83f247d386c58493db65b4
Gerrit-Change-Number: 70597
Gerrit-PatchSet: 3
Gerrit-Owner: Roger Chang rogerycchang@google.com
Gerrit-Reviewer: Ayaz Akram yazakram@ucdavis.edu
Gerrit-Reviewer: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Hoa Nguyen hoanguyen@ucdavis.edu
Gerrit-Reviewer: Jason Lowe-Power jason@lowepower.com
Gerrit-Reviewer: Roger Chang rogerycchang@google.com
Gerrit-Reviewer: Yu-hsin Wang yuhsingw@google.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com

Roger Chang has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/70597?usp=email ) Change subject: arch-riscv: Simplify amd merge RV32/RV64 the RVM instructions ...................................................................... arch-riscv: Simplify amd merge RV32/RV64 the RVM instructions The change move the details implementation to utility.hh and merge the RV32 and RV64 versions into one. Change-Id: I438bfb0fc511f0f27e83f247d386c58493db65b4 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70597 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Yu-hsin Wang <yuhsingw@google.com> Maintainer: Bobby Bruce <bbruce@ucdavis.edu> --- M src/arch/riscv/isa/decoder.isa M src/arch/riscv/utility.hh 2 files changed, 149 insertions(+), 183 deletions(-) Approvals: kokoro: Regressions pass Yu-hsin Wang: Looks good to me, approved Bobby Bruce: Looks good to me, approved diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 3acd80e..47519ee 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -1084,34 +1084,13 @@ 0x0: sll({{ Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>)); }}); - 0x1: decode RVTYPE { - 0x0: rv32_mulh({{ - Rd_sw = ((int64_t)Rs1_sw * Rs2_sw) >> 32; - }}, IntMultOp); - 0x1: mulh({{ - bool negate = (Rs1_sd < 0) != (Rs2_sd < 0); - - uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); - uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; - uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd); - uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32; - - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs2_lo*Rs1_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 - + (lo >> 32)) >> 32; - - uint64_t res = hi + - (mid1 >> 32) + - (mid2 >> 32) + - carry; - Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0) - : res; - }}, IntMultOp); - } + 0x1: mulh({{ + if (machInst.rv_type == RV32) { + Rd_sd = mulh_32(Rs1_sd, Rs2_sd); + } else { + Rd_sd = mulh_64(Rs1_sd, Rs2_sd); + } + }}, IntMultOp); 0x5: clmul({{ uint64_t result = 0; for (int i = 0; i < rvSelect(32, 64); i++) { @@ -1144,32 +1123,13 @@ 0x0: slt({{ Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0; }}); - 0x1: decode RVTYPE { - 0x0: rv32_mulhsu({{ - Rd_sw = ((int64_t)Rs1_sw * Rs2_uw) >> 32; - }}, IntMultOp); - 0x1: mulhsu({{ - bool negate = Rs1_sd < 0; - uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); - uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; - uint64_t Rs2_lo = (uint32_t)Rs2; - uint64_t Rs2_hi = Rs2 >> 32; - - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs1_lo*Rs2_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 - + (lo >> 32)) >> 32; - - uint64_t res = hi + - (mid1 >> 32) + - (mid2 >> 32) + - carry; - Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res; - }}, IntMultOp); - } + 0x1: mulhsu({{ + if (machInst.rv_type == RV32) { + Rd_sd = mulhsu_32(Rs1_sd, Rs2); + } else { + Rd_sd = mulhsu_64(Rs1_sd, Rs2); + } + }}, IntMultOp); 0x5: clmulr({{ uint64_t result = 0; uint64_t xlen = rvSelect(32, 64); @@ -1197,27 +1157,13 @@ 0x0: sltu({{ Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0; }}); - 0x1: decode RVTYPE { - 0x0: rv32_mulhu({{ - Rd_sw = ((uint64_t)Rs1_uw * Rs2_uw) >> 32; - }}, IntMultOp); - 0x1: mulhu({{ - uint64_t Rs1_lo = (uint32_t)Rs1; - uint64_t Rs1_hi = Rs1 >> 32; - uint64_t Rs2_lo = (uint32_t)Rs2; - uint64_t Rs2_hi = Rs2 >> 32; - - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs1_lo*Rs2_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 - + (lo >> 32)) >> 32; - - Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry; - }}, IntMultOp); - } + 0x1: mulhu({{ + if (machInst.rv_type == RV32) { + Rd = (int32_t)mulhu_32(Rs1, Rs2); + } else { + Rd = mulhu_64(Rs1, Rs2); + } + }}, IntMultOp); 0x5: clmulh({{ uint64_t result = 0; uint64_t xlen = rvSelect(32, 64); @@ -1235,30 +1181,13 @@ 0x0: xor({{ Rd = rvSext(Rs1 ^ Rs2); }}); - 0x1: decode RVTYPE { - 0x0: rv32_div({{ - constexpr int32_t kRsMin = \ - std::numeric_limits<int32_t>::min(); - if (Rs2_sw == 0) { - Rd_sw = -1; - } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { - Rd_sw = kRsMin; - } else { - Rd_sw = Rs1_sw/Rs2_sw; - } - }}, IntDivOp); - 0x1: div({{ - constexpr int64_t kRsMin = \ - std::numeric_limits<int64_t>::min(); - if (Rs2_sd == 0) { - Rd_sd = -1; - } else if (Rs1_sd == kRsMin && Rs2_sd == -1) { - Rd_sd = kRsMin; - } else { - Rd_sd = Rs1_sd/Rs2_sd; - } - }}, IntDivOp); - } + 0x1: div({{ + if (machInst.rv_type == RV32) { + Rd_sd = div<int32_t>(Rs1, Rs2); + } else { + Rd_sd = div<int64_t>(Rs1, Rs2); + } + }}, IntDivOp); 0x4: pack({{ int xlen = rvSelect(32, 64); Rd = rvSext( @@ -1289,22 +1218,13 @@ Rd = rvSext(rvZext(Rs1) >> rvSelect(Rs2<4:0>, Rs2<5:0>)); }}); - 0x1: decode RVTYPE { - 0x0: rv32_divu({{ - if (Rs2_uw == 0) { - Rd_sw = std::numeric_limits<uint32_t>::max(); - } else { - Rd_sw = Rs1_uw/Rs2_uw; - } - }}, IntDivOp); - 0x1: divu({{ - if (Rs2 == 0) { - Rd = std::numeric_limits<uint64_t>::max(); - } else { - Rd = Rs1/Rs2; - } - }}, IntDivOp); - } + 0x1: divu({{ + if (machInst.rv_type == RV32) { + Rd = (int32_t)divu<uint32_t>(Rs1, Rs2); + } else { + Rd = divu<uint64_t>(Rs1, Rs2); + } + }}, IntDivOp); 0x20: sra({{ Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>); }}); @@ -1327,30 +1247,13 @@ 0x0: or({{ Rd = rvSext(Rs1 | Rs2); }}); - 0x1: decode RVTYPE { - 0x0: rv32_rem({{ - constexpr int32_t kRsMin = \ - std::numeric_limits<int32_t>::min(); - if (Rs2_sw == 0) { - Rd_sw = Rs1_sw; - } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { - Rd_sw = 0; - } else { - Rd_sw = Rs1_sw%Rs2_sw; - } - }}, IntDivOp); - 0x1: rem({{ - constexpr int64_t kRsMin = \ - std::numeric_limits<int64_t>::min(); - if (Rs2_sd == 0) { - Rd = Rs1_sd; - } else if (Rs1_sd == kRsMin && Rs2_sd == -1) { - Rd = 0; - } else { - Rd = Rs1_sd%Rs2_sd; - } - }}, IntDivOp); - } + 0x1: rem({{ + if (machInst.rv_type == RV32) { + Rd_sd = rem<int32_t>(Rs1, Rs2); + } else { + Rd_sd = rem<int64_t>(Rs1, Rs2); + } + }}, IntDivOp); 0x5: max({{ Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd)); }}); @@ -1365,22 +1268,13 @@ 0x0: and({{ Rd = rvSext(Rs1 & Rs2); }}); - 0x1: decode RVTYPE { - 0x0: rv32_remu({{ - if (Rs2_uw == 0) { - Rd_sw = Rs1_uw; - } else { - Rd_sw = Rs1_uw%Rs2_uw; - } - }}, IntDivOp); - 0x1: remu({{ - if (Rs2 == 0) { - Rd = Rs1; - } else { - Rd = Rs1%Rs2; - } - }}, IntDivOp); - } + 0x1: remu({{ + if (machInst.rv_type == RV32) { + Rd = (int32_t)remu<uint32_t>(Rs1, Rs2); + } else { + Rd = remu<uint64_t>(Rs1, Rs2); + } + }}, IntDivOp); 0x4: packh({{ // It doesn't need to sign ext as MSB is always 0 Rd = (Rs2_ub << 8) | Rs1_ub; @@ -1432,15 +1326,7 @@ } 0x4: decode FUNCT7 { 0x1: divw({{ - constexpr int32_t kRsMin = \ - std::numeric_limits<int32_t>::min(); - if (Rs2_sw == 0) { - Rd_sd = -1; - } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { - Rd_sd = kRsMin; - } else { - Rd_sd = Rs1_sw/Rs2_sw; - } + Rd_sd = div<int32_t>(Rs1, Rs2); }}, IntDivOp); 0x4: packw({{ Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh); @@ -1454,11 +1340,7 @@ Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>); }}); 0x1: divuw({{ - if (Rs2_uw == 0) { - Rd_sd = std::numeric_limits<uint64_t>::max(); - } else { - Rd_sd = (int32_t)(Rs1_uw/Rs2_uw); - } + Rd = sext<32>(divu<uint32_t>(Rs1, Rs2)); }}, IntDivOp); 0x20: sraw({{ Rd_sd = Rs1_sw >> Rs2<4:0>; @@ -1470,26 +1352,14 @@ } 0x6: decode FUNCT7 { 0x1: remw({{ - constexpr int32_t kRsMin = \ - std::numeric_limits<int32_t>::min(); - if (Rs2_sw == 0) { - Rd_sd = Rs1_sw; - } else if (Rs1_sw == kRsMin && Rs2_sw == -1) { - Rd_sd = 0; - } else { - Rd_sd = Rs1_sw%Rs2_sw; - } + Rd_sd = rem<int32_t>(Rs1, Rs2); }}, IntDivOp); 0x10: sh3add_uw({{ Rd = (((uint64_t)Rs1_uw) << 3) + Rs2; }}); } 0x7: remuw({{ - if (Rs2_uw == 0) { - Rd_sd = (int32_t)Rs1_uw; - } else { - Rd_sd = (int32_t)(Rs1_uw%Rs2_uw); - } + Rd = sext<32>(remu<uint32_t>(Rs1, Rs2)); }}, IntDivOp); } } diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 3bd34c4..5fccc84 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -55,6 +55,7 @@ #include "cpu/reg_class.hh" #include "cpu/static_inst.hh" #include "cpu/thread_context.hh" +#include "enums/RiscvType.hh" #include "rvk.hh" namespace gem5 @@ -137,6 +138,101 @@ } } +inline uint32_t +mulhu_32(uint32_t rs1, uint32_t rs2) +{ + return ((uint64_t)rs1 * rs2) >> 32; +} + +inline uint64_t +mulhu_64(uint64_t rs1, uint64_t rs2) +{ + uint64_t rs1_lo = (uint32_t)rs1; + uint64_t rs1_hi = rs1 >> 32; + uint64_t rs2_lo = (uint32_t)rs2; + uint64_t rs2_hi = rs2 >> 32; + + uint64_t hi = rs1_hi * rs2_hi; + uint64_t mid1 = rs1_hi * rs2_lo; + uint64_t mid2 = rs1_lo * rs2_hi; + uint64_t lo = rs1_lo * rs2_lo; + uint64_t carry = ((uint64_t)(uint32_t)mid1 + + (uint64_t)(uint32_t)mid2 + + (lo >> 32)) >> 32; + + return hi + (mid1 >> 32) + (mid2 >> 32) + carry; +} + +inline int32_t +mulh_32(int32_t rs1, int32_t rs2) +{ + return ((int64_t)rs1 * rs2) >> 32; +} + +inline int64_t +mulh_64(int64_t rs1, int64_t rs2) +{ + bool negate = (rs1 < 0) != (rs2 < 0); + uint64_t res = mulhu_64(std::abs(rs1), std::abs(rs2)); + return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res; +} + +inline int32_t +mulhsu_32(int32_t rs1, uint32_t rs2) +{ + return ((int64_t)rs1 * rs2) >> 32; +} + +inline int64_t +mulhsu_64(int64_t rs1, uint64_t rs2) +{ + bool negate = rs1 < 0; + uint64_t res = mulhu_64(std::abs(rs1), rs2); + return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res; +} + +template<typename T> inline T +div(T rs1, T rs2) +{ + constexpr T kRsMin = std::numeric_limits<T>::min(); + if (rs2 == 0) { + return -1; + } else if (rs1 == kRsMin && rs2 == -1) { + return kRsMin; + } else { + return rs1 / rs2; + } +} + +template<typename T> inline T +divu(T rs1, T rs2) +{ + if (rs2 == 0) { + return std::numeric_limits<T>::max(); + } else { + return rs1 / rs2; + } +} + +template<typename T> inline T +rem(T rs1, T rs2) +{ + constexpr T kRsMin = std::numeric_limits<T>::min(); + if (rs2 == 0) { + return rs1; + } else if (rs1 == kRsMin && rs2 == -1) { + return 0; + } else { + return rs1 % rs2; + } +} + +template<typename T> inline T +remu(T rs1, T rs2) +{ + return (rs2 == 0) ? rs1 : rs1 % rs2; +} + } // namespace RiscvISA } // namespace gem5 -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70597?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I438bfb0fc511f0f27e83f247d386c58493db65b4 Gerrit-Change-Number: 70597 Gerrit-PatchSet: 3 Gerrit-Owner: Roger Chang <rogerycchang@google.com> Gerrit-Reviewer: Ayaz Akram <yazakram@ucdavis.edu> Gerrit-Reviewer: Bobby Bruce <bbruce@ucdavis.edu> Gerrit-Reviewer: Hoa Nguyen <hoanguyen@ucdavis.edu> Gerrit-Reviewer: Jason Lowe-Power <jason@lowepower.com> Gerrit-Reviewer: Roger Chang <rogerycchang@google.com> Gerrit-Reviewer: Yu-hsin Wang <yuhsingw@google.com> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com>