gem5-dev@gem5.org

The gem5 Developer List

View all threads

[XL] Change in gem5/gem5[develop]: cpu: Revert CPU stats changes

BB
Bobby Bruce (Gerrit)
Tue, Mar 7, 2023 9:42 PM

Bobby Bruce has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/68717?usp=email )

Change subject: cpu: Revert CPU stats changes
......................................................................

cpu: Revert CPU stats changes

This reverts this relationchain:
https://gem5-review.googlesource.com/c/public/gem5/+/67396/6

This was pre-maturely submitted before all testing and checking was
done. To be safe this has been reverted. When all testing and checks are
completed, this revert will be undone.

Change-Id: I2a88cadfee03c1fc81932e6548938db108786dd2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68717
Reviewed-by: Jason Lowe-Power power.jg@gmail.com
Maintainer: Bobby Bruce bbruce@ucdavis.edu
Reviewed-by: Bobby Bruce bbruce@ucdavis.edu
Tested-by: kokoro noreply+kokoro@google.com

M src/cpu/base.cc
M src/cpu/base.hh
M src/cpu/kvm/base.cc
M src/cpu/kvm/base.hh
M src/cpu/minor/execute.cc
M src/cpu/minor/stats.cc
M src/cpu/minor/stats.hh
M src/cpu/o3/commit.cc
M src/cpu/o3/commit.hh
M src/cpu/o3/cpu.cc
M src/cpu/o3/cpu.hh
M src/cpu/o3/dyn_inst.hh
M src/cpu/o3/fetch.cc
M src/cpu/o3/fetch.hh
M src/cpu/o3/iew.cc
M src/cpu/o3/iew.hh
M src/cpu/simple/base.cc
M src/cpu/simple/base.hh
M src/cpu/simple/exec_context.hh
19 files changed, 628 insertions(+), 542 deletions(-)

Approvals:
Bobby Bruce: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
Jason Lowe-Power: Looks good to me, approved

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index cee7647..d2c0a78 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -191,30 +191,6 @@
modelResetPort.onChange([this](const bool &new_val) {
setReset(new_val);
});

  • // create a stat group object for each thread on this core
  • fetchStats.reserve(numThreads);
  • executeStats.reserve(numThreads);
  • commitStats.reserve(numThreads);
  • for (int i = 0; i < numThreads; i++) {
  •    // create fetchStat object for thread i and set rate formulas
    
  •    FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i);
    
  •    fetchStatptr->fetchRate = fetchStatptr->numInsts /  
    

baseStats.numCycles;

  •    fetchStatptr->branchRate = fetchStatptr->numBranches /
    
  •        baseStats.numCycles;
    
  •    fetchStats.emplace_back(fetchStatptr);
    
  •    // create executeStat object for thread i and set rate formulas
    
  •    ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i);
    
  •    executeStatptr->instRate = executeStatptr->numInsts /
    
  •        baseStats.numCycles;
    
  •    executeStats.emplace_back(executeStatptr);
    
  •    // create commitStat object for thread i and set ipc, cpi formulas
    
  •    CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
    
  •    commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles;
    
  •    commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts;
    
  •    commitStats.emplace_back(commitStatptr);
    
  • }
    }

    void
    @@ -407,28 +383,13 @@
    BaseCPU::
    BaseCPUStats::BaseCPUStats(statistics::Group *parent)
    : statistics::Group(parent),

  •  ADD_STAT(numInsts, statistics::units::Count::get(),
    
  •           "Number of instructions committed (core level)"),
    
  •  ADD_STAT(numOps, statistics::units::Count::get(),
    
  •           "Number of ops (including micro ops) committed (core  
    

level)"),
ADD_STAT(numCycles, statistics::units::Cycle::get(),
"Number of cpu cycles simulated"),

  •  ADD_STAT(cpi, statistics::units::Rate<
    
  •            statistics::units::Cycle, statistics::units::Count>::get(),
    
  •           "CPI: cycles per instruction (core level)"),
    
  •  ADD_STAT(ipc, statistics::units::Rate<
    
  •            statistics::units::Count, statistics::units::Cycle>::get(),
    
  •           "IPC: instructions per cycle (core level)"),
      ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(),
               "Number of work items this cpu started"),
      ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(),
               "Number of work items this cpu completed")
    
    {
  • cpi.precision(6);
  • cpi = numCycles / numInsts;
  • ipc.precision(6);
  • ipc = numInsts / numCycles;
    }

void
@@ -866,215 +827,4 @@
hostOpRate = simOps / hostSeconds;
}

-BaseCPU::
-FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)

  • : statistics::Group(parent, csprintf("fetchStats%i",
    thread_id).c_str()),
  • ADD_STAT(numInsts, statistics::units::Count::get(),
  •         "Number of instructions fetched (thread level)"),
    
  • ADD_STAT(numOps, statistics::units::Count::get(),
  •         "Number of ops (including micro ops) fetched (thread level)"),
    
  • ADD_STAT(fetchRate, statistics::units::Rate<
  •         statistics::units::Count, statistics::units::Cycle>::get(),
    
  •         "Number of inst fetches per cycle"),
    
  • ADD_STAT(numBranches, statistics::units::Count::get(),
  •         "Number of branches fetched"),
    
  • ADD_STAT(branchRate, statistics::units::Ratio::get(),
  •         "Number of branch fetches per cycle"),
    
  • ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
  •         "ICache total stall cycles"),
    
  • ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
  •         "Number of times Execute suspended instruction fetching")
    

-{

  • fetchRate
  •    .flags(statistics::total);
    
  • numBranches
  •    .prereq(numBranches);
    
  • branchRate
  •    .flags(statistics::total);
    
  • icacheStallCycles
  •    .prereq(icacheStallCycles);
    

-}

-// means it is incremented in a vector indexing and not directly
-BaseCPU::
-ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)

  • : statistics::Group(parent, csprintf("executeStats%i",
    thread_id).c_str()),
  • ADD_STAT(numInsts, statistics::units::Count::get(),
  •         "Number of executed instructions"),
    
  • ADD_STAT(numNop, statistics::units::Count::get(),
  •         "Number of nop insts executed"),
    
  • ADD_STAT(numBranches, statistics::units::Count::get(),
  •         "Number of branches executed"),
    
  • ADD_STAT(numLoadInsts, statistics::units::Count::get(),
  •         "Number of load instructions executed"),
    
  • ADD_STAT(numStoreInsts, statistics::units::Count::get(),
  •         "Number of stores executed"),
    
  • ADD_STAT(instRate, statistics::units::Rate<
  •            statistics::units::Count, statistics::units::Cycle>::get(),
    
  •         "Inst execution rate"),
    
  • ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
  •         "DCache total stall cycles"),
    
  • ADD_STAT(numCCRegReads, statistics::units::Count::get(),
  •         "Number of times the CC registers were read"),
    
  • ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
  •         "Number of times the CC registers were written"),
    
  • ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
  •         "Number of float alu accesses"),
    
  • ADD_STAT(numFpRegReads, statistics::units::Count::get(),
  •         "Number of times the floating registers were read"),
    
  • ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
  •         "Number of times the floating registers were written"),
    
  • ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
  •         "Number of integer alu accesses"),
    
  • ADD_STAT(numIntRegReads, statistics::units::Count::get(),
  •         "Number of times the integer registers were read"),
    
  • ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
  •         "Number of times the integer registers were written"),
    
  • ADD_STAT(numMemRefs, statistics::units::Count::get(),
  •         "Number of memory refs"),
    
  • ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
  •         "Number of times the Misc registers were read"),
    
  • ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
  •         "Number of times the Misc registers were written"),
    
  • ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
  •         "Number of vector alu accesses"),
    
  • ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
  •         "Number of times the predicate registers were read"),
    
  • ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(),
  •         "Number of times the predicate registers were written"),
    
  • ADD_STAT(numVecRegReads, statistics::units::Count::get(),
  •         "Number of times the vector registers were read"),
    
  • ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
  •         "Number of times the vector registers were written"),
    
  • ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
  •         "Number of ops (including micro ops) which were discarded  
    

before "

  •         "commit")
    

-{

  • numStoreInsts = numMemRefs - numLoadInsts;
  • dcacheStallCycles
  •    .prereq(dcacheStallCycles);
    
  • numCCRegReads
  •    .prereq(numCCRegReads)
    
  •    .flags(statistics::nozero);
    
  • numCCRegWrites
  •    .prereq(numCCRegWrites)
    
  •    .flags(statistics::nozero);
    
  • numFpAluAccesses
  •    .prereq(numFpAluAccesses);
    
  • numFpRegReads
  •    .prereq(numFpRegReads);
    
  • numIntAluAccesses
  •    .prereq(numIntAluAccesses);
    
  • numIntRegReads
  •    .prereq(numIntRegReads);
    
  • numIntRegWrites
  •    .prereq(numIntRegWrites);
    
  • numMiscRegReads
  •    .prereq(numMiscRegReads);
    
  • numMiscRegWrites
  •    .prereq(numMiscRegWrites);
    
  • numVecPredRegReads
  •    .prereq(numVecPredRegReads);
    
  • numVecPredRegWrites
  •    .prereq(numVecPredRegWrites);
    
  • numVecRegReads
  •    .prereq(numVecRegReads);
    
  • numVecRegWrites
  •    .prereq(numVecRegWrites);
    

-}

-BaseCPU::
-CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)

  • : statistics::Group(parent, csprintf("commitStats%i",
    thread_id).c_str()),
  • ADD_STAT(numInsts, statistics::units::Count::get(),
  •         "Number of instructions committed (thread level)"),
    
  • ADD_STAT(numOps, statistics::units::Count::get(),
  •         "Number of ops (including micro ops) committed (thread  
    

level)"),

  • ADD_STAT(numInstsNotNOP, statistics::units::Count::get(),
  •         "Number of instructions committed excluding NOPs or  
    

prefetches"),

  • ADD_STAT(numOpsNotNOP, statistics::units::Count::get(),
  •         "Number of Ops (including micro ops) Simulated"),
    
  • ADD_STAT(cpi, statistics::units::Rate<
  •            statistics::units::Cycle, statistics::units::Count>::get(),
    
  •         "CPI: cycles per instruction (thread level)"),
    
  • ADD_STAT(ipc, statistics::units::Rate<
  •            statistics::units::Count, statistics::units::Cycle>::get(),
    
  •         "IPC: instructions per cycle (thread level)"),
    
  • ADD_STAT(numMemRefs, statistics::units::Count::get(),
  •        "Number of memory references committed"),
    
  • ADD_STAT(numFpInsts, statistics::units::Count::get(),
  •        "Number of float instructions"),
    
  • ADD_STAT(numIntInsts, statistics::units::Count::get(),
  •        "Number of integer instructions"),
    
  • ADD_STAT(numLoadInsts, statistics::units::Count::get(),
  •        "Number of load instructions"),
    
  • ADD_STAT(numStoreInsts, statistics::units::Count::get(),
  •        "Number of store instructions"),
    
  • ADD_STAT(numVecInsts, statistics::units::Count::get(),
  •        "Number of vector instructions"),
    
  • ADD_STAT(committedInstType, statistics::units::Count::get(),
  •        "Class of committed instruction."),
    
  • ADD_STAT(committedControl, statistics::units::Count::get(),
  •         "Class of control type instructions committed")
    

-{

  • numInsts
  •    .prereq(numInsts);
    
  • cpi.precision(6);
  • ipc.precision(6);
  • committedInstType
  •    .init(enums::Num_OpClass)
    
  •    .flags(statistics::total | statistics::pdf | statistics::dist);
    
  • for (unsigned i = 0; i < Num_OpClasses; ++i) {
  •    committedInstType.subname(i, enums::OpClassStrings[i]);
    
  • }
  • committedControl
  •    .init(StaticInstFlags::Flags::Num_Flags)
    
  •    .flags(statistics::nozero);
    
  • for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) {
  •    committedControl.subname(i, StaticInstFlags::FlagsStrings[i]);
    
  • }
    -}

-void
-BaseCPU::
-CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst)
-{

  • /* Add a count for every control instruction type */
  • if (staticInst->isControl()) {
  •    if (staticInst->isReturn()) {
    
  •        committedControl[gem5::StaticInstFlags::Flags::IsReturn]++;
    
  •    }
    
  •    if (staticInst->isCall()) {
    
  •        committedControl[gem5::StaticInstFlags::Flags::IsCall]++;
    
  •    }
    
  •    if (staticInst->isDirectCtrl()) {
    

committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++;

  •    }
    
  •    if (staticInst->isIndirectCtrl()) {
    
  •        committedControl
    
  •            [gem5::StaticInstFlags::Flags::IsIndirectControl]++;
    
  •    }
    
  •    if (staticInst->isCondCtrl()) {
    

committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++;

  •    }
    
  •    if (staticInst->isUncondCtrl()) {
    

committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++;

  •    }
    
  •    committedControl[gem5::StaticInstFlags::Flags::IsControl]++;
    
  • }

-}

} // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index fc22abc..084d9b9 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -43,7 +43,6 @@
#define CPU_BASE_HH

#include <vector>
-#include <memory>

#include "arch/generic/interrupts.hh"
#include "base/statistics.hh"
@@ -633,14 +632,8 @@
struct BaseCPUStats : public statistics::Group
{
BaseCPUStats(statistics::Group *parent);

  •    // Number of CPU insts and ops committed at CPU core level
    
  •    statistics::Scalar numInsts;
    
  •    statistics::Scalar numOps;
        // Number of CPU cycles simulated
        statistics::Scalar numCycles;
    
  •    /* CPI/IPC for total cycle counts and macro insts */
    
  •    statistics::Formula cpi;
    
  •    statistics::Formula ipc;
        statistics::Scalar numWorkItemsStarted;
        statistics::Scalar numWorkItemsCompleted;
    } baseStats;
    

@@ -683,141 +676,6 @@
const Cycles pwrGatingLatency;
const bool powerGatingOnIdle;
EventFunctionWrapper enterPwrGatingEvent;

  • public:
  • struct FetchCPUStats : public statistics::Group
  • {
  •    FetchCPUStats(statistics::Group *parent, int thread_id);
    
  •    /* Total number of instructions fetched */
    
  •    statistics::Scalar numInsts;
    
  •    /* Total number of operations fetched */
    
  •    statistics::Scalar numOps;
    
  •    /* Number of instruction fetched per cycle. */
    
  •    statistics::Formula fetchRate;
    
  •    /* Total number of branches fetched */
    
  •    statistics::Scalar numBranches;
    
  •    /* Number of branch fetches per cycle. */
    
  •    statistics::Formula branchRate;
    
  •    /* Number of cycles stalled due to an icache miss */
    
  •    statistics::Scalar icacheStallCycles;
    
  •    /* Number of times fetch was asked to suspend by Execute */
    
  •    statistics::Scalar numFetchSuspends;
    
  • };
  • struct ExecuteCPUStats: public statistics::Group
  • {
  •    ExecuteCPUStats(statistics::Group *parent, int thread_id);
    
  •    /* Stat for total number of executed instructions */
    
  •    statistics::Scalar numInsts;
    
  •    /* Number of executed nops */
    
  •    statistics::Scalar numNop;
    
  •    /* Number of executed branches */
    
  •    statistics::Scalar numBranches;
    
  •    /* Stat for total number of executed load instructions */
    
  •    statistics::Scalar numLoadInsts;
    
  •    /* Number of executed store instructions */
    
  •    statistics::Formula numStoreInsts;
    
  •    /* Number of instructions executed per cycle */
    
  •    statistics::Formula instRate;
    
  •    /* Number of cycles stalled for D-cache responses */
    
  •    statistics::Scalar dcacheStallCycles;
    
  •    /* Number of condition code register file accesses */
    
  •    statistics::Scalar numCCRegReads;
    
  •    statistics::Scalar numCCRegWrites;
    
  •    /* number of float alu accesses */
    
  •    statistics::Scalar numFpAluAccesses;
    
  •    /* Number of float register file accesses */
    
  •    statistics::Scalar numFpRegReads;
    
  •    statistics::Scalar numFpRegWrites;
    
  •    /* Number of integer alu accesses */
    
  •    statistics::Scalar numIntAluAccesses;
    
  •    /* Number of integer register file accesses */
    
  •    statistics::Scalar numIntRegReads;
    
  •    statistics::Scalar numIntRegWrites;
    
  •    /* number of simulated memory references */
    
  •    statistics::Scalar numMemRefs;
    
  •    /* Number of misc register file accesses */
    
  •    statistics::Scalar numMiscRegReads;
    
  •    statistics::Scalar numMiscRegWrites;
    
  •    /* Number of vector alu accesses */
    
  •    statistics::Scalar numVecAluAccesses;
    
  •    /* Number of predicate register file accesses */
    
  •    mutable statistics::Scalar numVecPredRegReads;
    
  •    statistics::Scalar numVecPredRegWrites;
    
  •    /* Number of vector register file accesses */
    
  •    mutable statistics::Scalar numVecRegReads;
    
  •    statistics::Scalar numVecRegWrites;
    
  •    /* Number of ops discarded before committing */
    
  •    statistics::Scalar numDiscardedOps;
    
  • };
  • struct CommitCPUStats: public statistics::Group
  • {
  •    CommitCPUStats(statistics::Group *parent, int thread_id);
    
  •    /* Number of simulated instructions committed */
    
  •    statistics::Scalar numInsts;
    
  •    statistics::Scalar numOps;
    
  •    /* Number of instructions committed that are not NOP or prefetches  
    

*/

  •    statistics::Scalar numInstsNotNOP;
    
  •    statistics::Scalar numOpsNotNOP;
    
  •    /* CPI/IPC for total cycle counts and macro insts */
    
  •    statistics::Formula cpi;
    
  •    statistics::Formula ipc;
    
  •    /* Number of committed memory references. */
    
  •    statistics::Scalar numMemRefs;
    
  •    /* Number of float instructions */
    
  •    statistics::Scalar numFpInsts;
    
  •    /* Number of int instructions */
    
  •    statistics::Scalar numIntInsts;
    
  •    /* number of load instructions */
    
  •    statistics::Scalar numLoadInsts;
    
  •    /* Number of store instructions */
    
  •    statistics::Scalar numStoreInsts;
    
  •    /* Number of vector instructions */
    
  •    statistics::Scalar numVecInsts;
    
  •    /* Number of instructions committed by type (OpClass) */
    
  •    statistics::Vector committedInstType;
    
  •    /* number of control instructions committed by control inst type */
    
  •    statistics::Vector committedControl;
    
  •    void updateComCtrlStats(const StaticInstPtr staticInst);
    
  • };
  • std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
  • std::vector<std::unique_ptr<ExecuteCPUStats>> executeStats;
  • std::vector<std::unique_ptr<CommitCPUStats>> commitStats;
    };

} // namespace gem5
diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index e22e162..b76bddc 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -261,6 +261,8 @@

BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent)
: statistics::Group(parent),

  • ADD_STAT(committedInsts, statistics::units::Count::get(),
  •         "Number of instructions committed"),
    ADD_STAT(numVMExits, statistics::units::Count::get(),
             "total number of KVM exits"),
    ADD_STAT(numVMHalfEntries, statistics::units::Count::get(),
    

@@ -776,8 +778,7 @@

      /* Update statistics */
      baseStats.numCycles += simCyclesExecuted;;
  •    commitStats[thread->threadId()]->numInsts += instsExecuted;
    
  •    baseStats.numInsts += instsExecuted;
    
  •    stats.committedInsts += instsExecuted;
        ctrInsts += instsExecuted;
    
        DPRINTF(KvmRun,
    

diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
index 7bbf393..2d81c7c 100644
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -804,6 +804,7 @@
struct StatGroup : public statistics::Group
{
StatGroup(statistics::Group *parent);

  •    statistics::Scalar committedInsts;
        statistics::Scalar numVMExits;
        statistics::Scalar numVMHalfEntries;
        statistics::Scalar numExitSignal;
    

diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 2908c22..5eaaf58 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -871,18 +871,49 @@
{
thread->numInst++;
thread->threadStats.numInsts++;

  •    cpu.commitStats[inst->id.threadId]->numInsts++;
    
  •    cpu.baseStats.numInsts++;
    
  •    cpu.stats.numInsts++;
    
        /* Act on events related to instruction counts */
        thread->comInstEventQueue.serviceEvents(thread->numInst);
    }
    thread->numOp++;
    thread->threadStats.numOps++;
    
  • cpu.commitStats[inst->id.threadId]->numOps++;
  • cpu.baseStats.numOps++;
  • cpu.commitStats[inst->id.threadId]
  •    ->committedInstType[inst->staticInst->opClass()]++;
    
  • cpu.stats.numOps++;
  • cpu.stats.committedInstType[inst->id.threadId]
  •                           [inst->staticInst->opClass()]++;
    
  • /** Add a count for every control instruction */
  • if (inst->staticInst->isControl()) {
  •    if (inst->staticInst->isReturn()) {
    
  •        cpu.stats.committedControl[inst->id.threadId]
    
  •                    [gem5::StaticInstFlags::Flags::IsReturn]++;
    
  •    }
    
  •    if (inst->staticInst->isCall()) {
    
  •        cpu.stats.committedControl[inst->id.threadId]
    
  •                    [gem5::StaticInstFlags::Flags::IsCall]++;
    
  •    }
    
  •    if (inst->staticInst->isDirectCtrl()) {
    
  •        cpu.stats.committedControl[inst->id.threadId]
    
  •                    [gem5::StaticInstFlags::Flags::IsDirectControl]++;
    
  •    }
    
  •    if (inst->staticInst->isIndirectCtrl()) {
    
  •        cpu.stats.committedControl[inst->id.threadId]
    

[gem5::StaticInstFlags::Flags::IsIndirectControl]++;

  •    }
    
  •    if (inst->staticInst->isCondCtrl()) {
    
  •        cpu.stats.committedControl[inst->id.threadId]
    
  •                    [gem5::StaticInstFlags::Flags::IsCondControl]++;
    
  •    }
    
  •    if (inst->staticInst->isUncondCtrl()) {
    
  •        cpu.stats.committedControl[inst->id.threadId]
    
  •                    [gem5::StaticInstFlags::Flags::IsUncondControl]++;
    
  •    }
    
  •    cpu.stats.committedControl[inst->id.threadId]
    
  •                    [gem5::StaticInstFlags::Flags::IsControl]++;
    
  • }
  /* Set the CP SeqNum to the numOps commit number */
  if (inst->traceData)

@@ -1023,7 +1054,7 @@
DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
" inst: %s\n", thread_id, *inst);

  •        cpu.fetchStats[thread_id]->numFetchSuspends++;
    
  •        cpu.stats.numFetchSuspends++;
    
            updateBranchData(thread_id, BranchData::SuspendThread, inst,
                resume_pc, branch);
    

@@ -1337,7 +1368,7 @@
*inst, ex_info.streamSeqNum);

          if (fault == NoFault)
  •            cpu.executeStats[thread_id]->numDiscardedOps++;
    
  •            cpu.stats.numDiscardedOps++;
        }
    
        /* Mark the mem inst as being in the LSQ */
    

diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index e31cbe9..64d4c47 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -45,13 +45,47 @@

MinorStats::MinorStats(BaseCPU *base_cpu)
: statistics::Group(base_cpu),

  • ADD_STAT(numInsts, statistics::units::Count::get(),
  •         "Number of instructions committed"),
    
  • ADD_STAT(numOps, statistics::units::Count::get(),
  •         "Number of ops (including micro ops) committed"),
    
  • ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
  •         "Number of ops (including micro ops) which were discarded  
    

before "

  •         "commit"),
    
  • ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
  •         "Number of times Execute suspended instruction fetching"),
    ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
             "Total number of cycles that CPU has spent quiesced or  
    

waiting "

  •         "for an interrupt")
    
  •         "for an interrupt"),
    
  • ADD_STAT(cpi, statistics::units::Rate<

  •            statistics::units::Cycle, statistics::units::Count>::get(),
    
  •         "CPI: cycles per instruction"),
    
  • ADD_STAT(ipc, statistics::units::Rate<

  •            statistics::units::Count, statistics::units::Cycle>::get(),
    
  •         "IPC: instructions per cycle"),
    
  • ADD_STAT(committedInstType, statistics::units::Count::get(),

  •         "Class of committed instruction"),
    
  • ADD_STAT(committedControl, statistics::units::Count::get(),

  •         "Class of control type instructions committed")
    

    {
    quiesceCycles.prereq(quiesceCycles);

  • cpi.precision(6);

  • cpi = base_cpu->baseStats.numCycles / numInsts;

  • ipc.precision(6);

  • ipc = numInsts / base_cpu->baseStats.numCycles;

  • committedInstType

  •    .init(base_cpu->numThreads, enums::Num_OpClass)
    
  •    .flags(statistics::total | statistics::pdf | statistics::dist);
    
  • committedInstType.ysubnames(enums::OpClassStrings);

  • committedControl

  •    .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags)
    
  •    .flags(statistics::nozero);
    
  • committedControl.ysubnames(StaticInstFlags::FlagsStrings);
    }

} // namespace minor
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 98ac80f..1ab81f4 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -59,9 +59,31 @@
{
MinorStats(BaseCPU *parent);

  • /** Number of simulated instructions */

  • statistics::Scalar numInsts;

  • /** Number of simulated insts and microops */

  • statistics::Scalar numOps;

  • /** Number of ops discarded before committing */

  • statistics::Scalar numDiscardedOps;

  • /** Number of times fetch was asked to suspend by Execute */

  • statistics::Scalar numFetchSuspends;

  • /** Number of cycles in quiescent state */
    statistics::Scalar quiesceCycles;
    
  • /** CPI/IPC for total cycle counts and macro insts */

  • statistics::Formula cpi;

  • statistics::Formula ipc;

  • /** Number of instructions by type (OpClass) */

  • statistics::Vector2d committedInstType;

  • /** Number of branches commited */

  • statistics::Vector2d committedControl;

  • };

    } // namespace minor
    diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
    index e1f0168..38dce83 100644
    --- a/src/cpu/o3/commit.cc
    +++ b/src/cpu/o3/commit.cc
    @@ -156,10 +156,25 @@
    "The number of times a branch was mispredicted"),
    ADD_STAT(numCommittedDist, statistics::units::Count::get(),
    "Number of insts commited each cycle"),

  •  ADD_STAT(instsCommitted, statistics::units::Count::get(),
    
  •           "Number of instructions committed"),
    
  •  ADD_STAT(opsCommitted, statistics::units::Count::get(),
    
  •           "Number of ops (including micro ops) committed"),
    
  •  ADD_STAT(memRefs, statistics::units::Count::get(),
    
  •           "Number of memory references committed"),
    
  •  ADD_STAT(loads, statistics::units::Count::get(), "Number of loads  
    

committed"),
ADD_STAT(amos, statistics::units::Count::get(),
"Number of atomic instructions committed"),
ADD_STAT(membars, statistics::units::Count::get(),
"Number of memory barriers committed"),

  •  ADD_STAT(branches, statistics::units::Count::get(),
    
  •           "Number of branches committed"),
    
  •  ADD_STAT(vectorInstructions, statistics::units::Count::get(),
    
  •           "Number of committed Vector instructions."),
    
  •  ADD_STAT(floating, statistics::units::Count::get(),
    
  •           "Number of committed floating point instructions."),
    
  •  ADD_STAT(integer, statistics::units::Count::get(),
    
  •           "Number of committed integer instructions."),
      ADD_STAT(functionCalls, statistics::units::Count::get(),
               "Number of function calls committed."),
      ADD_STAT(committedInstType, statistics::units::Count::get(),
    

@@ -177,6 +192,22 @@
.init(0,commit->commitWidth,1)
.flags(statistics::pdf);

  • instsCommitted
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • opsCommitted
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • memRefs
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • loads
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • amos
        .init(cpu->numThreads)
        .flags(total);
    

@@ -185,6 +216,22 @@
.init(cpu->numThreads)
.flags(total);

  • branches
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • vectorInstructions
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • floating
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • integer
  •    .init(cpu->numThreads)
    
  •    .flags(total);
    
  • functionCalls
        .init(commit->numThreads)
        .flags(total);
    

@@ -1336,12 +1383,9 @@
{
ThreadID tid = inst->threadNumber;

  • if (!inst->isMicroop() || inst->isLastMicroop()) {
  •    cpu->commitStats[tid]->numInsts++;
    
  •    cpu->baseStats.numInsts++;
    
  • }
  • cpu->commitStats[tid]->numOps++;
  • cpu->baseStats.numOps++;
  • if (!inst->isMicroop() || inst->isLastMicroop())

  •    stats.instsCommitted[tid]++;
    
  • stats.opsCommitted[tid]++;

    // To match the old model, don't count nops and instruction
    // prefetches towards the total commit count.
    @@ -1352,20 +1396,21 @@
    //
    //  Control Instructions
    //

  • cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst);
  • if (inst->isControl())
  •    stats.branches[tid]++;
    
    //
    //  Memory references
    //
    if (inst->isMemRef()) {
    
  •    cpu->commitStats[tid]->numMemRefs++;
    
  •    stats.memRefs[tid]++;
    
        if (inst->isLoad()) {
    
  •        cpu->commitStats[tid]->numLoadInsts++;
    
  •        stats.loads[tid]++;
        }
    
  •    if (inst->isStore()) {
    
  •        cpu->commitStats[tid]->numStoreInsts++;
    
  •    if (inst->isAtomic()) {
    
  •        stats.amos[tid]++;
        }
    }
    

@@ -1375,14 +1420,14 @@

  // Integer Instruction
  if (inst->isInteger())
  •    cpu->commitStats[tid]->numIntInsts++;
    
  •    stats.integer[tid]++;
    
    // Floating Point Instruction
    if (inst->isFloating())
    
  •    cpu->commitStats[tid]->numFpInsts++;
    
  •    stats.floating[tid]++;
    // Vector Instruction
    if (inst->isVector())
    
  •    cpu->commitStats[tid]->numVecInsts++;
    
  •    stats.vectorInstructions[tid]++;
    
    // Function Calls
    if (inst->isCall())
    

diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index eccd023..cf4eaf5 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -479,10 +479,26 @@
/** Distribution of the number of committed instructions each
cycle. */
statistics::Distribution numCommittedDist;

  •    /** Total number of instructions committed. */
    
  •    statistics::Vector instsCommitted;
    
  •    /** Total number of ops (including micro ops) committed. */
    
  •    statistics::Vector opsCommitted;
    
  •    /** Stat for the total number of committed memory references. */
    
  •    statistics::Vector memRefs;
    
  •    /** Stat for the total number of committed loads. */
    
  •    statistics::Vector loads;
        /** Stat for the total number of committed atomics. */
        statistics::Vector amos;
        /** Total number of committed memory barriers. */
        statistics::Vector membars;
    
  •    /** Total number of committed branches. */
    
  •    statistics::Vector branches;
    
  •    /** Total number of vector instructions */
    
  •    statistics::Vector vectorInstructions;
    
  •    /** Total number of floating point instructions */
    
  •    statistics::Vector floating;
    
  •    /** Total number of integer instructions */
    
  •    statistics::Vector integer;
        /** Total number of function calls */
        statistics::Vector functionCalls;
        /** Committed instructions by instruction type (OpClass) */
    

diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 93c58fe..d2bacaa 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -328,7 +328,47 @@
"to idling"),
ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
"Total number of cycles that CPU has spent quiesced or
waiting "

  •           "for an interrupt")
    
  •           "for an interrupt"),
    
  •  ADD_STAT(committedInsts, statistics::units::Count::get(),
    
  •           "Number of Instructions Simulated"),
    
  •  ADD_STAT(committedOps, statistics::units::Count::get(),
    
  •           "Number of Ops (including micro ops) Simulated"),
    
  •  ADD_STAT(cpi, statistics::units::Rate<
    
  •                statistics::units::Cycle,  
    

statistics::units::Count>::get(),

  •           "CPI: Cycles Per Instruction"),
    
  •  ADD_STAT(totalCpi, statistics::units::Rate<
    
  •                statistics::units::Cycle,  
    

statistics::units::Count>::get(),

  •           "CPI: Total CPI of All Threads"),
    
  •  ADD_STAT(ipc, statistics::units::Rate<
    
  •                statistics::units::Count,  
    

statistics::units::Cycle>::get(),

  •           "IPC: Instructions Per Cycle"),
    
  •  ADD_STAT(totalIpc, statistics::units::Rate<
    
  •                statistics::units::Count,  
    

statistics::units::Cycle>::get(),

  •           "IPC: Total IPC of All Threads"),
    
  •  ADD_STAT(intRegfileReads, statistics::units::Count::get(),
    
  •           "Number of integer regfile reads"),
    
  •  ADD_STAT(intRegfileWrites, statistics::units::Count::get(),
    
  •           "Number of integer regfile writes"),
    
  •  ADD_STAT(fpRegfileReads, statistics::units::Count::get(),
    
  •           "Number of floating regfile reads"),
    
  •  ADD_STAT(fpRegfileWrites, statistics::units::Count::get(),
    
  •           "Number of floating regfile writes"),
    
  •  ADD_STAT(vecRegfileReads, statistics::units::Count::get(),
    
  •           "number of vector regfile reads"),
    
  •  ADD_STAT(vecRegfileWrites, statistics::units::Count::get(),
    
  •           "number of vector regfile writes"),
    
  •  ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(),
    
  •           "number of predicate regfile reads"),
    
  •  ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(),
    
  •           "number of predicate regfile writes"),
    
  •  ADD_STAT(ccRegfileReads, statistics::units::Count::get(),
    
  •           "number of cc regfile reads"),
    
  •  ADD_STAT(ccRegfileWrites, statistics::units::Count::get(),
    
  •           "number of cc regfile writes"),
    
  •  ADD_STAT(miscRegfileReads, statistics::units::Count::get(),
    
  •           "number of misc regfile reads"),
    
  •  ADD_STAT(miscRegfileWrites, statistics::units::Count::get(),
    
  •           "number of misc regfile writes")
    

    {
    // Register any of the O3CPU's stats here.
    timesIdled
    @@ -340,6 +380,69 @@
    quiesceCycles
    .prereq(quiesceCycles);

  • // Number of Instructions simulated

  • // --------------------------------

  • // Should probably be in Base CPU but need templated

  • // MaxThreads so put in here instead

  • committedInsts

  •    .init(cpu->numThreads)
    
  •    .flags(statistics::total);
    
  • committedOps

  •    .init(cpu->numThreads)
    
  •    .flags(statistics::total);
    
  • cpi

  •    .precision(6);
    
  • cpi = cpu->baseStats.numCycles / committedInsts;

  • totalCpi

  •    .precision(6);
    
  • totalCpi = cpu->baseStats.numCycles / sum(committedInsts);

  • ipc

  •    .precision(6);
    
  • ipc = committedInsts / cpu->baseStats.numCycles;

  • totalIpc

  •    .precision(6);
    
  • totalIpc = sum(committedInsts) / cpu->baseStats.numCycles;

  • intRegfileReads

  •    .prereq(intRegfileReads);
    
  • intRegfileWrites

  •    .prereq(intRegfileWrites);
    
  • fpRegfileReads

  •    .prereq(fpRegfileReads);
    
  • fpRegfileWrites

  •    .prereq(fpRegfileWrites);
    
  • vecRegfileReads

  •    .prereq(vecRegfileReads);
    
  • vecRegfileWrites

  •    .prereq(vecRegfileWrites);
    
  • vecPredRegfileReads

  •    .prereq(vecPredRegfileReads);
    
  • vecPredRegfileWrites

  •    .prereq(vecPredRegfileWrites);
    
  • ccRegfileReads

  •    .prereq(ccRegfileReads);
    
  • ccRegfileWrites

  •    .prereq(ccRegfileWrites);
    
  • miscRegfileReads

  •    .prereq(miscRegfileReads);
    
  • miscRegfileWrites

  •    .prereq(miscRegfileWrites);
    

    }

    void
    @@ -916,7 +1019,7 @@
    RegVal
    CPU::readMiscReg(int misc_reg, ThreadID tid)
    {

  • executeStats[tid]->numMiscRegReads++;
  • cpuStats.miscRegfileReads++;
    return isa[tid]->readMiscReg(misc_reg);
    }

@@ -929,29 +1032,29 @@
void
CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
{

  • executeStats[tid]->numMiscRegWrites++;
  • cpuStats.miscRegfileWrites++;
    isa[tid]->setMiscReg(misc_reg, val);
    }

RegVal
-CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid)
+CPU::getReg(PhysRegIdPtr phys_reg)
{
switch (phys_reg->classValue()) {
case IntRegClass:

  •    executeStats[tid]->numIntRegReads++;
    
  •    cpuStats.intRegfileReads++;
        break;
      case FloatRegClass:
    
  •    executeStats[tid]->numFpRegReads++;
    
  •    cpuStats.fpRegfileReads++;
        break;
      case CCRegClass:
    
  •    executeStats[tid]->numCCRegReads++;
    
  •    cpuStats.ccRegfileReads++;
        break;
      case VecRegClass:
      case VecElemClass:
    
  •    executeStats[tid]->numVecRegReads++;
    
  •    cpuStats.vecRegfileReads++;
        break;
      case VecPredRegClass:
    
  •    executeStats[tid]->numVecPredRegReads++;
    
  •    cpuStats.vecPredRegfileReads++;
        break;
      default:
        break;
    

@@ -960,24 +1063,24 @@
}

void
-CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid)
+CPU::getReg(PhysRegIdPtr phys_reg, void *val)
{
switch (phys_reg->classValue()) {
case IntRegClass:

  •    executeStats[tid]->numIntRegReads++;
    
  •    cpuStats.intRegfileReads++;
        break;
      case FloatRegClass:
    
  •    executeStats[tid]->numFpRegReads++;
    
  •    cpuStats.fpRegfileReads++;
        break;
      case CCRegClass:
    
  •    executeStats[tid]->numCCRegReads++;
    
  •    cpuStats.ccRegfileReads++;
        break;
      case VecRegClass:
      case VecElemClass:
    
  •    executeStats[tid]->numVecRegReads++;
    
  •    cpuStats.vecRegfileReads++;
        break;
      case VecPredRegClass:
    
  •    executeStats[tid]->numVecPredRegReads++;
    
  •    cpuStats.vecPredRegfileReads++;
        break;
      default:
        break;
    

@@ -986,14 +1089,14 @@
}

void *
-CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid)
+CPU::getWritableReg(PhysRegIdPtr phys_reg)
{
switch (phys_reg->classValue()) {
case VecRegClass:

  •    executeStats[tid]->numVecRegReads++;
    
  •    cpuStats.vecRegfileReads++;
        break;
      case VecPredRegClass:
    
  •    executeStats[tid]->numVecPredRegReads++;
    
  •    cpuStats.vecPredRegfileReads++;
        break;
      default:
        break;
    

@@ -1002,24 +1105,24 @@
}

void
-CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid)
+CPU::setReg(PhysRegIdPtr phys_reg, RegVal val)
{
switch (phys_reg->classValue()) {
case IntRegClass:

  •    executeStats[tid]->numIntRegWrites++;
    
  •    cpuStats.intRegfileWrites++;
        break;
      case FloatRegClass:
    
  •    executeStats[tid]->numFpRegWrites++;
    
  •    cpuStats.fpRegfileWrites++;
        break;
      case CCRegClass:
    
  •    executeStats[tid]->numCCRegWrites++;
    
  •    cpuStats.ccRegfileWrites++;
        break;
      case VecRegClass:
      case VecElemClass:
    
  •    executeStats[tid]->numVecRegWrites++;
    
  •    cpuStats.vecRegfileWrites++;
        break;
      case VecPredRegClass:
    
  •    executeStats[tid]->numVecPredRegWrites++;
    
  •    cpuStats.vecPredRegfileWrites++;
        break;
      default:
        break;
    

@@ -1028,24 +1131,24 @@
}

void
-CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid)
+CPU::setReg(PhysRegIdPtr phys_reg, const void *val)
{
switch (phys_reg->classValue()) {
case IntRegClass:

  •    executeStats[tid]->numIntRegWrites++;
    
  •    cpuStats.intRegfileWrites++;
        break;
      case FloatRegClass:
    
  •    executeStats[tid]->numFpRegWrites++;
    
  •    cpuStats.fpRegfileWrites++;
        break;
      case CCRegClass:
    
  •    executeStats[tid]->numCCRegWrites++;
    
  •    cpuStats.ccRegfileWrites++;
        break;
      case VecRegClass:
      case VecElemClass:
    
  •    executeStats[tid]->numVecRegWrites++;
    
  •    cpuStats.vecRegfileWrites++;
        break;
      case VecPredRegClass:
    
  •    executeStats[tid]->numVecPredRegWrites++;
    
  •    cpuStats.vecPredRegfileWrites++;
        break;
      default:
        break;
    

@@ -1127,14 +1230,14 @@
if (!inst->isMicroop() || inst->isLastMicroop()) {
thread[tid]->numInst++;
thread[tid]->threadStats.numInsts++;

  •    commitStats[tid]->numInstsNotNOP++;
    
  •    cpuStats.committedInsts[tid]++;
    
        // Check for instruction-count-based events.
        thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst);
    }
    thread[tid]->numOp++;
    thread[tid]->threadStats.numOps++;
    
  • commitStats[tid]->numOpsNotNOP++;
  • cpuStats.committedOps[tid]++;

    probeInstCommit(inst->staticInst, inst->pcState().instAddr());
    }
    diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
    index 7dc3784..08a1312 100644
    --- a/src/cpu/o3/cpu.hh
    +++ b/src/cpu/o3/cpu.hh
    @@ -310,12 +310,12 @@
    */
    void setMiscReg(int misc_reg, RegVal val, ThreadID tid);

  • RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid);
  • void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid);
  • void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid);
  • RegVal getReg(PhysRegIdPtr phys_reg);
  • void getReg(PhysRegIdPtr phys_reg, void *val);
  • void *getWritableReg(PhysRegIdPtr phys_reg);
  • void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid);
  • void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid);
  • void setReg(PhysRegIdPtr phys_reg, RegVal val);

  • void setReg(PhysRegIdPtr phys_reg, const void *val);

    /** Architectural register accessors.  Looks up in the commit

    • rename table to obtain the true physical index of the
      @@ -581,7 +581,38 @@
      /** Stat for total number of cycles the CPU spends descheduled due
      to a
      • quiesce operation or waiting for an interrupt. */
        statistics::Scalar quiesceCycles;
  •    /** Stat for the number of committed instructions per thread. */
    
  •    statistics::Vector committedInsts;
    
  •    /** Stat for the number of committed ops (including micro ops) per
    
  •     *  thread. */
    
  •    statistics::Vector committedOps;
    
  •    /** Stat for the CPI per thread. */
    
  •    statistics::Formula cpi;
    
  •    /** Stat for the total CPI. */
    
  •    statistics::Formula totalCpi;
    
  •    /** Stat for the IPC per thread. */
    
  •    statistics::Formula ipc;
    
  •    /** Stat for the total IPC. */
    
  •    statistics::Formula totalIpc;
    
  •    //number of integer register file accesses
    
  •    statistics::Scalar intRegfileReads;
    
  •    statistics::Scalar intRegfileWrites;
    
  •    //number of float register file accesses
    
  •    statistics::Scalar fpRegfileReads;
    
  •    statistics::Scalar fpRegfileWrites;
    
  •    //number of vector register file accesses
    
  •    mutable statistics::Scalar vecRegfileReads;
    
  •    statistics::Scalar vecRegfileWrites;
    
  •    //number of predicate register file accesses
    
  •    mutable statistics::Scalar vecPredRegfileReads;
    
  •    statistics::Scalar vecPredRegfileWrites;
    
  •    //number of CC register file accesses
    
  •    statistics::Scalar ccRegfileReads;
    
  •    statistics::Scalar ccRegfileWrites;
    
  •    //number of misc
    
  •    statistics::Scalar miscRegfileReads;
    
  •    statistics::Scalar miscRegfileWrites;
    } cpuStats;
    

    public:
    diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
    index c759c5e..54c0385 100644
    --- a/src/cpu/o3/dyn_inst.hh
    +++ b/src/cpu/o3/dyn_inst.hh
    @@ -1086,10 +1086,10 @@

            if (bytes == sizeof(RegVal)) {
                setRegOperand(staticInst.get(), idx,
    
  •                    cpu->getReg(prev_phys_reg, threadNumber));
    
  •                    cpu->getReg(prev_phys_reg));
            } else {
                uint8_t val[original_dest_reg.regClass().regBytes()];
    
  •            cpu->getReg(prev_phys_reg, val, threadNumber);
    
  •            cpu->getReg(prev_phys_reg, val);
                setRegOperand(staticInst.get(), idx, val);
            }
        }
    

@@ -1116,7 +1116,7 @@
const PhysRegIdPtr reg = renamedSrcIdx(idx);
if (reg->is(InvalidRegClass))
return 0;

  •    return cpu->getReg(reg, threadNumber);
    
  •    return cpu->getReg(reg);
    }
    
    void
    

@@ -1125,13 +1125,13 @@
const PhysRegIdPtr reg = renamedSrcIdx(idx);
if (reg->is(InvalidRegClass))
return;

  •    cpu->getReg(reg, val, threadNumber);
    
  •    cpu->getReg(reg, val);
    }
    
    void *
    getWritableRegOperand(const StaticInst *si, int idx) override
    {
    
  •    return cpu->getWritableReg(renamedDestIdx(idx), threadNumber);
    
  •    return cpu->getWritableReg(renamedDestIdx(idx));
    }
    
    /** @todo: Make results into arrays so they can handle multiple dest
    

@@ -1143,7 +1143,7 @@
const PhysRegIdPtr reg = renamedDestIdx(idx);
if (reg->is(InvalidRegClass))
return;

  •    cpu->setReg(reg, val, threadNumber);
    
  •    cpu->setReg(reg, val);
        setResult(reg->regClass(), val);
    }
    

@@ -1153,7 +1153,7 @@
const PhysRegIdPtr reg = renamedDestIdx(idx);
if (reg->is(InvalidRegClass))
return;

  •    cpu->setReg(reg, val, threadNumber);
    
  •    cpu->setReg(reg, val);
        setResult(reg->regClass(), val);
    }
    

    };
    diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
    index f5fc6c6..d3cdd2c 100644
    --- a/src/cpu/o3/fetch.cc
    +++ b/src/cpu/o3/fetch.cc
    @@ -158,6 +158,12 @@

    Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
    : statistics::Group(cpu, "fetch"),

  • ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),

  •         "Number of cycles fetch is stalled on an Icache miss"),
    
  • ADD_STAT(insts, statistics::units::Count::get(),

  •         "Number of instructions fetch has processed"),
    
  • ADD_STAT(branches, statistics::units::Count::get(),

  •         "Number of branches that fetch encountered"),
    ADD_STAT(predictedBranches, statistics::units::Count::get(),
             "Number of branches that fetch has predicted taken"),
    ADD_STAT(cycles, statistics::units::Cycle::get(),
    

@@ -194,8 +200,21 @@
"Number of instructions fetched each cycle (Total)"),
ADD_STAT(idleRate, statistics::units::Ratio::get(),
"Ratio of cycles fetch was idle",

  •         idleCycles / cpu->baseStats.numCycles)
    
  •         idleCycles / cpu->baseStats.numCycles),
    
  • ADD_STAT(branchRate, statistics::units::Ratio::get(),
  •         "Number of branch fetches per cycle",
    
  •         branches / cpu->baseStats.numCycles),
    
  • ADD_STAT(rate, statistics::units::Rate<
  •                statistics::units::Count,  
    

statistics::units::Cycle>::get(),

  •         "Number of inst fetches per cycle",
    
  •         insts / cpu->baseStats.numCycles)
    
    {
  •    icacheStallCycles
    
  •        .prereq(icacheStallCycles);
    
  •    insts
    
  •        .prereq(insts);
    
  •    branches
    
  •        .prereq(branches);
        predictedBranches
            .prereq(predictedBranches);
        cycles
    

@@ -233,6 +252,10 @@
.flags(statistics::pdf);
idleRate
.prereq(idleRate);

  •    branchRate
    
  •        .flags(statistics::total);
    
  •    rate
    
  •        .flags(statistics::total);
    
    }
    void
    Fetch::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
    @@ -517,7 +540,7 @@
    inst->setPredTarg(next_pc);
    inst->setPredTaken(predict_taken);
  • cpu->fetchStats[tid]->numBranches++;
  • ++fetchStats.branches;

    if (predict_taken) {
    ++fetchStats.predictedBranches;
    @@ -1123,7 +1146,7 @@
    fetchCacheLine(fetchAddr, tid, this_pc.instAddr());

         if (fetchStatus[tid] == IcacheWaitResponse)
    
  •            cpu->fetchStats[tid]->icacheStallCycles++;
    
  •            ++fetchStats.icacheStallCycles;
            else if (fetchStatus[tid] == ItlbWait)
                ++fetchStats.tlbCycles;
            else
    

@@ -1219,7 +1242,7 @@
staticInst = dec_ptr->decode(this_pc);

                  // Increment stat of fetched instructions.
  •                cpu->fetchStats[tid]->numInsts++;
    
  •                ++fetchStats.insts;
    
                    if (staticInst->isMacroop()) {
                        curMacroop = staticInst;
    

@@ -1549,7 +1572,7 @@
++fetchStats.squashCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
} else if (fetchStatus[tid] == IcacheWaitResponse) {

  •    cpu->fetchStats[tid]->icacheStallCycles++;
    
  •    ++fetchStats.icacheStallCycles;
        DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
                tid);
    } else if (fetchStatus[tid] == ItlbWait) {
    

diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 6add314..cd31191 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -533,6 +533,12 @@
FetchStatGroup(CPU *cpu, Fetch *fetch);
// @todo: Consider making these
// vectors and tracking on a per thread basis.

  •    /** Stat for total number of cycles stalled due to an icache miss.  
    

*/

  •    statistics::Scalar icacheStallCycles;
    
  •    /** Stat for total number of fetched instructions. */
    
  •    statistics::Scalar insts;
    
  •    /** Total number of fetched branches. */
    
  •    statistics::Scalar branches;
        /** Stat for total number of predicted branches. */
        statistics::Scalar predictedBranches;
        /** Stat for total number of cycles spent fetching. */
    

@@ -575,6 +581,10 @@
statistics::Distribution nisnDist;
/** Rate of how often fetch was idle. */
statistics::Formula idleRate;

  •    /** Number of branch fetches per cycle. */
    
  •    statistics::Formula branchRate;
    
  •    /** Number of instruction fetched per cycle. */
    
  •    statistics::Formula rate;
    } fetchStats;
    
    };

diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 92d281c..7cf6c54 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -217,14 +217,52 @@

IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu)
: statistics::Group(cpu),

  • ADD_STAT(numInsts, statistics::units::Count::get(),
  •         "Number of executed instructions"),
    
  • ADD_STAT(numLoadInsts, statistics::units::Count::get(),
  •         "Number of load instructions executed"),
    ADD_STAT(numSquashedInsts, statistics::units::Count::get(),
             "Number of squashed instructions skipped in execute"),
    ADD_STAT(numSwp, statistics::units::Count::get(),
    
  •         "Number of swp insts executed")
    
  •         "Number of swp insts executed"),
    
  • ADD_STAT(numNop, statistics::units::Count::get(),

  •         "Number of nop insts executed"),
    
  • ADD_STAT(numRefs, statistics::units::Count::get(),

  •         "Number of memory reference insts executed"),
    
  • ADD_STAT(numBranches, statistics::units::Count::get(),

  •         "Number of branches executed"),
    
  • ADD_STAT(numStoreInsts, statistics::units::Count::get(),

  •         "Number of stores executed"),
    
  • ADD_STAT(numRate, statistics::units::Rate<

  •            statistics::units::Count, statistics::units::Cycle>::get(),
    
  •         "Inst execution rate", numInsts / cpu->baseStats.numCycles)
    

    {

  • numLoadInsts

  •    .init(cpu->numThreads)
    
  •    .flags(statistics::total);
    
  • numSwp
        .init(cpu->numThreads)
        .flags(statistics::total);
    
  • numNop

  •    .init(cpu->numThreads)
    
  •    .flags(statistics::total);
    
  • numRefs

  •    .init(cpu->numThreads)
    
  •    .flags(statistics::total);
    
  • numBranches

  •    .init(cpu->numThreads)
    
  •    .flags(statistics::total);
    
  • numStoreInsts

  •    .flags(statistics::total);
    
  • numStoreInsts = numRefs - numLoadInsts;

  • numRate

  •    .flags(statistics::total);
    

    }

    void
    @@ -1015,7 +1053,7 @@

            instQueue.recordProducer(inst);
    
  •        cpu->executeStats[tid]->numNop++;
    
  •        iewStats.executedInstStats.numNop[tid]++;
    
            add_to_iq = false;
        } else {
    

@@ -1523,7 +1561,7 @@
{
ThreadID tid = inst->threadNumber;

  • cpu->executeStats[tid]->numInsts++;
  • iewStats.executedInstStats.numInsts++;

#if TRACING_ON
if (debug::O3PipeView) {
@@ -1535,16 +1573,16 @@
//  Control operations
//
if (inst->isControl())

  •    cpu->executeStats[tid]->numBranches++;
    
  •    iewStats.executedInstStats.numBranches[tid]++;
    
    //
    //  Memory operations
    //
    if (inst->isMemRef()) {
    
  •    cpu->executeStats[tid]->numMemRefs++;
    
  •    iewStats.executedInstStats.numRefs[tid]++;
    
        if (inst->isLoad()) {
    
  •        cpu->executeStats[tid]->numLoadInsts++;
    
  •        iewStats.executedInstStats.numLoadInsts[tid]++;
        }
    }
    

    }
    diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
    index 4fe8227..80fed29 100644
    --- a/src/cpu/o3/iew.hh
    +++ b/src/cpu/o3/iew.hh
    @@ -455,11 +455,25 @@
    {
    ExecutedInstStats(CPU *cpu);

  •        /** Stat for total number of executed instructions. */
    
  •        statistics::Scalar numInsts;
    
  •        /** Stat for total number of executed load instructions. */
    
  •        statistics::Vector numLoadInsts;
            /** Stat for total number of squashed instructions skipped at
             *  execute. */
            statistics::Scalar numSquashedInsts;
            /** Number of executed software prefetches. */
            statistics::Vector numSwp;
    
  •        /** Number of executed nops. */
    
  •        statistics::Vector numNop;
    
  •        /** Number of executed meomory references. */
    
  •        statistics::Vector numRefs;
    
  •        /** Number of executed branches. */
    
  •        statistics::Vector numBranches;
    
  •        /** Number of executed store instructions. */
    
  •        statistics::Formula numStoreInsts;
    
  •        /** Number of instructions executed per cycle. */
    
  •        statistics::Formula numRate;
        } executedInstStats;
    
        /** Number of instructions sent to commit. */
    

diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 35d1490..768f63e 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -154,36 +154,10 @@

  if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
      t_info.numInst++;
  •    t_info.execContextStats.numInsts++;
    }
    t_info.numOp++;
    

-}

-void
-BaseSimpleCPU::countFetchInst()
-{

  • SimpleExecContext& t_info = *threadInfo[curThread];
  • if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
  •    // increment thread level numInsts fetched count
    
  •    fetchStats[t_info.thread->threadId()]->numInsts++;
    
  • }
  • // increment thread level numOps fetched count
  • fetchStats[t_info.thread->threadId()]->numOps++;
    -}

-void
-BaseSimpleCPU::countCommitInst()
-{

  • SimpleExecContext& t_info = *threadInfo[curThread];
  • if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
  •    // increment thread level and core level numInsts count
    
  •    commitStats[t_info.thread->threadId()]->numInsts++;
    
  •    baseStats.numInsts++;
    
  • }
  • // increment thread level and core level numOps count
  • commitStats[t_info.thread->threadId()]->numOps++;
  • baseStats.numOps++;
  • t_info.execContextStats.numOps++;
    }

Counter
@@ -402,11 +376,6 @@
if (predict_taken)
++t_info.execContextStats.numPredictedBranches;
}

  • // increment the fetch instruction stat counters

  • if (curStaticInst) {

  •    countFetchInst();
    
  • }
    }

    void
    @@ -419,7 +388,7 @@
    Addr instAddr = threadContexts[curThread]->pcState().instAddr();

    if (curStaticInst->isMemRef()) {
    
  •    executeStats[t_info.thread->threadId()]->numMemRefs++;
    
  •    t_info.execContextStats.numMemRefs++;
    }
    
    if (curStaticInst->isLoad()) {
    

@@ -427,26 +396,26 @@
}

  if (curStaticInst->isControl()) {
  •    ++fetchStats[t_info.thread->threadId()]->numBranches;
    
  •    ++t_info.execContextStats.numBranches;
    }
    
    /* Power model statistics */
    //integer alu accesses
    if (curStaticInst->isInteger()){
    
  •    executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
    
  •    commitStats[t_info.thread->threadId()]->numIntInsts++;
    
  •    t_info.execContextStats.numIntAluAccesses++;
    
  •    t_info.execContextStats.numIntInsts++;
    }
    
    //float alu accesses
    if (curStaticInst->isFloating()){
    
  •    executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
    
  •    commitStats[t_info.thread->threadId()]->numFpInsts++;
    
  •    t_info.execContextStats.numFpAluAccesses++;
    
  •    t_info.execContextStats.numFpInsts++;
    }
    
    //vector alu accesses
    if (curStaticInst->isVector()){
    
  •    executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
    
  •    commitStats[t_info.thread->threadId()]->numVecInsts++;
    
  •    t_info.execContextStats.numVecAluAccesses++;
    
  •    t_info.execContextStats.numVecInsts++;
    }
    
    //Matrix alu accesses
    

@@ -460,22 +429,22 @@
t_info.execContextStats.numCallsReturns++;
}

  • //the number of branch predictions that will be made
  • if (curStaticInst->isCondCtrl()){
  •    t_info.execContextStats.numCondCtrlInsts++;
    
  • }
  • //result bus acceses
    if (curStaticInst->isLoad()){
    
  •    commitStats[t_info.thread->threadId()]->numLoadInsts++;
    
  •    t_info.execContextStats.numLoadInsts++;
    }
    
    if (curStaticInst->isStore() || curStaticInst->isAtomic()){
    
  •    commitStats[t_info.thread->threadId()]->numStoreInsts++;
    
  •    t_info.execContextStats.numStoreInsts++;
    }
    /* End power model statistics */
    
  • commitStats[t_info.thread->threadId()]
  •    ->committedInstType[curStaticInst->opClass()]++;
    

commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst);

  • /* increment the committed numInsts and numOps stats */
  • countCommitInst();

t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++;

  if (FullSystem)
      traceFunctions(instAddr);

diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 46a25a0..df5290c 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -182,8 +182,6 @@
}

  void countInst();
  • void countFetchInst();
  • void countCommitInst();
    Counter totalInsts() const override;
    Counter totalOps() const override;

diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index c0927fc..0f20763 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -86,12 +86,60 @@
: statistics::Group(cpu,
csprintf("exec_context.thread_%i",
thread->threadId()).c_str()),

  •          ADD_STAT(numInsts, statistics::units::Count::get(),
    
  •                   "Number of instructions committed"),
    
  •          ADD_STAT(numOps, statistics::units::Count::get(),
    
  •                   "Number of ops (including micro ops) committed"),
    
  •          ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
    
  •                   "Number of integer alu accesses"),
    
  •          ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
    
  •                   "Number of float alu accesses"),
    
  •          ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
    
  •                   "Number of vector alu accesses"),
              ADD_STAT(numMatAluAccesses, statistics::units::Count::get(),
                       "Number of matrix alu accesses"),
              ADD_STAT(numCallsReturns, statistics::units::Count::get(),
                       "Number of times a function call or return  
    

occured"),

  •          ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(),
    
  •                   "Number of instructions that are conditional  
    

controls"),

  •          ADD_STAT(numIntInsts, statistics::units::Count::get(),
    
  •                   "Number of integer instructions"),
    
  •          ADD_STAT(numFpInsts, statistics::units::Count::get(),
    
  •                   "Number of float instructions"),
    
  •          ADD_STAT(numVecInsts, statistics::units::Count::get(),
    
  •                   "Number of vector instructions"),
              ADD_STAT(numMatInsts, statistics::units::Count::get(),
                       "Number of matrix instructions"),
    
  •          ADD_STAT(numIntRegReads, statistics::units::Count::get(),
    
  •                   "Number of times the integer registers were read"),
    
  •          ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
    
  •                   "Number of times the integer registers were  
    

written"),

  •          ADD_STAT(numFpRegReads, statistics::units::Count::get(),
    
  •                   "Number of times the floating registers were read"),
    
  •          ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
    
  •                   "Number of times the floating registers were  
    

written"),

  •          ADD_STAT(numVecRegReads, statistics::units::Count::get(),
    
  •                   "Number of times the vector registers were read"),
    
  •          ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
    
  •                   "Number of times the vector registers were  
    

written"),

  •          ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
    
  •                   "Number of times the predicate registers were  
    

read"),

  •          ADD_STAT(numVecPredRegWrites,  
    

statistics::units::Count::get(),

  •                   "Number of times the predicate registers were  
    

written"),

  •          ADD_STAT(numCCRegReads, statistics::units::Count::get(),
    
  •                   "Number of times the CC registers were read"),
    
  •          ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
    
  •                   "Number of times the CC registers were written"),
    
  •          ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
    
  •                   "Number of times the Misc registers were read"),
    
  •          ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
    
  •                   "Number of times the Misc registers were written"),
    
  •          ADD_STAT(numMemRefs, statistics::units::Count::get(),
    
  •                   "Number of memory refs"),
    
  •          ADD_STAT(numLoadInsts, statistics::units::Count::get(),
    
  •                   "Number of load instructions"),
    
  •          ADD_STAT(numStoreInsts, statistics::units::Count::get(),
    
  •                   "Number of store instructions"),
              ADD_STAT(numIdleCycles, statistics::units::Cycle::get(),
                       "Number of idle cycles"),
              ADD_STAT(numBusyCycles, statistics::units::Cycle::get(),
    

@@ -100,35 +148,64 @@
"Percentage of non-idle cycles"),
ADD_STAT(idleFraction, statistics::units::Ratio::get(),
"Percentage of idle cycles"),

  •          ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
    
  •                   "ICache total stall cycles"),
    
  •          ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
    
  •                   "DCache total stall cycles"),
    
  •          ADD_STAT(numBranches, statistics::units::Count::get(),
    
  •                   "Number of branches fetched"),
              ADD_STAT(numPredictedBranches,  
    

statistics::units::Count::get(),
"Number of branches predicted as taken"),
ADD_STAT(numBranchMispred, statistics::units::Count::get(),
"Number of branch mispredictions"),

  •          ADD_STAT(statExecutedInstType,  
    

statistics::units::Count::get(),

  •                   "Class of executed instruction."),
              numRegReads{
    
  •              &(cpu->executeStats[thread->threadId()]->numIntRegReads),
    
  •              &(cpu->executeStats[thread->threadId()]->numFpRegReads),
    
  •              &(cpu->executeStats[thread->threadId()]->numVecRegReads),
    
  •              &(cpu->executeStats[thread->threadId()]->numVecRegReads),
    

&(cpu->executeStats[thread->threadId()]->numVecPredRegReads),

  •              &(cpu->executeStats[thread->threadId()]->numCCRegReads),
    
  •              &numMatRegReads
    
  •              &numIntRegReads,
    
  •              &numFpRegReads,
    
  •              &numVecRegReads,
    
  •              &numVecRegReads,
    
  •              &numVecPredRegReads,
    
  •              &numMatRegReads,
    
  •              &numCCRegReads
              },
              numRegWrites{
    

&(cpu->executeStats[thread->threadId()]->numIntRegWrites),

  •              &(cpu->executeStats[thread->threadId()]->numFpRegWrites),
    

&(cpu->executeStats[thread->threadId()]->numVecRegWrites),

&(cpu->executeStats[thread->threadId()]->numVecRegWrites),

  •              &(cpu->executeStats[thread->threadId()]
    
  •                    ->numVecPredRegWrites),
    
  •              &(cpu->executeStats[thread->threadId()]->numCCRegWrites),
    
  •              &numMatRegWrites
    
  •              &numIntRegWrites,
    
  •              &numFpRegWrites,
    
  •              &numVecRegWrites,
    
  •              &numVecRegWrites,
    
  •              &numVecPredRegWrites,
    
  •              &numMatRegWrites,
    
  •              &numCCRegWrites
              }
        {
    
  •        numCCRegReads
    
  •            .flags(statistics::nozero);
    
  •        numCCRegWrites
    
  •            .flags(statistics::nozero);
    
  •        icacheStallCycles
    
  •            .prereq(icacheStallCycles);
    
  •        dcacheStallCycles
    
  •            .prereq(dcacheStallCycles);
    
  •        statExecutedInstType
    
  •            .init(enums::Num_OpClass)
    
  •            .flags(statistics::total | statistics::pdf |  
    

statistics::dist);
+

  •        for (unsigned i = 0; i < Num_OpClasses; ++i) {
    
  •            statExecutedInstType.subname(i, enums::OpClassStrings[i]);
    
  •        }
    
            idleFraction = statistics::constant(1.0) - notIdleFraction;
            numIdleCycles = idleFraction * cpu->baseStats.numCycles;
            numBusyCycles = notIdleFraction * cpu->baseStats.numCycles;
    
  •        numBranches
    
  •            .prereq(numBranches);
    
  •         numPredictedBranches
                .prereq(numPredictedBranches);
    

@@ -136,19 +213,73 @@
.prereq(numBranchMispred);
}

  •    // Number of simulated instructions
    
  •    statistics::Scalar numInsts;
    
  •    statistics::Scalar numOps;
    
  •    // Number of integer alu accesses
    
  •    statistics::Scalar numIntAluAccesses;
    
  •    // Number of float alu accesses
    
  •    statistics::Scalar numFpAluAccesses;
    
  •    // Number of vector alu accesses
    
  •    statistics::Scalar numVecAluAccesses;
    
  •     // Number of matrix alu accesses
        statistics::Scalar numMatAluAccesses;
    
        // Number of function calls/returns
        statistics::Scalar numCallsReturns;
    
  •    // Conditional control instructions;
    
  •    statistics::Scalar numCondCtrlInsts;
    
  •    // Number of int instructions
    
  •    statistics::Scalar numIntInsts;
    
  •    // Number of float instructions
    
  •    statistics::Scalar numFpInsts;
    
  •    // Number of vector instructions
    
  •    statistics::Scalar numVecInsts;
    
  •     // Number of matrix instructions
        statistics::Scalar numMatInsts;
    
  •    // Number of integer register file accesses
    
  •    statistics::Scalar numIntRegReads;
    
  •    statistics::Scalar numIntRegWrites;
    
  •    // Number of float register file accesses
    
  •    statistics::Scalar numFpRegReads;
    
  •    statistics::Scalar numFpRegWrites;
    
  •    // Number of vector register file accesses
    
  •    mutable statistics::Scalar numVecRegReads;
    
  •    statistics::Scalar numVecRegWrites;
    
  •    // Number of predicate register file accesses
    
  •    mutable statistics::Scalar numVecPredRegReads;
    
  •    statistics::Scalar numVecPredRegWrites;
    
  •     // Number of matrix register file accesses
        mutable statistics::Scalar numMatRegReads;
        statistics::Scalar numMatRegWrites;
    
  •    // Number of condition code register file accesses
    
  •    statistics::Scalar numCCRegReads;
    
  •    statistics::Scalar numCCRegWrites;
    
  •    // Number of misc register file accesses
    
  •    statistics::Scalar numMiscRegReads;
    
  •    statistics::Scalar numMiscRegWrites;
    
  •    // Number of simulated memory references
    
  •    statistics::Scalar numMemRefs;
    
  •    statistics::Scalar numLoadInsts;
    
  •    statistics::Scalar numStoreInsts;
    
  •     // Number of idle cycles
        statistics::Formula numIdleCycles;
    

@@ -159,13 +290,24 @@
statistics::Average notIdleFraction;
statistics::Formula idleFraction;

  •    // Number of cycles stalled for I-cache responses
    
  •    statistics::Scalar icacheStallCycles;
    
  •    // Number of cycles stalled for D-cache responses
    
  •    statistics::Scalar dcacheStallCycles;
    
  •     /// @{
    
  •    /// Total number of branches fetched
    
  •    statistics::Scalar numBranches;
        /// Number of branches predicted as taken
        statistics::Scalar numPredictedBranches;
        /// Number of misprediced branches
        statistics::Scalar numBranchMispred;
        /// @}
    
  •    // Instruction mix histogram by OpClass
    
  •    statistics::Vector statExecutedInstType;
    
  •     std::array<statistics::Scalar *, CCRegClass + 1> numRegReads;
        std::array<statistics::Scalar *, CCRegClass + 1> numRegWrites;
    

@@ -226,7 +368,7 @@
RegVal
readMiscRegOperand(const StaticInst *si, int idx) override
{

  •    cpu->executeStats[thread->threadId()]->numMiscRegReads++;
    
  •    execContextStats.numMiscRegReads++;
        const RegId& reg = si->srcRegIdx(idx);
        assert(reg.is(MiscRegClass));
        return thread->readMiscReg(reg.index());
    

@@ -235,7 +377,7 @@
void
setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override
{

  •    cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
    
  •    execContextStats.numMiscRegWrites++;
        const RegId& reg = si->destRegIdx(idx);
        assert(reg.is(MiscRegClass));
        thread->setMiscReg(reg.index(), val);
    

@@ -248,7 +390,7 @@
RegVal
readMiscReg(int misc_reg) override
{

  •    cpu->executeStats[thread->threadId()]->numMiscRegReads++;
    
  •    execContextStats.numMiscRegReads++;
        return thread->readMiscReg(misc_reg);
    }
    

@@ -259,7 +401,7 @@
void
setMiscReg(int misc_reg, RegVal val) override
{

  •    cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
    
  •    execContextStats.numMiscRegWrites++;
        thread->setMiscReg(misc_reg, val);
    }
    

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/68717?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I2a88cadfee03c1fc81932e6548938db108786dd2
Gerrit-Change-Number: 68717
Gerrit-PatchSet: 2
Gerrit-Owner: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Bobby Bruce bbruce@ucdavis.edu
Gerrit-Reviewer: Gabe Black gabe.black@gmail.com
Gerrit-Reviewer: Jason Lowe-Power jason@lowepower.com
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com
Gerrit-MessageType: merged

Bobby Bruce has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/68717?usp=email ) Change subject: cpu: Revert CPU stats changes ...................................................................... cpu: Revert CPU stats changes This reverts this relationchain: https://gem5-review.googlesource.com/c/public/gem5/+/67396/6 This was pre-maturely submitted before all testing and checking was done. To be safe this has been reverted. When all testing and checks are completed, this revert will be undone. Change-Id: I2a88cadfee03c1fc81932e6548938db108786dd2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68717 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Bobby Bruce <bbruce@ucdavis.edu> Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu> Tested-by: kokoro <noreply+kokoro@google.com> --- M src/cpu/base.cc M src/cpu/base.hh M src/cpu/kvm/base.cc M src/cpu/kvm/base.hh M src/cpu/minor/execute.cc M src/cpu/minor/stats.cc M src/cpu/minor/stats.hh M src/cpu/o3/commit.cc M src/cpu/o3/commit.hh M src/cpu/o3/cpu.cc M src/cpu/o3/cpu.hh M src/cpu/o3/dyn_inst.hh M src/cpu/o3/fetch.cc M src/cpu/o3/fetch.hh M src/cpu/o3/iew.cc M src/cpu/o3/iew.hh M src/cpu/simple/base.cc M src/cpu/simple/base.hh M src/cpu/simple/exec_context.hh 19 files changed, 628 insertions(+), 542 deletions(-) Approvals: Bobby Bruce: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass Jason Lowe-Power: Looks good to me, approved diff --git a/src/cpu/base.cc b/src/cpu/base.cc index cee7647..d2c0a78 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -191,30 +191,6 @@ modelResetPort.onChange([this](const bool &new_val) { setReset(new_val); }); - // create a stat group object for each thread on this core - fetchStats.reserve(numThreads); - executeStats.reserve(numThreads); - commitStats.reserve(numThreads); - for (int i = 0; i < numThreads; i++) { - // create fetchStat object for thread i and set rate formulas - FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i); - fetchStatptr->fetchRate = fetchStatptr->numInsts / baseStats.numCycles; - fetchStatptr->branchRate = fetchStatptr->numBranches / - baseStats.numCycles; - fetchStats.emplace_back(fetchStatptr); - - // create executeStat object for thread i and set rate formulas - ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i); - executeStatptr->instRate = executeStatptr->numInsts / - baseStats.numCycles; - executeStats.emplace_back(executeStatptr); - - // create commitStat object for thread i and set ipc, cpi formulas - CommitCPUStats* commitStatptr = new CommitCPUStats(this, i); - commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles; - commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts; - commitStats.emplace_back(commitStatptr); - } } void @@ -407,28 +383,13 @@ BaseCPU:: BaseCPUStats::BaseCPUStats(statistics::Group *parent) : statistics::Group(parent), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed (core level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed (core level)"), ADD_STAT(numCycles, statistics::units::Cycle::get(), "Number of cpu cycles simulated"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: cycles per instruction (core level)"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle (core level)"), ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(), "Number of work items this cpu started"), ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(), "Number of work items this cpu completed") { - cpi.precision(6); - cpi = numCycles / numInsts; - - ipc.precision(6); - ipc = numInsts / numCycles; } void @@ -866,215 +827,4 @@ hostOpRate = simOps / hostSeconds; } -BaseCPU:: -FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions fetched (thread level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) fetched (thread level)"), - ADD_STAT(fetchRate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Number of inst fetches per cycle"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches fetched"), - ADD_STAT(branchRate, statistics::units::Ratio::get(), - "Number of branch fetches per cycle"), - ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), - "ICache total stall cycles"), - ADD_STAT(numFetchSuspends, statistics::units::Count::get(), - "Number of times Execute suspended instruction fetching") - -{ - fetchRate - .flags(statistics::total); - - numBranches - .prereq(numBranches); - - branchRate - .flags(statistics::total); - - icacheStallCycles - .prereq(icacheStallCycles); - -} - -// means it is incremented in a vector indexing and not directly -BaseCPU:: -ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of executed instructions"), - ADD_STAT(numNop, statistics::units::Count::get(), - "Number of nop insts executed"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches executed"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions executed"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of stores executed"), - ADD_STAT(instRate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Inst execution rate"), - ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), - "DCache total stall cycles"), - ADD_STAT(numCCRegReads, statistics::units::Count::get(), - "Number of times the CC registers were read"), - ADD_STAT(numCCRegWrites, statistics::units::Count::get(), - "Number of times the CC registers were written"), - ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), - "Number of float alu accesses"), - ADD_STAT(numFpRegReads, statistics::units::Count::get(), - "Number of times the floating registers were read"), - ADD_STAT(numFpRegWrites, statistics::units::Count::get(), - "Number of times the floating registers were written"), - ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), - "Number of integer alu accesses"), - ADD_STAT(numIntRegReads, statistics::units::Count::get(), - "Number of times the integer registers were read"), - ADD_STAT(numIntRegWrites, statistics::units::Count::get(), - "Number of times the integer registers were written"), - ADD_STAT(numMemRefs, statistics::units::Count::get(), - "Number of memory refs"), - ADD_STAT(numMiscRegReads, statistics::units::Count::get(), - "Number of times the Misc registers were read"), - ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), - "Number of times the Misc registers were written"), - ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), - "Number of vector alu accesses"), - ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), - "Number of times the predicate registers were read"), - ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), - "Number of times the predicate registers were written"), - ADD_STAT(numVecRegReads, statistics::units::Count::get(), - "Number of times the vector registers were read"), - ADD_STAT(numVecRegWrites, statistics::units::Count::get(), - "Number of times the vector registers were written"), - ADD_STAT(numDiscardedOps, statistics::units::Count::get(), - "Number of ops (including micro ops) which were discarded before " - "commit") -{ - numStoreInsts = numMemRefs - numLoadInsts; - - dcacheStallCycles - .prereq(dcacheStallCycles); - numCCRegReads - .prereq(numCCRegReads) - .flags(statistics::nozero); - numCCRegWrites - .prereq(numCCRegWrites) - .flags(statistics::nozero); - numFpAluAccesses - .prereq(numFpAluAccesses); - numFpRegReads - .prereq(numFpRegReads); - numIntAluAccesses - .prereq(numIntAluAccesses); - numIntRegReads - .prereq(numIntRegReads); - numIntRegWrites - .prereq(numIntRegWrites); - numMiscRegReads - .prereq(numMiscRegReads); - numMiscRegWrites - .prereq(numMiscRegWrites); - numVecPredRegReads - .prereq(numVecPredRegReads); - numVecPredRegWrites - .prereq(numVecPredRegWrites); - numVecRegReads - .prereq(numVecRegReads); - numVecRegWrites - .prereq(numVecRegWrites); -} - -BaseCPU:: -CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed (thread level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed (thread level)"), - ADD_STAT(numInstsNotNOP, statistics::units::Count::get(), - "Number of instructions committed excluding NOPs or prefetches"), - ADD_STAT(numOpsNotNOP, statistics::units::Count::get(), - "Number of Ops (including micro ops) Simulated"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: cycles per instruction (thread level)"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle (thread level)"), - ADD_STAT(numMemRefs, statistics::units::Count::get(), - "Number of memory references committed"), - ADD_STAT(numFpInsts, statistics::units::Count::get(), - "Number of float instructions"), - ADD_STAT(numIntInsts, statistics::units::Count::get(), - "Number of integer instructions"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of store instructions"), - ADD_STAT(numVecInsts, statistics::units::Count::get(), - "Number of vector instructions"), - ADD_STAT(committedInstType, statistics::units::Count::get(), - "Class of committed instruction."), - ADD_STAT(committedControl, statistics::units::Count::get(), - "Class of control type instructions committed") -{ - numInsts - .prereq(numInsts); - - cpi.precision(6); - ipc.precision(6); - - committedInstType - .init(enums::Num_OpClass) - .flags(statistics::total | statistics::pdf | statistics::dist); - - for (unsigned i = 0; i < Num_OpClasses; ++i) { - committedInstType.subname(i, enums::OpClassStrings[i]); - } - - committedControl - .init(StaticInstFlags::Flags::Num_Flags) - .flags(statistics::nozero); - - for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) { - committedControl.subname(i, StaticInstFlags::FlagsStrings[i]); - } -} - - -void -BaseCPU:: -CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst) -{ - /* Add a count for every control instruction type */ - if (staticInst->isControl()) { - if (staticInst->isReturn()) { - committedControl[gem5::StaticInstFlags::Flags::IsReturn]++; - } - if (staticInst->isCall()) { - committedControl[gem5::StaticInstFlags::Flags::IsCall]++; - } - if (staticInst->isDirectCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++; - } - if (staticInst->isIndirectCtrl()) { - committedControl - [gem5::StaticInstFlags::Flags::IsIndirectControl]++; - } - if (staticInst->isCondCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++; - } - if (staticInst->isUncondCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++; - } - committedControl[gem5::StaticInstFlags::Flags::IsControl]++; - } - -} - } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index fc22abc..084d9b9 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -43,7 +43,6 @@ #define __CPU_BASE_HH__ #include <vector> -#include <memory> #include "arch/generic/interrupts.hh" #include "base/statistics.hh" @@ -633,14 +632,8 @@ struct BaseCPUStats : public statistics::Group { BaseCPUStats(statistics::Group *parent); - // Number of CPU insts and ops committed at CPU core level - statistics::Scalar numInsts; - statistics::Scalar numOps; // Number of CPU cycles simulated statistics::Scalar numCycles; - /* CPI/IPC for total cycle counts and macro insts */ - statistics::Formula cpi; - statistics::Formula ipc; statistics::Scalar numWorkItemsStarted; statistics::Scalar numWorkItemsCompleted; } baseStats; @@ -683,141 +676,6 @@ const Cycles pwrGatingLatency; const bool powerGatingOnIdle; EventFunctionWrapper enterPwrGatingEvent; - - public: - struct FetchCPUStats : public statistics::Group - { - FetchCPUStats(statistics::Group *parent, int thread_id); - - /* Total number of instructions fetched */ - statistics::Scalar numInsts; - - /* Total number of operations fetched */ - statistics::Scalar numOps; - - /* Number of instruction fetched per cycle. */ - statistics::Formula fetchRate; - - /* Total number of branches fetched */ - statistics::Scalar numBranches; - - /* Number of branch fetches per cycle. */ - statistics::Formula branchRate; - - /* Number of cycles stalled due to an icache miss */ - statistics::Scalar icacheStallCycles; - - /* Number of times fetch was asked to suspend by Execute */ - statistics::Scalar numFetchSuspends; - - }; - - struct ExecuteCPUStats: public statistics::Group - { - ExecuteCPUStats(statistics::Group *parent, int thread_id); - - /* Stat for total number of executed instructions */ - statistics::Scalar numInsts; - /* Number of executed nops */ - statistics::Scalar numNop; - /* Number of executed branches */ - statistics::Scalar numBranches; - /* Stat for total number of executed load instructions */ - statistics::Scalar numLoadInsts; - /* Number of executed store instructions */ - statistics::Formula numStoreInsts; - /* Number of instructions executed per cycle */ - statistics::Formula instRate; - - /* Number of cycles stalled for D-cache responses */ - statistics::Scalar dcacheStallCycles; - - /* Number of condition code register file accesses */ - statistics::Scalar numCCRegReads; - statistics::Scalar numCCRegWrites; - - /* number of float alu accesses */ - statistics::Scalar numFpAluAccesses; - - /* Number of float register file accesses */ - statistics::Scalar numFpRegReads; - statistics::Scalar numFpRegWrites; - - /* Number of integer alu accesses */ - statistics::Scalar numIntAluAccesses; - - /* Number of integer register file accesses */ - statistics::Scalar numIntRegReads; - statistics::Scalar numIntRegWrites; - - /* number of simulated memory references */ - statistics::Scalar numMemRefs; - - /* Number of misc register file accesses */ - statistics::Scalar numMiscRegReads; - statistics::Scalar numMiscRegWrites; - - /* Number of vector alu accesses */ - statistics::Scalar numVecAluAccesses; - - /* Number of predicate register file accesses */ - mutable statistics::Scalar numVecPredRegReads; - statistics::Scalar numVecPredRegWrites; - - /* Number of vector register file accesses */ - mutable statistics::Scalar numVecRegReads; - statistics::Scalar numVecRegWrites; - - /* Number of ops discarded before committing */ - statistics::Scalar numDiscardedOps; - }; - - struct CommitCPUStats: public statistics::Group - { - CommitCPUStats(statistics::Group *parent, int thread_id); - - /* Number of simulated instructions committed */ - statistics::Scalar numInsts; - statistics::Scalar numOps; - - /* Number of instructions committed that are not NOP or prefetches */ - statistics::Scalar numInstsNotNOP; - statistics::Scalar numOpsNotNOP; - - /* CPI/IPC for total cycle counts and macro insts */ - statistics::Formula cpi; - statistics::Formula ipc; - - /* Number of committed memory references. */ - statistics::Scalar numMemRefs; - - /* Number of float instructions */ - statistics::Scalar numFpInsts; - - /* Number of int instructions */ - statistics::Scalar numIntInsts; - - /* number of load instructions */ - statistics::Scalar numLoadInsts; - - /* Number of store instructions */ - statistics::Scalar numStoreInsts; - - /* Number of vector instructions */ - statistics::Scalar numVecInsts; - - /* Number of instructions committed by type (OpClass) */ - statistics::Vector committedInstType; - - /* number of control instructions committed by control inst type */ - statistics::Vector committedControl; - void updateComCtrlStats(const StaticInstPtr staticInst); - - }; - - std::vector<std::unique_ptr<FetchCPUStats>> fetchStats; - std::vector<std::unique_ptr<ExecuteCPUStats>> executeStats; - std::vector<std::unique_ptr<CommitCPUStats>> commitStats; }; } // namespace gem5 diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc index e22e162..b76bddc 100644 --- a/src/cpu/kvm/base.cc +++ b/src/cpu/kvm/base.cc @@ -261,6 +261,8 @@ BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent) : statistics::Group(parent), + ADD_STAT(committedInsts, statistics::units::Count::get(), + "Number of instructions committed"), ADD_STAT(numVMExits, statistics::units::Count::get(), "total number of KVM exits"), ADD_STAT(numVMHalfEntries, statistics::units::Count::get(), @@ -776,8 +778,7 @@ /* Update statistics */ baseStats.numCycles += simCyclesExecuted;; - commitStats[thread->threadId()]->numInsts += instsExecuted; - baseStats.numInsts += instsExecuted; + stats.committedInsts += instsExecuted; ctrInsts += instsExecuted; DPRINTF(KvmRun, diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh index 7bbf393..2d81c7c 100644 --- a/src/cpu/kvm/base.hh +++ b/src/cpu/kvm/base.hh @@ -804,6 +804,7 @@ struct StatGroup : public statistics::Group { StatGroup(statistics::Group *parent); + statistics::Scalar committedInsts; statistics::Scalar numVMExits; statistics::Scalar numVMHalfEntries; statistics::Scalar numExitSignal; diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 2908c22..5eaaf58 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -871,18 +871,49 @@ { thread->numInst++; thread->threadStats.numInsts++; - cpu.commitStats[inst->id.threadId]->numInsts++; - cpu.baseStats.numInsts++; + cpu.stats.numInsts++; /* Act on events related to instruction counts */ thread->comInstEventQueue.serviceEvents(thread->numInst); } thread->numOp++; thread->threadStats.numOps++; - cpu.commitStats[inst->id.threadId]->numOps++; - cpu.baseStats.numOps++; - cpu.commitStats[inst->id.threadId] - ->committedInstType[inst->staticInst->opClass()]++; + cpu.stats.numOps++; + cpu.stats.committedInstType[inst->id.threadId] + [inst->staticInst->opClass()]++; + + /** Add a count for every control instruction */ + if (inst->staticInst->isControl()) { + if (inst->staticInst->isReturn()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsReturn]++; + } + if (inst->staticInst->isCall()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsCall]++; + } + if (inst->staticInst->isDirectCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsDirectControl]++; + } + if (inst->staticInst->isIndirectCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsIndirectControl]++; + } + if (inst->staticInst->isCondCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsCondControl]++; + } + if (inst->staticInst->isUncondCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsUncondControl]++; + + } + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsControl]++; + } + + /* Set the CP SeqNum to the numOps commit number */ if (inst->traceData) @@ -1023,7 +1054,7 @@ DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute" " inst: %s\n", thread_id, *inst); - cpu.fetchStats[thread_id]->numFetchSuspends++; + cpu.stats.numFetchSuspends++; updateBranchData(thread_id, BranchData::SuspendThread, inst, resume_pc, branch); @@ -1337,7 +1368,7 @@ *inst, ex_info.streamSeqNum); if (fault == NoFault) - cpu.executeStats[thread_id]->numDiscardedOps++; + cpu.stats.numDiscardedOps++; } /* Mark the mem inst as being in the LSQ */ diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index e31cbe9..64d4c47 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -45,13 +45,47 @@ MinorStats::MinorStats(BaseCPU *base_cpu) : statistics::Group(base_cpu), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(numDiscardedOps, statistics::units::Count::get(), + "Number of ops (including micro ops) which were discarded before " + "commit"), + ADD_STAT(numFetchSuspends, statistics::units::Count::get(), + "Number of times Execute suspended instruction fetching"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt") + "for an interrupt"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: cycles per instruction"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: instructions per cycle"), + ADD_STAT(committedInstType, statistics::units::Count::get(), + "Class of committed instruction"), + ADD_STAT(committedControl, statistics::units::Count::get(), + "Class of control type instructions committed") { quiesceCycles.prereq(quiesceCycles); + cpi.precision(6); + cpi = base_cpu->baseStats.numCycles / numInsts; + + ipc.precision(6); + ipc = numInsts / base_cpu->baseStats.numCycles; + + committedInstType + .init(base_cpu->numThreads, enums::Num_OpClass) + .flags(statistics::total | statistics::pdf | statistics::dist); + committedInstType.ysubnames(enums::OpClassStrings); + + committedControl + .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags) + .flags(statistics::nozero); + committedControl.ysubnames(StaticInstFlags::FlagsStrings); } } // namespace minor diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index 98ac80f..1ab81f4 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -59,9 +59,31 @@ { MinorStats(BaseCPU *parent); + /** Number of simulated instructions */ + statistics::Scalar numInsts; + + /** Number of simulated insts and microops */ + statistics::Scalar numOps; + + /** Number of ops discarded before committing */ + statistics::Scalar numDiscardedOps; + + /** Number of times fetch was asked to suspend by Execute */ + statistics::Scalar numFetchSuspends; + /** Number of cycles in quiescent state */ statistics::Scalar quiesceCycles; + /** CPI/IPC for total cycle counts and macro insts */ + statistics::Formula cpi; + statistics::Formula ipc; + + /** Number of instructions by type (OpClass) */ + statistics::Vector2d committedInstType; + + /** Number of branches commited */ + statistics::Vector2d committedControl; + }; } // namespace minor diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index e1f0168..38dce83 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -156,10 +156,25 @@ "The number of times a branch was mispredicted"), ADD_STAT(numCommittedDist, statistics::units::Count::get(), "Number of insts commited each cycle"), + ADD_STAT(instsCommitted, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(opsCommitted, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(memRefs, statistics::units::Count::get(), + "Number of memory references committed"), + ADD_STAT(loads, statistics::units::Count::get(), "Number of loads committed"), ADD_STAT(amos, statistics::units::Count::get(), "Number of atomic instructions committed"), ADD_STAT(membars, statistics::units::Count::get(), "Number of memory barriers committed"), + ADD_STAT(branches, statistics::units::Count::get(), + "Number of branches committed"), + ADD_STAT(vectorInstructions, statistics::units::Count::get(), + "Number of committed Vector instructions."), + ADD_STAT(floating, statistics::units::Count::get(), + "Number of committed floating point instructions."), + ADD_STAT(integer, statistics::units::Count::get(), + "Number of committed integer instructions."), ADD_STAT(functionCalls, statistics::units::Count::get(), "Number of function calls committed."), ADD_STAT(committedInstType, statistics::units::Count::get(), @@ -177,6 +192,22 @@ .init(0,commit->commitWidth,1) .flags(statistics::pdf); + instsCommitted + .init(cpu->numThreads) + .flags(total); + + opsCommitted + .init(cpu->numThreads) + .flags(total); + + memRefs + .init(cpu->numThreads) + .flags(total); + + loads + .init(cpu->numThreads) + .flags(total); + amos .init(cpu->numThreads) .flags(total); @@ -185,6 +216,22 @@ .init(cpu->numThreads) .flags(total); + branches + .init(cpu->numThreads) + .flags(total); + + vectorInstructions + .init(cpu->numThreads) + .flags(total); + + floating + .init(cpu->numThreads) + .flags(total); + + integer + .init(cpu->numThreads) + .flags(total); + functionCalls .init(commit->numThreads) .flags(total); @@ -1336,12 +1383,9 @@ { ThreadID tid = inst->threadNumber; - if (!inst->isMicroop() || inst->isLastMicroop()) { - cpu->commitStats[tid]->numInsts++; - cpu->baseStats.numInsts++; - } - cpu->commitStats[tid]->numOps++; - cpu->baseStats.numOps++; + if (!inst->isMicroop() || inst->isLastMicroop()) + stats.instsCommitted[tid]++; + stats.opsCommitted[tid]++; // To match the old model, don't count nops and instruction // prefetches towards the total commit count. @@ -1352,20 +1396,21 @@ // // Control Instructions // - cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst); + if (inst->isControl()) + stats.branches[tid]++; // // Memory references // if (inst->isMemRef()) { - cpu->commitStats[tid]->numMemRefs++; + stats.memRefs[tid]++; if (inst->isLoad()) { - cpu->commitStats[tid]->numLoadInsts++; + stats.loads[tid]++; } - if (inst->isStore()) { - cpu->commitStats[tid]->numStoreInsts++; + if (inst->isAtomic()) { + stats.amos[tid]++; } } @@ -1375,14 +1420,14 @@ // Integer Instruction if (inst->isInteger()) - cpu->commitStats[tid]->numIntInsts++; + stats.integer[tid]++; // Floating Point Instruction if (inst->isFloating()) - cpu->commitStats[tid]->numFpInsts++; + stats.floating[tid]++; // Vector Instruction if (inst->isVector()) - cpu->commitStats[tid]->numVecInsts++; + stats.vectorInstructions[tid]++; // Function Calls if (inst->isCall()) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index eccd023..cf4eaf5 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -479,10 +479,26 @@ /** Distribution of the number of committed instructions each cycle. */ statistics::Distribution numCommittedDist; + /** Total number of instructions committed. */ + statistics::Vector instsCommitted; + /** Total number of ops (including micro ops) committed. */ + statistics::Vector opsCommitted; + /** Stat for the total number of committed memory references. */ + statistics::Vector memRefs; + /** Stat for the total number of committed loads. */ + statistics::Vector loads; /** Stat for the total number of committed atomics. */ statistics::Vector amos; /** Total number of committed memory barriers. */ statistics::Vector membars; + /** Total number of committed branches. */ + statistics::Vector branches; + /** Total number of vector instructions */ + statistics::Vector vectorInstructions; + /** Total number of floating point instructions */ + statistics::Vector floating; + /** Total number of integer instructions */ + statistics::Vector integer; /** Total number of function calls */ statistics::Vector functionCalls; /** Committed instructions by instruction type (OpClass) */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 93c58fe..d2bacaa 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -328,7 +328,47 @@ "to idling"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt") + "for an interrupt"), + ADD_STAT(committedInsts, statistics::units::Count::get(), + "Number of Instructions Simulated"), + ADD_STAT(committedOps, statistics::units::Count::get(), + "Number of Ops (including micro ops) Simulated"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: Cycles Per Instruction"), + ADD_STAT(totalCpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: Total CPI of All Threads"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: Instructions Per Cycle"), + ADD_STAT(totalIpc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: Total IPC of All Threads"), + ADD_STAT(intRegfileReads, statistics::units::Count::get(), + "Number of integer regfile reads"), + ADD_STAT(intRegfileWrites, statistics::units::Count::get(), + "Number of integer regfile writes"), + ADD_STAT(fpRegfileReads, statistics::units::Count::get(), + "Number of floating regfile reads"), + ADD_STAT(fpRegfileWrites, statistics::units::Count::get(), + "Number of floating regfile writes"), + ADD_STAT(vecRegfileReads, statistics::units::Count::get(), + "number of vector regfile reads"), + ADD_STAT(vecRegfileWrites, statistics::units::Count::get(), + "number of vector regfile writes"), + ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(), + "number of predicate regfile reads"), + ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(), + "number of predicate regfile writes"), + ADD_STAT(ccRegfileReads, statistics::units::Count::get(), + "number of cc regfile reads"), + ADD_STAT(ccRegfileWrites, statistics::units::Count::get(), + "number of cc regfile writes"), + ADD_STAT(miscRegfileReads, statistics::units::Count::get(), + "number of misc regfile reads"), + ADD_STAT(miscRegfileWrites, statistics::units::Count::get(), + "number of misc regfile writes") { // Register any of the O3CPU's stats here. timesIdled @@ -340,6 +380,69 @@ quiesceCycles .prereq(quiesceCycles); + // Number of Instructions simulated + // -------------------------------- + // Should probably be in Base CPU but need templated + // MaxThreads so put in here instead + committedInsts + .init(cpu->numThreads) + .flags(statistics::total); + + committedOps + .init(cpu->numThreads) + .flags(statistics::total); + + cpi + .precision(6); + cpi = cpu->baseStats.numCycles / committedInsts; + + totalCpi + .precision(6); + totalCpi = cpu->baseStats.numCycles / sum(committedInsts); + + ipc + .precision(6); + ipc = committedInsts / cpu->baseStats.numCycles; + + totalIpc + .precision(6); + totalIpc = sum(committedInsts) / cpu->baseStats.numCycles; + + intRegfileReads + .prereq(intRegfileReads); + + intRegfileWrites + .prereq(intRegfileWrites); + + fpRegfileReads + .prereq(fpRegfileReads); + + fpRegfileWrites + .prereq(fpRegfileWrites); + + vecRegfileReads + .prereq(vecRegfileReads); + + vecRegfileWrites + .prereq(vecRegfileWrites); + + vecPredRegfileReads + .prereq(vecPredRegfileReads); + + vecPredRegfileWrites + .prereq(vecPredRegfileWrites); + + ccRegfileReads + .prereq(ccRegfileReads); + + ccRegfileWrites + .prereq(ccRegfileWrites); + + miscRegfileReads + .prereq(miscRegfileReads); + + miscRegfileWrites + .prereq(miscRegfileWrites); } void @@ -916,7 +1019,7 @@ RegVal CPU::readMiscReg(int misc_reg, ThreadID tid) { - executeStats[tid]->numMiscRegReads++; + cpuStats.miscRegfileReads++; return isa[tid]->readMiscReg(misc_reg); } @@ -929,29 +1032,29 @@ void CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid) { - executeStats[tid]->numMiscRegWrites++; + cpuStats.miscRegfileWrites++; isa[tid]->setMiscReg(misc_reg, val); } RegVal -CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) +CPU::getReg(PhysRegIdPtr phys_reg) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegReads++; + cpuStats.intRegfileReads++; break; case FloatRegClass: - executeStats[tid]->numFpRegReads++; + cpuStats.fpRegfileReads++; break; case CCRegClass: - executeStats[tid]->numCCRegReads++; + cpuStats.ccRegfileReads++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -960,24 +1063,24 @@ } void -CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) +CPU::getReg(PhysRegIdPtr phys_reg, void *val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegReads++; + cpuStats.intRegfileReads++; break; case FloatRegClass: - executeStats[tid]->numFpRegReads++; + cpuStats.fpRegfileReads++; break; case CCRegClass: - executeStats[tid]->numCCRegReads++; + cpuStats.ccRegfileReads++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -986,14 +1089,14 @@ } void * -CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) +CPU::getWritableReg(PhysRegIdPtr phys_reg) { switch (phys_reg->classValue()) { case VecRegClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -1002,24 +1105,24 @@ } void -CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) +CPU::setReg(PhysRegIdPtr phys_reg, RegVal val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegWrites++; + cpuStats.intRegfileWrites++; break; case FloatRegClass: - executeStats[tid]->numFpRegWrites++; + cpuStats.fpRegfileWrites++; break; case CCRegClass: - executeStats[tid]->numCCRegWrites++; + cpuStats.ccRegfileWrites++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegWrites++; + cpuStats.vecRegfileWrites++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegWrites++; + cpuStats.vecPredRegfileWrites++; break; default: break; @@ -1028,24 +1131,24 @@ } void -CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid) +CPU::setReg(PhysRegIdPtr phys_reg, const void *val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegWrites++; + cpuStats.intRegfileWrites++; break; case FloatRegClass: - executeStats[tid]->numFpRegWrites++; + cpuStats.fpRegfileWrites++; break; case CCRegClass: - executeStats[tid]->numCCRegWrites++; + cpuStats.ccRegfileWrites++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegWrites++; + cpuStats.vecRegfileWrites++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegWrites++; + cpuStats.vecPredRegfileWrites++; break; default: break; @@ -1127,14 +1230,14 @@ if (!inst->isMicroop() || inst->isLastMicroop()) { thread[tid]->numInst++; thread[tid]->threadStats.numInsts++; - commitStats[tid]->numInstsNotNOP++; + cpuStats.committedInsts[tid]++; // Check for instruction-count-based events. thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst); } thread[tid]->numOp++; thread[tid]->threadStats.numOps++; - commitStats[tid]->numOpsNotNOP++; + cpuStats.committedOps[tid]++; probeInstCommit(inst->staticInst, inst->pcState().instAddr()); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 7dc3784..08a1312 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -310,12 +310,12 @@ */ void setMiscReg(int misc_reg, RegVal val, ThreadID tid); - RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid); - void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid); - void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid); + RegVal getReg(PhysRegIdPtr phys_reg); + void getReg(PhysRegIdPtr phys_reg, void *val); + void *getWritableReg(PhysRegIdPtr phys_reg); - void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid); - void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid); + void setReg(PhysRegIdPtr phys_reg, RegVal val); + void setReg(PhysRegIdPtr phys_reg, const void *val); /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the @@ -581,7 +581,38 @@ /** Stat for total number of cycles the CPU spends descheduled due to a * quiesce operation or waiting for an interrupt. */ statistics::Scalar quiesceCycles; + /** Stat for the number of committed instructions per thread. */ + statistics::Vector committedInsts; + /** Stat for the number of committed ops (including micro ops) per + * thread. */ + statistics::Vector committedOps; + /** Stat for the CPI per thread. */ + statistics::Formula cpi; + /** Stat for the total CPI. */ + statistics::Formula totalCpi; + /** Stat for the IPC per thread. */ + statistics::Formula ipc; + /** Stat for the total IPC. */ + statistics::Formula totalIpc; + //number of integer register file accesses + statistics::Scalar intRegfileReads; + statistics::Scalar intRegfileWrites; + //number of float register file accesses + statistics::Scalar fpRegfileReads; + statistics::Scalar fpRegfileWrites; + //number of vector register file accesses + mutable statistics::Scalar vecRegfileReads; + statistics::Scalar vecRegfileWrites; + //number of predicate register file accesses + mutable statistics::Scalar vecPredRegfileReads; + statistics::Scalar vecPredRegfileWrites; + //number of CC register file accesses + statistics::Scalar ccRegfileReads; + statistics::Scalar ccRegfileWrites; + //number of misc + statistics::Scalar miscRegfileReads; + statistics::Scalar miscRegfileWrites; } cpuStats; public: diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index c759c5e..54c0385 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1086,10 +1086,10 @@ if (bytes == sizeof(RegVal)) { setRegOperand(staticInst.get(), idx, - cpu->getReg(prev_phys_reg, threadNumber)); + cpu->getReg(prev_phys_reg)); } else { uint8_t val[original_dest_reg.regClass().regBytes()]; - cpu->getReg(prev_phys_reg, val, threadNumber); + cpu->getReg(prev_phys_reg, val); setRegOperand(staticInst.get(), idx, val); } } @@ -1116,7 +1116,7 @@ const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return 0; - return cpu->getReg(reg, threadNumber); + return cpu->getReg(reg); } void @@ -1125,13 +1125,13 @@ const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->getReg(reg, val, threadNumber); + cpu->getReg(reg, val); } void * getWritableRegOperand(const StaticInst *si, int idx) override { - return cpu->getWritableReg(renamedDestIdx(idx), threadNumber); + return cpu->getWritableReg(renamedDestIdx(idx)); } /** @todo: Make results into arrays so they can handle multiple dest @@ -1143,7 +1143,7 @@ const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val, threadNumber); + cpu->setReg(reg, val); setResult(reg->regClass(), val); } @@ -1153,7 +1153,7 @@ const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val, threadNumber); + cpu->setReg(reg, val); setResult(reg->regClass(), val); } }; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index f5fc6c6..d3cdd2c 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -158,6 +158,12 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) : statistics::Group(cpu, "fetch"), + ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), + "Number of cycles fetch is stalled on an Icache miss"), + ADD_STAT(insts, statistics::units::Count::get(), + "Number of instructions fetch has processed"), + ADD_STAT(branches, statistics::units::Count::get(), + "Number of branches that fetch encountered"), ADD_STAT(predictedBranches, statistics::units::Count::get(), "Number of branches that fetch has predicted taken"), ADD_STAT(cycles, statistics::units::Cycle::get(), @@ -194,8 +200,21 @@ "Number of instructions fetched each cycle (Total)"), ADD_STAT(idleRate, statistics::units::Ratio::get(), "Ratio of cycles fetch was idle", - idleCycles / cpu->baseStats.numCycles) + idleCycles / cpu->baseStats.numCycles), + ADD_STAT(branchRate, statistics::units::Ratio::get(), + "Number of branch fetches per cycle", + branches / cpu->baseStats.numCycles), + ADD_STAT(rate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Number of inst fetches per cycle", + insts / cpu->baseStats.numCycles) { + icacheStallCycles + .prereq(icacheStallCycles); + insts + .prereq(insts); + branches + .prereq(branches); predictedBranches .prereq(predictedBranches); cycles @@ -233,6 +252,10 @@ .flags(statistics::pdf); idleRate .prereq(idleRate); + branchRate + .flags(statistics::total); + rate + .flags(statistics::total); } void Fetch::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer) @@ -517,7 +540,7 @@ inst->setPredTarg(next_pc); inst->setPredTaken(predict_taken); - cpu->fetchStats[tid]->numBranches++; + ++fetchStats.branches; if (predict_taken) { ++fetchStats.predictedBranches; @@ -1123,7 +1146,7 @@ fetchCacheLine(fetchAddr, tid, this_pc.instAddr()); if (fetchStatus[tid] == IcacheWaitResponse) - cpu->fetchStats[tid]->icacheStallCycles++; + ++fetchStats.icacheStallCycles; else if (fetchStatus[tid] == ItlbWait) ++fetchStats.tlbCycles; else @@ -1219,7 +1242,7 @@ staticInst = dec_ptr->decode(this_pc); // Increment stat of fetched instructions. - cpu->fetchStats[tid]->numInsts++; + ++fetchStats.insts; if (staticInst->isMacroop()) { curMacroop = staticInst; @@ -1549,7 +1572,7 @@ ++fetchStats.squashCycles; DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); } else if (fetchStatus[tid] == IcacheWaitResponse) { - cpu->fetchStats[tid]->icacheStallCycles++; + ++fetchStats.icacheStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", tid); } else if (fetchStatus[tid] == ItlbWait) { diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 6add314..cd31191 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -533,6 +533,12 @@ FetchStatGroup(CPU *cpu, Fetch *fetch); // @todo: Consider making these // vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + statistics::Scalar icacheStallCycles; + /** Stat for total number of fetched instructions. */ + statistics::Scalar insts; + /** Total number of fetched branches. */ + statistics::Scalar branches; /** Stat for total number of predicted branches. */ statistics::Scalar predictedBranches; /** Stat for total number of cycles spent fetching. */ @@ -575,6 +581,10 @@ statistics::Distribution nisnDist; /** Rate of how often fetch was idle. */ statistics::Formula idleRate; + /** Number of branch fetches per cycle. */ + statistics::Formula branchRate; + /** Number of instruction fetched per cycle. */ + statistics::Formula rate; } fetchStats; }; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 92d281c..7cf6c54 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -217,14 +217,52 @@ IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu) : statistics::Group(cpu), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of executed instructions"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions executed"), ADD_STAT(numSquashedInsts, statistics::units::Count::get(), "Number of squashed instructions skipped in execute"), ADD_STAT(numSwp, statistics::units::Count::get(), - "Number of swp insts executed") + "Number of swp insts executed"), + ADD_STAT(numNop, statistics::units::Count::get(), + "Number of nop insts executed"), + ADD_STAT(numRefs, statistics::units::Count::get(), + "Number of memory reference insts executed"), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches executed"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of stores executed"), + ADD_STAT(numRate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Inst execution rate", numInsts / cpu->baseStats.numCycles) { + numLoadInsts + .init(cpu->numThreads) + .flags(statistics::total); + numSwp .init(cpu->numThreads) .flags(statistics::total); + + numNop + .init(cpu->numThreads) + .flags(statistics::total); + + numRefs + .init(cpu->numThreads) + .flags(statistics::total); + + numBranches + .init(cpu->numThreads) + .flags(statistics::total); + + numStoreInsts + .flags(statistics::total); + numStoreInsts = numRefs - numLoadInsts; + + numRate + .flags(statistics::total); } void @@ -1015,7 +1053,7 @@ instQueue.recordProducer(inst); - cpu->executeStats[tid]->numNop++; + iewStats.executedInstStats.numNop[tid]++; add_to_iq = false; } else { @@ -1523,7 +1561,7 @@ { ThreadID tid = inst->threadNumber; - cpu->executeStats[tid]->numInsts++; + iewStats.executedInstStats.numInsts++; #if TRACING_ON if (debug::O3PipeView) { @@ -1535,16 +1573,16 @@ // Control operations // if (inst->isControl()) - cpu->executeStats[tid]->numBranches++; + iewStats.executedInstStats.numBranches[tid]++; // // Memory operations // if (inst->isMemRef()) { - cpu->executeStats[tid]->numMemRefs++; + iewStats.executedInstStats.numRefs[tid]++; if (inst->isLoad()) { - cpu->executeStats[tid]->numLoadInsts++; + iewStats.executedInstStats.numLoadInsts[tid]++; } } } diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 4fe8227..80fed29 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -455,11 +455,25 @@ { ExecutedInstStats(CPU *cpu); + /** Stat for total number of executed instructions. */ + statistics::Scalar numInsts; + /** Stat for total number of executed load instructions. */ + statistics::Vector numLoadInsts; /** Stat for total number of squashed instructions skipped at * execute. */ statistics::Scalar numSquashedInsts; /** Number of executed software prefetches. */ statistics::Vector numSwp; + /** Number of executed nops. */ + statistics::Vector numNop; + /** Number of executed meomory references. */ + statistics::Vector numRefs; + /** Number of executed branches. */ + statistics::Vector numBranches; + /** Number of executed store instructions. */ + statistics::Formula numStoreInsts; + /** Number of instructions executed per cycle. */ + statistics::Formula numRate; } executedInstStats; /** Number of instructions sent to commit. */ diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 35d1490..768f63e 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -154,36 +154,10 @@ if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { t_info.numInst++; + t_info.execContextStats.numInsts++; } t_info.numOp++; -} - -void -BaseSimpleCPU::countFetchInst() -{ - SimpleExecContext& t_info = *threadInfo[curThread]; - - if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { - // increment thread level numInsts fetched count - fetchStats[t_info.thread->threadId()]->numInsts++; - } - // increment thread level numOps fetched count - fetchStats[t_info.thread->threadId()]->numOps++; -} - -void -BaseSimpleCPU::countCommitInst() -{ - SimpleExecContext& t_info = *threadInfo[curThread]; - - if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { - // increment thread level and core level numInsts count - commitStats[t_info.thread->threadId()]->numInsts++; - baseStats.numInsts++; - } - // increment thread level and core level numOps count - commitStats[t_info.thread->threadId()]->numOps++; - baseStats.numOps++; + t_info.execContextStats.numOps++; } Counter @@ -402,11 +376,6 @@ if (predict_taken) ++t_info.execContextStats.numPredictedBranches; } - - // increment the fetch instruction stat counters - if (curStaticInst) { - countFetchInst(); - } } void @@ -419,7 +388,7 @@ Addr instAddr = threadContexts[curThread]->pcState().instAddr(); if (curStaticInst->isMemRef()) { - executeStats[t_info.thread->threadId()]->numMemRefs++; + t_info.execContextStats.numMemRefs++; } if (curStaticInst->isLoad()) { @@ -427,26 +396,26 @@ } if (curStaticInst->isControl()) { - ++fetchStats[t_info.thread->threadId()]->numBranches; + ++t_info.execContextStats.numBranches; } /* Power model statistics */ //integer alu accesses if (curStaticInst->isInteger()){ - executeStats[t_info.thread->threadId()]->numIntAluAccesses++; - commitStats[t_info.thread->threadId()]->numIntInsts++; + t_info.execContextStats.numIntAluAccesses++; + t_info.execContextStats.numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ - executeStats[t_info.thread->threadId()]->numFpAluAccesses++; - commitStats[t_info.thread->threadId()]->numFpInsts++; + t_info.execContextStats.numFpAluAccesses++; + t_info.execContextStats.numFpInsts++; } //vector alu accesses if (curStaticInst->isVector()){ - executeStats[t_info.thread->threadId()]->numVecAluAccesses++; - commitStats[t_info.thread->threadId()]->numVecInsts++; + t_info.execContextStats.numVecAluAccesses++; + t_info.execContextStats.numVecInsts++; } //Matrix alu accesses @@ -460,22 +429,22 @@ t_info.execContextStats.numCallsReturns++; } + //the number of branch predictions that will be made + if (curStaticInst->isCondCtrl()){ + t_info.execContextStats.numCondCtrlInsts++; + } + //result bus acceses if (curStaticInst->isLoad()){ - commitStats[t_info.thread->threadId()]->numLoadInsts++; + t_info.execContextStats.numLoadInsts++; } if (curStaticInst->isStore() || curStaticInst->isAtomic()){ - commitStats[t_info.thread->threadId()]->numStoreInsts++; + t_info.execContextStats.numStoreInsts++; } /* End power model statistics */ - commitStats[t_info.thread->threadId()] - ->committedInstType[curStaticInst->opClass()]++; - commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst); - - /* increment the committed numInsts and numOps stats */ - countCommitInst(); + t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++; if (FullSystem) traceFunctions(instAddr); diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 46a25a0..df5290c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -182,8 +182,6 @@ } void countInst(); - void countFetchInst(); - void countCommitInst(); Counter totalInsts() const override; Counter totalOps() const override; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index c0927fc..0f20763 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -86,12 +86,60 @@ : statistics::Group(cpu, csprintf("exec_context.thread_%i", thread->threadId()).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), + "Number of integer alu accesses"), + ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), + "Number of float alu accesses"), + ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), + "Number of vector alu accesses"), ADD_STAT(numMatAluAccesses, statistics::units::Count::get(), "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), "Number of times a function call or return occured"), + ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(), + "Number of instructions that are conditional controls"), + ADD_STAT(numIntInsts, statistics::units::Count::get(), + "Number of integer instructions"), + ADD_STAT(numFpInsts, statistics::units::Count::get(), + "Number of float instructions"), + ADD_STAT(numVecInsts, statistics::units::Count::get(), + "Number of vector instructions"), ADD_STAT(numMatInsts, statistics::units::Count::get(), "Number of matrix instructions"), + ADD_STAT(numIntRegReads, statistics::units::Count::get(), + "Number of times the integer registers were read"), + ADD_STAT(numIntRegWrites, statistics::units::Count::get(), + "Number of times the integer registers were written"), + ADD_STAT(numFpRegReads, statistics::units::Count::get(), + "Number of times the floating registers were read"), + ADD_STAT(numFpRegWrites, statistics::units::Count::get(), + "Number of times the floating registers were written"), + ADD_STAT(numVecRegReads, statistics::units::Count::get(), + "Number of times the vector registers were read"), + ADD_STAT(numVecRegWrites, statistics::units::Count::get(), + "Number of times the vector registers were written"), + ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), + "Number of times the predicate registers were read"), + ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), + "Number of times the predicate registers were written"), + ADD_STAT(numCCRegReads, statistics::units::Count::get(), + "Number of times the CC registers were read"), + ADD_STAT(numCCRegWrites, statistics::units::Count::get(), + "Number of times the CC registers were written"), + ADD_STAT(numMiscRegReads, statistics::units::Count::get(), + "Number of times the Misc registers were read"), + ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), + "Number of times the Misc registers were written"), + ADD_STAT(numMemRefs, statistics::units::Count::get(), + "Number of memory refs"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of store instructions"), ADD_STAT(numIdleCycles, statistics::units::Cycle::get(), "Number of idle cycles"), ADD_STAT(numBusyCycles, statistics::units::Cycle::get(), @@ -100,35 +148,64 @@ "Percentage of non-idle cycles"), ADD_STAT(idleFraction, statistics::units::Ratio::get(), "Percentage of idle cycles"), + ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), + "ICache total stall cycles"), + ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), + "DCache total stall cycles"), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches fetched"), ADD_STAT(numPredictedBranches, statistics::units::Count::get(), "Number of branches predicted as taken"), ADD_STAT(numBranchMispred, statistics::units::Count::get(), "Number of branch mispredictions"), + ADD_STAT(statExecutedInstType, statistics::units::Count::get(), + "Class of executed instruction."), numRegReads{ - &(cpu->executeStats[thread->threadId()]->numIntRegReads), - &(cpu->executeStats[thread->threadId()]->numFpRegReads), - &(cpu->executeStats[thread->threadId()]->numVecRegReads), - &(cpu->executeStats[thread->threadId()]->numVecRegReads), - &(cpu->executeStats[thread->threadId()]->numVecPredRegReads), - &(cpu->executeStats[thread->threadId()]->numCCRegReads), - &numMatRegReads + &numIntRegReads, + &numFpRegReads, + &numVecRegReads, + &numVecRegReads, + &numVecPredRegReads, + &numMatRegReads, + &numCCRegReads }, numRegWrites{ - &(cpu->executeStats[thread->threadId()]->numIntRegWrites), - &(cpu->executeStats[thread->threadId()]->numFpRegWrites), - &(cpu->executeStats[thread->threadId()]->numVecRegWrites), - &(cpu->executeStats[thread->threadId()]->numVecRegWrites), - &(cpu->executeStats[thread->threadId()] - ->numVecPredRegWrites), - &(cpu->executeStats[thread->threadId()]->numCCRegWrites), - &numMatRegWrites + &numIntRegWrites, + &numFpRegWrites, + &numVecRegWrites, + &numVecRegWrites, + &numVecPredRegWrites, + &numMatRegWrites, + &numCCRegWrites } { + numCCRegReads + .flags(statistics::nozero); + + numCCRegWrites + .flags(statistics::nozero); + + icacheStallCycles + .prereq(icacheStallCycles); + + dcacheStallCycles + .prereq(dcacheStallCycles); + + statExecutedInstType + .init(enums::Num_OpClass) + .flags(statistics::total | statistics::pdf | statistics::dist); + + for (unsigned i = 0; i < Num_OpClasses; ++i) { + statExecutedInstType.subname(i, enums::OpClassStrings[i]); + } idleFraction = statistics::constant(1.0) - notIdleFraction; numIdleCycles = idleFraction * cpu->baseStats.numCycles; numBusyCycles = notIdleFraction * cpu->baseStats.numCycles; + numBranches + .prereq(numBranches); + numPredictedBranches .prereq(numPredictedBranches); @@ -136,19 +213,73 @@ .prereq(numBranchMispred); } + // Number of simulated instructions + statistics::Scalar numInsts; + statistics::Scalar numOps; + + // Number of integer alu accesses + statistics::Scalar numIntAluAccesses; + + // Number of float alu accesses + statistics::Scalar numFpAluAccesses; + + // Number of vector alu accesses + statistics::Scalar numVecAluAccesses; + // Number of matrix alu accesses statistics::Scalar numMatAluAccesses; // Number of function calls/returns statistics::Scalar numCallsReturns; + // Conditional control instructions; + statistics::Scalar numCondCtrlInsts; + + // Number of int instructions + statistics::Scalar numIntInsts; + + // Number of float instructions + statistics::Scalar numFpInsts; + + // Number of vector instructions + statistics::Scalar numVecInsts; + // Number of matrix instructions statistics::Scalar numMatInsts; + // Number of integer register file accesses + statistics::Scalar numIntRegReads; + statistics::Scalar numIntRegWrites; + + // Number of float register file accesses + statistics::Scalar numFpRegReads; + statistics::Scalar numFpRegWrites; + + // Number of vector register file accesses + mutable statistics::Scalar numVecRegReads; + statistics::Scalar numVecRegWrites; + + // Number of predicate register file accesses + mutable statistics::Scalar numVecPredRegReads; + statistics::Scalar numVecPredRegWrites; + // Number of matrix register file accesses mutable statistics::Scalar numMatRegReads; statistics::Scalar numMatRegWrites; + // Number of condition code register file accesses + statistics::Scalar numCCRegReads; + statistics::Scalar numCCRegWrites; + + // Number of misc register file accesses + statistics::Scalar numMiscRegReads; + statistics::Scalar numMiscRegWrites; + + // Number of simulated memory references + statistics::Scalar numMemRefs; + statistics::Scalar numLoadInsts; + statistics::Scalar numStoreInsts; + // Number of idle cycles statistics::Formula numIdleCycles; @@ -159,13 +290,24 @@ statistics::Average notIdleFraction; statistics::Formula idleFraction; + // Number of cycles stalled for I-cache responses + statistics::Scalar icacheStallCycles; + + // Number of cycles stalled for D-cache responses + statistics::Scalar dcacheStallCycles; + /// @{ + /// Total number of branches fetched + statistics::Scalar numBranches; /// Number of branches predicted as taken statistics::Scalar numPredictedBranches; /// Number of misprediced branches statistics::Scalar numBranchMispred; /// @} + // Instruction mix histogram by OpClass + statistics::Vector statExecutedInstType; + std::array<statistics::Scalar *, CCRegClass + 1> numRegReads; std::array<statistics::Scalar *, CCRegClass + 1> numRegWrites; @@ -226,7 +368,7 @@ RegVal readMiscRegOperand(const StaticInst *si, int idx) override { - cpu->executeStats[thread->threadId()]->numMiscRegReads++; + execContextStats.numMiscRegReads++; const RegId& reg = si->srcRegIdx(idx); assert(reg.is(MiscRegClass)); return thread->readMiscReg(reg.index()); @@ -235,7 +377,7 @@ void setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override { - cpu->executeStats[thread->threadId()]->numMiscRegWrites++; + execContextStats.numMiscRegWrites++; const RegId& reg = si->destRegIdx(idx); assert(reg.is(MiscRegClass)); thread->setMiscReg(reg.index(), val); @@ -248,7 +390,7 @@ RegVal readMiscReg(int misc_reg) override { - cpu->executeStats[thread->threadId()]->numMiscRegReads++; + execContextStats.numMiscRegReads++; return thread->readMiscReg(misc_reg); } @@ -259,7 +401,7 @@ void setMiscReg(int misc_reg, RegVal val) override { - cpu->executeStats[thread->threadId()]->numMiscRegWrites++; + execContextStats.numMiscRegWrites++; thread->setMiscReg(misc_reg, val); } -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/68717?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: I2a88cadfee03c1fc81932e6548938db108786dd2 Gerrit-Change-Number: 68717 Gerrit-PatchSet: 2 Gerrit-Owner: Bobby Bruce <bbruce@ucdavis.edu> Gerrit-Reviewer: Bobby Bruce <bbruce@ucdavis.edu> Gerrit-Reviewer: Gabe Black <gabe.black@gmail.com> Gerrit-Reviewer: Jason Lowe-Power <jason@lowepower.com> Gerrit-Reviewer: Jason Lowe-Power <power.jg@gmail.com> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com> Gerrit-MessageType: merged