Empathy List Archives

gem5-dev@gem5.org

The gem5 Developer List

[M] Change in gem5/gem5[develop]: mem: HBMCtrl changes to allow PC data buses to be in different states

Ayaz Akram (Gerrit)

Fri, May 26, 2023 8:08 PM

Ayaz Akram has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/65491?usp=email )

Change subject: mem: HBMCtrl changes to allow PC data buses to be in
different states
......................................................................

mem: HBMCtrl changes to allow PC data buses to be in different states

This change updates the HBMCtrl such that both pseudo channels
can be in separate states (read or write) at the same time. In
addition, the controller queues are now always split in two
halves for both pseudo channels.

Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65491
Reviewed-by: Jason Lowe-Power power.jg@gmail.com
Maintainer: Jason Lowe-Power power.jg@gmail.com
Tested-by: kokoro noreply+kokoro@google.com

M src/mem/HBMCtrl.py
M src/mem/dram_interface.cc
M src/mem/hbm_ctrl.cc
M src/mem/hbm_ctrl.hh
M src/mem/mem_ctrl.cc
M src/mem/mem_ctrl.hh
M src/mem/mem_interface.hh
M src/mem/nvm_interface.cc
M src/mem/qos/mem_ctrl.cc
M src/mem/qos/mem_ctrl.hh
M src/mem/qos/mem_sink.cc
M src/python/gem5/components/memory/hbm.py
12 files changed, 122 insertions(+), 116 deletions(-)

Approvals:
Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass

diff --git a/src/mem/HBMCtrl.py b/src/mem/HBMCtrl.py
index 0c7c1ea..45d89a7 100644
--- a/src/mem/HBMCtrl.py
+++ b/src/mem/HBMCtrl.py
@@ -46,5 +46,3 @@
# gives the best results with following min_r/w_per_switch
min_reads_per_switch = 64
min_writes_per_switch = 64

partitioned_q = Param.Bool(False, "split queues for pseudo channels")
diff --git a/src/mem/dram_interface.cc b/src/mem/dram_interface.cc
index d745fe5..65e06db 100644
--- a/src/mem/dram_interface.cc
+++ b/src/mem/dram_interface.cc
@@ -1068,13 +1068,14 @@
```
         // latest Tick for which ACT can occur without
         // incurring additoinal delay on the data bus
```

           const Tick tRCD = ctrl->inReadBusState(false) ?

                                            tRCD_RD : tRCD_WR;

           const Tick tRCD = ctrl->inReadBusState(false, this) ?

                                           tRCD_RD : tRCD_WR;
            const Tick hidden_act_max =
                        std::max(min_col_at - tRCD, curTick());

            // When is the earliest the R/W burst can issue?

           const Tick col_allowed_at = ctrl->inReadBusState(false) ?

           const Tick col_allowed_at = ctrl->inReadBusState(false,

                                         this) ?

ranks[i]->banks[j].rdAllowedAt :

ranks[i]->banks[j].wrAllowedAt;
Tick col_at = std::max(col_allowed_at, act_at + tRCD);
@@ -1180,10 +1181,10 @@
DRAMInterface::Rank::isQueueEmpty() const
{
// check commmands in Q based on current bus direction

bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&

                     (readEntries == 0))

                  || (dram.ctrl->inWriteBusState(true) &&

                     (writeEntries == 0));

bool no_queued_cmds = (dram.ctrl->inReadBusState(true, &(this->dram))

                     && (readEntries == 0)) ||

                     (dram.ctrl->inWriteBusState(true, &(this->dram))

                     && (writeEntries == 0));
return no_queued_cmds;

}

@@ -1669,7 +1670,7 @@
// completed refresh event, ensure next request is scheduled
if (!(dram.ctrl->requestEventScheduled(dram.pseudoChannel))) {
DPRINTF(DRAM, "Scheduling next request after refreshing"

                      " rank %d\n", rank);

                      " rank %d, PC %d \n", rank, dram.pseudoChannel);
        dram.ctrl->restartScheduler(curTick(), dram.pseudoChannel);
    }
}

@@ -1831,7 +1832,8 @@
bool
DRAMInterface::Rank::forceSelfRefreshExit() const {
return (readEntries != 0) ||

      (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));

      (dram.ctrl->inWriteBusState(true, &(this->dram))

```
       && (writeEntries != 0));
```
}

void
diff --git a/src/mem/hbm_ctrl.cc b/src/mem/hbm_ctrl.cc
index 747e714..f87fa2d 100644
--- a/src/mem/hbm_ctrl.cc
+++ b/src/mem/hbm_ctrl.cc
@@ -51,8 +51,7 @@
name()),
respondEventPC1([this] {processRespondEvent(pc1Int, respQueuePC1,
respondEventPC1, retryRdReqPC1); }, name()),

pc1Int(p.dram_2),
partitionedQ(p.partitioned_q)

pc1Int(p.dram_2)
{
DPRINTF(MemCtrl, "Setting up HBM controller\n");

@@ -69,17 +68,8 @@
pc0Int->setCtrl(this, commandWindow, 0);
pc1Int->setCtrl(this, commandWindow, 1);

if (partitionedQ) {

   writeHighThreshold = (writeBufferSize *

(p.write_high_thresh_perc/2)

```
                        / 100.0);
```

   writeLowThreshold = (writeBufferSize * (p.write_low_thresh_perc/2)

```
                       / 100.0);
```
} else {

   writeHighThreshold = (writeBufferSize * p.write_high_thresh_perc

```
                       / 100.0);
```

   writeLowThreshold = (writeBufferSize * p.write_low_thresh_perc

```
                       / 100.0);
```
}

writeHighThreshold = (writeBufferSize/2 *
p.write_high_thresh_perc)/100.0;
writeLowThreshold = (writeBufferSize/2 *
p.write_low_thresh_perc)/100.0;
}

void
@@ -109,9 +99,9 @@
Tick latency = 0;

  if (pc0Int->getAddrRange().contains(pkt->getAddr())) {

   latency = MemCtrl::recvAtomicLogic(pkt, pc0Int);

   latency = recvAtomicLogic(pkt, pc0Int);
} else if (pc1Int->getAddrRange().contains(pkt->getAddr())) {

   latency = MemCtrl::recvAtomicLogic(pkt, pc1Int);

   latency = recvAtomicLogic(pkt, pc1Int);
} else {
    panic("Can't handle address range for packet %s\n", pkt->print());
}

@@ -122,10 +112,10 @@
void
HBMCtrl::recvFunctional(PacketPtr pkt)
{

bool found = MemCtrl::recvFunctionalLogic(pkt, pc0Int);

bool found = recvFunctionalLogic(pkt, pc0Int);

if (!found) {

   found = MemCtrl::recvFunctionalLogic(pkt, pc1Int);

   found = recvFunctionalLogic(pkt, pc1Int);
}

if (!found) {

@@ -170,9 +160,9 @@
{
DPRINTF(MemCtrl,
"Write queue limit %d, PC0 size %d, entries needed %d\n",

       writeBufferSize, writeQueueSizePC0, neededEntries);

       writeBufferSize/2, pc0Int->writeQueueSize, neededEntries);

unsigned int wrsize_new = (writeQueueSizePC0 + neededEntries);

unsigned int wrsize_new = (pc0Int->writeQueueSize + neededEntries);
return wrsize_new > (writeBufferSize/2);
}

@@ -181,9 +171,9 @@
{
DPRINTF(MemCtrl,
"Write queue limit %d, PC1 size %d, entries needed %d\n",

       writeBufferSize, writeQueueSizePC1, neededEntries);

       writeBufferSize/2, pc1Int->writeQueueSize, neededEntries);

unsigned int wrsize_new = (writeQueueSizePC1 + neededEntries);

unsigned int wrsize_new = (pc1Int->writeQueueSize + neededEntries);
return wrsize_new > (writeBufferSize/2);
}

@@ -192,10 +182,10 @@
{
DPRINTF(MemCtrl,
"Read queue limit %d, PC0 size %d, entries needed %d\n",

       readBufferSize, readQueueSizePC0 + respQueue.size(),

       readBufferSize/2, pc0Int->readQueueSize + respQueue.size(),
        neededEntries);

unsigned int rdsize_new = readQueueSizePC0 + respQueue.size()

unsigned int rdsize_new = pc0Int->readQueueSize + respQueue.size()
+ neededEntries;
return rdsize_new > (readBufferSize/2);
}
@@ -205,27 +195,15 @@
{
DPRINTF(MemCtrl,
"Read queue limit %d, PC1 size %d, entries needed %d\n",

       readBufferSize, readQueueSizePC1 + respQueuePC1.size(),

       readBufferSize/2, pc1Int->readQueueSize + respQueuePC1.size(),
        neededEntries);

unsigned int rdsize_new = readQueueSizePC1 + respQueuePC1.size()

unsigned int rdsize_new = pc1Int->readQueueSize + respQueuePC1.size()
+ neededEntries;
return rdsize_new > (readBufferSize/2);
}

bool
-HBMCtrl::readQueueFull(unsigned int neededEntries) const
-{

DPRINTF(MemCtrl,

       "HBMCtrl: Read queue limit %d, entries needed %d\n",

```
       readBufferSize, neededEntries);
```
unsigned int rdsize_new = totalReadQueueSize + respQueue.size() +

                           respQueuePC1.size() + neededEntries;

return rdsize_new > readBufferSize;
-}

-bool
HBMCtrl::recvTimingReq(PacketPtr pkt)
{
// This is where we enter from the outside world
@@ -269,23 +247,23 @@
// check local buffers and do not accept if full
if (pkt->isWrite()) {
if (is_pc0) {

       if (partitionedQ ? writeQueueFullPC0(pkt_count) :

                                   writeQueueFull(pkt_count))

```
       {
```

       if (writeQueueFullPC0(pkt_count)) {
            DPRINTF(MemCtrl, "Write queue full, not accepting\n");
            // remember that we have to retry this port

```
           MemCtrl::retryWrReq = true;
```

           retryWrReq = true;
            stats.numWrRetry++;
            return false;
        } else {
            addToWriteQueue(pkt, pkt_count, pc0Int);

           if (!nextReqEvent.scheduled()) {

               DPRINTF(MemCtrl, "Request scheduled immediately\n");

               schedule(nextReqEvent, curTick());

           }
            stats.writeReqs++;
            stats.bytesWrittenSys += size;
        }
    } else {

       if (partitionedQ ? writeQueueFullPC1(pkt_count) :

                                   writeQueueFull(pkt_count))

```
       {
```

       if (writeQueueFullPC1(pkt_count)) {
            DPRINTF(MemCtrl, "Write queue full, not accepting\n");
            // remember that we have to retry this port
            retryWrReqPC1 = true;

@@ -293,6 +271,10 @@
return false;
} else {
addToWriteQueue(pkt, pkt_count, pc1Int);

           if (!nextReqEventPC1.scheduled()) {

               DPRINTF(MemCtrl, "Request scheduled immediately\n");

               schedule(nextReqEventPC1, curTick());

           }
            stats.writeReqs++;
            stats.bytesWrittenSys += size;
        }

@@ -303,11 +285,10 @@
assert(size != 0);

      if (is_pc0) {

       if (partitionedQ ? readQueueFullPC0(pkt_count) :

                                   HBMCtrl::readQueueFull(pkt_count))

{

       if (readQueueFullPC0(pkt_count)) {
            DPRINTF(MemCtrl, "Read queue full, not accepting\n");
            // remember that we have to retry this port

```
           retryRdReqPC1 = true;
```

           retryRdReq = true;
            stats.numRdRetry++;
            return false;
        } else {

@@ -322,8 +303,7 @@
stats.bytesReadSys += size;
}
} else {

       if (partitionedQ ? readQueueFullPC1(pkt_count) :

                                   HBMCtrl::readQueueFull(pkt_count))

{

       if (readQueueFullPC1(pkt_count)) {
            DPRINTF(MemCtrl, "Read queue full, not accepting\n");
            // remember that we have to retry this port
            retryRdReqPC1 = true;

@@ -351,7 +331,7 @@
auto it = rowBurstTicks.begin();
while (it != rowBurstTicks.end()) {
auto current_it = it++;

   if (MemCtrl::getBurstWindow(curTick()) > *current_it) {

   if (getBurstWindow(curTick()) > *current_it) {
        DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it);
        rowBurstTicks.erase(current_it);
    }

@@ -364,7 +344,7 @@
auto it = colBurstTicks.begin();
while (it != colBurstTicks.end()) {
auto current_it = it++;

   if (MemCtrl::getBurstWindow(curTick()) > *current_it) {

   if (getBurstWindow(curTick()) > *current_it) {
        DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it);
        colBurstTicks.erase(current_it);
    }

@@ -385,7 +365,7 @@
Tick cmd_at = cmd_tick;

  // get tick aligned to burst window

Tick burst_tick = MemCtrl::getBurstWindow(cmd_tick);

Tick burst_tick = getBurstWindow(cmd_tick);

// verify that we have command bandwidth to issue the command
// if not, iterate over next window(s) until slot found
@@ -424,7 +404,7 @@
Tick cmd_at = cmd_tick;

// get tick aligned to burst window

Tick burst_tick = MemCtrl::getBurstWindow(cmd_tick);

Tick burst_tick = getBurstWindow(cmd_tick);

// Command timing requirements are from 2nd command
// Start with assumption that 2nd command will issue at cmd_at and
diff --git a/src/mem/hbm_ctrl.hh b/src/mem/hbm_ctrl.hh
index a6ecf6c..b17caa6 100644
--- a/src/mem/hbm_ctrl.hh
+++ b/src/mem/hbm_ctrl.hh
@@ -144,7 +144,6 @@
*/
bool readQueueFullPC0(unsigned int pkt_count) const;
bool readQueueFullPC1(unsigned int pkt_count) const;

bool readQueueFull(unsigned int pkt_count) const;

/**
- Check if the write queue partition of both pseudo
  diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc
  index 290db3e..9a3600f 100644
  --- a/src/mem/mem_ctrl.cc
  +++ b/src/mem/mem_ctrl.cc
  @@ -72,7 +72,6 @@
  writeLowThreshold(writeBufferSize * p.write_low_thresh_perc / 100.0),
  minWritesPerSwitch(p.min_writes_per_switch),
  minReadsPerSwitch(p.min_reads_per_switch),
writesThisTime(0), readsThisTime(0),
memSchedPolicy(p.mem_sched_policy),
frontendLatency(p.static_frontend_latency),
backendLatency(p.static_backend_latency),
@@ -277,6 +276,8 @@
logRequest(MemCtrl::READ, pkt->requestorId(),
pkt->qosValue(), mem_pkt->addr, 1);

```
       mem_intr->readQueueSize++;
```

        // Update stats
        stats.avgRdQLen = totalReadQueueSize + respQueue.size();
    }

@@ -349,6 +350,8 @@
logRequest(MemCtrl::WRITE, pkt->requestorId(),
pkt->qosValue(), mem_pkt->addr, 1);

```
       mem_intr->writeQueueSize++;
```

        assert(totalWriteQueueSize == isInWriteQueue.size());

        // Update stats

@@ -575,6 +578,9 @@
// check if there is a packet going to a free rank
for (auto i = queue.begin(); i != queue.end(); ++i) {
MemPacket* mem_pkt = *i;

           if (mem_pkt->pseudoChannel != mem_intr->pseudoChannel) {

```
               continue;
```

           }
            if (packetReady(mem_pkt, mem_intr)) {
                ret = i;
                break;

@@ -761,28 +767,28 @@
}

bool
-MemCtrl::inReadBusState(bool next_state) const
+MemCtrl::inReadBusState(bool next_state, const MemInterface* mem_intr)
const
{
// check the bus state
if (next_state) {
// use busStateNext to get the state that will be used
// for the next burst

   return (busStateNext == MemCtrl::READ);

   return (mem_intr->busStateNext == MemCtrl::READ);
} else {

```
   return (busState == MemCtrl::READ);
```

```
   return (mem_intr->busState == MemCtrl::READ);
}
```
}

bool
-MemCtrl::inWriteBusState(bool next_state) const
+MemCtrl::inWriteBusState(bool next_state, const MemInterface* mem_intr)
const
{
// check the bus state
if (next_state) {
// use busStateNext to get the state that will be used
// for the next burst

   return (busStateNext == MemCtrl::WRITE);

   return (mem_intr->busStateNext == MemCtrl::WRITE);
} else {

   return (busState == MemCtrl::WRITE);

   return (mem_intr->busState == MemCtrl::WRITE);
}

}

@@ -813,13 +819,13 @@

  // Update the common bus stats
  if (mem_pkt->isRead()) {

```
   ++readsThisTime;
```

   ++(mem_intr->readsThisTime);
    // Update latency stats
    stats.requestorReadTotalLat[mem_pkt->requestorId()] +=
        mem_pkt->readyTime - mem_pkt->entryTime;
    stats.requestorReadBytes[mem_pkt->requestorId()] += mem_pkt->size;
} else {

```
   ++writesThisTime;
```

   ++(mem_intr->writesThisTime);
    stats.requestorWriteBytes[mem_pkt->requestorId()] += mem_pkt->size;
    stats.requestorWriteTotalLat[mem_pkt->requestorId()] +=
        mem_pkt->readyTime - mem_pkt->entryTime;

@@ -836,8 +842,8 @@
// Default to busy status and update based on interface specifics
// Default state of unused interface is 'true'
bool mem_busy = true;

bool all_writes_nvm = mem_intr->numWritesQueued == totalWriteQueueSize;
bool read_queue_empty = totalReadQueueSize == 0;

bool all_writes_nvm = mem_intr->numWritesQueued ==
mem_intr->writeQueueSize;
bool read_queue_empty = mem_intr->readQueueSize == 0;
mem_busy = mem_intr->isBusy(read_queue_empty, all_writes_nvm);
if (mem_busy) {
// if all ranks are refreshing wait for them to finish
@@ -884,27 +890,27 @@
}

// detect bus state change

bool switched_cmd_type = (busState != busStateNext);

bool switched_cmd_type = (mem_intr->busState !=
mem_intr->busStateNext);
// record stats

recordTurnaroundStats();

recordTurnaroundStats(mem_intr->busState, mem_intr->busStateNext);

DPRINTF(MemCtrl, "QoS Turnarounds selected state %s %s\n",

       (busState==MemCtrl::READ)?"READ":"WRITE",

       (mem_intr->busState==MemCtrl::READ)?"READ":"WRITE",
        switched_cmd_type?"[turnaround triggered]":"");

if (switched_cmd_type) {

```
   if (busState == MemCtrl::READ) {
```

   if (mem_intr->busState == MemCtrl::READ) {
        DPRINTF(MemCtrl,

               "Switching to writes after %d reads with %d reads "

               "waiting\n", readsThisTime, totalReadQueueSize);

       stats.rdPerTurnAround.sample(readsThisTime);

```
       readsThisTime = 0;
```

       "Switching to writes after %d reads with %d reads "

       "waiting\n", mem_intr->readsThisTime, mem_intr->readQueueSize);

       stats.rdPerTurnAround.sample(mem_intr->readsThisTime);

       mem_intr->readsThisTime = 0;
    } else {
        DPRINTF(MemCtrl,

               "Switching to reads after %d writes with %d writes "

               "waiting\n", writesThisTime, totalWriteQueueSize);

       stats.wrPerTurnAround.sample(writesThisTime);

```
       writesThisTime = 0;
```

       "Switching to reads after %d writes with %d writes "

       "waiting\n", mem_intr->writesThisTime,

mem_intr->writeQueueSize);

       stats.wrPerTurnAround.sample(mem_intr->writesThisTime);

       mem_intr->writesThisTime = 0;
    }
}

@@ -916,7 +922,7 @@
}

  // updates current state

busState = busStateNext;

mem_intr->busState = mem_intr->busStateNext;

nonDetermReads(mem_intr);

@@ -925,18 +931,18 @@
}

  // when we get here it is either a read or a write

if (busState == READ) {

if (mem_intr->busState == READ) {

 // track if we should switch or not
 bool switch_to_writes = false;

```
   if (totalReadQueueSize == 0) {
```

   if (mem_intr->readQueueSize == 0) {
        // In the case there is no read request to go next,
        // trigger writes if we have passed the low threshold (or
        // if we are draining)

       if (!(totalWriteQueueSize == 0) &&

       if (!(mem_intr->writeQueueSize == 0) &&
            (drainState() == DrainState::Draining ||

            totalWriteQueueSize > writeLowThreshold)) {

            mem_intr->writeQueueSize > writeLowThreshold)) {

            DPRINTF(MemCtrl,
                    "Switching to writes due to read queue empty\n");

@@ -1011,6 +1017,7 @@
mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
mem_pkt->readyTime - mem_pkt->entryTime);

       mem_intr->readQueueSize--;

        // Insert into response queue. It will be sent back to the
        // requestor at its readyTime

@@ -1029,8 +1036,9 @@
// there are no other writes that can issue
// Also ensure that we've issued a minimum defined number
// of reads before switching, or have emptied the readQ

       if ((totalWriteQueueSize > writeHighThreshold) &&

          (readsThisTime >= minReadsPerSwitch || totalReadQueueSize

== 0)

       if ((mem_intr->writeQueueSize > writeHighThreshold) &&

          (mem_intr->readsThisTime >= minReadsPerSwitch ||

          mem_intr->readQueueSize == 0)
           && !(nvmWriteBlock(mem_intr))) {
            switch_to_writes = true;
        }

@@ -1045,7 +1053,7 @@
// draining), or because the writes hit the hight threshold
if (switch_to_writes) {
// transition to writing

```
       busStateNext = WRITE;
```

       mem_intr->busStateNext = WRITE;
    }
} else {

@@ -1099,6 +1107,7 @@
mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
mem_pkt->readyTime - mem_pkt->entryTime);

   mem_intr->writeQueueSize--;

    // remove the request from the queue - the iterator is no longer

valid
writeQueue[mem_pkt->qosValue()].erase(to_write);
@@ -1112,15 +1121,15 @@
// If we are interfacing to NVM and have filled the writeRespQueue,
// with only NVM writes in Q, then switch to reads
bool below_threshold =

       totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold;

       mem_intr->writeQueueSize + minWritesPerSwitch <

writeLowThreshold;

```
   if (totalWriteQueueSize == 0 ||
```

   if (mem_intr->writeQueueSize == 0 ||
        (below_threshold && drainState() != DrainState::Draining) ||

       (totalReadQueueSize && writesThisTime >= minWritesPerSwitch) ||

       (totalReadQueueSize && (nvmWriteBlock(mem_intr)))) {

       (mem_intr->readQueueSize && mem_intr->writesThisTime >=

minWritesPerSwitch) ||

       (mem_intr->readQueueSize && (nvmWriteBlock(mem_intr)))) {

        // turn the bus back around for reads again

```
       busStateNext = MemCtrl::READ;
```

       mem_intr->busStateNext = MemCtrl::READ;

        // note that the we switch back to reads also in the idle
        // case, which eventually will check for any draining and

@@ -1133,7 +1142,7 @@
if (!next_req_event.scheduled())
schedule(next_req_event, std::max(mem_intr->nextReqTime,
curTick()));

if (retry_wr_req && totalWriteQueueSize < writeBufferSize) {

if (retry_wr_req && mem_intr->writeQueueSize < writeBufferSize) {
retry_wr_req = false;
port.sendRetryReq();
}
@@ -1418,9 +1427,8 @@
{
// if there is anything in any of our internal queues, keep track
// of that as well

if (totalWriteQueueSize || totalReadQueueSize || !respQueue.empty() ||

if (totalWriteQueueSize || totalReadQueueSize || !respQEmpty() ||
!allIntfDrained()) {

    DPRINTF(Drain, "Memory controller not drained, write: %d,

read: %d,"
" resp: %d\n", totalWriteQueueSize, totalReadQueueSize,
respQueue.size());
diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh
index 2819fb4..917798f 100644
--- a/src/mem/mem_ctrl.hh
+++ b/src/mem/mem_ctrl.hh
@@ -517,8 +517,6 @@
uint32_t writeLowThreshold;
const uint32_t minWritesPerSwitch;
const uint32_t minReadsPerSwitch;

uint32_t writesThisTime;
uint32_t readsThisTime;

/**
- Memory controller configuration initialized based on parameter
  @@ -764,7 +762,7 @@
- @param next_state Check either the current or next bus state
- @return True when bus is currently in a read state
  */
bool inReadBusState(bool next_state) const;

bool inReadBusState(bool next_state, const MemInterface* mem_intr)
const;

/**
- Check the current direction of the memory channel
  @@ -772,7 +770,7 @@
- @param next_state Check either the current or next bus state
- @return True when bus is currently in a write state
  */

bool inWriteBusState(bool next_state) const;

bool inWriteBusState(bool next_state, const MemInterface* mem_intr)
const;

Port &getPort(const std::string &if_name,
PortID idx=InvalidPortID) override;
diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh
index 8d6f4fe..b0f762f 100644
--- a/src/mem/mem_interface.hh
+++ b/src/mem/mem_interface.hh
@@ -190,6 +190,28 @@
Tick nextReqTime = 0;

/**

* Reads/writes performed by the controller for this interface before

```
* bus direction is switched
```
```
*/
```
uint32_t readsThisTime = 0;
uint32_t writesThisTime = 0;
/**

* Read/write packets in the read/write queue for this interface

* qos/mem_ctrl.hh has similar counters, but they track all packets

* in the controller for all memory interfaces connected to the

```
* controller.
```
```
*/
```
uint32_t readQueueSize = 0;
uint32_t writeQueueSize = 0;
MemCtrl::BusState busState = MemCtrl::READ;
/** bus state for next request event triggered */
MemCtrl::BusState busStateNext = MemCtrl::READ;
/**
- pseudo channel number used for HBM modeling
  */
  uint8_t pseudoChannel;
  diff --git a/src/mem/nvm_interface.cc b/src/mem/nvm_interface.cc
  index b2c4073..366f71d 100644
  --- a/src/mem/nvm_interface.cc
  +++ b/src/mem/nvm_interface.cc
  @@ -402,9 +402,9 @@

bool
NVMInterface::burstReady(MemPacket* pkt) const {

bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true)) &&

          (pkt->readyTime <= curTick()) && (numReadDataReady > 0);

bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true) &&

bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true, this)) &&

           (pkt->readyTime <= curTick()) && (numReadDataReady > 0);

bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true, this)
&&
!writeRespQueueFull();
return (read_rdy || write_rdy);
}
@@ -613,7 +613,7 @@
// Only assert busy for the write case when there are also
// no reads in Q and the write queue only contains NVM commands
// This allows the bus state to switch and service reads

```
return (ctrl->inReadBusState(true) ?
```

return (ctrl->inReadBusState(true, this) ?
             (numReadDataReady == 0) && !read_queue_empty :
             writeRespQueueFull() && read_queue_empty &&
                                     all_writes_nvm);

diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 9bf1328..b102ccf 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -355,7 +355,7 @@
}

void
-MemCtrl::recordTurnaroundStats()
+MemCtrl::recordTurnaroundStats(BusState busState, BusState busStateNext)
{
if (busStateNext != busState) {
if (busState == READ) {
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 359e285..2e295d0 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -242,7 +242,7 @@
* Record statistics on turnarounds based on
* busStateNext and busState values
*/

void recordTurnaroundStats();

void recordTurnaroundStats(BusState busState, BusState busStateNext);

/**
- Escalates/demotes priority of all packets
  diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
  index 66b9451..9ade691 100644
  --- a/src/mem/qos/mem_sink.cc
  +++ b/src/mem/qos/mem_sink.cc
  @@ -217,7 +217,7 @@
  busStateNext = selectNextBusState();
// Record turnaround stats and update current state direction

recordTurnaroundStats();

recordTurnaroundStats(busState, busStateNext);

// Set current bus state
setCurrentBusState();
diff --git a/src/python/gem5/components/memory/hbm.py
b/src/python/gem5/components/memory/hbm.py
index 35497c2..75db1f9 100644
--- a/src/python/gem5/components/memory/hbm.py
+++ b/src/python/gem5/components/memory/hbm.py
@@ -122,7 +122,6 @@
# for interleaving across pseudo channels (at 64B currently)
mask_list.insert(0, 1 << 6)
for i, ctrl in enumerate(self.mem_ctrl):

       ctrl.partitioned_q = False
        ctrl.dram.range = AddrRange(
            start=self._mem_range.start,
            size=self._mem_range.size(),

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/65491?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86
Gerrit-Change-Number: 65491
Gerrit-PatchSet: 7
Gerrit-Owner: Ayaz Akram yazakram@ucdavis.edu
Gerrit-Reviewer: Ayaz Akram yazakram@ucdavis.edu
Gerrit-Reviewer: Jason Lowe-Power jason@lowepower.com
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: Nikos Nikoleris nikos.nikoleris@arm.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com

Ayaz Akram has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/65491?usp=email ) Change subject: mem: HBMCtrl changes to allow PC data buses to be in different states ...................................................................... mem: HBMCtrl changes to allow PC data buses to be in different states This change updates the HBMCtrl such that both pseudo channels can be in separate states (read or write) at the same time. In addition, the controller queues are now always split in two halves for both pseudo channels. Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65491 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com> --- M src/mem/HBMCtrl.py M src/mem/dram_interface.cc M src/mem/hbm_ctrl.cc M src/mem/hbm_ctrl.hh M src/mem/mem_ctrl.cc M src/mem/mem_ctrl.hh M src/mem/mem_interface.hh M src/mem/nvm_interface.cc M src/mem/qos/mem_ctrl.cc M src/mem/qos/mem_ctrl.hh M src/mem/qos/mem_sink.cc M src/python/gem5/components/memory/hbm.py 12 files changed, 122 insertions(+), 116 deletions(-) Approvals: Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass diff --git a/src/mem/HBMCtrl.py b/src/mem/HBMCtrl.py index 0c7c1ea..45d89a7 100644 --- a/src/mem/HBMCtrl.py +++ b/src/mem/HBMCtrl.py @@ -46,5 +46,3 @@ # gives the best results with following min_r/w_per_switch min_reads_per_switch = 64 min_writes_per_switch = 64 - - partitioned_q = Param.Bool(False, "split queues for pseudo channels") diff --git a/src/mem/dram_interface.cc b/src/mem/dram_interface.cc index d745fe5..65e06db 100644 --- a/src/mem/dram_interface.cc +++ b/src/mem/dram_interface.cc @@ -1068,13 +1068,14 @@ // latest Tick for which ACT can occur without // incurring additoinal delay on the data bus - const Tick tRCD = ctrl->inReadBusState(false) ? - tRCD_RD : tRCD_WR; + const Tick tRCD = ctrl->inReadBusState(false, this) ? + tRCD_RD : tRCD_WR; const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick()); // When is the earliest the R/W burst can issue? - const Tick col_allowed_at = ctrl->inReadBusState(false) ? + const Tick col_allowed_at = ctrl->inReadBusState(false, + this) ? ranks[i]->banks[j].rdAllowedAt : ranks[i]->banks[j].wrAllowedAt; Tick col_at = std::max(col_allowed_at, act_at + tRCD); @@ -1180,10 +1181,10 @@ DRAMInterface::Rank::isQueueEmpty() const { // check commmands in Q based on current bus direction - bool no_queued_cmds = (dram.ctrl->inReadBusState(true) && - (readEntries == 0)) - || (dram.ctrl->inWriteBusState(true) && - (writeEntries == 0)); + bool no_queued_cmds = (dram.ctrl->inReadBusState(true, &(this->dram)) + && (readEntries == 0)) || + (dram.ctrl->inWriteBusState(true, &(this->dram)) + && (writeEntries == 0)); return no_queued_cmds; } @@ -1669,7 +1670,7 @@ // completed refresh event, ensure next request is scheduled if (!(dram.ctrl->requestEventScheduled(dram.pseudoChannel))) { DPRINTF(DRAM, "Scheduling next request after refreshing" - " rank %d\n", rank); + " rank %d, PC %d \n", rank, dram.pseudoChannel); dram.ctrl->restartScheduler(curTick(), dram.pseudoChannel); } } @@ -1831,7 +1832,8 @@ bool DRAMInterface::Rank::forceSelfRefreshExit() const { return (readEntries != 0) || - (dram.ctrl->inWriteBusState(true) && (writeEntries != 0)); + (dram.ctrl->inWriteBusState(true, &(this->dram)) + && (writeEntries != 0)); } void diff --git a/src/mem/hbm_ctrl.cc b/src/mem/hbm_ctrl.cc index 747e714..f87fa2d 100644 --- a/src/mem/hbm_ctrl.cc +++ b/src/mem/hbm_ctrl.cc @@ -51,8 +51,7 @@ name()), respondEventPC1([this] {processRespondEvent(pc1Int, respQueuePC1, respondEventPC1, retryRdReqPC1); }, name()), - pc1Int(p.dram_2), - partitionedQ(p.partitioned_q) + pc1Int(p.dram_2) { DPRINTF(MemCtrl, "Setting up HBM controller\n"); @@ -69,17 +68,8 @@ pc0Int->setCtrl(this, commandWindow, 0); pc1Int->setCtrl(this, commandWindow, 1); - if (partitionedQ) { - writeHighThreshold = (writeBufferSize * (p.write_high_thresh_perc/2) - / 100.0); - writeLowThreshold = (writeBufferSize * (p.write_low_thresh_perc/2) - / 100.0); - } else { - writeHighThreshold = (writeBufferSize * p.write_high_thresh_perc - / 100.0); - writeLowThreshold = (writeBufferSize * p.write_low_thresh_perc - / 100.0); - } + writeHighThreshold = (writeBufferSize/2 * p.write_high_thresh_perc)/100.0; + writeLowThreshold = (writeBufferSize/2 * p.write_low_thresh_perc)/100.0; } void @@ -109,9 +99,9 @@ Tick latency = 0; if (pc0Int->getAddrRange().contains(pkt->getAddr())) { - latency = MemCtrl::recvAtomicLogic(pkt, pc0Int); + latency = recvAtomicLogic(pkt, pc0Int); } else if (pc1Int->getAddrRange().contains(pkt->getAddr())) { - latency = MemCtrl::recvAtomicLogic(pkt, pc1Int); + latency = recvAtomicLogic(pkt, pc1Int); } else { panic("Can't handle address range for packet %s\n", pkt->print()); } @@ -122,10 +112,10 @@ void HBMCtrl::recvFunctional(PacketPtr pkt) { - bool found = MemCtrl::recvFunctionalLogic(pkt, pc0Int); + bool found = recvFunctionalLogic(pkt, pc0Int); if (!found) { - found = MemCtrl::recvFunctionalLogic(pkt, pc1Int); + found = recvFunctionalLogic(pkt, pc1Int); } if (!found) { @@ -170,9 +160,9 @@ { DPRINTF(MemCtrl, "Write queue limit %d, PC0 size %d, entries needed %d\n", - writeBufferSize, writeQueueSizePC0, neededEntries); + writeBufferSize/2, pc0Int->writeQueueSize, neededEntries); - unsigned int wrsize_new = (writeQueueSizePC0 + neededEntries); + unsigned int wrsize_new = (pc0Int->writeQueueSize + neededEntries); return wrsize_new > (writeBufferSize/2); } @@ -181,9 +171,9 @@ { DPRINTF(MemCtrl, "Write queue limit %d, PC1 size %d, entries needed %d\n", - writeBufferSize, writeQueueSizePC1, neededEntries); + writeBufferSize/2, pc1Int->writeQueueSize, neededEntries); - unsigned int wrsize_new = (writeQueueSizePC1 + neededEntries); + unsigned int wrsize_new = (pc1Int->writeQueueSize + neededEntries); return wrsize_new > (writeBufferSize/2); } @@ -192,10 +182,10 @@ { DPRINTF(MemCtrl, "Read queue limit %d, PC0 size %d, entries needed %d\n", - readBufferSize, readQueueSizePC0 + respQueue.size(), + readBufferSize/2, pc0Int->readQueueSize + respQueue.size(), neededEntries); - unsigned int rdsize_new = readQueueSizePC0 + respQueue.size() + unsigned int rdsize_new = pc0Int->readQueueSize + respQueue.size() + neededEntries; return rdsize_new > (readBufferSize/2); } @@ -205,27 +195,15 @@ { DPRINTF(MemCtrl, "Read queue limit %d, PC1 size %d, entries needed %d\n", - readBufferSize, readQueueSizePC1 + respQueuePC1.size(), + readBufferSize/2, pc1Int->readQueueSize + respQueuePC1.size(), neededEntries); - unsigned int rdsize_new = readQueueSizePC1 + respQueuePC1.size() + unsigned int rdsize_new = pc1Int->readQueueSize + respQueuePC1.size() + neededEntries; return rdsize_new > (readBufferSize/2); } bool -HBMCtrl::readQueueFull(unsigned int neededEntries) const -{ - DPRINTF(MemCtrl, - "HBMCtrl: Read queue limit %d, entries needed %d\n", - readBufferSize, neededEntries); - - unsigned int rdsize_new = totalReadQueueSize + respQueue.size() + - respQueuePC1.size() + neededEntries; - return rdsize_new > readBufferSize; -} - -bool HBMCtrl::recvTimingReq(PacketPtr pkt) { // This is where we enter from the outside world @@ -269,23 +247,23 @@ // check local buffers and do not accept if full if (pkt->isWrite()) { if (is_pc0) { - if (partitionedQ ? writeQueueFullPC0(pkt_count) : - writeQueueFull(pkt_count)) - { + if (writeQueueFullPC0(pkt_count)) { DPRINTF(MemCtrl, "Write queue full, not accepting\n"); // remember that we have to retry this port - MemCtrl::retryWrReq = true; + retryWrReq = true; stats.numWrRetry++; return false; } else { addToWriteQueue(pkt, pkt_count, pc0Int); + if (!nextReqEvent.scheduled()) { + DPRINTF(MemCtrl, "Request scheduled immediately\n"); + schedule(nextReqEvent, curTick()); + } stats.writeReqs++; stats.bytesWrittenSys += size; } } else { - if (partitionedQ ? writeQueueFullPC1(pkt_count) : - writeQueueFull(pkt_count)) - { + if (writeQueueFullPC1(pkt_count)) { DPRINTF(MemCtrl, "Write queue full, not accepting\n"); // remember that we have to retry this port retryWrReqPC1 = true; @@ -293,6 +271,10 @@ return false; } else { addToWriteQueue(pkt, pkt_count, pc1Int); + if (!nextReqEventPC1.scheduled()) { + DPRINTF(MemCtrl, "Request scheduled immediately\n"); + schedule(nextReqEventPC1, curTick()); + } stats.writeReqs++; stats.bytesWrittenSys += size; } @@ -303,11 +285,10 @@ assert(size != 0); if (is_pc0) { - if (partitionedQ ? readQueueFullPC0(pkt_count) : - HBMCtrl::readQueueFull(pkt_count)) { + if (readQueueFullPC0(pkt_count)) { DPRINTF(MemCtrl, "Read queue full, not accepting\n"); // remember that we have to retry this port - retryRdReqPC1 = true; + retryRdReq = true; stats.numRdRetry++; return false; } else { @@ -322,8 +303,7 @@ stats.bytesReadSys += size; } } else { - if (partitionedQ ? readQueueFullPC1(pkt_count) : - HBMCtrl::readQueueFull(pkt_count)) { + if (readQueueFullPC1(pkt_count)) { DPRINTF(MemCtrl, "Read queue full, not accepting\n"); // remember that we have to retry this port retryRdReqPC1 = true; @@ -351,7 +331,7 @@ auto it = rowBurstTicks.begin(); while (it != rowBurstTicks.end()) { auto current_it = it++; - if (MemCtrl::getBurstWindow(curTick()) > *current_it) { + if (getBurstWindow(curTick()) > *current_it) { DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it); rowBurstTicks.erase(current_it); } @@ -364,7 +344,7 @@ auto it = colBurstTicks.begin(); while (it != colBurstTicks.end()) { auto current_it = it++; - if (MemCtrl::getBurstWindow(curTick()) > *current_it) { + if (getBurstWindow(curTick()) > *current_it) { DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it); colBurstTicks.erase(current_it); } @@ -385,7 +365,7 @@ Tick cmd_at = cmd_tick; // get tick aligned to burst window - Tick burst_tick = MemCtrl::getBurstWindow(cmd_tick); + Tick burst_tick = getBurstWindow(cmd_tick); // verify that we have command bandwidth to issue the command // if not, iterate over next window(s) until slot found @@ -424,7 +404,7 @@ Tick cmd_at = cmd_tick; // get tick aligned to burst window - Tick burst_tick = MemCtrl::getBurstWindow(cmd_tick); + Tick burst_tick = getBurstWindow(cmd_tick); // Command timing requirements are from 2nd command // Start with assumption that 2nd command will issue at cmd_at and diff --git a/src/mem/hbm_ctrl.hh b/src/mem/hbm_ctrl.hh index a6ecf6c..b17caa6 100644 --- a/src/mem/hbm_ctrl.hh +++ b/src/mem/hbm_ctrl.hh @@ -144,7 +144,6 @@ */ bool readQueueFullPC0(unsigned int pkt_count) const; bool readQueueFullPC1(unsigned int pkt_count) const; - bool readQueueFull(unsigned int pkt_count) const; /** * Check if the write queue partition of both pseudo diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc index 290db3e..9a3600f 100644 --- a/src/mem/mem_ctrl.cc +++ b/src/mem/mem_ctrl.cc @@ -72,7 +72,6 @@ writeLowThreshold(writeBufferSize * p.write_low_thresh_perc / 100.0), minWritesPerSwitch(p.min_writes_per_switch), minReadsPerSwitch(p.min_reads_per_switch), - writesThisTime(0), readsThisTime(0), memSchedPolicy(p.mem_sched_policy), frontendLatency(p.static_frontend_latency), backendLatency(p.static_backend_latency), @@ -277,6 +276,8 @@ logRequest(MemCtrl::READ, pkt->requestorId(), pkt->qosValue(), mem_pkt->addr, 1); + mem_intr->readQueueSize++; + // Update stats stats.avgRdQLen = totalReadQueueSize + respQueue.size(); } @@ -349,6 +350,8 @@ logRequest(MemCtrl::WRITE, pkt->requestorId(), pkt->qosValue(), mem_pkt->addr, 1); + mem_intr->writeQueueSize++; + assert(totalWriteQueueSize == isInWriteQueue.size()); // Update stats @@ -575,6 +578,9 @@ // check if there is a packet going to a free rank for (auto i = queue.begin(); i != queue.end(); ++i) { MemPacket* mem_pkt = *i; + if (mem_pkt->pseudoChannel != mem_intr->pseudoChannel) { + continue; + } if (packetReady(mem_pkt, mem_intr)) { ret = i; break; @@ -761,28 +767,28 @@ } bool -MemCtrl::inReadBusState(bool next_state) const +MemCtrl::inReadBusState(bool next_state, const MemInterface* mem_intr) const { // check the bus state if (next_state) { // use busStateNext to get the state that will be used // for the next burst - return (busStateNext == MemCtrl::READ); + return (mem_intr->busStateNext == MemCtrl::READ); } else { - return (busState == MemCtrl::READ); + return (mem_intr->busState == MemCtrl::READ); } } bool -MemCtrl::inWriteBusState(bool next_state) const +MemCtrl::inWriteBusState(bool next_state, const MemInterface* mem_intr) const { // check the bus state if (next_state) { // use busStateNext to get the state that will be used // for the next burst - return (busStateNext == MemCtrl::WRITE); + return (mem_intr->busStateNext == MemCtrl::WRITE); } else { - return (busState == MemCtrl::WRITE); + return (mem_intr->busState == MemCtrl::WRITE); } } @@ -813,13 +819,13 @@ // Update the common bus stats if (mem_pkt->isRead()) { - ++readsThisTime; + ++(mem_intr->readsThisTime); // Update latency stats stats.requestorReadTotalLat[mem_pkt->requestorId()] += mem_pkt->readyTime - mem_pkt->entryTime; stats.requestorReadBytes[mem_pkt->requestorId()] += mem_pkt->size; } else { - ++writesThisTime; + ++(mem_intr->writesThisTime); stats.requestorWriteBytes[mem_pkt->requestorId()] += mem_pkt->size; stats.requestorWriteTotalLat[mem_pkt->requestorId()] += mem_pkt->readyTime - mem_pkt->entryTime; @@ -836,8 +842,8 @@ // Default to busy status and update based on interface specifics // Default state of unused interface is 'true' bool mem_busy = true; - bool all_writes_nvm = mem_intr->numWritesQueued == totalWriteQueueSize; - bool read_queue_empty = totalReadQueueSize == 0; + bool all_writes_nvm = mem_intr->numWritesQueued == mem_intr->writeQueueSize; + bool read_queue_empty = mem_intr->readQueueSize == 0; mem_busy = mem_intr->isBusy(read_queue_empty, all_writes_nvm); if (mem_busy) { // if all ranks are refreshing wait for them to finish @@ -884,27 +890,27 @@ } // detect bus state change - bool switched_cmd_type = (busState != busStateNext); + bool switched_cmd_type = (mem_intr->busState != mem_intr->busStateNext); // record stats - recordTurnaroundStats(); + recordTurnaroundStats(mem_intr->busState, mem_intr->busStateNext); DPRINTF(MemCtrl, "QoS Turnarounds selected state %s %s\n", - (busState==MemCtrl::READ)?"READ":"WRITE", + (mem_intr->busState==MemCtrl::READ)?"READ":"WRITE", switched_cmd_type?"[turnaround triggered]":""); if (switched_cmd_type) { - if (busState == MemCtrl::READ) { + if (mem_intr->busState == MemCtrl::READ) { DPRINTF(MemCtrl, - "Switching to writes after %d reads with %d reads " - "waiting\n", readsThisTime, totalReadQueueSize); - stats.rdPerTurnAround.sample(readsThisTime); - readsThisTime = 0; + "Switching to writes after %d reads with %d reads " + "waiting\n", mem_intr->readsThisTime, mem_intr->readQueueSize); + stats.rdPerTurnAround.sample(mem_intr->readsThisTime); + mem_intr->readsThisTime = 0; } else { DPRINTF(MemCtrl, - "Switching to reads after %d writes with %d writes " - "waiting\n", writesThisTime, totalWriteQueueSize); - stats.wrPerTurnAround.sample(writesThisTime); - writesThisTime = 0; + "Switching to reads after %d writes with %d writes " + "waiting\n", mem_intr->writesThisTime, mem_intr->writeQueueSize); + stats.wrPerTurnAround.sample(mem_intr->writesThisTime); + mem_intr->writesThisTime = 0; } } @@ -916,7 +922,7 @@ } // updates current state - busState = busStateNext; + mem_intr->busState = mem_intr->busStateNext; nonDetermReads(mem_intr); @@ -925,18 +931,18 @@ } // when we get here it is either a read or a write - if (busState == READ) { + if (mem_intr->busState == READ) { // track if we should switch or not bool switch_to_writes = false; - if (totalReadQueueSize == 0) { + if (mem_intr->readQueueSize == 0) { // In the case there is no read request to go next, // trigger writes if we have passed the low threshold (or // if we are draining) - if (!(totalWriteQueueSize == 0) && + if (!(mem_intr->writeQueueSize == 0) && (drainState() == DrainState::Draining || - totalWriteQueueSize > writeLowThreshold)) { + mem_intr->writeQueueSize > writeLowThreshold)) { DPRINTF(MemCtrl, "Switching to writes due to read queue empty\n"); @@ -1011,6 +1017,7 @@ mem_pkt->qosValue(), mem_pkt->getAddr(), 1, mem_pkt->readyTime - mem_pkt->entryTime); + mem_intr->readQueueSize--; // Insert into response queue. It will be sent back to the // requestor at its readyTime @@ -1029,8 +1036,9 @@ // there are no other writes that can issue // Also ensure that we've issued a minimum defined number // of reads before switching, or have emptied the readQ - if ((totalWriteQueueSize > writeHighThreshold) && - (readsThisTime >= minReadsPerSwitch || totalReadQueueSize == 0) + if ((mem_intr->writeQueueSize > writeHighThreshold) && + (mem_intr->readsThisTime >= minReadsPerSwitch || + mem_intr->readQueueSize == 0) && !(nvmWriteBlock(mem_intr))) { switch_to_writes = true; } @@ -1045,7 +1053,7 @@ // draining), or because the writes hit the hight threshold if (switch_to_writes) { // transition to writing - busStateNext = WRITE; + mem_intr->busStateNext = WRITE; } } else { @@ -1099,6 +1107,7 @@ mem_pkt->qosValue(), mem_pkt->getAddr(), 1, mem_pkt->readyTime - mem_pkt->entryTime); + mem_intr->writeQueueSize--; // remove the request from the queue - the iterator is no longer valid writeQueue[mem_pkt->qosValue()].erase(to_write); @@ -1112,15 +1121,15 @@ // If we are interfacing to NVM and have filled the writeRespQueue, // with only NVM writes in Q, then switch to reads bool below_threshold = - totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold; + mem_intr->writeQueueSize + minWritesPerSwitch < writeLowThreshold; - if (totalWriteQueueSize == 0 || + if (mem_intr->writeQueueSize == 0 || (below_threshold && drainState() != DrainState::Draining) || - (totalReadQueueSize && writesThisTime >= minWritesPerSwitch) || - (totalReadQueueSize && (nvmWriteBlock(mem_intr)))) { + (mem_intr->readQueueSize && mem_intr->writesThisTime >= minWritesPerSwitch) || + (mem_intr->readQueueSize && (nvmWriteBlock(mem_intr)))) { // turn the bus back around for reads again - busStateNext = MemCtrl::READ; + mem_intr->busStateNext = MemCtrl::READ; // note that the we switch back to reads also in the idle // case, which eventually will check for any draining and @@ -1133,7 +1142,7 @@ if (!next_req_event.scheduled()) schedule(next_req_event, std::max(mem_intr->nextReqTime, curTick())); - if (retry_wr_req && totalWriteQueueSize < writeBufferSize) { + if (retry_wr_req && mem_intr->writeQueueSize < writeBufferSize) { retry_wr_req = false; port.sendRetryReq(); } @@ -1418,9 +1427,8 @@ { // if there is anything in any of our internal queues, keep track // of that as well - if (totalWriteQueueSize || totalReadQueueSize || !respQueue.empty() || + if (totalWriteQueueSize || totalReadQueueSize || !respQEmpty() || !allIntfDrained()) { - DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d," " resp: %d\n", totalWriteQueueSize, totalReadQueueSize, respQueue.size()); diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh index 2819fb4..917798f 100644 --- a/src/mem/mem_ctrl.hh +++ b/src/mem/mem_ctrl.hh @@ -517,8 +517,6 @@ uint32_t writeLowThreshold; const uint32_t minWritesPerSwitch; const uint32_t minReadsPerSwitch; - uint32_t writesThisTime; - uint32_t readsThisTime; /** * Memory controller configuration initialized based on parameter @@ -764,7 +762,7 @@ * @param next_state Check either the current or next bus state * @return True when bus is currently in a read state */ - bool inReadBusState(bool next_state) const; + bool inReadBusState(bool next_state, const MemInterface* mem_intr) const; /** * Check the current direction of the memory channel @@ -772,7 +770,7 @@ * @param next_state Check either the current or next bus state * @return True when bus is currently in a write state */ - bool inWriteBusState(bool next_state) const; + bool inWriteBusState(bool next_state, const MemInterface* mem_intr) const; Port &getPort(const std::string &if_name, PortID idx=InvalidPortID) override; diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh index 8d6f4fe..b0f762f 100644 --- a/src/mem/mem_interface.hh +++ b/src/mem/mem_interface.hh @@ -190,6 +190,28 @@ Tick nextReqTime = 0; /** + * Reads/writes performed by the controller for this interface before + * bus direction is switched + */ + uint32_t readsThisTime = 0; + uint32_t writesThisTime = 0; + + /** + * Read/write packets in the read/write queue for this interface + * qos/mem_ctrl.hh has similar counters, but they track all packets + * in the controller for all memory interfaces connected to the + * controller. + */ + uint32_t readQueueSize = 0; + uint32_t writeQueueSize = 0; + + + MemCtrl::BusState busState = MemCtrl::READ; + + /** bus state for next request event triggered */ + MemCtrl::BusState busStateNext = MemCtrl::READ; + + /** * pseudo channel number used for HBM modeling */ uint8_t pseudoChannel; diff --git a/src/mem/nvm_interface.cc b/src/mem/nvm_interface.cc index b2c4073..366f71d 100644 --- a/src/mem/nvm_interface.cc +++ b/src/mem/nvm_interface.cc @@ -402,9 +402,9 @@ bool NVMInterface::burstReady(MemPacket* pkt) const { - bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true)) && - (pkt->readyTime <= curTick()) && (numReadDataReady > 0); - bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true) && + bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true, this)) && + (pkt->readyTime <= curTick()) && (numReadDataReady > 0); + bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true, this) && !writeRespQueueFull(); return (read_rdy || write_rdy); } @@ -613,7 +613,7 @@ // Only assert busy for the write case when there are also // no reads in Q and the write queue only contains NVM commands // This allows the bus state to switch and service reads - return (ctrl->inReadBusState(true) ? + return (ctrl->inReadBusState(true, this) ? (numReadDataReady == 0) && !read_queue_empty : writeRespQueueFull() && read_queue_empty && all_writes_nvm); diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc index 9bf1328..b102ccf 100644 --- a/src/mem/qos/mem_ctrl.cc +++ b/src/mem/qos/mem_ctrl.cc @@ -355,7 +355,7 @@ } void -MemCtrl::recordTurnaroundStats() +MemCtrl::recordTurnaroundStats(BusState busState, BusState busStateNext) { if (busStateNext != busState) { if (busState == READ) { diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh index 359e285..2e295d0 100644 --- a/src/mem/qos/mem_ctrl.hh +++ b/src/mem/qos/mem_ctrl.hh @@ -242,7 +242,7 @@ * Record statistics on turnarounds based on * busStateNext and busState values */ - void recordTurnaroundStats(); + void recordTurnaroundStats(BusState busState, BusState busStateNext); /** * Escalates/demotes priority of all packets diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc index 66b9451..9ade691 100644 --- a/src/mem/qos/mem_sink.cc +++ b/src/mem/qos/mem_sink.cc @@ -217,7 +217,7 @@ busStateNext = selectNextBusState(); // Record turnaround stats and update current state direction - recordTurnaroundStats(); + recordTurnaroundStats(busState, busStateNext); // Set current bus state setCurrentBusState(); diff --git a/src/python/gem5/components/memory/hbm.py b/src/python/gem5/components/memory/hbm.py index 35497c2..75db1f9 100644 --- a/src/python/gem5/components/memory/hbm.py +++ b/src/python/gem5/components/memory/hbm.py @@ -122,7 +122,6 @@ # for interleaving across pseudo channels (at 64B currently) mask_list.insert(0, 1 << 6) for i, ctrl in enumerate(self.mem_ctrl): - ctrl.partitioned_q = False ctrl.dram.range = AddrRange( start=self._mem_range.start, size=self._mem_range.size(), -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/65491?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86 Gerrit-Change-Number: 65491 Gerrit-PatchSet: 7 Gerrit-Owner: Ayaz Akram <yazakram@ucdavis.edu> Gerrit-Reviewer: Ayaz Akram <yazakram@ucdavis.edu> Gerrit-Reviewer: Jason Lowe-Power <jason@lowepower.com> Gerrit-Reviewer: Jason Lowe-Power <power.jg@gmail.com> Gerrit-Reviewer: Nikos Nikoleris <nikos.nikoleris@arm.com> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com>

gem5-dev@gem5.org

[M] Change in gem5/gem5[develop]: mem: HBMCtrl changes to allow PC data buses to be in different states

Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65491 Reviewed-by: Jason Lowe-Power power.jg@gmail.com Maintainer: Jason Lowe-Power power.jg@gmail.com Tested-by: kokoro noreply+kokoro@google.com

diff --git a/src/mem/HBMCtrl.py b/src/mem/HBMCtrl.py index 0c7c1ea..45d89a7 100644 --- a/src/mem/HBMCtrl.py +++ b/src/mem/HBMCtrl.py @@ -46,5 +46,3 @@ # gives the best results with following min_r/w_per_switch min_reads_per_switch = 64 min_writes_per_switch = 64

Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65491
Reviewed-by: Jason Lowe-Power power.jg@gmail.com
Maintainer: Jason Lowe-Power power.jg@gmail.com
Tested-by: kokoro noreply+kokoro@google.com

diff --git a/src/mem/HBMCtrl.py b/src/mem/HBMCtrl.py
index 0c7c1ea..45d89a7 100644
--- a/src/mem/HBMCtrl.py
+++ b/src/mem/HBMCtrl.py
@@ -46,5 +46,3 @@
# gives the best results with following min_r/w_per_switch
min_reads_per_switch = 64
min_writes_per_switch = 64