Ayaz Akram has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/65491?usp=email )
Change subject: mem: HBMCtrl changes to allow PC data buses to be in
different states
......................................................................
mem: HBMCtrl changes to allow PC data buses to be in different states
This change updates the HBMCtrl such that both pseudo channels
can be in separate states (read or write) at the same time. In
addition, the controller queues are now always split in two
halves for both pseudo channels.
M src/mem/HBMCtrl.py
M src/mem/dram_interface.cc
M src/mem/hbm_ctrl.cc
M src/mem/hbm_ctrl.hh
M src/mem/mem_ctrl.cc
M src/mem/mem_ctrl.hh
M src/mem/mem_interface.hh
M src/mem/nvm_interface.cc
M src/mem/qos/mem_ctrl.cc
M src/mem/qos/mem_ctrl.hh
M src/mem/qos/mem_sink.cc
M src/python/gem5/components/memory/hbm.py
12 files changed, 122 insertions(+), 116 deletions(-)
Approvals:
Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
partitioned_q = Param.Bool(False, "split queues for pseudo channels")
diff --git a/src/mem/dram_interface.cc b/src/mem/dram_interface.cc
index d745fe5..65e06db 100644
--- a/src/mem/dram_interface.cc
+++ b/src/mem/dram_interface.cc
@@ -1068,13 +1068,14 @@
// latest Tick for which ACT can occur without
// incurring additoinal delay on the data bus
const Tick tRCD = ctrl->inReadBusState(false) ?
tRCD_RD : tRCD_WR;
const Tick tRCD = ctrl->inReadBusState(false, this) ?
tRCD_RD : tRCD_WR;
const Tick hidden_act_max =
std::max(min_col_at - tRCD, curTick());
// When is the earliest the R/W burst can issue?
const Tick col_allowed_at = ctrl->inReadBusState(false) ?
const Tick col_allowed_at = ctrl->inReadBusState(false,
this) ?
ranks[i]->banks[j].rdAllowedAt :
ranks[i]->banks[j].wrAllowedAt;
Tick col_at = std::max(col_allowed_at, act_at + tRCD);
@@ -1180,10 +1181,10 @@
DRAMInterface::Rank::isQueueEmpty() const
{
// check commmands in Q based on current bus direction
(readEntries == 0))
|| (dram.ctrl->inWriteBusState(true) &&
(writeEntries == 0));
&& (readEntries == 0)) ||
(dram.ctrl->inWriteBusState(true, &(this->dram))
&& (writeEntries == 0));
return no_queued_cmds;
}@@ -1669,7 +1670,7 @@
// completed refresh event, ensure next request is scheduled
if (!(dram.ctrl->requestEventScheduled(dram.pseudoChannel))) {
DPRINTF(DRAM, "Scheduling next request after refreshing"
" rank %d\n", rank);
" rank %d, PC %d \n", rank, dram.pseudoChannel);
dram.ctrl->restartScheduler(curTick(), dram.pseudoChannel);
}
}
@@ -1831,7 +1832,8 @@
bool
DRAMInterface::Rank::forceSelfRefreshExit() const {
return (readEntries != 0) ||
(dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
(dram.ctrl->inWriteBusState(true, &(this->dram))
&& (writeEntries != 0));
}
void
diff --git a/src/mem/hbm_ctrl.cc b/src/mem/hbm_ctrl.cc
index 747e714..f87fa2d 100644
--- a/src/mem/hbm_ctrl.cc
+++ b/src/mem/hbm_ctrl.cc
@@ -51,8 +51,7 @@
name()),
respondEventPC1([this] {processRespondEvent(pc1Int, respQueuePC1,
respondEventPC1, retryRdReqPC1); }, name()),
@@ -69,17 +68,8 @@
pc0Int->setCtrl(this, commandWindow, 0);
pc1Int->setCtrl(this, commandWindow, 1);
writeHighThreshold = (writeBufferSize *
(p.write_high_thresh_perc/2)
/ 100.0);
writeLowThreshold = (writeBufferSize * (p.write_low_thresh_perc/2)
/ 100.0);
writeHighThreshold = (writeBufferSize * p.write_high_thresh_perc
/ 100.0);
writeLowThreshold = (writeBufferSize * p.write_low_thresh_perc
/ 100.0);
void
@@ -109,9 +99,9 @@
Tick latency = 0;
if (pc0Int->getAddrRange().contains(pkt->getAddr())) {
latency = MemCtrl::recvAtomicLogic(pkt, pc0Int);
latency = recvAtomicLogic(pkt, pc0Int);
} else if (pc1Int->getAddrRange().contains(pkt->getAddr())) {
latency = MemCtrl::recvAtomicLogic(pkt, pc1Int);
latency = recvAtomicLogic(pkt, pc1Int);
} else {
panic("Can't handle address range for packet %s\n", pkt->print());
}
@@ -122,10 +112,10 @@
void
HBMCtrl::recvFunctional(PacketPtr pkt)
{
bool found = recvFunctionalLogic(pkt, pc0Int);
if (!found) {
found = MemCtrl::recvFunctionalLogic(pkt, pc1Int);
found = recvFunctionalLogic(pkt, pc1Int);
}
if (!found) {
@@ -170,9 +160,9 @@
{
DPRINTF(MemCtrl,
"Write queue limit %d, PC0 size %d, entries needed %d\n",
writeBufferSize, writeQueueSizePC0, neededEntries);
writeBufferSize/2, pc0Int->writeQueueSize, neededEntries);
@@ -181,9 +171,9 @@
{
DPRINTF(MemCtrl,
"Write queue limit %d, PC1 size %d, entries needed %d\n",
writeBufferSize, writeQueueSizePC1, neededEntries);
writeBufferSize/2, pc1Int->writeQueueSize, neededEntries);
@@ -192,10 +182,10 @@
{
DPRINTF(MemCtrl,
"Read queue limit %d, PC0 size %d, entries needed %d\n",
readBufferSize, readQueueSizePC0 + respQueue.size(),
readBufferSize/2, pc0Int->readQueueSize + respQueue.size(),
neededEntries);
readBufferSize, readQueueSizePC1 + respQueuePC1.size(),
readBufferSize/2, pc1Int->readQueueSize + respQueuePC1.size(),
neededEntries);
bool
-HBMCtrl::readQueueFull(unsigned int neededEntries) const
-{
"HBMCtrl: Read queue limit %d, entries needed %d\n",
readBufferSize, neededEntries);
respQueuePC1.size() + neededEntries;
-bool
HBMCtrl::recvTimingReq(PacketPtr pkt)
{
// This is where we enter from the outside world
@@ -269,23 +247,23 @@
// check local buffers and do not accept if full
if (pkt->isWrite()) {
if (is_pc0) {
if (partitionedQ ? writeQueueFullPC0(pkt_count) :
writeQueueFull(pkt_count))
{
if (writeQueueFullPC0(pkt_count)) {
DPRINTF(MemCtrl, "Write queue full, not accepting\n");
// remember that we have to retry this port
MemCtrl::retryWrReq = true;
retryWrReq = true;
stats.numWrRetry++;
return false;
} else {
addToWriteQueue(pkt, pkt_count, pc0Int);
if (!nextReqEvent.scheduled()) {
DPRINTF(MemCtrl, "Request scheduled immediately\n");
schedule(nextReqEvent, curTick());
}
stats.writeReqs++;
stats.bytesWrittenSys += size;
}
} else {
if (partitionedQ ? writeQueueFullPC1(pkt_count) :
writeQueueFull(pkt_count))
{
if (writeQueueFullPC1(pkt_count)) {
DPRINTF(MemCtrl, "Write queue full, not accepting\n");
// remember that we have to retry this port
retryWrReqPC1 = true;
@@ -293,6 +271,10 @@
return false;
} else {
addToWriteQueue(pkt, pkt_count, pc1Int);
if (!nextReqEventPC1.scheduled()) {
DPRINTF(MemCtrl, "Request scheduled immediately\n");
schedule(nextReqEventPC1, curTick());
}
stats.writeReqs++;
stats.bytesWrittenSys += size;
}
@@ -303,11 +285,10 @@
assert(size != 0);
if (is_pc0) {
if (partitionedQ ? readQueueFullPC0(pkt_count) :
HBMCtrl::readQueueFull(pkt_count))
{
if (readQueueFullPC0(pkt_count)) {
DPRINTF(MemCtrl, "Read queue full, not accepting\n");
// remember that we have to retry this port
retryRdReqPC1 = true;
retryRdReq = true;
stats.numRdRetry++;
return false;
} else {
@@ -322,8 +303,7 @@
stats.bytesReadSys += size;
}
} else {
if (partitionedQ ? readQueueFullPC1(pkt_count) :
HBMCtrl::readQueueFull(pkt_count))
{
if (readQueueFullPC1(pkt_count)) {
DPRINTF(MemCtrl, "Read queue full, not accepting\n");
// remember that we have to retry this port
retryRdReqPC1 = true;
@@ -351,7 +331,7 @@
auto it = rowBurstTicks.begin();
while (it != rowBurstTicks.end()) {
auto current_it = it++;
if (MemCtrl::getBurstWindow(curTick()) > *current_it) {
if (getBurstWindow(curTick()) > *current_it) {
DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it);
rowBurstTicks.erase(current_it);
}
@@ -364,7 +344,7 @@
auto it = colBurstTicks.begin();
while (it != colBurstTicks.end()) {
auto current_it = it++;
if (MemCtrl::getBurstWindow(curTick()) > *current_it) {
if (getBurstWindow(curTick()) > *current_it) {
DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it);
colBurstTicks.erase(current_it);
}
@@ -385,7 +365,7 @@
Tick cmd_at = cmd_tick;
// get tick aligned to burst window
Tick burst_tick = getBurstWindow(cmd_tick);
// verify that we have command bandwidth to issue the command
// if not, iterate over next window(s) until slot found
@@ -424,7 +404,7 @@
Tick cmd_at = cmd_tick;
// get tick aligned to burst window
Tick burst_tick = getBurstWindow(cmd_tick);
// Command timing requirements are from 2nd command
// Start with assumption that 2nd command will issue at cmd_at and
diff --git a/src/mem/hbm_ctrl.hh b/src/mem/hbm_ctrl.hh
index a6ecf6c..b17caa6 100644
--- a/src/mem/hbm_ctrl.hh
+++ b/src/mem/hbm_ctrl.hh
@@ -144,7 +144,6 @@
*/
bool readQueueFullPC0(unsigned int pkt_count) const;
bool readQueueFullPC1(unsigned int pkt_count) const;
bool readQueueFull(unsigned int pkt_count) const;
/**
writesThisTime(0), readsThisTime(0),
memSchedPolicy(p.mem_sched_policy),
frontendLatency(p.static_frontend_latency),
backendLatency(p.static_backend_latency),
@@ -277,6 +276,8 @@
logRequest(MemCtrl::READ, pkt->requestorId(),
pkt->qosValue(), mem_pkt->addr, 1);
mem_intr->readQueueSize++;
// Update stats
stats.avgRdQLen = totalReadQueueSize + respQueue.size();
}
@@ -349,6 +350,8 @@
logRequest(MemCtrl::WRITE, pkt->requestorId(),
pkt->qosValue(), mem_pkt->addr, 1);
mem_intr->writeQueueSize++;
assert(totalWriteQueueSize == isInWriteQueue.size());
// Update stats
@@ -575,6 +578,9 @@
// check if there is a packet going to a free rank
for (auto i = queue.begin(); i != queue.end(); ++i) {
MemPacket* mem_pkt = *i;
if (mem_pkt->pseudoChannel != mem_intr->pseudoChannel) {
continue;
}
if (packetReady(mem_pkt, mem_intr)) {
ret = i;
break;
@@ -761,28 +767,28 @@
}
bool
-MemCtrl::inReadBusState(bool next_state) const
+MemCtrl::inReadBusState(bool next_state, const MemInterface* mem_intr)
const
{
// check the bus state
if (next_state) {
// use busStateNext to get the state that will be used
// for the next burst
return (busStateNext == MemCtrl::READ);
return (mem_intr->busStateNext == MemCtrl::READ);
} else {
return (busState == MemCtrl::READ);
return (mem_intr->busState == MemCtrl::READ);
}
}
bool
-MemCtrl::inWriteBusState(bool next_state) const
+MemCtrl::inWriteBusState(bool next_state, const MemInterface* mem_intr)
const
{
// check the bus state
if (next_state) {
// use busStateNext to get the state that will be used
// for the next burst
return (busStateNext == MemCtrl::WRITE);
return (mem_intr->busStateNext == MemCtrl::WRITE);
} else {
return (busState == MemCtrl::WRITE);
return (mem_intr->busState == MemCtrl::WRITE);
}
}@@ -813,13 +819,13 @@
// Update the common bus stats
if (mem_pkt->isRead()) {
++readsThisTime;
++(mem_intr->readsThisTime);
// Update latency stats
stats.requestorReadTotalLat[mem_pkt->requestorId()] +=
mem_pkt->readyTime - mem_pkt->entryTime;
stats.requestorReadBytes[mem_pkt->requestorId()] += mem_pkt->size;
} else {
++writesThisTime;
++(mem_intr->writesThisTime);
stats.requestorWriteBytes[mem_pkt->requestorId()] += mem_pkt->size;
stats.requestorWriteTotalLat[mem_pkt->requestorId()] +=
mem_pkt->readyTime - mem_pkt->entryTime;
@@ -836,8 +842,8 @@
// Default to busy status and update based on interface specifics
// Default state of unused interface is 'true'
bool mem_busy = true;
bool all_writes_nvm = mem_intr->numWritesQueued ==
mem_intr->writeQueueSize;
bool read_queue_empty = mem_intr->readQueueSize == 0;
mem_busy = mem_intr->isBusy(read_queue_empty, all_writes_nvm);
if (mem_busy) {
// if all ranks are refreshing wait for them to finish
@@ -884,27 +890,27 @@
}
// detect bus state change
recordTurnaroundStats(mem_intr->busState, mem_intr->busStateNext);
DPRINTF(MemCtrl, "QoS Turnarounds selected state %s %s\n",
(busState==MemCtrl::READ)?"READ":"WRITE",
(mem_intr->busState==MemCtrl::READ)?"READ":"WRITE",
switched_cmd_type?"[turnaround triggered]":"");
if (switched_cmd_type) {
if (busState == MemCtrl::READ) {
if (mem_intr->busState == MemCtrl::READ) {
DPRINTF(MemCtrl,
"Switching to writes after %d reads with %d reads "
"waiting\n", readsThisTime, totalReadQueueSize);
stats.rdPerTurnAround.sample(readsThisTime);
readsThisTime = 0;
"Switching to writes after %d reads with %d reads "
"waiting\n", mem_intr->readsThisTime, mem_intr->readQueueSize);
stats.rdPerTurnAround.sample(mem_intr->readsThisTime);
mem_intr->readsThisTime = 0;
} else {
DPRINTF(MemCtrl,
"Switching to reads after %d writes with %d writes "
"waiting\n", writesThisTime, totalWriteQueueSize);
stats.wrPerTurnAround.sample(writesThisTime);
writesThisTime = 0;
"Switching to reads after %d writes with %d writes "
"waiting\n", mem_intr->writesThisTime,
mem_intr->writeQueueSize);
stats.wrPerTurnAround.sample(mem_intr->writesThisTime);
mem_intr->writesThisTime = 0;
}
}
@@ -916,7 +922,7 @@
}
// updates current state
mem_intr->busState = mem_intr->busStateNext;
nonDetermReads(mem_intr);
@@ -925,18 +931,18 @@
}
// when we get here it is either a read or a write
if (mem_intr->busState == READ) {
// track if we should switch or not
bool switch_to_writes = false;
if (totalReadQueueSize == 0) {
if (mem_intr->readQueueSize == 0) {
// In the case there is no read request to go next,
// trigger writes if we have passed the low threshold (or
// if we are draining)
if (!(totalWriteQueueSize == 0) &&
if (!(mem_intr->writeQueueSize == 0) &&
(drainState() == DrainState::Draining ||
totalWriteQueueSize > writeLowThreshold)) {
mem_intr->writeQueueSize > writeLowThreshold)) {
DPRINTF(MemCtrl,
"Switching to writes due to read queue empty\n");
@@ -1011,6 +1017,7 @@
mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
mem_pkt->readyTime - mem_pkt->entryTime);
mem_intr->readQueueSize--;
// Insert into response queue. It will be sent back to the
// requestor at its readyTime
@@ -1029,8 +1036,9 @@
// there are no other writes that can issue
// Also ensure that we've issued a minimum defined number
// of reads before switching, or have emptied the readQ
if ((totalWriteQueueSize > writeHighThreshold) &&
(readsThisTime >= minReadsPerSwitch || totalReadQueueSize
== 0)
if ((mem_intr->writeQueueSize > writeHighThreshold) &&
(mem_intr->readsThisTime >= minReadsPerSwitch ||
mem_intr->readQueueSize == 0)
&& !(nvmWriteBlock(mem_intr))) {
switch_to_writes = true;
}
@@ -1045,7 +1053,7 @@
// draining), or because the writes hit the hight threshold
if (switch_to_writes) {
// transition to writing
busStateNext = WRITE;
mem_intr->busStateNext = WRITE;
}
} else {
@@ -1099,6 +1107,7 @@
mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
mem_pkt->readyTime - mem_pkt->entryTime);
mem_intr->writeQueueSize--;
// remove the request from the queue - the iterator is no longer
valid
writeQueue[mem_pkt->qosValue()].erase(to_write);
@@ -1112,15 +1121,15 @@
// If we are interfacing to NVM and have filled the writeRespQueue,
// with only NVM writes in Q, then switch to reads
bool below_threshold =
totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold;
mem_intr->writeQueueSize + minWritesPerSwitch <
writeLowThreshold;
if (totalWriteQueueSize == 0 ||
if (mem_intr->writeQueueSize == 0 ||
(below_threshold && drainState() != DrainState::Draining) ||
(totalReadQueueSize && writesThisTime >= minWritesPerSwitch) ||
(totalReadQueueSize && (nvmWriteBlock(mem_intr)))) {
(mem_intr->readQueueSize && mem_intr->writesThisTime >=
minWritesPerSwitch) ||
(mem_intr->readQueueSize && (nvmWriteBlock(mem_intr)))) {
// turn the bus back around for reads again
busStateNext = MemCtrl::READ;
mem_intr->busStateNext = MemCtrl::READ;
// note that the we switch back to reads also in the idle
// case, which eventually will check for any draining and
@@ -1133,7 +1142,7 @@
if (!next_req_event.scheduled())
schedule(next_req_event, std::max(mem_intr->nextReqTime,
curTick()));
DPRINTF(Drain, "Memory controller not drained, write: %d,
read: %d,"
" resp: %d\n", totalWriteQueueSize, totalReadQueueSize,
respQueue.size());
diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh
index 2819fb4..917798f 100644
--- a/src/mem/mem_ctrl.hh
+++ b/src/mem/mem_ctrl.hh
@@ -517,8 +517,6 @@
uint32_t writeLowThreshold;
const uint32_t minWritesPerSwitch;
const uint32_t minReadsPerSwitch;
uint32_t writesThisTime;
uint32_t readsThisTime;
/**
bool inReadBusState(bool next_state) const;
bool inReadBusState(bool next_state, const MemInterface* mem_intr)
const;
/**
bool inWriteBusState(bool next_state, const MemInterface* mem_intr)
const;
Port &getPort(const std::string &if_name,
PortID idx=InvalidPortID) override;
diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh
index 8d6f4fe..b0f762f 100644
--- a/src/mem/mem_interface.hh
+++ b/src/mem/mem_interface.hh
@@ -190,6 +190,28 @@
Tick nextReqTime = 0;
/**
* Reads/writes performed by the controller for this interface before
* bus direction is switched
*/
uint32_t readsThisTime = 0;
uint32_t writesThisTime = 0;
/**
* Read/write packets in the read/write queue for this interface
* qos/mem_ctrl.hh has similar counters, but they track all packets
* in the controller for all memory interfaces connected to the
* controller.
*/
uint32_t readQueueSize = 0;
uint32_t writeQueueSize = 0;
MemCtrl::BusState busState = MemCtrl::READ;
/** bus state for next request event triggered */
MemCtrl::BusState busStateNext = MemCtrl::READ;
/**
bool
NVMInterface::burstReady(MemPacket* pkt) const {
(pkt->readyTime <= curTick()) && (numReadDataReady > 0);
(pkt->readyTime <= curTick()) && (numReadDataReady > 0);
return (ctrl->inReadBusState(true) ?
return (ctrl->inReadBusState(true, this) ?
(numReadDataReady == 0) && !read_queue_empty :
writeRespQueueFull() && read_queue_empty &&
all_writes_nvm);
diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 9bf1328..b102ccf 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -355,7 +355,7 @@
}
void
-MemCtrl::recordTurnaroundStats()
+MemCtrl::recordTurnaroundStats(BusState busState, BusState busStateNext)
{
if (busStateNext != busState) {
if (busState == READ) {
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 359e285..2e295d0 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -242,7 +242,7 @@
* Record statistics on turnarounds based on
* busStateNext and busState values
*/
void recordTurnaroundStats(BusState busState, BusState busStateNext);
/**
// Record turnaround stats and update current state direction
recordTurnaroundStats(busState, busStateNext);
// Set current bus state
setCurrentBusState();
diff --git a/src/python/gem5/components/memory/hbm.py
b/src/python/gem5/components/memory/hbm.py
index 35497c2..75db1f9 100644
--- a/src/python/gem5/components/memory/hbm.py
+++ b/src/python/gem5/components/memory/hbm.py
@@ -122,7 +122,6 @@
# for interleaving across pseudo channels (at 64B currently)
mask_list.insert(0, 1 << 6)
for i, ctrl in enumerate(self.mem_ctrl):
ctrl.partitioned_q = False
ctrl.dram.range = AddrRange(
start=self._mem_range.start,
size=self._mem_range.size(),
--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/65491?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86
Gerrit-Change-Number: 65491
Gerrit-PatchSet: 7
Gerrit-Owner: Ayaz Akram yazakram@ucdavis.edu
Gerrit-Reviewer: Ayaz Akram yazakram@ucdavis.edu
Gerrit-Reviewer: Jason Lowe-Power jason@lowepower.com
Gerrit-Reviewer: Jason Lowe-Power power.jg@gmail.com
Gerrit-Reviewer: Nikos Nikoleris nikos.nikoleris@arm.com
Gerrit-Reviewer: kokoro noreply+kokoro@google.com