WE
Wendy Elsasser (Gerrit)
Tue, May 12, 2020 6:30 PM
Wendy Elsasser has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/28968 )
Change subject: mem: Make DRAMCtrl a ClockedObject
......................................................................
mem: Make DRAMCtrl a ClockedObject
Made DRAMCtrl a ClockedObject, with DRAMInterface
defined as an AbstractMemory. The address
ranges are now defined per interface. Currently
the model only includes a DRAMInterface but this
can be expanded for other media types.
The controller object includes a parameter to the
interface, which is setup when gem5 is configured.
Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
M configs/common/MemConfig.py
M configs/dram/low_power_sweep.py
M configs/dram/sweep.py
M configs/learning_gem5/part1/simple.py
M configs/learning_gem5/part1/two_level.py
M configs/learning_gem5/part2/simple_cache.py
M configs/learning_gem5/part2/simple_memobj.py
M configs/learning_gem5/part3/simple_ruby.py
M src/mem/DRAMCtrl.py
A src/mem/DRAMInterface.py
M src/mem/SConscript
M src/mem/dram_ctrl.cc
M src/mem/dram_ctrl.hh
M src/mem/drampower.cc
M src/mem/drampower.hh
M src/mem/qos/QoSMemCtrl.py
M src/mem/qos/QoSMemSinkCtrl.py
A src/mem/qos/QoSMemSinkInterface.py
M src/mem/qos/SConscript
M src/mem/qos/mem_ctrl.cc
M src/mem/qos/mem_ctrl.hh
M src/mem/qos/mem_sink.cc
M src/mem/qos/mem_sink.hh
M tests/configs/base_config.py
24 files changed, 1,934 insertions(+), 1,760 deletions(-)
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index 9443520..ab6b933 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -40,7 +40,7 @@
from common import ObjectList
from common import HMC
-def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size):
+def create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits, intlv_size):
"""
Helper function for creating a single memoy controller from the given
options. This function is invoked multiple times in config_mem
function
@@ -59,33 +59,33 @@
# Create an instance so we can figure out the address
# mapping and row-buffer size
-
interface = intf()
Only do this for DRAMs
- if issubclass(cls, m5.objects.DRAMCtrl):
- if issubclass(intf, m5.objects.DRAMInterface):
# If the channel bits are appearing after the column
# bits, we need to add the appropriate number of bits
# for the row buffer size
-
rowbuffer_size = interface.device_rowbuffer_size.value * \
-
interface.devices_per_rank.value
intlv_low_bit = int(math.log(rowbuffer_size, 2))
# We got all we need to configure the appropriate address
# range
- ctrl.range = m5.objects.AddrRange(r.start, size = r.size(),
- interface.range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit =
intlv_low_bit + intlv_bits - 1,
xorHighBit =
xor_low_bit + intlv_bits - 1,
intlvBits = intlv_bits,
intlvMatch = i)
def config_mem(options, system):
"""
@@ -144,10 +144,10 @@
if 2 ** intlv_bits != nbr_mem_ctrls:
fatal("Number of memory channels must be a power of 2")
- cls = ObjectList.mem_list.get(opt_mem_type)
- intf = ObjectList.mem_list.get(opt_mem_type)
mem_ctrls = []
- if opt_elastic_trace_en and not issubclass(cls,
m5.objects.SimpleMemory):
- if opt_elastic_trace_en and not issubclass(intf,
m5.objects.SimpleMemory):
fatal("When elastic trace is enabled, configure mem-type as "
"simple-mem.")
@@ -158,36 +158,56 @@
intlv_size = max(opt_mem_channels_intlv, system.cache_line_size.value)
# For every range (most systems will only have one), create an
-
array of controllers and set their parameters to match their
-
address mapping in the case of a DRAM
-
array of memory interfaces and set their parameters to match
-
their address mapping in the case of a DRAM
for r in system.mem_ranges:
for i in range(nbr_mem_ctrls):
-
mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls,
intlv_bits,
-
# Create the DRAM interface
-
dram_intf = create_mem_intf(intf, r, i, nbr_mem_ctrls,
intlv_bits,
intlv_size)
+
# Set the number of ranks based on the command-line
# options if it was explicitly set
-
if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks:
-
mem_ctrl.ranks_per_channel = opt_mem_ranks
opt_mem_ranks:
-
if issubclass(intf, m5.objects.DRAMInterface):
-
dram_intf.enable_dram_powerdown = opt_dram_powerdown
if opt_elastic_trace_en:
-
dram_intf.latency = '1ns'
print("For elastic trace, over-riding Simple Memory "
"latency to 1ns.")
-
# Create the controller that will drive the interface
-
if opt_mem_type == "HMC_2500_1x32":
-
# The static latency of the vault controllers is estimated
-
# to be smaller than a full DRAM channel controller
-
mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8,
-
static_backend_latency
= '4ns',
= '4ns')
-
else:
-
mem_ctrl = m5.objects.DRAMCtrl()
-
# Override buffer sizes with interface specific values
-
mem_ctrl.write_buffer_size = dram_intf.write_buffer_size
-
mem_ctrl.read_buffer_size = dram_intf.read_buffer_size
-
# Hookup the controller to the interface and add to the list
-
mem_ctrl.dram = dram_intf
mem_ctrls.append(mem_ctrl)
- subsystem.mem_ctrls = mem_ctrls
-
Connect the controllers to the membus
- for i in range(len(subsystem.mem_ctrls)):
-
Create a controller and connect the interfaces to a controller
- for i in range(len(mem_ctrls)):
if opt_mem_type == "HMC_2500_1x32":
for
# each vault. All vaults are same size.
All rights reserved.
The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
from future import absolute_import
import argparse
+import math
import m5
from m5.objects import *
@@ -57,6 +58,10 @@
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+dram_generators = {
-
"DRAM" : lambda x: x.createDram,
+}
-
Use a single-channel DDR4-2400 in 16x4 configuration by default
parser.add_argument("--mem-type", default="DDR4_2400_16x4",
choices=ObjectList.mem_list.get_names(),
@@ -77,7 +82,7 @@
help = "Percentage of read commands")
parser.add_argument("--addr-map",
-
choices=ObjectList.dram_addr_map_list.get_names(),
default="RoRaBaCoCh", help = "DRAM address map policy")
parser.add_argument("--idle-end", type=int, default=50000000,
@@ -111,14 +116,19 @@
Sanity check for memory controller class.
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
- fatal("This script assumes the memory is a DRAMInterface subclass")
There is no point slowing things down by saving any data.
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
+
+# enable DRAM low power states
+system.mem_ctrls[0].dram.enable_dram_powerdown = True
Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = args.addr_map
-system.mem_ctrls[0].page_policy = args.page_policy
+system.mem_ctrls[0].dram.addr_mapping = args.addr_map
+system.mem_ctrls[0].dram.page_policy = args.page_policy
We create a traffic generator state for each param combination we want to
test. Each traffic generator state is specified in the config file and
the
@@ -126,28 +136,23 @@
Stats are dumped and reset at the state transition.
period = 250000000
-# We specify the states in a config file input to the traffic generator.
-cfg_file_name = "lowp_sweep.cfg"
-cfg_file_path = os.path.dirname(file) + "/" +cfg_file_name
-cfg_file = open(cfg_file_path, 'w')
Get the number of banks
-nbr_banks = int(system.mem_ctrls[0].banks_per_rank.value)
+nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
determine the burst size in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
- system.mem_ctrls[0].dram.device_rowbuffer_size.value
Inter-request delay should be such that we can hit as many transitions
to/from low power states as possible to. We provide a min and max itt to
the
traffic generator and it randomises in the range. The parameter is in
seconds and we need it in ticks (ps).
-itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000
+itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
#The itt value when set to (tRAS + tRP + tCK) covers the case where
a read command is delayed beyond the delay from ACT to PRE_PDN entry of
the
@@ -155,9 +160,9 @@
between a write and power down entry will be tRCD + tCL + tWR + tRP +
tCK.
As we use this delay as a unit and create multiples of it as bigger
delays
for the sweep, this parameter works for reads, writes and mix of them.
-pd_entry_time = (system.mem_ctrls[0].tRAS.value +
+pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
-
system.mem_ctrls[0].dram.tRP.value +
-
system.mem_ctrls[0].dram.tCK.value) * 1000000000000
We sweep itt max using the multipliers specified by the user.
itt_max_str = args.itt_list.strip().split()
@@ -180,42 +185,11 @@
banks
bank_util_values = [1, int(nbr_banks/2), nbr_banks]
-# Next we create the config file, but first a comment
-cfg_file.write("""# STATE state# period mode=DRAM
-# read_percent start_addr end_addr req_size min_itt max_itt data_limit
-# stride_size page_size #banks #banks_util addr_map #ranks\n""")
-addr_map = m5.objects.AddrMap.map[args.addr_map]
-nxt_state = 0
-for itt_max in itt_max_values:
- for bank in bank_util_values:
-
for stride_size in stride_values:
-
cfg_file.write("STATE %d %d %s %d 0 %d %d "
-
"%d %d %d %d %d %d %d %d %d\n" %
-
(nxt_state, period, "DRAM", args.rd_perc,
max_addr,
-
burst_size, itt_min, itt_max, 0, stride_size,
-
page_size, nbr_banks, bank, addr_map,
-
args.mem_ranks))
-
nxt_state = nxt_state + 1
-
State for idle period
idle_period = args.idle_end
-cfg_file.write("STATE %d %d IDLE\n" % (nxt_state, idle_period))
-# Init state is state 0
-cfg_file.write("INIT 0\n")
-# Go through the states one by one
-for state in range(1, nxt_state + 1):
- cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state))
-# Transition from last state to itself to not break the probability math
-cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state, nxt_state))
-cfg_file.close()
create a traffic generator, and point it to the file we just created
-system.tgen = TrafficGen(config_file = cfg_file_path)
+system.tgen = PyTrafficGen()
add a communication monitor
system.monitor = CommMonitor()
@@ -230,14 +204,34 @@
every period, dump and reset all stats
periodicStatDump(period)
+# run Forrest, run!
root = Root(full_system = False, system = system)
root.system.mem_mode = 'timing'
m5.instantiate()
+def trace():
- addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
- generator = dram_generators"DRAM"
- for itt_max in itt_max_values:
-
for bank in bank_util_values:
-
for stride_size in stride_values:
-
num_seq_pkts = int(math.ceil(float(stride_size) /
burst_size))
-
yield generator(period,
-
0, max_addr, burst_size, int(itt_min),
-
int(itt_max), args.rd_perc, 0,
-
num_seq_pkts, page_size, nbr_banks, bank,
-
addr_map, args.mem_ranks)
- yield system.tgen.createIdle(idle_period)
- yield system.tgen.createExit(0)
+system.tgen.start(trace())
+
Simulate for exactly as long as it takes to go through all the states
This is why sim exists.
-m5.simulate(nxt_state * period + idle_period)
+m5.simulate()
+
print("--- Done DRAM low power sweep ---")
print("Fixed params - ")
print("\tburst: %d, banks: %d, max stride: %d, itt min: %s ns" %
@@ -247,4 +241,3 @@
print("\titt max values", itt_max_values)
print("\tbank utilization values", bank_util_values)
print("\tstride values:", stride_values)
-print("Traffic gen config file:", cfg_file_name)
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py
index d3c86c3..6a49f44 100644
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -116,13 +116,15 @@
the following assumes that we are using the native DRAM
controller, check to be sure
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
- fatal("This script assumes the memory is a DRAMInterface subclass")
there is no point slowing things down by saving any data
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = options.addr_map
+system.mem_ctrls[0].dram.addr_mapping = options.addr_map
stay in each state for 0.25 ms, long enough to warm things up, and
short enough to avoid hitting a refresh
@@ -133,21 +135,21 @@
the DRAM maximum bandwidth to ensure that it is saturated
get the number of banks
-nbr_banks = system.mem_ctrls[0].banks_per_rank.value
+nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
determine the burst length in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
- system.mem_ctrls[0].dram.device_rowbuffer_size.value
match the maximum bandwidth of the memory, the parameter is in seconds
and we need it in ticks (ps)
-itt = getattr(system.mem_ctrls[0].tBURST_MIN, 'value',
+itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
-
system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
assume we start at 0
max_addr = mem_range.end
diff --git a/configs/learning_gem5/part1/simple.py
b/configs/learning_gem5/part1/simple.py
index ef73a06..cfd15be 100644
--- a/configs/learning_gem5/part1/simple.py
+++ b/configs/learning_gem5/part1/simple.py
@@ -77,8 +77,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Connect the system up to the membus
diff --git a/configs/learning_gem5/part1/two_level.py
b/configs/learning_gem5/part1/two_level.py
index 564c785..0dbcfc7 100644
--- a/configs/learning_gem5/part1/two_level.py
+++ b/configs/learning_gem5/part1/two_level.py
@@ -132,8 +132,9 @@
system.system_port = system.membus.slave
Create a DDR3 memory controller
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Create a process for a simple "Hello World" application
diff --git a/configs/learning_gem5/part2/simple_cache.py
b/configs/learning_gem5/part2/simple_cache.py
index 8d98d92..fbea73d 100644
--- a/configs/learning_gem5/part2/simple_cache.py
+++ b/configs/learning_gem5/part2/simple_cache.py
@@ -76,8 +76,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Connect the system up to the membus
diff --git a/configs/learning_gem5/part2/simple_memobj.py
b/configs/learning_gem5/part2/simple_memobj.py
index d30977c..e792eb9 100644
--- a/configs/learning_gem5/part2/simple_memobj.py
+++ b/configs/learning_gem5/part2/simple_memobj.py
@@ -74,8 +74,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Connect the system up to the membus
diff --git a/configs/learning_gem5/part3/simple_ruby.py
b/configs/learning_gem5/part3/simple_ruby.py
index c47ee7e..7f70a8c 100644
--- a/configs/learning_gem5/part3/simple_ruby.py
+++ b/configs/learning_gem5/part3/simple_ruby.py
@@ -68,8 +68,9 @@
system.cpu = [TimingSimpleCPU() for i in range(2)]
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
create the interrupt controller for the CPU and connect to the membus
for cpu in system.cpu:
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py
index 0f70dff..dff5000 100644
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -40,26 +40,12 @@
from m5.params import *
from m5.proxy import *
-from m5.objects.AbstractMemory import *
from m5.objects.QoSMemCtrl import *
Enum for memory scheduling algorithms, currently First-Come
First-Served and a First-Row Hit then First-Come First-Served
class MemSched(Enum): vals = ['fcfs', 'frfcfs']
-# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
-# channel, rank, bank, row and column, respectively, and going from
-# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
-# suitable for an open-page policy, optimising for sequential accesses
-# hitting in the open row. For a closed-page policy, RoCoRaBaCh
-# maximises parallelism.
-class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
-# Enum for the page policy, either open, open_adaptive, close, or
-# close_adaptive.
-class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
-
'close_adaptive']
-
DRAMCtrl is a single-channel single-ported DRAM controller model
that aims to model the most important system-level performance
effects of a DRAM without getting into too much detail of the DRAM
@@ -72,8 +58,11 @@
# bus in front of the controller for multiple ports
port = SlavePort("Slave port")
-
the basic configuration of the controller architecture, note
-
that each entry corresponds to a burst for the specific DRAM
-
Interface to volatile, DRAM media
-
dram = Param.DRAMInterface(Parent.any, "DRAM interface")
-
Set default buffer sizes
-
each entry corresponds to a burst for the specific DRAM
configuration (e.g. x32 with burst length 8 is 32 bytes) and not
the cacheline size or request/packet size
write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
@@ -93,15 +82,6 @@
scheduler, address map and page policy
mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
-
addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
-
page_policy = Param.PageManage('open_adaptive', "Page management
policy")
-
enforce a limit on the number of accesses per row
-
max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
-
"closing");
-
size of DRAM Chip in Bytes
-
device_size = Param.MemorySize("Size of DRAM chip")
pipeline latency of the controller and PHY, split into a
frontend part and a backend part, with reads and writes serviced
@@ -109,1404 +89,3 @@
# serviced by the memory seeing the sum of the two
static_frontend_latency = Param.Latency("10ns", "Static frontend
latency")
static_backend_latency = Param.Latency("10ns", "Static backend
latency")
-
the physical organisation of the DRAM
- device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
-
"device/chip")
- burst_length = Param.Unsigned("Burst lenght (BL) in beats")
- device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
-
"device/chip")
- devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
- ranks_per_channel = Param.Unsigned("Number of ranks per channel")
-
default to 0 bank groups per rank, indicating bank group architecture
-
is not used
-
update per memory class when bank group architecture is supported
- bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
- banks_per_rank = Param.Unsigned("Number of banks per rank")
-
Enable DRAM powerdown states if True. This is False by default due to
-
performance being lower when enabled
- enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
-
For power modelling we need to know if the DRAM has a DLL or not
- dll = Param.Bool(True, "DRAM has DLL or not")
-
DRAMPower provides in addition to the core power, the possibility to
-
include RD/WR termination and IO power. This calculation assumes some
-
default values. The integration of DRAMPower with gem5 does not
include
-
IO and RD/WR termination power by default. This might be added as an
-
additional feature in the future.
-
timing behaviour and constraints - all in nanoseconds
-
the base clock period of the DRAM
- tCK = Param.Latency("Clock period")
-
the amount of time in nanoseconds from issuing an activate command
-
to the data being available in the row buffer for a read/write
- tRCD = Param.Latency("RAS to CAS delay")
-
the time from issuing a read/write command to seeing the actual data
- tCL = Param.Latency("CAS latency")
-
minimum time between a precharge and subsequent activate
- tRP = Param.Latency("Row precharge time")
-
minimum time between an activate and a precharge to the same row
- tRAS = Param.Latency("ACT to PRE delay")
-
minimum time between a write data transfer and a precharge
- tWR = Param.Latency("Write recovery time")
-
minimum time between a read and precharge command
- tRTP = Param.Latency("Read to precharge")
-
time to complete a burst transfer, typically the burst length
-
divided by two due to the DDR bus, but by making it a parameter
-
it is easier to also evaluate SDR memories like WideIO.
-
This parameter has to account for burst length.
-
Read/Write requests with data size larger than one full burst are
broken
access,
-
which could be greater than tBURST when the memory access time is
greater
-
than tBURST
- tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
-
tBURST_MIN is the minimum delay between bursts, which could be less
than
-
tBURST when interleaving is supported
- tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
-
CAS-to-CAS delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
-
Write-to-Write delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
-
This will be used to enable different same bank group delays
-
for writes versus reads
- tCCD_L_WR = Param.Latency(Self.tCCD_L,
-
"Same bank group Write to Write delay")
-
time taken to complete one refresh cycle (N rows in all banks)
- tRFC = Param.Latency("Refresh cycle time")
-
refresh command interval, how often a "ref" command needs
-
to be sent. It is 7.8 us for a 64ms refresh requirement
- tREFI = Param.Latency("Refresh command interval")
-
write-to-read, same rank turnaround penalty
- tWTR = Param.Latency("Write to read, same rank switching time")
-
write-to-read, same rank turnaround penalty for same bank group
- tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
-
"time, same bank group")
-
read-to-write, same rank turnaround penalty
- tRTW = Param.Latency("Read to write, same rank switching time")
-
rank-to-rank bus delay penalty
-
this does not correlate to a memory timing parameter and encompasses:
-
1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
-
different rank bus delay
- tCS = Param.Latency("Rank to rank switching time")
-
minimum precharge to precharge delay time
- tPPD = Param.Latency("0ns", "PRE to PRE delay")
-
maximum delay between two-cycle ACT command phases
- tAAD = Param.Latency(Self.tCK,
-
"Maximum delay between two-cycle ACT commands")
- two_cycle_activate = Param.Bool(False,
-
"Two cycles required to send activate")
-
minimum row activate to row activate delay time
- tRRD = Param.Latency("ACT to ACT delay")
-
only utilized with bank group architectures; set to 0 for default
case
- tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
-
time window in which a maximum number of activates are allowed
-
to take place, set to 0 to disable
- tXAW = Param.Latency("X activation window")
- activation_limit = Param.Unsigned("Max number of activates in window")
-
time to exit power-down mode
-
Exit power-down to next valid command delay
- tXP = Param.Latency("0ns", "Power-up Delay")
-
Exit Powerdown to commands requiring a locked DLL
- tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
-
time to exit self-refresh mode
- tXS = Param.Latency("0ns", "Self-refresh exit latency")
-
time to exit self-refresh mode with locked DLL
- tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
-
number of data beats per clock. with DDR, default is 2, one per edge
- beats_per_clock = Param.Unsigned(2, "Data beats per clock")
- data_clock_sync = Param.Bool(False, "Synchronization commands
required")
-
Currently rolled into other params
- ######################################################################
-
tRC - assumed to be tRAS + tRP
-
Power Behaviour and Constraints
-
DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
-
defined as VDD and VDD2. Each current is defined for each voltage
domain
-
separately. For example, current IDD0 is active-precharge current for
-
voltage domain VDD and current IDD02 is active-precharge current for
-
voltage domain VDD2.
-
By default all currents are set to 0mA. Users who are only
interested in
-
the performance of DRAMs can leave them at 0.
-
Operating 1 Bank Active-Precharge current
- IDD0 = Param.Current("0mA", "Active precharge current")
-
Operating 1 Bank Active-Precharge current multiple voltage Range
- IDD02 = Param.Current("0mA", "Active precharge current VDD2")
-
Precharge Power-down Current: Slow exit
- IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
-
Precharge Power-down Current: Slow exit multiple voltage Range
- IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
-
Precharge Power-down Current: Fast exit
- IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
-
Precharge Power-down Current: Fast exit multiple voltage Range
- IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
-
Precharge Standby current
- IDD2N = Param.Current("0mA", "Precharge Standby current")
-
Precharge Standby current multiple voltage range
- IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
-
Active Power-down current: slow exit
- IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
-
Active Power-down current: slow exit multiple voltage range
- IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
-
Active Power-down current : fast exit
- IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
-
Active Power-down current : fast exit multiple voltage range
- IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
-
Active Standby current
- IDD3N = Param.Current("0mA", "Active Standby current")
-
Active Standby current multiple voltage range
- IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
-
Burst Read Operating Current
- IDD4R = Param.Current("0mA", "READ current")
-
Burst Read Operating Current multiple voltage range
- IDD4R2 = Param.Current("0mA", "READ current VDD2")
-
Burst Write Operating Current
- IDD4W = Param.Current("0mA", "WRITE current")
-
Burst Write Operating Current multiple voltage range
- IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
-
Refresh Current
- IDD5 = Param.Current("0mA", "Refresh current")
-
Refresh Current multiple voltage range
- IDD52 = Param.Current("0mA", "Refresh current VDD2")
-
Self-Refresh Current
- IDD6 = Param.Current("0mA", "Self-refresh Current")
-
Self-Refresh Current multiple voltage range
- IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
-
Main voltage range of the DRAM
- VDD = Param.Voltage("0V", "Main Voltage Range")
-
Second voltage range defined by some DRAMs
- VDD2 = Param.Voltage("0V", "2nd Voltage Range")
-# A single DDR3-1600 x64 channel (one command and address bus), with
-# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
-# an 8x8 configuration.
-class DDR3_1600_8x8(DRAMCtrl):
-
size of device in bytes
- device_size = '512MB'
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
DDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
Use two ranks
- ranks_per_channel = 2
-
DDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
-
8 beats across an x64 interface translates to 4 clocks @ 800 MHz
- tBURST = '5ns'
-
DDR3-1600 11-11-11
- tRCD = '13.75ns'
- tCL = '13.75ns'
- tRP = '13.75ns'
- tRAS = '35ns'
- tRRD = '6ns'
- tXAW = '30ns'
- activation_limit = 4
- tRFC = '260ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns
- tWTR = '7.5ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
- tXS = '270ns'
-
Current values from datasheet Die Rev E,J
- IDD0 = '55mA'
- IDD2N = '32mA'
- IDD3N = '38mA'
- IDD4W = '125mA'
- IDD4R = '157mA'
- IDD5 = '235mA'
- IDD3P1 = '38mA'
- IDD2P1 = '32mA'
- IDD6 = '20mA'
- VDD = '1.5V'
-# A single HMC-2500 x32 model based on:
-# [1] DRAMSpec: a high-level DRAM bank modelling tool
-# developed at the University of Kaiserslautern. This high level tool
-# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
-# estimate the DRAM bank latency and power numbers.
-# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
-# cubes (E. Azarkhish et. al)
-# Assumed for the HMC model is a 30 nm technology node.
-# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
-# layers).
-# Each layer has 16 vaults and each vault consists of 2 banks per layer.
-# In order to be able to use the same controller used for 2D DRAM
generations
-# for HMC, the following analogy is done:
-# Channel (DDR) => Vault (HMC)
-# device_size (DDR) => size of a single layer in a vault
-# ranks per channel (DDR) => number of layers
-# banks per rank (DDR) => banks per layer
-# devices per rank (DDR) => devices per layer ( 1 for HMC).
-# The parameters for which no input is available are inherited from the
DDR3
-# configuration.
-# This configuration includes the latencies from the DRAM to the logic
layer
-# of the HMC
-class HMC_2500_1x32(DDR3_1600_8x8):
-
size of device
-
two banks per device with each bank 4MB [2]
- device_size = '8MB'
-
1x32 configuration, 1 device with 32 TSVs [2]
- device_bus_width = 32
-
HMC is a BL8 device [2]
- burst_length = 8
-
Each device has a page (row buffer) size of 256 bytes [2]
- device_rowbuffer_size = '256B'
-
1x32 configuration, so 1 device [2]
- devices_per_rank = 1
-
4 layers so 4 ranks [2]
- ranks_per_channel = 4
-
HMC has 2 banks per layer [2]
-
Each layer represents a rank. With 4 layers and 8 banks in total,
each
-
layer has 2 banks; thus 2 banks per rank.
- banks_per_rank = 2
-
1250 MHz [2]
- tCK = '0.8ns'
-
8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
- tBURST = '3.2ns'
-
Values using DRAMSpec HMC model [1]
- tRCD = '10.2ns'
- tCL = '9.9ns'
- tRP = '7.7ns'
- tRAS = '21.6ns'
-
tRRD depends on the power supply network for each vendor.
-
We assume a tRRD of a double bank approach to be equal to 4 clock
-
cycles (Assumption)
- tRRD = '3.2ns'
-
activation limit is set to 0 since there are only 2 banks per vault
-
layer.
- activation_limit = 0
-
Values using DRAMSpec HMC model [1]
- tRFC = '59ns'
- tWR = '8ns'
- tRTP = '4.9ns'
-
Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
-
0.8 ns (Assumption)
- tCS = '0.8ns'
-
Value using DRAMSpec HMC model [1]
- tREFI = '3.9us'
-
The default page policy in the vault controllers is simple closed
page
-
[2] nevertheless 'close' policy opens and closes the row multiple
times
-
for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
- page_policy = 'close_adaptive'
-
RoCoRaBaCh resembles the default address mapping in HMC
- addr_mapping = 'RoCoRaBaCh'
- min_writes_per_switch = 8
-
These parameters do not directly correlate with buffer_size in real
-
hardware. Nevertheless, their value has been tuned to achieve a
-
bandwidth similar to the cycle-accurate model in [2]
- write_buffer_size = 32
- read_buffer_size = 32
-
The static latency of the vault controllers is estimated to be
smaller
-
than a full DRAM channel controller
- static_backend_latency='4ns'
- static_frontend_latency='4ns'
-# A single DDR3-2133 x64 channel refining a selected subset of the
-# options for the DDR-1600 configuration, based on the same DDR3-1600
-# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
-# consistent across the two configurations.
-class DDR3_2133_8x8(DDR3_1600_8x8):
-
1066 MHz
- tCK = '0.938ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
- tBURST = '3.752ns'
-
DDR3-2133 14-14-14
- tRCD = '13.09ns'
- tCL = '13.09ns'
- tRP = '13.09ns'
- tRAS = '33ns'
- tRRD = '5ns'
- tXAW = '25ns'
-
Current values from datasheet
- IDD0 = '70mA'
- IDD2N = '37mA'
- IDD3N = '44mA'
- IDD4W = '157mA'
- IDD4R = '191mA'
- IDD5 = '250mA'
- IDD3P1 = '44mA'
- IDD2P1 = '43mA'
- IDD6 ='20mA'
- VDD = '1.5V'
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
-# in an 16x4 configuration.
-# Total channel capacity is 32GB
-# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
-class DDR4_2400_16x4(DRAMCtrl):
-
size of device
- device_size = '1GB'
-
16x4 configuration, 16 devices each with a 4-bit interface
- device_bus_width = 4
-
DDR4 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 512 byte (1K columns x4)
- device_rowbuffer_size = '512B'
-
16x4 configuration, so 16 devices
- devices_per_rank = 16
-
Match our DDR3 configurations which is dual rank
- ranks_per_channel = 2
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 4 for x4 case
- bank_groups_per_rank = 4
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 16
-
override the default buffer sizes and go for something larger to
-
accommodate the larger bank count
- write_buffer_size = 128
- read_buffer_size = 64
-
1200 MHz
- tCK = '0.833ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '3.332ns'
-
@2400 data rate, tCCD_L is 6 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '5ns';
-
DDR4-2400 17-17-17
- tRCD = '14.16ns'
- tCL = '14.16ns'
- tRP = '14.16ns'
- tRAS = '32ns'
-
RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
- tRRD = '3.332ns'
-
RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
tFAW for 512B page is MAX(16 CK, 13ns)
- tXAW = '13.328ns'
- activation_limit = 4
-
tRFC is 350ns
- tRFC = '350ns'
- tWR = '15ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
- tRTW = '1.666ns'
-
Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
- tCS = '1.666ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
-
exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
-
tRFC + 10ns = 340ns
- tXS = '340ns'
-
Current values from datasheet
- IDD0 = '43mA'
- IDD02 = '3mA'
- IDD2N = '34mA'
- IDD3N = '38mA'
- IDD3N2 = '3mA'
- IDD4W = '103mA'
- IDD4R = '110mA'
- IDD5 = '250mA'
- IDD3P1 = '32mA'
- IDD2P1 = '25mA'
- IDD6 = '30mA'
- VDD = '1.2V'
- VDD2 = '2.5V'
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
-# in an 8x8 configuration.
-# Total channel capacity is 16GB
-# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
-class DDR4_2400_8x8(DDR4_2400_16x4):
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
- tXAW = '21ns'
-
Current values from datasheet
- IDD0 = '48mA'
- IDD3N = '43mA'
- IDD4W = '123mA'
- IDD4R = '135mA'
- IDD3P1 = '37mA'
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
-# in an 4x16 configuration.
-# Total channel capacity is 4GB
-# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
-class DDR4_2400_4x16(DDR4_2400_16x4):
-
4x16 configuration, 4 devices each with an 16-bit interface
- device_bus_width = 16
-
Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
- device_rowbuffer_size = '2kB'
-
4x16 configuration, so 4 devices
- devices_per_rank = 4
-
Single rank for x16
- ranks_per_channel = 1
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 2 for x16 case
- bank_groups_per_rank = 2
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 8
-
RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
- tRRD = '5.3ns'
-
RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
- tRRD_L = '6.4ns';
- tXAW = '30ns'
-
Current values from datasheet
- IDD0 = '80mA'
- IDD02 = '4mA'
- IDD2N = '34mA'
- IDD3N = '47mA'
- IDD4W = '228mA'
- IDD4R = '243mA'
- IDD5 = '280mA'
- IDD3P1 = '41mA'
-# A single LPDDR2-S4 x32 interface (one command/address bus), with
-# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
-# in a 1x32 configuration.
-class LPDDR2_S4_1066_1x32(DRAMCtrl):
-
No DLL in LPDDR2
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR2_S4 is a BL4 and BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1KB
-
(this depends on the memory density)
- device_rowbuffer_size = '1kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR2-S4 has 8 banks in all configurations
- banks_per_rank = 8
-
533 MHz
- tCK = '1.876ns'
-
Fixed at 15 ns
- tRCD = '15ns'
-
8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
- tCL = '15ns'
-
Pre-charge one bank 15 ns (all banks 18 ns)
- tRP = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
- tRTP = '7.5ns'
-
8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '7.5ns'
-
LPDDR2-S4, 4 Gbit
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
- tRTW = '3.75ns'
-
Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
- tCS = '3.75ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of density, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '15mA'
- IDD02 = '70mA'
- IDD2N = '2mA'
- IDD2N2 = '30mA'
- IDD3N = '2.5mA'
- IDD3N2 = '30mA'
- IDD4W = '10mA'
- IDD4W2 = '190mA'
- IDD4R = '3mA'
- IDD4R2 = '220mA'
- IDD5 = '40mA'
- IDD52 = '150mA'
- IDD3P1 = '1.2mA'
- IDD3P12 = '8mA'
- IDD2P1 = '0.6mA'
- IDD2P12 = '0.8mA'
- IDD6 = '1mA'
- IDD62 = '3.2mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-# A single WideIO x128 interface (one command and address bus), with
-# default timings based on an estimated WIO-200 8 Gbit part.
-class WideIO_200_1x128(DRAMCtrl):
-
No DLL for WideIO
- dll = False
-
size of device
- device_size = '1024MB'
-
1x128 configuration, 1 device with a 128-bit interface
- device_bus_width = 128
-
This is a BL4 device
- burst_length = 4
-
Each device has a page (row buffer) size of 4KB
-
(this depends on the memory density)
- device_rowbuffer_size = '4kB'
-
1x128 configuration, so 1 device
- devices_per_rank = 1
-
Use one rank for a one-high die stack
- ranks_per_channel = 1
-
WideIO has 4 banks in all configurations
- banks_per_rank = 4
-
200 MHz
- tCK = '5ns'
-
WIO-200
- tRCD = '18ns'
- tCL = '18ns'
- tRP = '18ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Read to precharge is same as the burst
- tRTP = '20ns'
-
4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
-
Note this is a BL4 SDR device.
- tBURST = '20ns'
-
WIO 8 Gb
- tRFC = '210ns'
-
WIO 8 Gb, <=85C, half for >85C
- tREFI = '3.9us'
-
Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
- tWTR = '15ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
- tRTW = '10ns'
-
Default different rank bus delay to 2 CK, @200 MHz = 10 ns
- tCS = '10ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Two instead of four activation window
- tXAW = '50ns'
- activation_limit = 2
-
The WideIO specification does not provide current information
-# A single LPDDR3 x32 interface (one command/address bus), with
-# default timings based on a LPDDR3-1600 4 Gbit part (Micron
-# EDF8132A1MC) in a 1x32 configuration.
-class LPDDR3_1600_1x32(DRAMCtrl):
-
No DLL for LPDDR3
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 4KB
- device_rowbuffer_size = '4kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Technically the datasheet is a dual-rank package, but for
-
comparison with the LPDDR2 config we stick to a single rank
- ranks_per_channel = 1
-
LPDDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
- tRCD = '18ns'
-
12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
- tCL = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
- tRTP = '7.5ns'
-
Pre-charge one bank 18 ns (all banks 21 ns)
- tRP = '18ns'
-
8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '5ns'
-
LPDDR3, 4 Gb
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of size, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '8mA'
- IDD02 = '60mA'
- IDD2N = '0.8mA'
- IDD2N2 = '26mA'
- IDD3N = '2mA'
- IDD3N2 = '34mA'
- IDD4W = '2mA'
- IDD4W2 = '190mA'
- IDD4R = '2mA'
- IDD4R2 = '230mA'
- IDD5 = '28mA'
- IDD52 = '150mA'
- IDD3P1 = '1.4mA'
- IDD3P12 = '11mA'
- IDD2P1 = '0.8mA'
- IDD2P12 = '1.8mA'
- IDD6 = '0.5mA'
- IDD62 = '1.8mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-# A single GDDR5 x64 interface, with
-# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
-# H5GQ1H24AFR) in a 2x32 configuration.
-class GDDR5_4000_2x32(DRAMCtrl):
-
size of device
- device_size = '128MB'
-
2x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
GDDR5 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 2Kbits (256Bytes)
- device_rowbuffer_size = '256B'
-
2x32 configuration, so 2 devices
- devices_per_rank = 2
-
assume single rank
- ranks_per_channel = 1
-
GDDR5 has 4 bank groups
- bank_groups_per_rank = 4
-
GDDR5 has 16 banks with 4 bank groups
- banks_per_rank = 16
-
1000 MHz
- tCK = '1ns'
-
8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
-
Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
-
8 beats at 4000 MHz = 2 beats at 1000 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '2ns'
-
@1000MHz data rate, tCCD_L is 3 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '3ns';
- tRCD = '12ns'
-
tCL is not directly found in datasheet and assumed equal tRCD
- tCL = '12ns'
- tRP = '12ns'
- tRAS = '28ns'
-
RRD_S (different bank group)
-
RRD_S is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD = '6ns'
-
RRD_L (same bank group)
-
RRD_L is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD_L = '6ns'
- tXAW = '23ns'
-
tXAW < 4 x tRRD.
-
Therefore, activation limit is set to 0
- activation_limit = 0
- tRFC = '65ns'
- tWR = '12ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Read-to-Precharge 2 CK
- tRTP = '2ns'
-
Assume 2 cycles
- tRTW = '2ns'
-# A single HBM x128 interface (one command and address bus), with
-# default timings based on data publically released
-# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
-# IDD measurement values, and by extrapolating data from other classes.
-# Architecture values based on published HBM spec
-# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
-class HBM_1000_4H_1x128(DRAMCtrl):
-
HBM gen1 supports up to 8 128-bit physical channels
-
Configuration defines a single channel, with the capacity
-
set to (full_ stack_capacity / 8) based on 2Gb dies
-
To use all 8 channels, set 'channels' parameter to 8 in
-
system configuration
-
128-bit interface legacy mode
- device_bus_width = 128
-
HBM supports BL4 and BL2 (legacy mode only)
- burst_length = 4
-
size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
-
with 8 channels, 128MB per channel
- device_size = '128MB'
- device_rowbuffer_size = '2kB'
-
1x128 configuration
- devices_per_rank = 1
-
HBM does not have a CS pin; set rank to 1
- ranks_per_channel = 1
-
HBM has 8 or 16 banks depending on capacity
-
2Gb dies have 8 banks
- banks_per_rank = 8
-
depending on frequency, bank groups may be required
-
will always have 4 bank groups when enabled
-
current specifications do not define the minimum frequency for
-
bank group architecture
-
setting bank_groups_per_rank to 0 to disable until range is defined
- bank_groups_per_rank = 0
-
500 MHz for 1Gbps DDR data rate
- tCK = '2ns'
-
use values from IDD measurement in JEDEC spec
-
use tRP value for tRCD and tCL similar to other classes
- tRP = '15ns'
- tRCD = '15ns'
- tCL = '15ns'
- tRAS = '33ns'
-
BL2 and BL4 supported, default to BL4
-
DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
- tBURST = '4ns'
-
value for 2Gb device from JEDEC spec
- tRFC = '160ns'
-
value for 2Gb device from JEDEC spec
- tREFI = '3.9us'
-
extrapolate the following from LPDDR configs, using ns values
-
to minimize burst length, prefetch differences
- tWR = '18ns'
- tRTP = '7.5ns'
- tWTR = '10ns'
-
start with 2 cycles turnaround, similar to other memory classes
-
could be more with variations across the stack
- tRTW = '4ns'
-
single rank device, set to 0
- tCS = '0ns'
-
from MemCon example, tRRD is 4ns with 2ns tCK
- tRRD = '4ns'
-
from MemCon example, tFAW is 30ns with 2ns tCK
- tXAW = '30ns'
- activation_limit = 4
-
4tCK
- tXP = '8ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '168ns'
-# A single HBM x64 interface (one command and address bus), with
-# default timings based on HBM gen1 and data publically released
-# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
-# Note: This defines a pseudo-channel with a unique controller
-# instantiated per pseudo-channel
-# Stay at same IO rate (1Gbps) to maintain timing relationship with
-# HBM gen1 class (HBM_1000_4H_x128) where possible
-class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
-
For HBM gen2 with pseudo-channel mode, configure 2X channels.
-
Configuration defines a single pseudo channel, with the capacity
-
set to (full_ stack_capacity / 16) based on 8Gb dies
-
To use all 16 pseudo channels, set 'channels' parameter to 16 in
-
system configuration
-
64-bit pseudo-channle interface
- device_bus_width = 64
-
HBM pseudo-channel only supports BL4
- burst_length = 4
-
size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
-
with 16 channels, 256MB per channel
- device_size = '256MB'
-
page size is halved with pseudo-channel; maintaining the same same
number
-
of rows per pseudo-channel with 2X banks across 2 channels
- device_rowbuffer_size = '1kB'
-
HBM has 8 or 16 banks depending on capacity
-
Starting with 4Gb dies, 16 banks are defined
- banks_per_rank = 16
-
reset tRFC for larger, 8Gb device
-
use HBM1 4Gb value as a starting point
- tRFC = '260ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '268ns'
-
Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
- tCS = '2ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '10ns'
-
self refresh exit time
- tXS = '65ns'
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_5500_1x16_BG_BL32(DRAMCtrl):
-
Increase buffer size to account for more bank resources
- read_buffer_size = 64
-
Set page policy to better suit DMC Huxley
- page_policy = 'close_adaptive'
-
16-bit channel interface
- device_bus_width = 16
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL32 for higher command bandwidth
- burst_length = 32
-
size of device in bytes
- device_size = '1GB'
-
2kB page with BG mode
- device_rowbuffer_size = '2kB'
-
Use a 1x16 configuration
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Initial configuration will have 16 banks with Bank Group Arch
-
to maximim resources and enable higher data rates
- banks_per_rank = 16
- bank_groups_per_rank = 4
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.455ns'
-
Greater of 2 CK or 18ns
- tRCD = '18ns'
-
Base RL is 16 CK @ 687.5 MHz = 23.28ns
- tCL = '23.280ns'
-
Greater of 2 CK or 18ns
- tRP = '18ns'
-
Greater of 3 CK or 42ns
- tRAS = '42ns'
-
Greater of 3 CK or 34ns
- tWR = '34ns'
-
active powerdown and precharge powerdown exit time
-
Greater of 3 CK or 7ns
- tXP = '7ns'
-
self refresh exit time (tRFCab + 7.5ns)
- tXS = '217.5ns'
-
Greater of 2 CK or 7.5 ns minus 2 CK
- tRTP = '4.59ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
- tBURST = '8.73ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- tBURST_MIN = '2.91ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 687.5 MHz
- tBURST_MAX = '11.64ns'
-
8 CK @ 687.5 MHz
- tCCD_L = "11.64ns"
-
LPDDR5, 8 Gbit/channel for 280ns tRFCab
- tRFC = '210ns'
- tREFI = '3.9us'
-
Greater of 4 CK or 6.25 ns
- tWTR = '6.25ns'
-
Greater of 4 CK or 12 ns
- tWTR_L = '12ns'
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.455ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
- tCS = '2.91ns'
-
2 CK
- tPPD = '2.91ns'
-
Greater of 2 CK or 5 ns
- tRRD = '5ns'
- tRRD_L = '5ns'
-
With Bank Group Arch mode tFAW is 20 ns
- tXAW = '20ns'
- activation_limit = 4
-
at 5Gbps, 4:1 WCK to CK ratio required
-
2 data beats per WCK (DDR) -> 8 per CK
- beats_per_clock = 8
-
2 cycles required to send activate command
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- two_cycle_activate = True
- tAAD = '11.640ns'
- data_clock_sync = True
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '2.91ns'
- tBURST_MIN = '2.91ns'
-
For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST_MAX = '5.82ns'
-
4 CK @ 687.5 MHz
- tCCD_L = "5.82ns"
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '5.82ns'
- tBURST_MIN = '5.82ns'
- tBURST_MAX = '5.82ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.25ns'
-
Base RL is 17 CK @ 800 MHz = 21.25ns
- tCL = '21.25ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
- tBURST = '7.5ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
- tBURST_MIN = '2.5ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 800 MHz
- tBURST_MAX = '10ns'
-
8 CK @ 800 MHz
- tCCD_L = "10ns"
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.25ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
- tCS = '2.5ns'
-
2 CK
- tPPD = '2.5ns'
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- tAAD = '10ns'
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on initial
-# JEDEC specifcation
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '2.5ns'
- tBURST_MIN = '2.5ns'
-
For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST_MAX = '5ns'
-
4 CK @ 800 MHz
- tCCD_L = "5ns"
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '5ns'
- tBURST_MIN = '5ns'
- tBURST_MAX = '5ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py
new file mode 100644
index 0000000..35bf8a3
--- /dev/null
+++ b/src/mem/DRAMInterface.py
@@ -0,0 +1,1483 @@
+# Copyright (c) 2012-2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# Copyright (c) 2015 University of Kaiserslautern
+# Copyright (c) 2015 The University of Bologna
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from AbstractMemory import AbstractMemory
+from DRAMCtrl import *
+
+# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
+# channel, rank, bank, row and column, respectively, and going from
+# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
+# suitable for an open-page policy, optimising for sequential accesses
+# hitting in the open row. For a closed-page policy, RoCoRaBaCh
+# maximises parallelism.
+class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
+
+# Enum for the page policy, either open, open_adaptive, close, or
+# close_adaptive.
+class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
+class DRAMInterface(AbstractMemory):
- type = 'DRAMInterface'
- cxx_header = "mem/dram_ctrl.hh"
-
scheduler, address map and page policy
- addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
- page_policy = Param.PageManage('open_adaptive', "Page management
policy")
-
Allow the interface to set required controller buffer sizes
-
each entry corresponds to a burst for the specific DRAM
-
configuration (e.g. x32 with burst length 8 is 32 bytes) and not
-
the cacheline size or request/packet size
- write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
- read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
-
enforce a limit on the number of accesses per row
- max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
-
"closing");
-
size of DRAM Chip in Bytes
- device_size = Param.MemorySize("Size of DRAM chip")
-
the physical organisation of the DRAM
- device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
-
"device/chip")
- burst_length = Param.Unsigned("Burst lenght (BL) in beats")
- device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
-
"device/chip")
- devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
- ranks_per_channel = Param.Unsigned("Number of ranks per channel")
-
default to 0 bank groups per rank, indicating bank group architecture
-
is not used
-
update per memory class when bank group architecture is supported
- bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
- banks_per_rank = Param.Unsigned("Number of banks per rank")
-
Enable DRAM powerdown states if True. This is False by default due to
-
performance being lower when enabled
- enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
-
For power modelling we need to know if the DRAM has a DLL or not
- dll = Param.Bool(True, "DRAM has DLL or not")
-
DRAMPower provides in addition to the core power, the possibility to
-
include RD/WR termination and IO power. This calculation assumes some
-
default values. The integration of DRAMPower with gem5 does not
include
-
IO and RD/WR termination power by default. This might be added as an
-
additional feature in the future.
-
timing behaviour and constraints - all in nanoseconds
-
the base clock period of the DRAM
- tCK = Param.Latency("Clock period")
-
rank-to-rank bus delay penalty
-
this does not correlate to a memory timing parameter and encompasses:
-
1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
-
different rank bus delay
- tCS = Param.Latency("Rank to rank switching time")
-
the amount of time in nanoseconds from issuing an activate command
-
to the data being available in the row buffer for a read/write
- tRCD = Param.Latency("RAS to CAS delay")
-
the time from issuing a read/write command to seeing the actual data
- tCL = Param.Latency("CAS latency")
-
minimum time between a precharge and subsequent activate
- tRP = Param.Latency("Row precharge time")
-
minimum time between an activate and a precharge to the same row
- tRAS = Param.Latency("ACT to PRE delay")
-
minimum time between a write data transfer and a precharge
- tWR = Param.Latency("Write recovery time")
-
minimum time between a read and precharge command
- tRTP = Param.Latency("Read to precharge")
-
time to complete a burst transfer, typically the burst length
-
divided by two due to the DDR bus, but by making it a parameter
-
it is easier to also evaluate SDR memories like WideIO.
-
This parameter has to account for burst length.
-
Read/Write requests with data size larger than one full burst are
broken
access,
-
which could be greater than tBURST when the memory access time is
greater
-
than tBURST
- tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
-
tBURST_MIN is the minimum delay between bursts, which could be less
than
-
tBURST when interleaving is supported
- tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
-
CAS-to-CAS delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
-
Write-to-Write delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
case
- tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
-
time window in which a maximum number of activates are allowed
-
to take place, set to 0 to disable
- tXAW = Param.Latency("X activation window")
- activation_limit = Param.Unsigned("Max number of activates in window")
-
time to exit power-down mode
-
Exit power-down to next valid command delay
- tXP = Param.Latency("0ns", "Power-up Delay")
-
Exit Powerdown to commands requiring a locked DLL
- tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
-
time to exit self-refresh mode
- tXS = Param.Latency("0ns", "Self-refresh exit latency")
-
time to exit self-refresh mode with locked DLL
- tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
-
number of data beats per clock. with DDR, default is 2, one per edge
- beats_per_clock = Param.Unsigned(2, "Data beats per clock")
- data_clock_sync = Param.Bool(False, "Synchronization commands
required")
-
Currently rolled into other params
- ######################################################################
-
tRC - assumed to be tRAS + tRP
-
Power Behaviour and Constraints
-
DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
-
defined as VDD and VDD2. Each current is defined for each voltage
domain
-
separately. For example, current IDD0 is active-precharge current for
-
voltage domain VDD and current IDD02 is active-precharge current for
-
voltage domain VDD2.
-
By default all currents are set to 0mA. Users who are only
interested in
-
the performance of DRAMs can leave them at 0.
-
Power Behaviour and Constraints
-
DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
-
defined as VDD and VDD2. Each current is defined for each voltage
domain
-
separately. For example, current IDD0 is active-precharge current for
-
voltage domain VDD and current IDD02 is active-precharge current for
-
voltage domain VDD2.
-
By default all currents are set to 0mA. Users who are only
interested in
-
the performance of DRAMs can leave them at 0.
-
Operating 1 Bank Active-Precharge current
- IDD0 = Param.Current("0mA", "Active precharge current")
-
Operating 1 Bank Active-Precharge current multiple voltage Range
- IDD02 = Param.Current("0mA", "Active precharge current VDD2")
-
Precharge Power-down Current: Slow exit
- IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
-
Precharge Power-down Current: Slow exit multiple voltage Range
- IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
-
Precharge Power-down Current: Fast exit
- IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
-
Precharge Power-down Current: Fast exit multiple voltage Range
- IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
-
Precharge Standby current
- IDD2N = Param.Current("0mA", "Precharge Standby current")
-
Precharge Standby current multiple voltage range
- IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
-
Active Power-down current: slow exit
- IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
-
Active Power-down current: slow exit multiple voltage range
- IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
-
Active Power-down current : fast exit
- IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
-
Active Power-down current : fast exit multiple voltage range
- IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
-
Active Standby current
- IDD3N = Param.Current("0mA", "Active Standby current")
-
Active Standby current multiple voltage range
- IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
-
Burst Read Operating Current
- IDD4R = Param.Current("0mA", "READ current")
-
Burst Read Operating Current multiple voltage range
- IDD4R2 = Param.Current("0mA", "READ current VDD2")
-
Burst Write Operating Current
- IDD4W = Param.Current("0mA", "WRITE current")
-
Burst Write Operating Current multiple voltage range
- IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
-
Refresh Current
- IDD5 = Param.Current("0mA", "Refresh current")
-
Refresh Current multiple voltage range
- IDD52 = Param.Current("0mA", "Refresh current VDD2")
-
Self-Refresh Current
- IDD6 = Param.Current("0mA", "Self-refresh Current")
-
Self-Refresh Current multiple voltage range
- IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
-
Main voltage range of the DRAM
- VDD = Param.Voltage("0V", "Main Voltage Range")
-
Second voltage range defined by some DRAMs
- VDD2 = Param.Voltage("0V", "2nd Voltage Range")
+# A single DDR3-1600 x64 channel (one command and address bus), with
+# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
+# an 8x8 configuration.
+class DDR3_1600_8x8(DRAMInterface):
-
size of device in bytes
- device_size = '512MB'
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
DDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
Use two ranks
- ranks_per_channel = 2
-
DDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
8 beats across an x64 interface translates to 4 clocks @ 800 MHz
- tBURST = '5ns'
-
Greater of 4 CK or 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
DDR3-1600 11-11-11
- tRCD = '13.75ns'
- tCL = '13.75ns'
- tRP = '13.75ns'
- tRAS = '35ns'
- tRRD = '6ns'
- tXAW = '30ns'
- activation_limit = 4
- tRFC = '260ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
- tXS = '270ns'
-
Current values from datasheet Die Rev E,J
- IDD0 = '55mA'
- IDD2N = '32mA'
- IDD3N = '38mA'
- IDD4W = '125mA'
- IDD4R = '157mA'
- IDD5 = '235mA'
- IDD3P1 = '38mA'
- IDD2P1 = '32mA'
- IDD6 = '20mA'
- VDD = '1.5V'
+# A single HMC-2500 x32 model based on:
+# [1] DRAMSpec: a high-level DRAM bank modelling tool
+# developed at the University of Kaiserslautern. This high level tool
+# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
+# estimate the DRAM bank latency and power numbers.
+# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
+# cubes (E. Azarkhish et. al)
+# Assumed for the HMC model is a 30 nm technology node.
+# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
+# layers).
+# Each layer has 16 vaults and each vault consists of 2 banks per layer.
+# In order to be able to use the same controller used for 2D DRAM
generations
+# for HMC, the following analogy is done:
+# Channel (DDR) => Vault (HMC)
+# device_size (DDR) => size of a single layer in a vault
+# ranks per channel (DDR) => number of layers
+# banks per rank (DDR) => banks per layer
+# devices per rank (DDR) => devices per layer ( 1 for HMC).
+# The parameters for which no input is available are inherited from the
DDR3
+# configuration.
+# This configuration includes the latencies from the DRAM to the logic
layer
+# of the HMC
+class HMC_2500_1x32_Interface(DDR3_1600_8x8):
-
A single HMC-2500 x32 controller
-
The buffer parameters do not directly correlate with buffer_size in
-
real hardware. Nevertheless, their value has been tuned to achieve a
-
bandwidth similar to the cycle-accurate model in [2]
- write_buffer_size = 32
- read_buffer_size = 32
-
size of device
-
two banks per device with each bank 4MB [2]
- device_size = '8MB'
-
1x32 configuration, 1 device with 32 TSVs [2]
- device_bus_width = 32
-
HMC is a BL8 device [2]
- burst_length = 8
-
Each device has a page (row buffer) size of 256 bytes [2]
- device_rowbuffer_size = '256B'
-
1x32 configuration, so 1 device [2]
- devices_per_rank = 1
-
4 layers so 4 ranks [2]
- ranks_per_channel = 4
-
HMC has 2 banks per layer [2]
-
Each layer represents a rank. With 4 layers and 8 banks in total,
each
-
layer has 2 banks; thus 2 banks per rank.
- banks_per_rank = 2
-
1250 MHz [2]
- tCK = '0.8ns'
-
Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
-
0.8 ns (Assumption)
- tCS = '0.8ns'
-
8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
- tBURST = '3.2ns'
-
Values using DRAMSpec HMC model [1]
- tRCD = '10.2ns'
- tCL = '9.9ns'
- tRP = '7.7ns'
- tRAS = '21.6ns'
-
tRRD depends on the power supply network for each vendor.
-
We assume a tRRD of a double bank approach to be equal to 4 clock
-
cycles (Assumption)
- tRRD = '3.2ns'
-
activation limit is set to 0 since there are only 2 banks per vault
-
layer.
- activation_limit = 0
-
Values using DRAMSpec HMC model [1]
- tRFC = '59ns'
- tWR = '8ns'
- tRTP = '4.9ns'
-
Value using DRAMSpec HMC model [1]
- tREFI = '3.9us'
-
The default page policy in the vault controllers is simple closed
page
-
[2] nevertheless 'close' policy opens and closes the row multiple
times
-
for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
- page_policy = 'close_adaptive'
-
RoCoRaBaCh resembles the default address mapping in HMC
- addr_mapping = 'RoCoRaBaCh'
+# A single DDR3-2133 x64 channel refining a selected subset of the
+# options for the DDR-1600 configuration, based on the same DDR3-1600
+# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
+# consistent across the two configurations.
+class DDR3_2133_8x8(DDR3_1600_8x8):
-
1066 MHz
- tCK = '0.938ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
- tBURST = '3.752ns'
-
DDR3-2133 14-14-14
- tRCD = '13.09ns'
- tCL = '13.09ns'
- tRP = '13.09ns'
- tRAS = '33ns'
- tRRD = '5ns'
- tXAW = '25ns'
-
Current values from datasheet
- IDD0 = '70mA'
- IDD2N = '37mA'
- IDD3N = '44mA'
- IDD4W = '157mA'
- IDD4R = '191mA'
- IDD5 = '250mA'
- IDD3P1 = '44mA'
- IDD2P1 = '43mA'
- IDD6 ='20mA'
- VDD = '1.5V'
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
+# in an 16x4 configuration.
+# Total channel capacity is 32GB
+# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
+class DDR4_2400_16x4(DRAMInterface):
-
override the default buffer sizes and go for something larger to
-
accommodate the larger bank count
- write_buffer_size = 128
- read_buffer_size = 64
-
size of device
- device_size = '1GB'
-
16x4 configuration, 16 devices each with a 4-bit interface
- device_bus_width = 4
-
DDR4 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 512 byte (1K columns x4)
- device_rowbuffer_size = '512B'
-
16x4 configuration, so 16 devices
- devices_per_rank = 16
-
Match our DDR3 configurations which is dual rank
- ranks_per_channel = 2
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 4 for x4 case
- bank_groups_per_rank = 4
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 16
-
1200 MHz
- tCK = '0.833ns'
-
Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
- tCS = '1.666ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '3.332ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
- tRTW = '1.666ns'
-
@2400 data rate, tCCD_L is 6 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '5ns';
-
DDR4-2400 17-17-17
- tRCD = '14.16ns'
- tCL = '14.16ns'
- tRP = '14.16ns'
- tRAS = '32ns'
-
RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
- tRRD = '3.332ns'
-
RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
tFAW for 512B page is MAX(16 CK, 13ns)
- tXAW = '13.328ns'
- activation_limit = 4
-
tRFC is 350ns
- tRFC = '350ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
-
exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
-
tRFC + 10ns = 340ns
- tXS = '340ns'
-
Current values from datasheet
- IDD0 = '43mA'
- IDD02 = '3mA'
- IDD2N = '34mA'
- IDD3N = '38mA'
- IDD3N2 = '3mA'
- IDD4W = '103mA'
- IDD4R = '110mA'
- IDD5 = '250mA'
- IDD3P1 = '32mA'
- IDD2P1 = '25mA'
- IDD6 = '30mA'
- VDD = '1.2V'
- VDD2 = '2.5V'
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
+# in an 8x8 configuration.
+# Total channel capacity is 16GB
+# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
+class DDR4_2400_8x8(DDR4_2400_16x4):
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
- tXAW = '21ns'
-
Current values from datasheet
- IDD0 = '48mA'
- IDD3N = '43mA'
- IDD4W = '123mA'
- IDD4R = '135mA'
- IDD3P1 = '37mA'
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
+# in an 4x16 configuration.
+# Total channel capacity is 4GB
+# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
+class DDR4_2400_4x16(DDR4_2400_16x4):
-
4x16 configuration, 4 devices each with an 16-bit interface
- device_bus_width = 16
-
Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
- device_rowbuffer_size = '2kB'
-
4x16 configuration, so 4 devices
- devices_per_rank = 4
-
Single rank for x16
- ranks_per_channel = 1
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 2 for x16 case
- bank_groups_per_rank = 2
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 8
-
RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
- tRRD = '5.3ns'
-
RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
- tRRD_L = '6.4ns';
- tXAW = '30ns'
-
Current values from datasheet
- IDD0 = '80mA'
- IDD02 = '4mA'
- IDD2N = '34mA'
- IDD3N = '47mA'
- IDD4W = '228mA'
- IDD4R = '243mA'
- IDD5 = '280mA'
- IDD3P1 = '41mA'
+# A single LPDDR2-S4 x32 interface (one command/address bus), with
+# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
+# in a 1x32 configuration.
+class LPDDR2_S4_1066_1x32(DRAMInterface):
-
No DLL in LPDDR2
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR2_S4 is a BL4 and BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1KB
-
(this depends on the memory density)
- device_rowbuffer_size = '1kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR2-S4 has 8 banks in all configurations
- banks_per_rank = 8
-
533 MHz
- tCK = '1.876ns'
-
Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
- tCS = '3.75ns'
-
8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '7.5ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
- tRTW = '3.75ns'
-
Fixed at 15 ns
- tRCD = '15ns'
-
8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
- tCL = '15ns'
-
Pre-charge one bank 15 ns (all banks 18 ns)
- tRP = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
- tRTP = '7.5ns'
-
LPDDR2-S4, 4 Gbit
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of density, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '15mA'
- IDD02 = '70mA'
- IDD2N = '2mA'
- IDD2N2 = '30mA'
- IDD3N = '2.5mA'
- IDD3N2 = '30mA'
- IDD4W = '10mA'
- IDD4W2 = '190mA'
- IDD4R = '3mA'
- IDD4R2 = '220mA'
- IDD5 = '40mA'
- IDD52 = '150mA'
- IDD3P1 = '1.2mA'
- IDD3P12 = '8mA'
- IDD2P1 = '0.6mA'
- IDD2P12 = '0.8mA'
- IDD6 = '1mA'
- IDD62 = '3.2mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
+# A single WideIO x128 interface (one command and address bus), with
+# default timings based on an estimated WIO-200 8 Gbit part.
+class WideIO_200_1x128(DRAMInterface):
-
No DLL for WideIO
- dll = False
-
size of device
- device_size = '1024MB'
-
1x128 configuration, 1 device with a 128-bit interface
- device_bus_width = 128
-
This is a BL4 device
- burst_length = 4
-
Each device has a page (row buffer) size of 4KB
-
(this depends on the memory density)
- device_rowbuffer_size = '4kB'
-
1x128 configuration, so 1 device
- devices_per_rank = 1
-
Use one rank for a one-high die stack
- ranks_per_channel = 1
-
WideIO has 4 banks in all configurations
- banks_per_rank = 4
-
200 MHz
- tCK = '5ns'
-
Default different rank bus delay to 2 CK, @200 MHz = 10 ns
- tCS = '10ns'
-
4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
-
Note this is a BL4 SDR device.
- tBURST = '20ns'
-
Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
- tWTR = '15ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
- tRTW = '10ns'
-
WIO-200
- tRCD = '18ns'
- tCL = '18ns'
- tRP = '18ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Read to precharge is same as the burst
- tRTP = '20ns'
-
WIO 8 Gb
- tRFC = '210ns'
-
WIO 8 Gb, <=85C, half for >85C
- tREFI = '3.9us'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Two instead of four activation window
- tXAW = '50ns'
- activation_limit = 2
-
The WideIO specification does not provide current information
+# A single LPDDR3 x32 interface (one command/address bus), with
+# default timings based on a LPDDR3-1600 4 Gbit part (Micron
+# EDF8132A1MC) in a 1x32 configuration.
+class LPDDR3_1600_1x32(DRAMInterface):
-
No DLL for LPDDR3
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 4KB
- device_rowbuffer_size = '4kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Technically the datasheet is a dual-rank package, but for
-
comparison with the LPDDR2 config we stick to a single rank
- ranks_per_channel = 1
-
LPDDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '5ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
- tRCD = '18ns'
-
12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
- tCL = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
- tRTP = '7.5ns'
-
Pre-charge one bank 18 ns (all banks 21 ns)
- tRP = '18ns'
-
LPDDR3, 4 Gb
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of size, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '8mA'
- IDD02 = '60mA'
- IDD2N = '0.8mA'
- IDD2N2 = '26mA'
- IDD3N = '2mA'
- IDD3N2 = '34mA'
- IDD4W = '2mA'
- IDD4W2 = '190mA'
- IDD4R = '2mA'
- IDD4R2 = '230mA'
- IDD5 = '28mA'
- IDD52 = '150mA'
- IDD3P1 = '1.4mA'
- IDD3P12 = '11mA'
- IDD2P1 = '0.8mA'
- IDD2P12 = '1.8mA'
- IDD6 = '0.5mA'
- IDD62 = '1.8mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
+# A single GDDR5 x64 interface, with
+# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
+# H5GQ1H24AFR) in a 2x32 configuration.
+class GDDR5_4000_2x32(DRAMInterface):
-
size of device
- device_size = '128MB'
-
2x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
GDDR5 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 2Kbits (256Bytes)
- device_rowbuffer_size = '256B'
-
2x32 configuration, so 2 devices
- devices_per_rank = 2
-
assume single rank
- ranks_per_channel = 1
-
GDDR5 has 4 bank groups
- bank_groups_per_rank = 4
-
GDDR5 has 16 banks with 4 bank groups
- banks_per_rank = 16
-
1000 MHz
- tCK = '1ns'
-
8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
-
Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
-
8 beats at 4000 MHz = 2 beats at 1000 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '2ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Assume 2 cycles
- tRTW = '2ns'
-
@1000MHz data rate, tCCD_L is 3 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '3ns';
- tRCD = '12ns'
-
tCL is not directly found in datasheet and assumed equal tRCD
- tCL = '12ns'
- tRP = '12ns'
- tRAS = '28ns'
-
RRD_S (different bank group)
-
RRD_S is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD = '6ns'
-
RRD_L (same bank group)
-
RRD_L is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD_L = '6ns'
- tXAW = '23ns'
-
tXAW < 4 x tRRD.
-
Therefore, activation limit is set to 0
- activation_limit = 0
- tRFC = '65ns'
- tWR = '12ns'
-
Read-to-Precharge 2 CK
- tRTP = '2ns'
+# A single HBM x128 interface (one command and address bus), with
+# default timings based on data publically released
+# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
+# IDD measurement values, and by extrapolating data from other classes.
+# Architecture values based on published HBM spec
+# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
+class HBM_1000_4H_1x128(DRAMInterface):
-
HBM gen1 supports up to 8 128-bit physical channels
-
Configuration defines a single channel, with the capacity
-
set to (full_ stack_capacity / 8) based on 2Gb dies
-
To use all 8 channels, set 'channels' parameter to 8 in
-
system configuration
-
128-bit interface legacy mode
- device_bus_width = 128
-
HBM supports BL4 and BL2 (legacy mode only)
- burst_length = 4
-
size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
-
with 8 channels, 128MB per channel
- device_size = '128MB'
- device_rowbuffer_size = '2kB'
-
1x128 configuration
- devices_per_rank = 1
-
HBM does not have a CS pin; set rank to 1
- ranks_per_channel = 1
-
HBM has 8 or 16 banks depending on capacity
-
2Gb dies have 8 banks
- banks_per_rank = 8
-
depending on frequency, bank groups may be required
-
will always have 4 bank groups when enabled
-
current specifications do not define the minimum frequency for
-
bank group architecture
-
setting bank_groups_per_rank to 0 to disable until range is defined
- bank_groups_per_rank = 0
-
500 MHz for 1Gbps DDR data rate
- tCK = '2ns'
-
single rank device, set to 0
- tCS = '0ns'
-
BL2 and BL4 supported, default to BL4
-
DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
- tBURST = '4ns'
- tWTR = '10ns'
-
start with 2 cycles turnaround, similar to other memory classes
-
could be more with variations across the stack
- tRTW = '4ns'
-
use values from IDD measurement in JEDEC spec
-
use tRP value for tRCD and tCL similar to other classes
- tRP = '15ns'
- tRCD = '15ns'
- tCL = '15ns'
- tRAS = '33ns'
-
value for 2Gb device from JEDEC spec
- tRFC = '160ns'
-
value for 2Gb device from JEDEC spec
- tREFI = '3.9us'
-
extrapolate the following from LPDDR configs, using ns values
-
to minimize burst length, prefetch differences
- tWR = '18ns'
- tRTP = '7.5ns'
-
from MemCon example, tRRD is 4ns with 2ns tCK
- tRRD = '4ns'
-
from MemCon example, tFAW is 30ns with 2ns tCK
- tXAW = '30ns'
- activation_limit = 4
-
4tCK
- tXP = '8ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '168ns'
+# A single HBM x64 interface (one command and address bus), with
+# default timings based on HBM gen1 and data publically released
+# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
+# Note: This defines a pseudo-channel with a unique controller
+# instantiated per pseudo-channel
+# Stay at same IO rate (1Gbps) to maintain timing relationship with
+# HBM gen1 class (HBM_1000_4H_x128) where possible
+class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
-
For HBM gen2 with pseudo-channel mode, configure 2X channels.
-
Configuration defines a single pseudo channel, with the capacity
-
set to (full_ stack_capacity / 16) based on 8Gb dies
-
To use all 16 pseudo channels, set 'channels' parameter to 16 in
-
system configuration
-
64-bit pseudo-channle interface
- device_bus_width = 64
-
HBM pseudo-channel only supports BL4
- burst_length = 4
-
size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
-
with 16 channels, 256MB per channel
- device_size = '256MB'
-
page size is halved with pseudo-channel; maintaining the same same
number
-
of rows per pseudo-channel with 2X banks across 2 channels
- device_rowbuffer_size = '1kB'
-
HBM has 8 or 16 banks depending on capacity
-
Starting with 4Gb dies, 16 banks are defined
- banks_per_rank = 16
-
Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
- tCS = '2ns'
-
reset tRFC for larger, 8Gb device
-
use HBM1 4Gb value as a starting point
- tRFC = '260ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '268ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '10ns'
-
self refresh exit time
- tXS = '65ns'
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_5500_1x16_BG_BL32(DRAMInterface):
+
-
Increase buffer size to account for more bank resources
- read_buffer_size = 64
-
Set page policy to better suit DMC Huxley
- page_policy = 'close_adaptive'
-
16-bit channel interface
- device_bus_width = 16
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL32 for higher command bandwidth
- burst_length = 32
-
size of device in bytes
- device_size = '1GB'
-
2kB page with BG mode
- device_rowbuffer_size = '2kB'
-
Use a 1x16 configuration
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Initial configuration will have 16 banks with Bank Group Arch
-
to maximim resources and enable higher data rates
- banks_per_rank = 16
- bank_groups_per_rank = 4
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.455ns'
-
Greater of 2 CK or 18ns
- tRCD = '18ns'
-
Base RL is 16 CK @ 687.5 MHz = 23.28ns
- tCL = '23.280ns'
-
Greater of 2 CK or 18ns
- tRP = '18ns'
-
Greater of 3 CK or 42ns
- tRAS = '42ns'
-
Greater of 3 CK or 34ns
- tWR = '34ns'
-
active powerdown and precharge powerdown exit time
-
Greater of 3 CK or 7ns
- tXP = '7ns'
-
self refresh exit time (tRFCab + 7.5ns)
- tXS = '217.5ns'
-
Greater of 2 CK or 7.5 ns minus 2 CK
- tRTP = '4.59ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
- tBURST = '8.73ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- tBURST_MIN = '2.91ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 687.5 MHz
- tBURST_MAX = '11.64ns'
-
8 CK @ 687.5 MHz
- tCCD_L = "11.64ns"
-
LPDDR5, 8 Gbit/channel for 280ns tRFCab
- tRFC = '210ns'
- tREFI = '3.9us'
-
Greater of 4 CK or 6.25 ns
- tWTR = '6.25ns'
-
Greater of 4 CK or 12 ns
- tWTR_L = '12ns'
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.455ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
- tCS = '2.91ns'
-
2 CK
- tPPD = '2.91ns'
-
Greater of 2 CK or 5 ns
- tRRD = '5ns'
- tRRD_L = '5ns'
-
With Bank Group Arch mode tFAW is 20 ns
- tXAW = '20ns'
- activation_limit = 4
-
at 5Gbps, 4:1 WCK to CK ratio required
-
2 data beats per WCK (DDR) -> 8 per CK
- beats_per_clock = 8
-
2 cycles required to send activate command
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- two_cycle_activate = True
- tAAD = '11.640ns'
- data_clock_sync = True
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
+
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '2.91ns'
- tBURST_MIN = '2.91ns'
-
For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST_MAX = '5.82ns'
-
4 CK @ 687.5 MHz
- tCCD_L = "5.82ns"
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
+
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '5.82ns'
- tBURST_MIN = '5.82ns'
- tBURST_MAX = '5.82ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
+
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.25ns'
-
Base RL is 17 CK @ 800 MHz = 21.25ns
- tCL = '21.25ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
- tBURST = '7.5ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
- tBURST_MIN = '2.5ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 800 MHz
- tBURST_MAX = '10ns'
-
8 CK @ 800 MHz
- tCCD_L = "10ns"
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.25ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
- tCS = '2.5ns'
-
2 CK
- tPPD = '2.5ns'
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- tAAD = '10ns'
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on initial
+# JEDEC specifcation
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
+
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '2.5ns'
- tBURST_MIN = '2.5ns'
-
For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST_MAX = '5ns'
-
4 CK @ 800 MHz
- tCCD_L = "5ns"
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
+
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '5ns'
- tBURST_MIN = '5ns'
- tBURST_MAX = '5ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
diff --git a/src/mem/SConscript b/src/mem/SConscript
index b77dbb1..76ffdbd 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -1,6 +1,6 @@
-- mode:python --
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018, 2020 ARM Limited
All rights reserved
The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@
SimObject('AddrMapper.py')
SimObject('Bridge.py')
SimObject('DRAMCtrl.py')
+SimObject('DRAMInterface.py')
SimObject('ExternalMaster.py')
SimObject('ExternalSlave.py')
SimObject('MemObject.py')
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc
index dc244fe..533aa01 100644
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -47,6 +47,7 @@
#include "debug/DRAMState.hh"
#include "debug/Drain.hh"
#include "debug/QOS.hh"
+#include "params/DRAMInterface.hh"
#include "sim/system.hh"
using namespace std;
@@ -58,12 +59,13 @@
retryRdReq(false), retryWrReq(false),
nextReqEvent([this]{ processNextReqEvent(); }, name()),
respondEvent([this]{ processRespondEvent(); }, name()),
- dram(p->dram),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size),
writeHighThreshold(writeBufferSize * p->write_high_thresh_perc /
100.0),
writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
minWritesPerSwitch(p->min_writes_per_switch),
- writesThisTime(0), readsThisTime(0), tCS(p->tCS),
-
writesThisTime(0), readsThisTime(0),
memSchedPolicy(p->mem_sched_policy),
frontendLatency(p->static_frontend_latency),
backendLatency(p->static_backend_latency),
@@ -75,37 +77,23 @@
readQueue.resize(p->qos_priorities);
writeQueue.resize(p->qos_priorities);
-
dram->setCtrl(this);
-
// perform a basic check of the write thresholds
if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
fatal("Write buffer low threshold %d must be smaller than the "
"high threshold %d\n", p->write_low_thresh_perc,
p->write_high_thresh_perc);
- // determine the rows per bank by looking at the total capacity
- uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
- DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
-
AbstractMemory::size());
- // create a DRAM interface
- // will only populate the ranks if DRAM is configured
- dram = new DRAMInterface(*this, p, capacity, range);
- DPRINTF(DRAM, "Created DRAM interface \n");
}
void
DRAMCtrl::init()
{
-
MemCtrl::init();
-
if (!port.isConnected()) {
fatal("DRAMCtrl %s is unconnected!\n", name());
} else {
port.sendRangeChange();
}
-
dram->init(range);
-
}
void
@@ -115,8 +103,6 @@
isTimingMode = system()->isTimingMode();
if (isTimingMode) {
-
dram->startup();
-
// shift the bus busy time sufficiently far ahead that we never
// have to worry about negative values when computing the time for
// the next request, this will add an insignificant bubble at the
@@ -134,7 +120,7 @@
"is responding");
// do the actual memory access and turn the packet into a response
-
dram->access(pkt);
Tick latency = 0;
if (pkt->hasData()) {
@@ -264,7 +250,7 @@
// address of first DRAM packet is kept unaliged. Subsequent DRAM
packets
// are aligned to burst size boundaries. This is to ensure we
accurately
// check read packets against packets in write queue.
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
-
const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
unsigned pktsServicedByWrQ = 0;
BurstHelper* burst_helper = NULL;
@@ -364,7 +350,7 @@
// if the request size is larger than burst size, the pkt is split into
// multiple DRAM packets
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
-
const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
uint32_t burstSize = dram->bytesPerBurst();
for (int cnt = 0; cnt < pktCount; ++cnt) {
@@ -527,7 +513,7 @@
DRAMPacket* dram_pkt = respQueue.front();
// media specific checks and functions when read response is complete
- dram->respondEventDRAM(dram_pkt->rank);
-
dram->respondEvent(dram_pkt->rank);
if (dram_pkt->burstHelper) {
// it is a split packet
@@ -726,12 +712,12 @@
void
DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
{
- DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
-
DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr());
bool needsResponse = pkt->needsResponse();
// do the actual memory access which also turns the packet into a
// response
-
dram->access(pkt);
// turn packet around to go back to requester if response expected
if (needsResponse) {
@@ -876,9 +862,9 @@
// if not, shift to next burst window
Tick act_at;
if (twoCycleActivate)
@@ -996,7 +982,7 @@
// Issuing an explicit PRE command
// Verify that we have command bandwidth to issue the precharge
// if not, shift to next burst window
@@ -1046,7 +1032,7 @@
// first clean up the burstTick set, removing old entries
// before adding new entries for next burst
-
ctrl->pruneBurstTick();
// get the rank
Rank& rank_ref = *ranks[dram_pkt->rank];
@@ -1098,9 +1084,9 @@
// verify that we have command bandwidth to issue the burst
// if not, shift to next burst window
if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) >
clkResyncDelay))
@@ -1200,9 +1186,9 @@
// either look at the read queue or write queue
const std::vector<DRAMPacketQueue>& queue =
@@ -1273,6 +1259,7 @@
// Update latency stats
stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
stats.totQLat += cmd_at - dram_pkt->entryTime;
@@ -1338,13 +1325,9 @@
// Update latency stats
stats.masterReadTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
-
stats.bytesRead += dram->bytesPerBurst();
-
stats.totBusLat += dram->burstDly();
stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size;
} else {
++writesThisTime;
-
stats.bytesWritten += dram->bytesPerBurst();
stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size;
stats.masterWriteTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
@@ -1446,8 +1429,9 @@
// Figure out which read request goes next
// If we are changing command type, incorporate the minimum
case
0);
-
// bus turnaround delay which will be rank to rank delay
-
to_read = chooseNext((*queue), switched_cmd_type ?
-
dram->rankDelay() : 0);
if (to_read != queue->end()) {
// candidate read found
@@ -1526,7 +1510,8 @@
// If we are changing command type, incorporate the minimum
// bus turnaround delay
to_write = chooseNext((*queue),
tCS) : 0);
-
switched_cmd_type ? std::min(dram->minRdToWr(),
-
dram->rankDelay()) : 0);
if (to_write != queue->end()) {
write_found = true;
@@ -1599,11 +1584,8 @@
}
}
-DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl,
- : AbstractMemory(_p),
addrMapping(_p->addr_mapping),
burstSize((_p->devices_per_rank * _p->burst_length *
_p->device_bus_width) / 8),
@@ -1618,7 +1600,7 @@
bankGroupsPerRank(_p->bank_groups_per_rank),
bankGroupArch(_p->bank_groups_per_rank > 0),
banksPerRank(_p->banks_per_rank), rowsPerBank(0),
-
tCK(_p->tCK), tCS(_p->tCS), tCL(_p->tCL), tBURST(_p->tBURST),
tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX),
tRTW(_p->tRTW),
tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD),
tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP),
@@ -1634,12 +1616,12 @@
wrToRdDly(tCL + tBURST + _p->tWTR), rdToWrDly(tBURST + tRTW),
wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L),
rdToWrDlySameBG(tRTW + _p->tBURST_MAX),
-
rankToRankDly(tCS + tBURST),
pageMgmt(_p->page_policy),
maxAccessesPerRow(_p->max_accesses_per_row),
timeStampOffset(0), activeRank(0),
enableDRAMPowerdown(_p->enable_dram_powerdown),
-
stats(*this)
{
fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
"must be a power of two\n", burstSize);
@@ -1651,7 +1633,7 @@
for (int i = 0; i < ranksPerChannel; i++) {
DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
@@ -1659,6 +1641,11 @@
uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
ranksPerChannel;
@@ -1713,8 +1700,10 @@
}
void
-DRAMInterface::init(AddrRange range)
+DRAMInterface::init()
{
@@ -1736,7 +1725,7 @@
// channel striping has to be done at a granularity that
// is equal or larger to a cache line
large "
"as the cache line size\n", name());
}
@@ -1755,8 +1744,10 @@
void
DRAMInterface::startup()
{
- // timestamp offset should be in clock cycles for DRAMPower
- timeStampOffset = divCeil(curTick(), tCK);
-
if (system()->isTimingMode()) {
-
// timestamp offset should be in clock cycles for DRAMPower
-
timeStampOffset = divCeil(curTick(), tCK);
-
}
for (auto r : ranks) {
r->startup(curTick() + tREFI - tRP);
@@ -1802,7 +1793,7 @@
}
void
-DRAMInterface::respondEventDRAM(uint8_t rank)
+DRAMInterface::respondEvent(uint8_t rank)
{
Rank& rank_ref = *ranks[rank];
@@ -1943,7 +1934,7 @@
std::max(ranks[i]->banks[j].preAllowedAt, curTick()) +
tRP;
// When is the earliest the R/W burst can issue?
ranks[i]->banks[j].rdAllowedAt :
ranks[i]->banks[j].wrAllowedAt;
Tick col_at = std::max(col_allowed_at, act_at + tRCD);
@@ -1983,9 +1974,15 @@
return make_pair(bank_mask, hidden_bank_prep);
}
-DRAMInterface::Rank::Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int
_rank,
-
DRAMInterface& _dram)
- : EventManager(&_ctrl), ctrl(_ctrl), dram(_dram),
+DRAMInterface*
+DRAMInterfaceParams::create()
+{
- return new DRAMInterface(this);
+}
+DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p,
-
stats(_dram, *this)
{
for (int b = 0; b < _p->banks_per_rank; b++) {
banks[b].bank = b;
@@ -2049,8 +2046,10 @@
DRAMInterface::Rank::isQueueEmpty() const
{
// check commmands in Q based on current bus direction
0));
@@ -2174,7 +2173,7 @@
// if a request is at the moment being handled and this request is
// accessing the current rank then wait for it to finish
if ((rank == dram.activeRank)
@@ -2249,7 +2248,7 @@
// or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
// should have outstanding precharge or read response event
assert(prechargeEvent.scheduled() ||
@@ -2309,8 +2308,8 @@
assert(!powerEvent.scheduled());
-
if ((dram.ctrl->drainState() == DrainState::Draining) ||
-
(dram.ctrl->drainState() == DrainState::Drained)) {
// if draining, do not re-enter low-power mode.
// simply go to IDLE and wait
schedulePowerEvent(PWR_IDLE, curTick());
@@ -2535,10 +2534,10 @@
}
// completed refresh event, ensure next request is scheduled
@@ -2597,8 +2596,8 @@
// bypass auto-refresh and go straight to SREF, where memory
// will issue refresh immediately upon entry
if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
-
(dram.ctrl->drainState() != DrainState::Draining) &&
-
(dram.ctrl->drainState() != DrainState::Drained) &&
dram.enableDRAMPowerdown) {
DPRINTF(DRAMState, "Rank %d bypassing refresh and
transitioning "
"to self refresh at %11u tick\n", rank, curTick());
@@ -2669,7 +2668,7 @@
// power (mW) = ----------- * ----------
// time (tick) tick_frequency
stats.averagePower = (stats.totalEnergy.value() /
@@ -2699,7 +2698,7 @@
bool
DRAMInterface::Rank::forceSelfRefreshExit() const {
return (readEntries != 0) ||
-
(!dram.ctrl->inReadBusState(true) && (writeEntries != 0));
}
DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl)
@@ -2710,15 +2709,15 @@
ADD_STAT(writeReqs, "Number of write requests accepted"),
ADD_STAT(readBursts,
-
"Number of controller read bursts, "
"including those serviced by the write queue"),
ADD_STAT(writeBursts,
-
"Number of controller write bursts, "
"including those merged in the write queue"),
ADD_STAT(servicedByWrQ,
queue"),
ADD_STAT(mergedWrBursts,
one"),
ADD_STAT(neitherReadNorWriteReqs,
"Number of requests that are neither read nor write"),
@@ -2726,9 +2725,6 @@
ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
retry"),
ADD_STAT(numWrRetry, "Number of times write queue was full causing
retry"),
@@ -2743,22 +2739,13 @@
ADD_STAT(wrPerTurnAround,
"Writes before turning the bus around for reads"),
-
ADD_STAT(bytesRead, "Total number of bytes read from memory"),
ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
-
ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(bytesReadSys, "Total read bytes from the system interface
side"),
ADD_STAT(bytesWrittenSys,
"Total written bytes from the system interface side"),
-
ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiByte/s"),
-
ADD_STAT(avgWrBW, "Average achieved write bandwidth in MiByte/s"),
ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
-
ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
ADD_STAT(busUtil, "Data bus utilization in percentage"),
-
ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
-
ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
ADD_STAT(totGap, "Total gap between requests"),
ADD_STAT(avgGap, "Average gap between requests"),
@@ -2790,12 +2777,11 @@
{
using namespace Stats;
-
assert(ctrl._system);
-
const auto max_masters = ctrl._system->maxMasters();
-
avgBusLat.precision(2);
readPktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
writePktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
@@ -2810,14 +2796,9 @@
.init(ctrl.writeBufferSize)
.flags(nozero);
-
avgRdBW.precision(2);
-
avgWrBW.precision(2);
avgRdBWSys.precision(2);
avgWrBWSys.precision(2);
-
peakBW.precision(2);
-
busUtil.precision(2);
avgGap.precision(2);
-
busUtilWrite.precision(2);
// per-master bytes read and written to memory
masterReadBytes
@@ -2849,9 +2830,6 @@
.flags(nonan)
.precision(2);
-
busUtilRead
-
.precision(2);
-
masterWriteRate
.flags(nozero | nonan)
.precision(12);
@@ -2865,7 +2843,7 @@
.precision(2);
for (int i = 0; i < max_masters; i++) {
-
const std::string master = ctrl.system()->getMasterName(i);
masterReadBytes.subname(i, master);
masterReadRate.subname(i, master);
masterWriteBytes.subname(i, master);
@@ -2879,22 +2857,11 @@
}
// Formula stats
-
avgBusLat = totBusLat / (readBursts - servicedByWrQ);
-
avgRdBW = (bytesRead / 1000000) / simSeconds;
-
avgWrBW = (bytesWritten / 1000000) / simSeconds;
avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
-
peakBW = (SimClock::Frequency / ctrl.dram->burstDataDly()) *
-
ctrl.dram->bytesPerBurst() / 1000000;
-
busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
avgGap = totGap / (readReqs + writeReqs);
-
busUtilRead = avgRdBW / peakBW * 100;
-
busUtilWrite = avgWrBW / peakBW * 100;
-
masterReadRate = masterReadBytes / simSeconds;
masterWriteRate = masterWriteBytes / simSeconds;
masterReadAvgLat = masterReadTotalLat / masterReadAccesses;
@@ -2907,8 +2874,8 @@
ctrl.lastStatsResetTick = curTick();
}
-DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram)
- : Stats::Group(&_ctrl, csprintf("dram").c_str()),
+DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
-
: Stats::Group(&_dram),
dram(_dram),
ADD_STAT(readBursts, "Number of DRAM read bursts"),
@@ -2918,10 +2885,13 @@
ADD_STAT(perBankWrBursts, "Per bank write bursts"),
ADD_STAT(totQLat, "Total ticks spent queuing"),
-
ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
ADD_STAT(totMemAccLat,
"Total ticks spent from burst creation until serviced "
"by the DRAM"),
-
ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"),
-
ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"),
ADD_STAT(readRowHits, "Number of row buffer hits during reads"),
@@ -2934,6 +2904,12 @@
ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
-
ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
ADD_STAT(busUtil, "Data bus utilization in percentage"),
-
ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
-
ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
-
ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined")
{
@@ -2945,6 +2921,7 @@
using namespace Stats;
avgQLat.precision(2);
-
avgBusLat.precision(2);
avgMemAccLat.precision(2);
readRowHitRate.precision(2);
@@ -2958,10 +2935,16 @@
dram.maxAccessesPerRow : dram.rowBufferSize)
.flags(nozero);
-
peakBW.precision(2);
-
busUtil.precision(2);
-
busUtilWrite.precision(2);
-
busUtilRead.precision(2);
-
pageHitRate.precision(2);
// Formula stats
avgQLat = totQLat / readBursts;
-
avgBusLat = totBusLat / readBursts;
avgMemAccLat = totMemAccLat / readBursts;
readRowHitRate = (readRowHits / readBursts) * 100;
@@ -2969,13 +2952,19 @@
avgRdBW = (bytesRead / 1000000) / simSeconds;
avgWrBW = (bytesWritten / 1000000) / simSeconds;
-
peakBW = (SimClock::Frequency / dram.burstDataDly()) *
-
dram.bytesPerBurst() / 1000000;
-
busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
-
busUtilRead = avgRdBW / peakBW * 100;
-
busUtilWrite = avgWrBW / peakBW * 100;
pageHitRate = (writeRowHits + readRowHits) /
(writeBursts + readBursts) * 100;
}
-DRAMInterface::RankStats::RankStats(DRAMCtrl &_ctrl, Rank &_rank)
- : Stats::Group(&_ctrl, csprintf("dram_rank%d", _rank.rank).c_str()),
+DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
-
: Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
rank(_rank),
ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"),
@@ -3034,7 +3023,7 @@
DRAMCtrl::recvFunctional(PacketPtr pkt)
{
// rely on the abstract memory
- dram->functionalAccess(pkt);
}
Port &
@@ -3099,7 +3088,7 @@
DRAMCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(ctrl.getAddrRange());
- ranges.push_back(ctrl.dram->getAddrRange());
return ranges;
}
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index 4464f7a..1b6d8b5 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -56,12 +56,15 @@
#include "enums/AddrMap.hh"
#include "enums/MemSched.hh"
#include "enums/PageManage.hh"
+#include "mem/abstract_mem.hh"
#include "mem/drampower.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/DRAMCtrl.hh"
#include "sim/eventq.hh"
+class DRAMInterfaceParams;
+
/**
- A basic class to track the bank state, i.e. what row is
- currently open (if any), when is the bank free to accept a new
@@ -243,7 +246,7 @@
- The DRAMInterface includes a class for individual ranks
- and per rank functions.
/
-class DRAMInterface : public SimObject
+class DRAMInterface : public AbstractMemory
{
private:
/*
@@ -340,7 +343,7 @@
class Rank;
struct RankStats : public Stats::Group
{
@@ -406,13 +409,6 @@
*/
class Rank : public EventManager
{
@@ -532,10 +528,10 @@
*/
Tick lastBurstTick;
}
@@ -662,9 +658,9 @@
};
/**
-
DRAMCtrl* ctrl;
/**
- Memory controller configuration initialized based on parameter
@@ -695,6 +691,7 @@
- DRAM timing requirements
*/
const Tick M5_CLASS_VAR_USED tCK;
-
const Tick tCS;
const Tick tCL;
const Tick tBURST;
const Tick tBURST_MIN;
@@ -774,7 +771,7 @@
bool trace = true);
struct DRAMStats : public Stats::Group {
@@ -790,10 +787,12 @@
// Latencies summed over all requests
Stats::Scalar totQLat;
@@ -809,6 +808,11 @@
// Average bandwidth
Stats::Formula avgRdBW;
Stats::Formula avgWrBW;
@@ -820,11 +824,16 @@
std::vector<Rank*> ranks;
public:
-
void init();
/**
- Iterate through dram ranks and instantiate per rank startup routine
@@ -853,6 +862,20 @@
void suspend();
/**
-
* Get an address in a dense range which starts from 0. The input
-
* address is the physical address of the request in an address
-
* space that contains other SimObjects apart from this
-
* controller.
-
*
-
* @param addr The intput address which should be in the addrRange
-
* @return An address in the continues range [0, max)
-
*/
-
Addr getCtrlAddr(Addr addr)
-
{
-
return range.getOffset(addr);
-
}
-
/**
- @return number of bytes in a burst for this interface
*/
uint32_t bytesPerBurst () { return burstSize; };
@@ -887,6 +910,13 @@
*/
Tick minRdToWr () { return tRTW; };
-
/**
-
* Determine the required delay for an access to a different rank
-
*
-
* @return required rank to rank delay
-
*/
-
Tick rankDelay() { return tCS; };
-
/*
* Function to calulate RAS cycle time for use within and
* outside of this class
@@ -968,7 +998,7 @@
*
* @param rank Specifies rank associated with read burst
*/
- void respondEventDRAM(uint8_t rank);
- DRAMInterface(const DRAMInterfaceParams* _p);
};
/**
@@ -1170,20 +1199,6 @@
void accessAndRespond(PacketPtr pkt, Tick static_latency);
/**
-
* Get an address in a dense range which starts from 0. The input
-
* address is the physical address of the request in an address
-
* space that contains other SimObjects apart from this
-
* controller.
-
*
-
* @param addr The intput address which should be in the addrRange
-
* @return An address in the continues range [0, max)
-
*/
-
Addr getCtrlAddr(Addr addr)
-
{
-
return range.getOffset(addr);
-
}
-
/**
- The memory schduler/arbiter - picks which request needs to
- go next, based on the specified policy such as FCFS or FR-FCFS
- and moves it to the head of the queue.
@@ -1265,6 +1280,11 @@
std::unordered_multiset<Tick> burstTicks;
/**
-
* Basic memory timing parameters initialized based on parameter
-
* values. These will be used across memory interfaces.
-
*/
- const Tick tCS;
- /**
- Memory controller configuration initialized based on parameter
- values.
*/
@@ -1338,10 +1352,6 @@
// Average queue lengths
Stats::Average avgRdQLen;
Stats::Average avgWrQLen;
-
// Latencies summed over all requests
-
Stats::Scalar totBusLat;
-
// Average latencies per request
-
Stats::Formula avgBusLat;
Stats::Scalar numRdRetry;
Stats::Scalar numWrRetry;
@@ -1352,21 +1362,12 @@
Stats::Histogram rdPerTurnAround;
Stats::Histogram wrPerTurnAround;
-
Stats::Scalar bytesRead;
Stats::Scalar bytesReadWrQ;
-
Stats::Scalar bytesWritten;
Stats::Scalar bytesReadSys;
Stats::Scalar bytesWrittenSys;
// Average bandwidth
-
Stats::Formula avgRdBW;
-
Stats::Formula avgWrBW;
Stats::Formula avgRdBWSys;
Stats::Formula avgWrBWSys;
-
Stats::Formula peakBW;
-
// bus utilization
-
Stats::Formula busUtil;
-
Stats::Formula busUtilRead;
-
Stats::Formula busUtilWrite;
Stats::Scalar totGap;
Stats::Formula avgGap;
@@ -1405,11 +1406,6 @@
/** The time when stats were last reset used to calculate average
power */
Tick lastStatsResetTick;
@@ -1458,13 +1454,6 @@
};
/**
-DRAMPower::DRAMPower(const DRAMCtrlParams* p, bool include_io) :
+DRAMPower::DRAMPower(const DRAMInterfaceParams* p, bool include_io) :
powerlib(libDRAMPower(getMemSpec(p), include_io))
{
}
Data::MemArchitectureSpec
-DRAMPower::getArchParams(const DRAMCtrlParams* p)
+DRAMPower::getArchParams(const DRAMInterfaceParams* p)
{
Data::MemArchitectureSpec archSpec;
archSpec.burstLength = p->burst_length;
@@ -68,7 +68,7 @@
}
Data::MemTimingSpec
-DRAMPower::getTimingParams(const DRAMCtrlParams* p)
+DRAMPower::getTimingParams(const DRAMInterfaceParams* p)
{
// Set the values that are used for power calculations and ignore
// the ones only used by the controller functionality in DRAMPower
@@ -100,7 +100,7 @@
}
Data::MemPowerSpec
-DRAMPower::getPowerParams(const DRAMCtrlParams* p)
+DRAMPower::getPowerParams(const DRAMInterfaceParams* p)
{
// All DRAMPower currents are in mA
Data::MemPowerSpec powerSpec;
@@ -132,7 +132,7 @@
}
Data::MemorySpecification
-DRAMPower::getMemSpec(const DRAMCtrlParams* p)
+DRAMPower::getMemSpec(const DRAMInterfaceParams* p)
{
Data::MemorySpecification memSpec;
memSpec.memArchSpec = getArchParams(p);
@@ -142,13 +142,13 @@
}
bool
-DRAMPower::hasTwoVDD(const DRAMCtrlParams* p)
+DRAMPower::hasTwoVDD(const DRAMInterfaceParams* p)
{
return p->VDD2 == 0 ? false : true;
}
uint8_t
-DRAMPower::getDataRate(const DRAMCtrlParams* p)
+DRAMPower::getDataRate(const DRAMInterfaceParams* p)
{
uint32_t burst_cycles = divCeil(p->tBURST_MAX, p->tCK);
uint8_t data_rate = p->burst_length / burst_cycles;
diff --git a/src/mem/drampower.hh b/src/mem/drampower.hh
index ed47476..da68a78 100644
--- a/src/mem/drampower.hh
+++ b/src/mem/drampower.hh
@@ -44,7 +44,7 @@
#define MEM_DRAM_POWER_HH
#include "libdrampower/LibDRAMPower.h"
-#include "params/DRAMCtrl.hh"
+#include "params/DRAMInterface.hh"
/**
- static Data::MemArchitectureSpec getArchParams(const DRAMCtrlParams*
p);
- static Data::MemTimingSpec getTimingParams(const DRAMCtrlParams* p);
- static Data::MemPowerSpec getPowerParams(const DRAMCtrlParams* p);
- static uint8_t getDataRate(const DRAMCtrlParams* p);
- static bool hasTwoVDD(const DRAMCtrlParams* p);
- static Data::MemorySpecification getMemSpec(const DRAMCtrlParams* p);
- static Data::MemorySpecification getMemSpec(const DRAMInterfaceParams*
p);
public:
// Instance of DRAMPower Library
libDRAMPower powerlib;
- DRAMPower(const DRAMCtrlParams* p, bool include_io);
- DRAMPower(const DRAMInterfaceParams* p, bool include_io);
};
diff --git a/src/mem/qos/QoSMemCtrl.py b/src/mem/qos/QoSMemCtrl.py
index 1cd3f0b..f55105b 100644
--- a/src/mem/qos/QoSMemCtrl.py
+++ b/src/mem/qos/QoSMemCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
All rights reserved.
The license below extends only to copyright in the software and shall
@@ -34,18 +34,21 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
-from m5.objects.AbstractMemory import AbstractMemory
+from m5.proxy import *
+from m5.objects.ClockedObject import ClockedObject
from m5.objects.QoSTurnaround import *
QoS Queue Selection policy used to select packets among same-QoS queues
class QoSQPolicy(Enum): vals = ["fifo", "lifo", "lrg"]
-class QoSMemCtrl(AbstractMemory):
+class QoSMemCtrl(ClockedObject):
type = 'QoSMemCtrl'
cxx_header = "mem/qos/mem_ctrl.hh"
cxx_class = 'QoS::MemCtrl'
abstract = True
diff --git a/src/mem/qos/QoSMemSinkCtrl.py b/src/mem/qos/QoSMemSinkCtrl.py
index 572cad5..03a988a 100644
--- a/src/mem/qos/QoSMemSinkCtrl.py
+++ b/src/mem/qos/QoSMemSinkCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
All rights reserved.
The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
from m5.params import *
from m5.objects.QoSMemCtrl import *
+from QoSMemSinkInterface import *
class QoSMemSinkCtrl(QoSMemCtrl):
type = 'QoSMemSinkCtrl'
@@ -44,6 +45,10 @@
cxx_class = "QoS::MemSinkCtrl"
port = SlavePort("Slave ports")
@@ -59,5 +64,3 @@
# response latency - time to issue a response once a request is
serviced
response_latency = Param.Latency("20ns", "Memory response latency")
diff --git a/src/mem/qos/QoSMemSinkInterface.py
b/src/mem/qos/QoSMemSinkInterface.py
new file mode 100644
index 0000000..fd8254f
--- /dev/null
+++ b/src/mem/qos/QoSMemSinkInterface.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Matteo Andreozzi
+# Wendy Elsasser
+
+from AbstractMemory import AbstractMemory
+
+class QoSMemSinkInterface(AbstractMemory):
- type = 'QoSMemSinkInterface'
- cxx_header = "mem/qos/mem_sink.hh"
diff --git a/src/mem/qos/SConscript b/src/mem/qos/SConscript
index f8601b6..1d90f9c 100644
--- a/src/mem/qos/SConscript
+++ b/src/mem/qos/SConscript
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
All rights reserved
The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
SimObject('QoSMemCtrl.py')
SimObject('QoSMemSinkCtrl.py')
+SimObject('QoSMemSinkInterface.py')
SimObject('QoSPolicy.py')
SimObject('QoSTurnaround.py')
diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 50e6035..190960b 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2017-2019 ARM Limited
- : ClockedObject(p),
policy(p->qos_policy),
turnPolicy(p->qos_turnaround_policy),
queuePolicy(QueuePolicy::create(p)),
@@ -51,7 +51,8 @@
qosSyncroScheduler(p->qos_syncro_scheduler),
totalReadQueueSize(0), totalWriteQueueSize(0),
busState(READ), busStateNext(READ),
- stats(*this),
- _system(p->system)
{
// Set the priority policy
if (policy) {
@@ -77,12 +78,6 @@
{}
void
-MemCtrl::init()
-{
- AbstractMemory::init();
-}
-void
MemCtrl::logRequest(BusState dir, MasterID m_id, uint8_t qos,
Addr addr, uint64_t entries)
{
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 0e29fcc..50ddc94 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2019 ARM Limited
@@ -49,6 +49,8 @@
#ifndef MEM_QOS_MEM_CTRL_HH
#define MEM_QOS_MEM_CTRL_HH
+class System;
+
namespace QoS {
/**
@@ -56,7 +58,7 @@
- which support QoS - it provides access to a set of QoS
- scheduling policies
/
-class MemCtrl: public AbstractMemory
+class MemCtrl: public ClockedObject
{
public:
/* Bus Direction */
@@ -151,6 +153,9 @@
Stats::Scalar numStayWriteState;
} stats;
@@ -266,11 +271,6 @@
virtual ~MemCtrl();
/**
template<typename Queues>
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index 1f104e4..fb06b9d 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2018 ARM Limited
-
- Copyright (c) 2018-2020 ARM Limited
- All rights reserved
- The license below extends only to copyright in the software and shall
@@ -40,6 +40,7 @@
#include "debug/Drain.hh"
#include "debug/QOS.hh"
#include "mem_sink.hh"
+#include "params/QoSMemSinkInterface.hh"
#include "sim/system.hh"
namespace QoS {
@@ -50,12 +51,15 @@
memoryPacketSize(p->memory_packet_size),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size), port(name() + ".port", *this),
-
intf(p->intf),
retryRdReq(false), retryWrReq(false), nextRequest(0),
nextReqEvent(this)
{
// Resize read and write queue to allocate space
// for configured QoS priorities
readQueue.resize(numPriorities());
writeQueue.resize(numPriorities());
-
intf->setMemCtrl(this);
}
MemSinkCtrl::~MemSinkCtrl()
@@ -92,7 +96,7 @@
"%s Should not see packets where cache is responding\n",
func);
- intf->access(pkt);
return responseLatency;
}
@@ -101,7 +105,7 @@
{
pkt->pushLabel(name());
-
intf->functionalAccess(pkt);
pkt->popLabel();
}
@@ -279,7 +283,7 @@
// Do the actual memory access which also turns the packet
// into a response
-
intf->access(pkt);
// Log the response
logResponse(pkt->isRead()? READ : WRITE,
@@ -351,7 +355,7 @@
MemSinkCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(memory.getAddrRange());
- ranges.push_back(memory.intf->getAddrRange());
return ranges;
}
@@ -390,3 +394,19 @@
return new QoS::MemSinkCtrl(this);
}
+QoSMemSinkInterface::QoSMemSinkInterface(const QoSMemSinkInterfaceParams*
_p)
- : AbstractMemory(_p)
+{
+}
+void
+QoSMemSinkInterface::init()
+{
- AbstractMemory::init();
+}
+QoSMemSinkInterface*
+QoSMemSinkInterfaceParams::create()
+{
- return new QoSMemSinkInterface(this);
+}
diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh
index 9a51269..3b10abd 100644
--- a/src/mem/qos/mem_sink.hh
+++ b/src/mem/qos/mem_sink.hh
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2018 ARM Limited
-
- Copyright (c) 2018-2020 ARM Limited
- All rights reserved
- The license below extends only to copyright in the software and shall
@@ -41,10 +41,14 @@
#ifndef MEM_QOS_MEM_SINK_HH
#define MEM_QOS_MEM_SINK_HH
+#include "mem/abstract_mem.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/QoSMemSinkCtrl.hh"
+class QoSMemSinkInterfaceParams;
+class QoSMemSinkInterface;
+
namespace QoS {
/**
@@ -163,6 +167,11 @@
/** Memory slave port */
MemoryPort port;
@@ -244,4 +253,23 @@
} // namespace QoS
+class QoSMemSinkInterface : public AbstractMemory
+{
-
public:
-
/** Initialize the memory interface */
-
void init();
-
/** Setting a pointer to the interface */
-
void setMemCtrl(QoS::MemSinkCtrl* _ctrl)
-
{
-
ctrl = _ctrl;
-
};
-
/** Pointer to the controller */
-
QoS::MemSinkCtrl* ctrl;
-
QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p);
+};
-
#endif /* MEM_QOS_MEM_SINK_HH */
diff --git a/tests/configs/base_config.py b/tests/configs/base_config.py
index 0f79938..e2d3851 100644
--- a/tests/configs/base_config.py
+++ b/tests/configs/base_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2013, 2017-2018 ARM Limited
+# Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
All rights reserved.
The license below extends only to copyright in the software and shall
@@ -221,7 +221,12 @@
super(BaseSESystem, self).init_system(system)
def create_system(self):
-
if issubclass(self.mem_class, m5.objects.DRAMInterface):
-
mem_ctrl = DRAMCtrl()
-
mem_ctrl.dram = self.mem_class()
-
else:
-
mem_ctrl = self.mem_class()
-
system = System(physmem = mem_ctrl,
membus = SystemXBar(),
mem_mode = self.mem_mode,
multi_thread = (self.num_threads > 1))
@@ -275,6 +280,16 @@
# the physmem name to avoid bumping all the reference stats
system.physmem = [self.mem_class(range = r)
for r in system.mem_ranges]
-
if issubclass(self.mem_class, m5.objects.DRAMInterface):
-
mem_ctrls = []
-
for r in system.mem_ranges:
-
mem_ctrl = DRAMCtrl()
-
mem_ctrl.dram = self.mem_class(range = r)
-
mem_ctrls.append(mem_ctrl)
-
system.physmem = mem_ctrls
-
else:
-
system.physmem = [self.mem_class(range = r)
-
for r in system.mem_ranges]
for i in range(len(system.physmem)):
system.physmem[i].port = system.membus.master
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/28968
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
Gerrit-Change-Number: 28968
Gerrit-PatchSet: 1
Gerrit-Owner: Wendy Elsasser <wendy.elsasser(a)arm.com>
Gerrit-MessageType: newchange
Wendy Elsasser has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/28968 )
Change subject: mem: Make DRAMCtrl a ClockedObject
......................................................................
mem: Make DRAMCtrl a ClockedObject
Made DRAMCtrl a ClockedObject, with DRAMInterface
defined as an AbstractMemory. The address
ranges are now defined per interface. Currently
the model only includes a DRAMInterface but this
can be expanded for other media types.
The controller object includes a parameter to the
interface, which is setup when gem5 is configured.
Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
---
M configs/common/MemConfig.py
M configs/dram/low_power_sweep.py
M configs/dram/sweep.py
M configs/learning_gem5/part1/simple.py
M configs/learning_gem5/part1/two_level.py
M configs/learning_gem5/part2/simple_cache.py
M configs/learning_gem5/part2/simple_memobj.py
M configs/learning_gem5/part3/simple_ruby.py
M src/mem/DRAMCtrl.py
A src/mem/DRAMInterface.py
M src/mem/SConscript
M src/mem/dram_ctrl.cc
M src/mem/dram_ctrl.hh
M src/mem/drampower.cc
M src/mem/drampower.hh
M src/mem/qos/QoSMemCtrl.py
M src/mem/qos/QoSMemSinkCtrl.py
A src/mem/qos/QoSMemSinkInterface.py
M src/mem/qos/SConscript
M src/mem/qos/mem_ctrl.cc
M src/mem/qos/mem_ctrl.hh
M src/mem/qos/mem_sink.cc
M src/mem/qos/mem_sink.hh
M tests/configs/base_config.py
24 files changed, 1,934 insertions(+), 1,760 deletions(-)
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index 9443520..ab6b933 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -40,7 +40,7 @@
from common import ObjectList
from common import HMC
-def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size):
+def create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits, intlv_size):
"""
Helper function for creating a single memoy controller from the given
options. This function is invoked multiple times in config_mem
function
@@ -59,33 +59,33 @@
# Create an instance so we can figure out the address
# mapping and row-buffer size
- ctrl = cls()
+ interface = intf()
# Only do this for DRAMs
- if issubclass(cls, m5.objects.DRAMCtrl):
+ if issubclass(intf, m5.objects.DRAMInterface):
# If the channel bits are appearing after the column
# bits, we need to add the appropriate number of bits
# for the row buffer size
- if ctrl.addr_mapping.value == 'RoRaBaChCo':
+ if interface.addr_mapping.value == 'RoRaBaChCo':
# This computation only really needs to happen
# once, but as we rely on having an instance we
# end up having to repeat it for each and every
# one
- rowbuffer_size = ctrl.device_rowbuffer_size.value * \
- ctrl.devices_per_rank.value
+ rowbuffer_size = interface.device_rowbuffer_size.value * \
+ interface.devices_per_rank.value
intlv_low_bit = int(math.log(rowbuffer_size, 2))
# We got all we need to configure the appropriate address
# range
- ctrl.range = m5.objects.AddrRange(r.start, size = r.size(),
+ interface.range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit = \
intlv_low_bit + intlv_bits - 1,
xorHighBit = \
xor_low_bit + intlv_bits - 1,
intlvBits = intlv_bits,
intlvMatch = i)
- return ctrl
+ return interface
def config_mem(options, system):
"""
@@ -144,10 +144,10 @@
if 2 ** intlv_bits != nbr_mem_ctrls:
fatal("Number of memory channels must be a power of 2")
- cls = ObjectList.mem_list.get(opt_mem_type)
+ intf = ObjectList.mem_list.get(opt_mem_type)
mem_ctrls = []
- if opt_elastic_trace_en and not issubclass(cls,
m5.objects.SimpleMemory):
+ if opt_elastic_trace_en and not issubclass(intf,
m5.objects.SimpleMemory):
fatal("When elastic trace is enabled, configure mem-type as "
"simple-mem.")
@@ -158,36 +158,56 @@
intlv_size = max(opt_mem_channels_intlv, system.cache_line_size.value)
# For every range (most systems will only have one), create an
- # array of controllers and set their parameters to match their
- # address mapping in the case of a DRAM
+ # array of memory interfaces and set their parameters to match
+ # their address mapping in the case of a DRAM
for r in system.mem_ranges:
for i in range(nbr_mem_ctrls):
- mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls,
intlv_bits,
+ # Create the DRAM interface
+ dram_intf = create_mem_intf(intf, r, i, nbr_mem_ctrls,
intlv_bits,
intlv_size)
+
# Set the number of ranks based on the command-line
# options if it was explicitly set
- if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks:
- mem_ctrl.ranks_per_channel = opt_mem_ranks
+ if issubclass(intf, m5.objects.DRAMInterface) and
opt_mem_ranks:
+ dram_intf.ranks_per_channel = opt_mem_ranks
# Enable low-power DRAM states if option is set
- if issubclass(cls, m5.objects.DRAMCtrl):
- mem_ctrl.enable_dram_powerdown = opt_dram_powerdown
+ if issubclass(intf, m5.objects.DRAMInterface):
+ dram_intf.enable_dram_powerdown = opt_dram_powerdown
if opt_elastic_trace_en:
- mem_ctrl.latency = '1ns'
+ dram_intf.latency = '1ns'
print("For elastic trace, over-riding Simple Memory "
"latency to 1ns.")
+ # Create the controller that will drive the interface
+ if opt_mem_type == "HMC_2500_1x32":
+ # The static latency of the vault controllers is estimated
+ # to be smaller than a full DRAM channel controller
+ mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8,
+ static_backend_latency
= '4ns',
+ static_frontend_latency
= '4ns')
+ else:
+ mem_ctrl = m5.objects.DRAMCtrl()
+
+ # Override buffer sizes with interface specific values
+ mem_ctrl.write_buffer_size = dram_intf.write_buffer_size
+ mem_ctrl.read_buffer_size = dram_intf.read_buffer_size
+
+ # Hookup the controller to the interface and add to the list
+ mem_ctrl.dram = dram_intf
mem_ctrls.append(mem_ctrl)
- subsystem.mem_ctrls = mem_ctrls
-
- # Connect the controllers to the membus
- for i in range(len(subsystem.mem_ctrls)):
+ # Create a controller and connect the interfaces to a controller
+ for i in range(len(mem_ctrls)):
if opt_mem_type == "HMC_2500_1x32":
- subsystem.mem_ctrls[i].port = xbar[i/4].master
+ # Connect the controllers to the membus
+ mem_ctrls[i].port = xbar[i/4].master
# Set memory device size. There is an independent controller
for
# each vault. All vaults are same size.
- subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size
+ mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size
else:
- subsystem.mem_ctrls[i].port = xbar.master
+ # Connect the controllers to the membus
+ mem_ctrls[i].port = xbar.master
+
+ subsystem.mem_ctrls = mem_ctrls
diff --git a/configs/dram/low_power_sweep.py
b/configs/dram/low_power_sweep.py
index 9a62393..4a97fcb 100644
--- a/configs/dram/low_power_sweep.py
+++ b/configs/dram/low_power_sweep.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2014-2015, 2017, 2019 ARM Limited
+# Copyright (c) 2014-2015, 2017, 2019-2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
from __future__ import absolute_import
import argparse
+import math
import m5
from m5.objects import *
@@ -57,6 +58,10 @@
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+dram_generators = {
+ "DRAM" : lambda x: x.createDram,
+}
+
# Use a single-channel DDR4-2400 in 16x4 configuration by default
parser.add_argument("--mem-type", default="DDR4_2400_16x4",
choices=ObjectList.mem_list.get_names(),
@@ -77,7 +82,7 @@
help = "Percentage of read commands")
parser.add_argument("--addr-map",
- choices=m5.objects.AddrMap.vals,
+ choices=ObjectList.dram_addr_map_list.get_names(),
default="RoRaBaCoCh", help = "DRAM address map policy")
parser.add_argument("--idle-end", type=int, default=50000000,
@@ -111,14 +116,19 @@
# Sanity check for memory controller class.
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
+ fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
+ fatal("This script assumes the memory is a DRAMInterface subclass")
# There is no point slowing things down by saving any data.
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
+
+# enable DRAM low power states
+system.mem_ctrls[0].dram.enable_dram_powerdown = True
# Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = args.addr_map
-system.mem_ctrls[0].page_policy = args.page_policy
+system.mem_ctrls[0].dram.addr_mapping = args.addr_map
+system.mem_ctrls[0].dram.page_policy = args.page_policy
# We create a traffic generator state for each param combination we want to
# test. Each traffic generator state is specified in the config file and
the
@@ -126,28 +136,23 @@
# Stats are dumped and reset at the state transition.
period = 250000000
-# We specify the states in a config file input to the traffic generator.
-cfg_file_name = "lowp_sweep.cfg"
-cfg_file_path = os.path.dirname(__file__) + "/" +cfg_file_name
-cfg_file = open(cfg_file_path, 'w')
-
# Get the number of banks
-nbr_banks = int(system.mem_ctrls[0].banks_per_rank.value)
+nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
# determine the burst size in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
- system.mem_ctrls[0].device_bus_width.value *
- system.mem_ctrls[0].burst_length.value) / 8)
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
+ system.mem_ctrls[0].dram.device_bus_width.value *
+ system.mem_ctrls[0].dram.burst_length.value) / 8)
# next, get the page size in bytes (the rowbuffer size is already in bytes)
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
+ system.mem_ctrls[0].dram.device_rowbuffer_size.value
# Inter-request delay should be such that we can hit as many transitions
# to/from low power states as possible to. We provide a min and max itt to
the
# traffic generator and it randomises in the range. The parameter is in
# seconds and we need it in ticks (ps).
-itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000
+itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
#The itt value when set to (tRAS + tRP + tCK) covers the case where
# a read command is delayed beyond the delay from ACT to PRE_PDN entry of
the
@@ -155,9 +160,9 @@
# between a write and power down entry will be tRCD + tCL + tWR + tRP +
tCK.
# As we use this delay as a unit and create multiples of it as bigger
delays
# for the sweep, this parameter works for reads, writes and mix of them.
-pd_entry_time = (system.mem_ctrls[0].tRAS.value +
- system.mem_ctrls[0].tRP.value +
- system.mem_ctrls[0].tCK.value) * 1000000000000
+pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
+ system.mem_ctrls[0].dram.tRP.value +
+ system.mem_ctrls[0].dram.tCK.value) * 1000000000000
# We sweep itt max using the multipliers specified by the user.
itt_max_str = args.itt_list.strip().split()
@@ -180,42 +185,11 @@
# banks
bank_util_values = [1, int(nbr_banks/2), nbr_banks]
-# Next we create the config file, but first a comment
-cfg_file.write("""# STATE state# period mode=DRAM
-# read_percent start_addr end_addr req_size min_itt max_itt data_limit
-# stride_size page_size #banks #banks_util addr_map #ranks\n""")
-
-addr_map = m5.objects.AddrMap.map[args.addr_map]
-
-nxt_state = 0
-for itt_max in itt_max_values:
- for bank in bank_util_values:
- for stride_size in stride_values:
- cfg_file.write("STATE %d %d %s %d 0 %d %d "
- "%d %d %d %d %d %d %d %d %d\n" %
- (nxt_state, period, "DRAM", args.rd_perc,
max_addr,
- burst_size, itt_min, itt_max, 0, stride_size,
- page_size, nbr_banks, bank, addr_map,
- args.mem_ranks))
- nxt_state = nxt_state + 1
-
# State for idle period
idle_period = args.idle_end
-cfg_file.write("STATE %d %d IDLE\n" % (nxt_state, idle_period))
-
-# Init state is state 0
-cfg_file.write("INIT 0\n")
-
-# Go through the states one by one
-for state in range(1, nxt_state + 1):
- cfg_file.write("TRANSITION %d %d 1\n" % (state - 1, state))
-
-# Transition from last state to itself to not break the probability math
-cfg_file.write("TRANSITION %d %d 1\n" % (nxt_state, nxt_state))
-cfg_file.close()
# create a traffic generator, and point it to the file we just created
-system.tgen = TrafficGen(config_file = cfg_file_path)
+system.tgen = PyTrafficGen()
# add a communication monitor
system.monitor = CommMonitor()
@@ -230,14 +204,34 @@
# every period, dump and reset all stats
periodicStatDump(period)
+# run Forrest, run!
root = Root(full_system = False, system = system)
root.system.mem_mode = 'timing'
m5.instantiate()
+def trace():
+ addr_map = ObjectList.dram_addr_map_list.get(args.addr_map)
+ generator = dram_generators["DRAM"](system.tgen)
+ for itt_max in itt_max_values:
+ for bank in bank_util_values:
+ for stride_size in stride_values:
+ num_seq_pkts = int(math.ceil(float(stride_size) /
burst_size))
+ yield generator(period,
+ 0, max_addr, burst_size, int(itt_min),
+ int(itt_max), args.rd_perc, 0,
+ num_seq_pkts, page_size, nbr_banks, bank,
+ addr_map, args.mem_ranks)
+
+ yield system.tgen.createIdle(idle_period)
+ yield system.tgen.createExit(0)
+
+system.tgen.start(trace())
+
# Simulate for exactly as long as it takes to go through all the states
# This is why sim exists.
-m5.simulate(nxt_state * period + idle_period)
+m5.simulate()
+
print("--- Done DRAM low power sweep ---")
print("Fixed params - ")
print("\tburst: %d, banks: %d, max stride: %d, itt min: %s ns" % \
@@ -247,4 +241,3 @@
print("\titt max values", itt_max_values)
print("\tbank utilization values", bank_util_values)
print("\tstride values:", stride_values)
-print("Traffic gen config file:", cfg_file_name)
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py
index d3c86c3..6a49f44 100644
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -116,13 +116,15 @@
# the following assumes that we are using the native DRAM
# controller, check to be sure
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
+ fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
+ fatal("This script assumes the memory is a DRAMInterface subclass")
# there is no point slowing things down by saving any data
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
# Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = options.addr_map
+system.mem_ctrls[0].dram.addr_mapping = options.addr_map
# stay in each state for 0.25 ms, long enough to warm things up, and
# short enough to avoid hitting a refresh
@@ -133,21 +135,21 @@
# the DRAM maximum bandwidth to ensure that it is saturated
# get the number of banks
-nbr_banks = system.mem_ctrls[0].banks_per_rank.value
+nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
# determine the burst length in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
- system.mem_ctrls[0].device_bus_width.value *
- system.mem_ctrls[0].burst_length.value) / 8)
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
+ system.mem_ctrls[0].dram.device_bus_width.value *
+ system.mem_ctrls[0].dram.burst_length.value) / 8)
# next, get the page size in bytes
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
+ system.mem_ctrls[0].dram.device_rowbuffer_size.value
# match the maximum bandwidth of the memory, the parameter is in seconds
# and we need it in ticks (ps)
-itt = getattr(system.mem_ctrls[0].tBURST_MIN, 'value',
- system.mem_ctrls[0].tBURST.value) * 1000000000000
+itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
+ system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
# assume we start at 0
max_addr = mem_range.end
diff --git a/configs/learning_gem5/part1/simple.py
b/configs/learning_gem5/part1/simple.py
index ef73a06..cfd15be 100644
--- a/configs/learning_gem5/part1/simple.py
+++ b/configs/learning_gem5/part1/simple.py
@@ -77,8 +77,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
diff --git a/configs/learning_gem5/part1/two_level.py
b/configs/learning_gem5/part1/two_level.py
index 564c785..0dbcfc7 100644
--- a/configs/learning_gem5/part1/two_level.py
+++ b/configs/learning_gem5/part1/two_level.py
@@ -132,8 +132,9 @@
system.system_port = system.membus.slave
# Create a DDR3 memory controller
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Create a process for a simple "Hello World" application
diff --git a/configs/learning_gem5/part2/simple_cache.py
b/configs/learning_gem5/part2/simple_cache.py
index 8d98d92..fbea73d 100644
--- a/configs/learning_gem5/part2/simple_cache.py
+++ b/configs/learning_gem5/part2/simple_cache.py
@@ -76,8 +76,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
diff --git a/configs/learning_gem5/part2/simple_memobj.py
b/configs/learning_gem5/part2/simple_memobj.py
index d30977c..e792eb9 100644
--- a/configs/learning_gem5/part2/simple_memobj.py
+++ b/configs/learning_gem5/part2/simple_memobj.py
@@ -74,8 +74,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
diff --git a/configs/learning_gem5/part3/simple_ruby.py
b/configs/learning_gem5/part3/simple_ruby.py
index c47ee7e..7f70a8c 100644
--- a/configs/learning_gem5/part3/simple_ruby.py
+++ b/configs/learning_gem5/part3/simple_ruby.py
@@ -68,8 +68,9 @@
system.cpu = [TimingSimpleCPU() for i in range(2)]
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
# create the interrupt controller for the CPU and connect to the membus
for cpu in system.cpu:
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py
index 0f70dff..dff5000 100644
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -40,26 +40,12 @@
from m5.params import *
from m5.proxy import *
-from m5.objects.AbstractMemory import *
from m5.objects.QoSMemCtrl import *
# Enum for memory scheduling algorithms, currently First-Come
# First-Served and a First-Row Hit then First-Come First-Served
class MemSched(Enum): vals = ['fcfs', 'frfcfs']
-# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
-# channel, rank, bank, row and column, respectively, and going from
-# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
-# suitable for an open-page policy, optimising for sequential accesses
-# hitting in the open row. For a closed-page policy, RoCoRaBaCh
-# maximises parallelism.
-class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
-
-# Enum for the page policy, either open, open_adaptive, close, or
-# close_adaptive.
-class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
- 'close_adaptive']
-
# DRAMCtrl is a single-channel single-ported DRAM controller model
# that aims to model the most important system-level performance
# effects of a DRAM without getting into too much detail of the DRAM
@@ -72,8 +58,11 @@
# bus in front of the controller for multiple ports
port = SlavePort("Slave port")
- # the basic configuration of the controller architecture, note
- # that each entry corresponds to a burst for the specific DRAM
+ # Interface to volatile, DRAM media
+ dram = Param.DRAMInterface(Parent.any, "DRAM interface")
+
+ # Set default buffer sizes
+ # each entry corresponds to a burst for the specific DRAM
# configuration (e.g. x32 with burst length 8 is 32 bytes) and not
# the cacheline size or request/packet size
write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
@@ -93,15 +82,6 @@
# scheduler, address map and page policy
mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
- addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
- page_policy = Param.PageManage('open_adaptive', "Page management
policy")
-
- # enforce a limit on the number of accesses per row
- max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
- "closing");
-
- # size of DRAM Chip in Bytes
- device_size = Param.MemorySize("Size of DRAM chip")
# pipeline latency of the controller and PHY, split into a
# frontend part and a backend part, with reads and writes serviced
@@ -109,1404 +89,3 @@
# serviced by the memory seeing the sum of the two
static_frontend_latency = Param.Latency("10ns", "Static frontend
latency")
static_backend_latency = Param.Latency("10ns", "Static backend
latency")
-
- # the physical organisation of the DRAM
- device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
- "device/chip")
- burst_length = Param.Unsigned("Burst lenght (BL) in beats")
- device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
- "device/chip")
- devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
- ranks_per_channel = Param.Unsigned("Number of ranks per channel")
-
- # default to 0 bank groups per rank, indicating bank group architecture
- # is not used
- # update per memory class when bank group architecture is supported
- bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
- banks_per_rank = Param.Unsigned("Number of banks per rank")
-
- # Enable DRAM powerdown states if True. This is False by default due to
- # performance being lower when enabled
- enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
-
- # For power modelling we need to know if the DRAM has a DLL or not
- dll = Param.Bool(True, "DRAM has DLL or not")
-
- # DRAMPower provides in addition to the core power, the possibility to
- # include RD/WR termination and IO power. This calculation assumes some
- # default values. The integration of DRAMPower with gem5 does not
include
- # IO and RD/WR termination power by default. This might be added as an
- # additional feature in the future.
-
- # timing behaviour and constraints - all in nanoseconds
-
- # the base clock period of the DRAM
- tCK = Param.Latency("Clock period")
-
- # the amount of time in nanoseconds from issuing an activate command
- # to the data being available in the row buffer for a read/write
- tRCD = Param.Latency("RAS to CAS delay")
-
- # the time from issuing a read/write command to seeing the actual data
- tCL = Param.Latency("CAS latency")
-
- # minimum time between a precharge and subsequent activate
- tRP = Param.Latency("Row precharge time")
-
- # minimum time between an activate and a precharge to the same row
- tRAS = Param.Latency("ACT to PRE delay")
-
- # minimum time between a write data transfer and a precharge
- tWR = Param.Latency("Write recovery time")
-
- # minimum time between a read and precharge command
- tRTP = Param.Latency("Read to precharge")
-
- # time to complete a burst transfer, typically the burst length
- # divided by two due to the DDR bus, but by making it a parameter
- # it is easier to also evaluate SDR memories like WideIO.
- # This parameter has to account for burst length.
- # Read/Write requests with data size larger than one full burst are
broken
- # down into multiple requests in the controller
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = Param.Latency("Burst duration "
- "(typically burst length / 2 cycles)")
-
- # tBURST_MAX is the column array cycle delay required before next
access,
- # which could be greater than tBURST when the memory access time is
greater
- # than tBURST
- tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
-
- # tBURST_MIN is the minimum delay between bursts, which could be less
than
- # tBURST when interleaving is supported
- tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
-
- # CAS-to-CAS delay for bursts to the same bank group
- # only utilized with bank group architectures; set to 0 for default
case
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
-
- # Write-to-Write delay for bursts to the same bank group
- # only utilized with bank group architectures; set to 0 for default
case
- # This will be used to enable different same bank group delays
- # for writes versus reads
- tCCD_L_WR = Param.Latency(Self.tCCD_L,
- "Same bank group Write to Write delay")
-
- # time taken to complete one refresh cycle (N rows in all banks)
- tRFC = Param.Latency("Refresh cycle time")
-
- # refresh command interval, how often a "ref" command needs
- # to be sent. It is 7.8 us for a 64ms refresh requirement
- tREFI = Param.Latency("Refresh command interval")
-
- # write-to-read, same rank turnaround penalty
- tWTR = Param.Latency("Write to read, same rank switching time")
-
- # write-to-read, same rank turnaround penalty for same bank group
- tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
- "time, same bank group")
-
- # read-to-write, same rank turnaround penalty
- tRTW = Param.Latency("Read to write, same rank switching time")
-
- # rank-to-rank bus delay penalty
- # this does not correlate to a memory timing parameter and encompasses:
- # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
- # different rank bus delay
- tCS = Param.Latency("Rank to rank switching time")
-
- # minimum precharge to precharge delay time
- tPPD = Param.Latency("0ns", "PRE to PRE delay")
-
- # maximum delay between two-cycle ACT command phases
- tAAD = Param.Latency(Self.tCK,
- "Maximum delay between two-cycle ACT commands")
-
- two_cycle_activate = Param.Bool(False,
- "Two cycles required to send activate")
-
- # minimum row activate to row activate delay time
- tRRD = Param.Latency("ACT to ACT delay")
-
- # only utilized with bank group architectures; set to 0 for default
case
- tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
-
- # time window in which a maximum number of activates are allowed
- # to take place, set to 0 to disable
- tXAW = Param.Latency("X activation window")
- activation_limit = Param.Unsigned("Max number of activates in window")
-
- # time to exit power-down mode
- # Exit power-down to next valid command delay
- tXP = Param.Latency("0ns", "Power-up Delay")
-
- # Exit Powerdown to commands requiring a locked DLL
- tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
-
- # time to exit self-refresh mode
- tXS = Param.Latency("0ns", "Self-refresh exit latency")
-
- # time to exit self-refresh mode with locked DLL
- tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
-
- # number of data beats per clock. with DDR, default is 2, one per edge
- beats_per_clock = Param.Unsigned(2, "Data beats per clock")
-
- data_clock_sync = Param.Bool(False, "Synchronization commands
required")
-
- # Currently rolled into other params
- ######################################################################
-
- # tRC - assumed to be tRAS + tRP
-
- # Power Behaviour and Constraints
- # DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
- # defined as VDD and VDD2. Each current is defined for each voltage
domain
- # separately. For example, current IDD0 is active-precharge current for
- # voltage domain VDD and current IDD02 is active-precharge current for
- # voltage domain VDD2.
- # By default all currents are set to 0mA. Users who are only
interested in
- # the performance of DRAMs can leave them at 0.
-
- # Operating 1 Bank Active-Precharge current
- IDD0 = Param.Current("0mA", "Active precharge current")
-
- # Operating 1 Bank Active-Precharge current multiple voltage Range
- IDD02 = Param.Current("0mA", "Active precharge current VDD2")
-
- # Precharge Power-down Current: Slow exit
- IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
-
- # Precharge Power-down Current: Slow exit multiple voltage Range
- IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
-
- # Precharge Power-down Current: Fast exit
- IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
-
- # Precharge Power-down Current: Fast exit multiple voltage Range
- IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
-
- # Precharge Standby current
- IDD2N = Param.Current("0mA", "Precharge Standby current")
-
- # Precharge Standby current multiple voltage range
- IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
-
- # Active Power-down current: slow exit
- IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
-
- # Active Power-down current: slow exit multiple voltage range
- IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
-
- # Active Power-down current : fast exit
- IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
-
- # Active Power-down current : fast exit multiple voltage range
- IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
-
- # Active Standby current
- IDD3N = Param.Current("0mA", "Active Standby current")
-
- # Active Standby current multiple voltage range
- IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
-
- # Burst Read Operating Current
- IDD4R = Param.Current("0mA", "READ current")
-
- # Burst Read Operating Current multiple voltage range
- IDD4R2 = Param.Current("0mA", "READ current VDD2")
-
- # Burst Write Operating Current
- IDD4W = Param.Current("0mA", "WRITE current")
-
- # Burst Write Operating Current multiple voltage range
- IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
-
- # Refresh Current
- IDD5 = Param.Current("0mA", "Refresh current")
-
- # Refresh Current multiple voltage range
- IDD52 = Param.Current("0mA", "Refresh current VDD2")
-
- # Self-Refresh Current
- IDD6 = Param.Current("0mA", "Self-refresh Current")
-
- # Self-Refresh Current multiple voltage range
- IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
-
- # Main voltage range of the DRAM
- VDD = Param.Voltage("0V", "Main Voltage Range")
-
- # Second voltage range defined by some DRAMs
- VDD2 = Param.Voltage("0V", "2nd Voltage Range")
-
-# A single DDR3-1600 x64 channel (one command and address bus), with
-# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
-# an 8x8 configuration.
-class DDR3_1600_8x8(DRAMCtrl):
- # size of device in bytes
- device_size = '512MB'
-
- # 8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
- # DDR3 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
- # 8x8 configuration, so 8 devices
- devices_per_rank = 8
-
- # Use two ranks
- ranks_per_channel = 2
-
- # DDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 800 MHz
- tCK = '1.25ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
- tBURST = '5ns'
-
- # DDR3-1600 11-11-11
- tRCD = '13.75ns'
- tCL = '13.75ns'
- tRP = '13.75ns'
- tRAS = '35ns'
- tRRD = '6ns'
- tXAW = '30ns'
- activation_limit = 4
- tRFC = '260ns'
-
- tWR = '15ns'
-
- # Greater of 4 CK or 7.5 ns
- tWTR = '7.5ns'
-
- # Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
- # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # <=85C, half for >85C
- tREFI = '7.8us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
- # self refresh exit time
- tXS = '270ns'
-
- # Current values from datasheet Die Rev E,J
- IDD0 = '55mA'
- IDD2N = '32mA'
- IDD3N = '38mA'
- IDD4W = '125mA'
- IDD4R = '157mA'
- IDD5 = '235mA'
- IDD3P1 = '38mA'
- IDD2P1 = '32mA'
- IDD6 = '20mA'
- VDD = '1.5V'
-
-# A single HMC-2500 x32 model based on:
-# [1] DRAMSpec: a high-level DRAM bank modelling tool
-# developed at the University of Kaiserslautern. This high level tool
-# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
-# estimate the DRAM bank latency and power numbers.
-# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
-# cubes (E. Azarkhish et. al)
-# Assumed for the HMC model is a 30 nm technology node.
-# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
-# layers).
-# Each layer has 16 vaults and each vault consists of 2 banks per layer.
-# In order to be able to use the same controller used for 2D DRAM
generations
-# for HMC, the following analogy is done:
-# Channel (DDR) => Vault (HMC)
-# device_size (DDR) => size of a single layer in a vault
-# ranks per channel (DDR) => number of layers
-# banks per rank (DDR) => banks per layer
-# devices per rank (DDR) => devices per layer ( 1 for HMC).
-# The parameters for which no input is available are inherited from the
DDR3
-# configuration.
-# This configuration includes the latencies from the DRAM to the logic
layer
-# of the HMC
-class HMC_2500_1x32(DDR3_1600_8x8):
- # size of device
- # two banks per device with each bank 4MB [2]
- device_size = '8MB'
-
- # 1x32 configuration, 1 device with 32 TSVs [2]
- device_bus_width = 32
-
- # HMC is a BL8 device [2]
- burst_length = 8
-
- # Each device has a page (row buffer) size of 256 bytes [2]
- device_rowbuffer_size = '256B'
-
- # 1x32 configuration, so 1 device [2]
- devices_per_rank = 1
-
- # 4 layers so 4 ranks [2]
- ranks_per_channel = 4
-
- # HMC has 2 banks per layer [2]
- # Each layer represents a rank. With 4 layers and 8 banks in total,
each
- # layer has 2 banks; thus 2 banks per rank.
- banks_per_rank = 2
-
- # 1250 MHz [2]
- tCK = '0.8ns'
-
- # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
- tBURST = '3.2ns'
-
- # Values using DRAMSpec HMC model [1]
- tRCD = '10.2ns'
- tCL = '9.9ns'
- tRP = '7.7ns'
- tRAS = '21.6ns'
-
- # tRRD depends on the power supply network for each vendor.
- # We assume a tRRD of a double bank approach to be equal to 4 clock
- # cycles (Assumption)
- tRRD = '3.2ns'
-
- # activation limit is set to 0 since there are only 2 banks per vault
- # layer.
- activation_limit = 0
-
- # Values using DRAMSpec HMC model [1]
- tRFC = '59ns'
- tWR = '8ns'
- tRTP = '4.9ns'
-
- # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
- # 0.8 ns (Assumption)
- tCS = '0.8ns'
-
- # Value using DRAMSpec HMC model [1]
- tREFI = '3.9us'
-
- # The default page policy in the vault controllers is simple closed
page
- # [2] nevertheless 'close' policy opens and closes the row multiple
times
- # for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
- page_policy = 'close_adaptive'
-
- # RoCoRaBaCh resembles the default address mapping in HMC
- addr_mapping = 'RoCoRaBaCh'
- min_writes_per_switch = 8
-
- # These parameters do not directly correlate with buffer_size in real
- # hardware. Nevertheless, their value has been tuned to achieve a
- # bandwidth similar to the cycle-accurate model in [2]
- write_buffer_size = 32
- read_buffer_size = 32
-
- # The static latency of the vault controllers is estimated to be
smaller
- # than a full DRAM channel controller
- static_backend_latency='4ns'
- static_frontend_latency='4ns'
-
-# A single DDR3-2133 x64 channel refining a selected subset of the
-# options for the DDR-1600 configuration, based on the same DDR3-1600
-# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
-# consistent across the two configurations.
-class DDR3_2133_8x8(DDR3_1600_8x8):
- # 1066 MHz
- tCK = '0.938ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
- tBURST = '3.752ns'
-
- # DDR3-2133 14-14-14
- tRCD = '13.09ns'
- tCL = '13.09ns'
- tRP = '13.09ns'
- tRAS = '33ns'
- tRRD = '5ns'
- tXAW = '25ns'
-
- # Current values from datasheet
- IDD0 = '70mA'
- IDD2N = '37mA'
- IDD3N = '44mA'
- IDD4W = '157mA'
- IDD4R = '191mA'
- IDD5 = '250mA'
- IDD3P1 = '44mA'
- IDD2P1 = '43mA'
- IDD6 ='20mA'
- VDD = '1.5V'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
-# in an 16x4 configuration.
-# Total channel capacity is 32GB
-# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
-class DDR4_2400_16x4(DRAMCtrl):
- # size of device
- device_size = '1GB'
-
- # 16x4 configuration, 16 devices each with a 4-bit interface
- device_bus_width = 4
-
- # DDR4 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 512 byte (1K columns x4)
- device_rowbuffer_size = '512B'
-
- # 16x4 configuration, so 16 devices
- devices_per_rank = 16
-
- # Match our DDR3 configurations which is dual rank
- ranks_per_channel = 2
-
- # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
- # Set to 4 for x4 case
- bank_groups_per_rank = 4
-
- # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
- # configurations). Currently we do not capture the additional
- # constraints incurred by the bank groups
- banks_per_rank = 16
-
- # override the default buffer sizes and go for something larger to
- # accommodate the larger bank count
- write_buffer_size = 128
- read_buffer_size = 64
-
- # 1200 MHz
- tCK = '0.833ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = '3.332ns'
-
- # @2400 data rate, tCCD_L is 6 CK
- # CAS-to-CAS delay for bursts to the same bank group
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '5ns';
-
- # DDR4-2400 17-17-17
- tRCD = '14.16ns'
- tCL = '14.16ns'
- tRP = '14.16ns'
- tRAS = '32ns'
-
- # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
- tRRD = '3.332ns'
-
- # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
- # tFAW for 512B page is MAX(16 CK, 13ns)
- tXAW = '13.328ns'
- activation_limit = 4
- # tRFC is 350ns
- tRFC = '350ns'
-
- tWR = '15ns'
-
- # Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
- # Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
- tRTW = '1.666ns'
-
- # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
- tCS = '1.666ns'
-
- # <=85C, half for >85C
- tREFI = '7.8us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
- # self refresh exit time
- # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
- # tRFC + 10ns = 340ns
- tXS = '340ns'
-
- # Current values from datasheet
- IDD0 = '43mA'
- IDD02 = '3mA'
- IDD2N = '34mA'
- IDD3N = '38mA'
- IDD3N2 = '3mA'
- IDD4W = '103mA'
- IDD4R = '110mA'
- IDD5 = '250mA'
- IDD3P1 = '32mA'
- IDD2P1 = '25mA'
- IDD6 = '30mA'
- VDD = '1.2V'
- VDD2 = '2.5V'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
-# in an 8x8 configuration.
-# Total channel capacity is 16GB
-# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
-class DDR4_2400_8x8(DDR4_2400_16x4):
- # 8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
- # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
- # 8x8 configuration, so 8 devices
- devices_per_rank = 8
-
- # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
- tXAW = '21ns'
-
- # Current values from datasheet
- IDD0 = '48mA'
- IDD3N = '43mA'
- IDD4W = '123mA'
- IDD4R = '135mA'
- IDD3P1 = '37mA'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
-# in an 4x16 configuration.
-# Total channel capacity is 4GB
-# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
-class DDR4_2400_4x16(DDR4_2400_16x4):
- # 4x16 configuration, 4 devices each with an 16-bit interface
- device_bus_width = 16
-
- # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
- device_rowbuffer_size = '2kB'
-
- # 4x16 configuration, so 4 devices
- devices_per_rank = 4
-
- # Single rank for x16
- ranks_per_channel = 1
-
- # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
- # Set to 2 for x16 case
- bank_groups_per_rank = 2
-
- # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
- # configurations). Currently we do not capture the additional
- # constraints incurred by the bank groups
- banks_per_rank = 8
-
- # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
- tRRD = '5.3ns'
-
- # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
- tRRD_L = '6.4ns';
-
- tXAW = '30ns'
-
- # Current values from datasheet
- IDD0 = '80mA'
- IDD02 = '4mA'
- IDD2N = '34mA'
- IDD3N = '47mA'
- IDD4W = '228mA'
- IDD4R = '243mA'
- IDD5 = '280mA'
- IDD3P1 = '41mA'
-
-# A single LPDDR2-S4 x32 interface (one command/address bus), with
-# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
-# in a 1x32 configuration.
-class LPDDR2_S4_1066_1x32(DRAMCtrl):
- # No DLL in LPDDR2
- dll = False
-
- # size of device
- device_size = '512MB'
-
- # 1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # LPDDR2_S4 is a BL4 and BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 1KB
- # (this depends on the memory density)
- device_rowbuffer_size = '1kB'
-
- # 1x32 configuration, so 1 device
- devices_per_rank = 1
-
- # Use a single rank
- ranks_per_channel = 1
-
- # LPDDR2-S4 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 533 MHz
- tCK = '1.876ns'
-
- # Fixed at 15 ns
- tRCD = '15ns'
-
- # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
- tCL = '15ns'
-
- # Pre-charge one bank 15 ns (all banks 18 ns)
- tRP = '15ns'
-
- tRAS = '42ns'
- tWR = '15ns'
-
- tRTP = '7.5ns'
-
- # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
- # Note this is a BL8 DDR device.
- # Requests larger than 32 bytes are broken down into multiple requests
- # in the controller
- tBURST = '7.5ns'
-
- # LPDDR2-S4, 4 Gbit
- tRFC = '130ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
- # self refresh exit time
- tXS = '140ns'
-
- # Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
- tRTW = '3.75ns'
-
- # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
- tCS = '3.75ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Irrespective of density, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
- # Current values from datasheet
- IDD0 = '15mA'
- IDD02 = '70mA'
- IDD2N = '2mA'
- IDD2N2 = '30mA'
- IDD3N = '2.5mA'
- IDD3N2 = '30mA'
- IDD4W = '10mA'
- IDD4W2 = '190mA'
- IDD4R = '3mA'
- IDD4R2 = '220mA'
- IDD5 = '40mA'
- IDD52 = '150mA'
- IDD3P1 = '1.2mA'
- IDD3P12 = '8mA'
- IDD2P1 = '0.6mA'
- IDD2P12 = '0.8mA'
- IDD6 = '1mA'
- IDD62 = '3.2mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-
-# A single WideIO x128 interface (one command and address bus), with
-# default timings based on an estimated WIO-200 8 Gbit part.
-class WideIO_200_1x128(DRAMCtrl):
- # No DLL for WideIO
- dll = False
-
- # size of device
- device_size = '1024MB'
-
- # 1x128 configuration, 1 device with a 128-bit interface
- device_bus_width = 128
-
- # This is a BL4 device
- burst_length = 4
-
- # Each device has a page (row buffer) size of 4KB
- # (this depends on the memory density)
- device_rowbuffer_size = '4kB'
-
- # 1x128 configuration, so 1 device
- devices_per_rank = 1
-
- # Use one rank for a one-high die stack
- ranks_per_channel = 1
-
- # WideIO has 4 banks in all configurations
- banks_per_rank = 4
-
- # 200 MHz
- tCK = '5ns'
-
- # WIO-200
- tRCD = '18ns'
- tCL = '18ns'
- tRP = '18ns'
- tRAS = '42ns'
- tWR = '15ns'
- # Read to precharge is same as the burst
- tRTP = '20ns'
-
- # 4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
- # Note this is a BL4 SDR device.
- tBURST = '20ns'
-
- # WIO 8 Gb
- tRFC = '210ns'
-
- # WIO 8 Gb, <=85C, half for >85C
- tREFI = '3.9us'
-
- # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
- tWTR = '15ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
- tRTW = '10ns'
-
- # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
- tCS = '10ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Two instead of four activation window
- tXAW = '50ns'
- activation_limit = 2
-
- # The WideIO specification does not provide current information
-
-# A single LPDDR3 x32 interface (one command/address bus), with
-# default timings based on a LPDDR3-1600 4 Gbit part (Micron
-# EDF8132A1MC) in a 1x32 configuration.
-class LPDDR3_1600_1x32(DRAMCtrl):
- # No DLL for LPDDR3
- dll = False
-
- # size of device
- device_size = '512MB'
-
- # 1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # LPDDR3 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 4KB
- device_rowbuffer_size = '4kB'
-
- # 1x32 configuration, so 1 device
- devices_per_rank = 1
-
- # Technically the datasheet is a dual-rank package, but for
- # comparison with the LPDDR2 config we stick to a single rank
- ranks_per_channel = 1
-
- # LPDDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 800 MHz
- tCK = '1.25ns'
-
- tRCD = '18ns'
-
- # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
- tCL = '15ns'
-
- tRAS = '42ns'
- tWR = '15ns'
-
- # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
- tRTP = '7.5ns'
-
- # Pre-charge one bank 18 ns (all banks 21 ns)
- tRP = '18ns'
-
- # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
- # Note this is a BL8 DDR device.
- # Requests larger than 32 bytes are broken down into multiple requests
- # in the controller
- tBURST = '5ns'
-
- # LPDDR3, 4 Gb
- tRFC = '130ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
- # self refresh exit time
- tXS = '140ns'
-
- # Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
- # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Irrespective of size, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
- # Current values from datasheet
- IDD0 = '8mA'
- IDD02 = '60mA'
- IDD2N = '0.8mA'
- IDD2N2 = '26mA'
- IDD3N = '2mA'
- IDD3N2 = '34mA'
- IDD4W = '2mA'
- IDD4W2 = '190mA'
- IDD4R = '2mA'
- IDD4R2 = '230mA'
- IDD5 = '28mA'
- IDD52 = '150mA'
- IDD3P1 = '1.4mA'
- IDD3P12 = '11mA'
- IDD2P1 = '0.8mA'
- IDD2P12 = '1.8mA'
- IDD6 = '0.5mA'
- IDD62 = '1.8mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-
-# A single GDDR5 x64 interface, with
-# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
-# H5GQ1H24AFR) in a 2x32 configuration.
-class GDDR5_4000_2x32(DRAMCtrl):
- # size of device
- device_size = '128MB'
-
- # 2x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # GDDR5 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 2Kbits (256Bytes)
- device_rowbuffer_size = '256B'
-
- # 2x32 configuration, so 2 devices
- devices_per_rank = 2
-
- # assume single rank
- ranks_per_channel = 1
-
- # GDDR5 has 4 bank groups
- bank_groups_per_rank = 4
-
- # GDDR5 has 16 banks with 4 bank groups
- banks_per_rank = 16
-
- # 1000 MHz
- tCK = '1ns'
-
- # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
- # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
- # 8 beats at 4000 MHz = 2 beats at 1000 MHz
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = '2ns'
-
- # @1000MHz data rate, tCCD_L is 3 CK
- # CAS-to-CAS delay for bursts to the same bank group
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '3ns';
-
- tRCD = '12ns'
-
- # tCL is not directly found in datasheet and assumed equal tRCD
- tCL = '12ns'
-
- tRP = '12ns'
- tRAS = '28ns'
-
- # RRD_S (different bank group)
- # RRD_S is 5.5 ns in datasheet.
- # rounded to the next multiple of tCK
- tRRD = '6ns'
-
- # RRD_L (same bank group)
- # RRD_L is 5.5 ns in datasheet.
- # rounded to the next multiple of tCK
- tRRD_L = '6ns'
-
- tXAW = '23ns'
-
- # tXAW < 4 x tRRD.
- # Therefore, activation limit is set to 0
- activation_limit = 0
-
- tRFC = '65ns'
- tWR = '12ns'
-
- # Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
- # Read-to-Precharge 2 CK
- tRTP = '2ns'
-
- # Assume 2 cycles
- tRTW = '2ns'
-
-# A single HBM x128 interface (one command and address bus), with
-# default timings based on data publically released
-# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
-# IDD measurement values, and by extrapolating data from other classes.
-# Architecture values based on published HBM spec
-# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
-class HBM_1000_4H_1x128(DRAMCtrl):
- # HBM gen1 supports up to 8 128-bit physical channels
- # Configuration defines a single channel, with the capacity
- # set to (full_ stack_capacity / 8) based on 2Gb dies
- # To use all 8 channels, set 'channels' parameter to 8 in
- # system configuration
-
- # 128-bit interface legacy mode
- device_bus_width = 128
-
- # HBM supports BL4 and BL2 (legacy mode only)
- burst_length = 4
-
- # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
- # with 8 channels, 128MB per channel
- device_size = '128MB'
-
- device_rowbuffer_size = '2kB'
-
- # 1x128 configuration
- devices_per_rank = 1
-
- # HBM does not have a CS pin; set rank to 1
- ranks_per_channel = 1
-
- # HBM has 8 or 16 banks depending on capacity
- # 2Gb dies have 8 banks
- banks_per_rank = 8
-
- # depending on frequency, bank groups may be required
- # will always have 4 bank groups when enabled
- # current specifications do not define the minimum frequency for
- # bank group architecture
- # setting bank_groups_per_rank to 0 to disable until range is defined
- bank_groups_per_rank = 0
-
- # 500 MHz for 1Gbps DDR data rate
- tCK = '2ns'
-
- # use values from IDD measurement in JEDEC spec
- # use tRP value for tRCD and tCL similar to other classes
- tRP = '15ns'
- tRCD = '15ns'
- tCL = '15ns'
- tRAS = '33ns'
-
- # BL2 and BL4 supported, default to BL4
- # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
- tBURST = '4ns'
-
- # value for 2Gb device from JEDEC spec
- tRFC = '160ns'
-
- # value for 2Gb device from JEDEC spec
- tREFI = '3.9us'
-
- # extrapolate the following from LPDDR configs, using ns values
- # to minimize burst length, prefetch differences
- tWR = '18ns'
- tRTP = '7.5ns'
- tWTR = '10ns'
-
- # start with 2 cycles turnaround, similar to other memory classes
- # could be more with variations across the stack
- tRTW = '4ns'
-
- # single rank device, set to 0
- tCS = '0ns'
-
- # from MemCon example, tRRD is 4ns with 2ns tCK
- tRRD = '4ns'
-
- # from MemCon example, tFAW is 30ns with 2ns tCK
- tXAW = '30ns'
- activation_limit = 4
-
- # 4tCK
- tXP = '8ns'
-
- # start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '168ns'
-
-# A single HBM x64 interface (one command and address bus), with
-# default timings based on HBM gen1 and data publically released
-# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
-# Note: This defines a pseudo-channel with a unique controller
-# instantiated per pseudo-channel
-# Stay at same IO rate (1Gbps) to maintain timing relationship with
-# HBM gen1 class (HBM_1000_4H_x128) where possible
-class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
- # For HBM gen2 with pseudo-channel mode, configure 2X channels.
- # Configuration defines a single pseudo channel, with the capacity
- # set to (full_ stack_capacity / 16) based on 8Gb dies
- # To use all 16 pseudo channels, set 'channels' parameter to 16 in
- # system configuration
-
- # 64-bit pseudo-channle interface
- device_bus_width = 64
-
- # HBM pseudo-channel only supports BL4
- burst_length = 4
-
- # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
- # with 16 channels, 256MB per channel
- device_size = '256MB'
-
- # page size is halved with pseudo-channel; maintaining the same same
number
- # of rows per pseudo-channel with 2X banks across 2 channels
- device_rowbuffer_size = '1kB'
-
- # HBM has 8 or 16 banks depending on capacity
- # Starting with 4Gb dies, 16 banks are defined
- banks_per_rank = 16
-
- # reset tRFC for larger, 8Gb device
- # use HBM1 4Gb value as a starting point
- tRFC = '260ns'
-
- # start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '268ns'
- # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
- tCS = '2ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '10ns'
-
- # self refresh exit time
- tXS = '65ns'
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_5500_1x16_BG_BL32(DRAMCtrl):
-
- # Increase buffer size to account for more bank resources
- read_buffer_size = 64
-
- # Set page policy to better suit DMC Huxley
- page_policy = 'close_adaptive'
-
- # 16-bit channel interface
- device_bus_width = 16
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL32 for higher command bandwidth
- burst_length = 32
-
- # size of device in bytes
- device_size = '1GB'
-
- # 2kB page with BG mode
- device_rowbuffer_size = '2kB'
-
- # Use a 1x16 configuration
- devices_per_rank = 1
-
- # Use a single rank
- ranks_per_channel = 1
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Initial configuration will have 16 banks with Bank Group Arch
- # to maximim resources and enable higher data rates
- banks_per_rank = 16
- bank_groups_per_rank = 4
-
- # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.455ns'
-
- # Greater of 2 CK or 18ns
- tRCD = '18ns'
-
- # Base RL is 16 CK @ 687.5 MHz = 23.28ns
- tCL = '23.280ns'
-
- # Greater of 2 CK or 18ns
- tRP = '18ns'
-
- # Greater of 3 CK or 42ns
- tRAS = '42ns'
-
- # Greater of 3 CK or 34ns
- tWR = '34ns'
-
- # active powerdown and precharge powerdown exit time
- # Greater of 3 CK or 7ns
- tXP = '7ns'
-
- # self refresh exit time (tRFCab + 7.5ns)
- tXS = '217.5ns'
-
- # Greater of 2 CK or 7.5 ns minus 2 CK
- tRTP = '4.59ns'
-
- # With BG architecture, burst of 32 transferred in two 16-beat
- # sub-bursts, with a 16-beat gap in between.
- # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
- tBURST = '8.73ns'
- # can interleave a Bstof32 from another bank group at tBURST_MIN
- # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- tBURST_MIN = '2.91ns'
- # tBURST_MAX is the maximum burst delay for same bank group timing
- # this is 8 CK @ 687.5 MHz
- tBURST_MAX = '11.64ns'
-
- # 8 CK @ 687.5 MHz
- tCCD_L = "11.64ns"
-
- # LPDDR5, 8 Gbit/channel for 280ns tRFCab
- tRFC = '210ns'
- tREFI = '3.9us'
-
- # Greater of 4 CK or 6.25 ns
- tWTR = '6.25ns'
- # Greater of 4 CK or 12 ns
- tWTR_L = '12ns'
-
- # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
- # tWCKDQ0/tCK will be 1 CK for most cases
- # For gem5 RL = WL and BL/n is already accounted for with tBURST
- # Result is and additional 1 CK is required
- tRTW = '1.455ns'
-
- # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
- tCS = '2.91ns'
-
- # 2 CK
- tPPD = '2.91ns'
-
- # Greater of 2 CK or 5 ns
- tRRD = '5ns'
- tRRD_L = '5ns'
-
- # With Bank Group Arch mode tFAW is 20 ns
- tXAW = '20ns'
- activation_limit = 4
-
- # at 5Gbps, 4:1 WCK to CK ratio required
- # 2 data beats per WCK (DDR) -> 8 per CK
- beats_per_clock = 8
-
- # 2 cycles required to send activate command
- # 2 command phases can be sent back-to-back or
- # with a gap up to tAAD = 8 CK
- two_cycle_activate = True
- tAAD = '11.640ns'
-
- data_clock_sync = True
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL16 for smaller access granularity
- burst_length = 16
-
- # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '2.91ns'
- tBURST_MIN = '2.91ns'
- # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST_MAX = '5.82ns'
-
- # 4 CK @ 687.5 MHz
- tCCD_L = "5.82ns"
-
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
-
- # 4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
- # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '5.82ns'
- tBURST_MIN = '5.82ns'
- tBURST_MAX = '5.82ns'
-
- # Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
- # Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
- # With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
- # Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
-
- # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.25ns'
-
- # Base RL is 17 CK @ 800 MHz = 21.25ns
- tCL = '21.25ns'
-
- # With BG architecture, burst of 32 transferred in two 16-beat
- # sub-bursts, with a 16-beat gap in between.
- # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
- # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
- tBURST = '7.5ns'
- # can interleave a Bstof32 from another bank group at tBURST_MIN
- # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
- tBURST_MIN = '2.5ns'
- # tBURST_MAX is the maximum burst delay for same bank group timing
- # this is 8 CK @ 800 MHz
- tBURST_MAX = '10ns'
-
- # 8 CK @ 800 MHz
- tCCD_L = "10ns"
-
- # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
- # tWCKDQ0/tCK will be 1 CK for most cases
- # For gem5 RL = WL and BL/n is already accounted for with tBURST
- # Result is and additional 1 CK is required
- tRTW = '1.25ns'
-
- # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # 2 CK
- tPPD = '2.5ns'
-
- # 2 command phases can be sent back-to-back or
- # with a gap up to tAAD = 8 CK
- tAAD = '10ns'
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on initial
-# JEDEC specifcation
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL16 for smaller access granularity
- burst_length = 16
-
- # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '2.5ns'
- tBURST_MIN = '2.5ns'
- # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST_MAX = '5ns'
-
- # 4 CK @ 800 MHz
- tCCD_L = "5ns"
-
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
-
- # 4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
- # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '5ns'
- tBURST_MIN = '5ns'
- tBURST_MAX = '5ns'
-
- # Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
- # Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
- # With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
- # Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py
new file mode 100644
index 0000000..35bf8a3
--- /dev/null
+++ b/src/mem/DRAMInterface.py
@@ -0,0 +1,1483 @@
+# Copyright (c) 2012-2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# Copyright (c) 2015 University of Kaiserslautern
+# Copyright (c) 2015 The University of Bologna
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from AbstractMemory import AbstractMemory
+from DRAMCtrl import *
+
+# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
+# channel, rank, bank, row and column, respectively, and going from
+# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
+# suitable for an open-page policy, optimising for sequential accesses
+# hitting in the open row. For a closed-page policy, RoCoRaBaCh
+# maximises parallelism.
+class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
+
+# Enum for the page policy, either open, open_adaptive, close, or
+# close_adaptive.
+class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
+ 'close_adaptive']
+
+class DRAMInterface(AbstractMemory):
+ type = 'DRAMInterface'
+ cxx_header = "mem/dram_ctrl.hh"
+
+ # scheduler, address map and page policy
+ addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
+ page_policy = Param.PageManage('open_adaptive', "Page management
policy")
+
+ # Allow the interface to set required controller buffer sizes
+ # each entry corresponds to a burst for the specific DRAM
+ # configuration (e.g. x32 with burst length 8 is 32 bytes) and not
+ # the cacheline size or request/packet size
+ write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
+ read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
+
+ # enforce a limit on the number of accesses per row
+ max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
+ "closing");
+
+ # size of DRAM Chip in Bytes
+ device_size = Param.MemorySize("Size of DRAM chip")
+ # the physical organisation of the DRAM
+ device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
+ "device/chip")
+ burst_length = Param.Unsigned("Burst lenght (BL) in beats")
+ device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
+ "device/chip")
+ devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
+
+ ranks_per_channel = Param.Unsigned("Number of ranks per channel")
+ # default to 0 bank groups per rank, indicating bank group architecture
+ # is not used
+ # update per memory class when bank group architecture is supported
+ bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
+ banks_per_rank = Param.Unsigned("Number of banks per rank")
+
+ # Enable DRAM powerdown states if True. This is False by default due to
+ # performance being lower when enabled
+ enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
+
+ # For power modelling we need to know if the DRAM has a DLL or not
+ dll = Param.Bool(True, "DRAM has DLL or not")
+
+ # DRAMPower provides in addition to the core power, the possibility to
+ # include RD/WR termination and IO power. This calculation assumes some
+ # default values. The integration of DRAMPower with gem5 does not
include
+ # IO and RD/WR termination power by default. This might be added as an
+ # additional feature in the future.
+
+ # timing behaviour and constraints - all in nanoseconds
+
+ # the base clock period of the DRAM
+
+ tCK = Param.Latency("Clock period")
+
+ # rank-to-rank bus delay penalty
+ # this does not correlate to a memory timing parameter and encompasses:
+ # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
+ # different rank bus delay
+ tCS = Param.Latency("Rank to rank switching time")
+
+ # the amount of time in nanoseconds from issuing an activate command
+ # to the data being available in the row buffer for a read/write
+ tRCD = Param.Latency("RAS to CAS delay")
+
+ # the time from issuing a read/write command to seeing the actual data
+ tCL = Param.Latency("CAS latency")
+
+ # minimum time between a precharge and subsequent activate
+ tRP = Param.Latency("Row precharge time")
+
+ # minimum time between an activate and a precharge to the same row
+ tRAS = Param.Latency("ACT to PRE delay")
+
+ # minimum time between a write data transfer and a precharge
+ tWR = Param.Latency("Write recovery time")
+
+ # minimum time between a read and precharge command
+ tRTP = Param.Latency("Read to precharge")
+
+ # time to complete a burst transfer, typically the burst length
+ # divided by two due to the DDR bus, but by making it a parameter
+ # it is easier to also evaluate SDR memories like WideIO.
+ # This parameter has to account for burst length.
+ # Read/Write requests with data size larger than one full burst are
broken
+ # down into multiple requests in the controller
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = Param.Latency("Burst duration "
+ "(typically burst length / 2 cycles)")
+
+ # tBURST_MAX is the column array cycle delay required before next
access,
+ # which could be greater than tBURST when the memory access time is
greater
+ # than tBURST
+ tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
+
+ # tBURST_MIN is the minimum delay between bursts, which could be less
than
+ # tBURST when interleaving is supported
+ tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
+
+ # CAS-to-CAS delay for bursts to the same bank group
+ # only utilized with bank group architectures; set to 0 for default
case
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
+
+ # Write-to-Write delay for bursts to the same bank group
+ # only utilized with bank group architectures; set to 0 for default
case
+ # This will be used to enable different same bank group delays
+ # for writes versus reads
+ tCCD_L_WR = Param.Latency(Self.tCCD_L, "Same bank group Write to
Write " \
+ "delay")
+
+ # time taken to complete one refresh cycle (N rows in all banks)
+ tRFC = Param.Latency("Refresh cycle time")
+
+ # refresh command interval, how often a "ref" command needs
+ # to be sent. It is 7.8 us for a 64ms refresh requirement
+ tREFI = Param.Latency("Refresh command interval")
+
+ # write-to-read, same rank turnaround penalty
+ tWTR = Param.Latency("Write to read, same rank switching time")
+
+ # write-to-read, same rank turnaround penalty for same bank group
+ tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
+ "time, same bank group")
+
+ # read-to-write, same rank turnaround penalty
+ tRTW = Param.Latency("Read to write, same rank switching time")
+
+ # minimum precharge to precharge delay time
+ tPPD = Param.Latency("0ns", "PRE to PRE delay")
+
+ # maximum delay between two-cycle ACT command phases
+ tAAD = Param.Latency(Self.tCK,
+ "Maximum delay between two-cycle ACT commands")
+
+ two_cycle_activate = Param.Bool(False,
+ "Two cycles required to send activate")
+
+ # minimum row activate to row activate delay time
+ tRRD = Param.Latency("ACT to ACT delay")
+
+ # only utilized with bank group architectures; set to 0 for default
case
+ tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
+
+ # time window in which a maximum number of activates are allowed
+ # to take place, set to 0 to disable
+ tXAW = Param.Latency("X activation window")
+ activation_limit = Param.Unsigned("Max number of activates in window")
+
+ # time to exit power-down mode
+ # Exit power-down to next valid command delay
+ tXP = Param.Latency("0ns", "Power-up Delay")
+
+ # Exit Powerdown to commands requiring a locked DLL
+ tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
+
+ # time to exit self-refresh mode
+ tXS = Param.Latency("0ns", "Self-refresh exit latency")
+
+ # time to exit self-refresh mode with locked DLL
+ tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
+
+ # number of data beats per clock. with DDR, default is 2, one per edge
+ beats_per_clock = Param.Unsigned(2, "Data beats per clock")
+
+ data_clock_sync = Param.Bool(False, "Synchronization commands
required")
+
+ # Currently rolled into other params
+ ######################################################################
+
+ # tRC - assumed to be tRAS + tRP
+
+ # Power Behaviour and Constraints
+ # DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
+ # defined as VDD and VDD2. Each current is defined for each voltage
domain
+ # separately. For example, current IDD0 is active-precharge current for
+ # voltage domain VDD and current IDD02 is active-precharge current for
+ # voltage domain VDD2.
+ # By default all currents are set to 0mA. Users who are only
interested in
+ # the performance of DRAMs can leave them at 0.
+
+ # Power Behaviour and Constraints
+ # DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
+ # defined as VDD and VDD2. Each current is defined for each voltage
domain
+ # separately. For example, current IDD0 is active-precharge current for
+ # voltage domain VDD and current IDD02 is active-precharge current for
+ # voltage domain VDD2.
+ # By default all currents are set to 0mA. Users who are only
interested in
+ # the performance of DRAMs can leave them at 0.
+
+ # Operating 1 Bank Active-Precharge current
+ IDD0 = Param.Current("0mA", "Active precharge current")
+
+ # Operating 1 Bank Active-Precharge current multiple voltage Range
+ IDD02 = Param.Current("0mA", "Active precharge current VDD2")
+
+ # Precharge Power-down Current: Slow exit
+ IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
+
+ # Precharge Power-down Current: Slow exit multiple voltage Range
+ IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
+
+ # Precharge Power-down Current: Fast exit
+ IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
+
+ # Precharge Power-down Current: Fast exit multiple voltage Range
+ IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
+
+ # Precharge Standby current
+ IDD2N = Param.Current("0mA", "Precharge Standby current")
+
+ # Precharge Standby current multiple voltage range
+ IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
+
+ # Active Power-down current: slow exit
+ IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
+
+ # Active Power-down current: slow exit multiple voltage range
+ IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
+
+ # Active Power-down current : fast exit
+ IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
+
+ # Active Power-down current : fast exit multiple voltage range
+ IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
+
+ # Active Standby current
+ IDD3N = Param.Current("0mA", "Active Standby current")
+
+ # Active Standby current multiple voltage range
+ IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
+
+ # Burst Read Operating Current
+ IDD4R = Param.Current("0mA", "READ current")
+
+ # Burst Read Operating Current multiple voltage range
+ IDD4R2 = Param.Current("0mA", "READ current VDD2")
+
+ # Burst Write Operating Current
+ IDD4W = Param.Current("0mA", "WRITE current")
+
+ # Burst Write Operating Current multiple voltage range
+ IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
+
+ # Refresh Current
+ IDD5 = Param.Current("0mA", "Refresh current")
+
+ # Refresh Current multiple voltage range
+ IDD52 = Param.Current("0mA", "Refresh current VDD2")
+
+ # Self-Refresh Current
+ IDD6 = Param.Current("0mA", "Self-refresh Current")
+
+ # Self-Refresh Current multiple voltage range
+ IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
+
+ # Main voltage range of the DRAM
+ VDD = Param.Voltage("0V", "Main Voltage Range")
+
+ # Second voltage range defined by some DRAMs
+ VDD2 = Param.Voltage("0V", "2nd Voltage Range")
+
+# A single DDR3-1600 x64 channel (one command and address bus), with
+# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
+# an 8x8 configuration.
+class DDR3_1600_8x8(DRAMInterface):
+ # size of device in bytes
+ device_size = '512MB'
+
+ # 8x8 configuration, 8 devices each with an 8-bit interface
+ device_bus_width = 8
+
+ # DDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
+ device_rowbuffer_size = '1kB'
+
+ # 8x8 configuration, so 8 devices
+ devices_per_rank = 8
+
+ # Use two ranks
+ ranks_per_channel = 2
+
+ # DDR3 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 800 MHz
+ tCK = '1.25ns'
+
+ # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
+ tBURST = '5ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tWTR = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
+ tRTW = '2.5ns'
+
+ # DDR3-1600 11-11-11
+ tRCD = '13.75ns'
+ tCL = '13.75ns'
+ tRP = '13.75ns'
+ tRAS = '35ns'
+ tRRD = '6ns'
+ tXAW = '30ns'
+ activation_limit = 4
+ tRFC = '260ns'
+
+ tWR = '15ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tRTP = '7.5ns'
+ # <=85C, half for >85C
+ tREFI = '7.8us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '6ns'
+
+ # self refresh exit time
+ tXS = '270ns'
+
+ # Current values from datasheet Die Rev E,J
+ IDD0 = '55mA'
+ IDD2N = '32mA'
+ IDD3N = '38mA'
+ IDD4W = '125mA'
+ IDD4R = '157mA'
+ IDD5 = '235mA'
+ IDD3P1 = '38mA'
+ IDD2P1 = '32mA'
+ IDD6 = '20mA'
+ VDD = '1.5V'
+
+# A single HMC-2500 x32 model based on:
+# [1] DRAMSpec: a high-level DRAM bank modelling tool
+# developed at the University of Kaiserslautern. This high level tool
+# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
+# estimate the DRAM bank latency and power numbers.
+# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
+# cubes (E. Azarkhish et. al)
+# Assumed for the HMC model is a 30 nm technology node.
+# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
+# layers).
+# Each layer has 16 vaults and each vault consists of 2 banks per layer.
+# In order to be able to use the same controller used for 2D DRAM
generations
+# for HMC, the following analogy is done:
+# Channel (DDR) => Vault (HMC)
+# device_size (DDR) => size of a single layer in a vault
+# ranks per channel (DDR) => number of layers
+# banks per rank (DDR) => banks per layer
+# devices per rank (DDR) => devices per layer ( 1 for HMC).
+# The parameters for which no input is available are inherited from the
DDR3
+# configuration.
+# This configuration includes the latencies from the DRAM to the logic
layer
+# of the HMC
+class HMC_2500_1x32_Interface(DDR3_1600_8x8):
+ # A single HMC-2500 x32 controller
+ # The buffer parameters do not directly correlate with buffer_size in
+ # real hardware. Nevertheless, their value has been tuned to achieve a
+ # bandwidth similar to the cycle-accurate model in [2]
+ write_buffer_size = 32
+ read_buffer_size = 32
+
+ # size of device
+ # two banks per device with each bank 4MB [2]
+ device_size = '8MB'
+
+ # 1x32 configuration, 1 device with 32 TSVs [2]
+ device_bus_width = 32
+
+ # HMC is a BL8 device [2]
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 256 bytes [2]
+ device_rowbuffer_size = '256B'
+
+ # 1x32 configuration, so 1 device [2]
+ devices_per_rank = 1
+
+ # 4 layers so 4 ranks [2]
+ ranks_per_channel = 4
+
+ # HMC has 2 banks per layer [2]
+ # Each layer represents a rank. With 4 layers and 8 banks in total,
each
+ # layer has 2 banks; thus 2 banks per rank.
+ banks_per_rank = 2
+
+ # 1250 MHz [2]
+ tCK = '0.8ns'
+
+ # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
+ # 0.8 ns (Assumption)
+ tCS = '0.8ns'
+
+ # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
+ tBURST = '3.2ns'
+
+ # Values using DRAMSpec HMC model [1]
+ tRCD = '10.2ns'
+ tCL = '9.9ns'
+ tRP = '7.7ns'
+ tRAS = '21.6ns'
+
+ # tRRD depends on the power supply network for each vendor.
+ # We assume a tRRD of a double bank approach to be equal to 4 clock
+ # cycles (Assumption)
+ tRRD = '3.2ns'
+
+ # activation limit is set to 0 since there are only 2 banks per vault
+ # layer.
+ activation_limit = 0
+
+ # Values using DRAMSpec HMC model [1]
+ tRFC = '59ns'
+ tWR = '8ns'
+ tRTP = '4.9ns'
+
+ # Value using DRAMSpec HMC model [1]
+ tREFI = '3.9us'
+
+ # The default page policy in the vault controllers is simple closed
page
+ # [2] nevertheless 'close' policy opens and closes the row multiple
times
+ # for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
+ page_policy = 'close_adaptive'
+
+ # RoCoRaBaCh resembles the default address mapping in HMC
+ addr_mapping = 'RoCoRaBaCh'
+
+# A single DDR3-2133 x64 channel refining a selected subset of the
+# options for the DDR-1600 configuration, based on the same DDR3-1600
+# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
+# consistent across the two configurations.
+class DDR3_2133_8x8(DDR3_1600_8x8):
+ # 1066 MHz
+ tCK = '0.938ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
+ tBURST = '3.752ns'
+
+ # DDR3-2133 14-14-14
+ tRCD = '13.09ns'
+ tCL = '13.09ns'
+ tRP = '13.09ns'
+ tRAS = '33ns'
+ tRRD = '5ns'
+ tXAW = '25ns'
+
+ # Current values from datasheet
+ IDD0 = '70mA'
+ IDD2N = '37mA'
+ IDD3N = '44mA'
+ IDD4W = '157mA'
+ IDD4R = '191mA'
+ IDD5 = '250mA'
+ IDD3P1 = '44mA'
+ IDD2P1 = '43mA'
+ IDD6 ='20mA'
+ VDD = '1.5V'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
+# in an 16x4 configuration.
+# Total channel capacity is 32GB
+# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
+class DDR4_2400_16x4(DRAMInterface):
+ # override the default buffer sizes and go for something larger to
+ # accommodate the larger bank count
+ write_buffer_size = 128
+ read_buffer_size = 64
+
+ # size of device
+ device_size = '1GB'
+
+ # 16x4 configuration, 16 devices each with a 4-bit interface
+ device_bus_width = 4
+
+ # DDR4 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 512 byte (1K columns x4)
+ device_rowbuffer_size = '512B'
+
+ # 16x4 configuration, so 16 devices
+ devices_per_rank = 16
+
+ # Match our DDR3 configurations which is dual rank
+ ranks_per_channel = 2
+
+ # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+ # Set to 4 for x4 case
+ bank_groups_per_rank = 4
+
+ # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
+ # configurations). Currently we do not capture the additional
+ # constraints incurred by the bank groups
+ banks_per_rank = 16
+
+ # 1200 MHz
+ tCK = '0.833ns'
+
+ # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
+ tCS = '1.666ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = '3.332ns'
+
+ # Here using the average of WTR_S and WTR_L
+ tWTR = '5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
+ tRTW = '1.666ns'
+
+ # @2400 data rate, tCCD_L is 6 CK
+ # CAS-to-CAS delay for bursts to the same bank group
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = '5ns';
+
+ # DDR4-2400 17-17-17
+ tRCD = '14.16ns'
+ tCL = '14.16ns'
+ tRP = '14.16ns'
+ tRAS = '32ns'
+
+ # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
+ tRRD = '3.332ns'
+
+ # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
+ tRRD_L = '4.9ns';
+
+ # tFAW for 512B page is MAX(16 CK, 13ns)
+ tXAW = '13.328ns'
+ activation_limit = 4
+ # tRFC is 350ns
+ tRFC = '350ns'
+
+ tWR = '15ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tRTP = '7.5ns'
+
+ # <=85C, half for >85C
+ tREFI = '7.8us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '6ns'
+
+ # self refresh exit time
+ # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
+ # tRFC + 10ns = 340ns
+ tXS = '340ns'
+
+ # Current values from datasheet
+ IDD0 = '43mA'
+ IDD02 = '3mA'
+ IDD2N = '34mA'
+ IDD3N = '38mA'
+ IDD3N2 = '3mA'
+ IDD4W = '103mA'
+ IDD4R = '110mA'
+ IDD5 = '250mA'
+ IDD3P1 = '32mA'
+ IDD2P1 = '25mA'
+ IDD6 = '30mA'
+ VDD = '1.2V'
+ VDD2 = '2.5V'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
+# in an 8x8 configuration.
+# Total channel capacity is 16GB
+# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
+class DDR4_2400_8x8(DDR4_2400_16x4):
+ # 8x8 configuration, 8 devices each with an 8-bit interface
+ device_bus_width = 8
+
+ # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
+ device_rowbuffer_size = '1kB'
+
+ # 8x8 configuration, so 8 devices
+ devices_per_rank = 8
+
+ # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
+ tRRD_L = '4.9ns';
+
+ tXAW = '21ns'
+
+ # Current values from datasheet
+ IDD0 = '48mA'
+ IDD3N = '43mA'
+ IDD4W = '123mA'
+ IDD4R = '135mA'
+ IDD3P1 = '37mA'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
+# in an 4x16 configuration.
+# Total channel capacity is 4GB
+# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
+class DDR4_2400_4x16(DDR4_2400_16x4):
+ # 4x16 configuration, 4 devices each with an 16-bit interface
+ device_bus_width = 16
+
+ # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
+ device_rowbuffer_size = '2kB'
+
+ # 4x16 configuration, so 4 devices
+ devices_per_rank = 4
+
+ # Single rank for x16
+ ranks_per_channel = 1
+
+ # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+ # Set to 2 for x16 case
+ bank_groups_per_rank = 2
+
+ # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
+ # configurations). Currently we do not capture the additional
+ # constraints incurred by the bank groups
+ banks_per_rank = 8
+
+ # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
+ tRRD = '5.3ns'
+
+ # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
+ tRRD_L = '6.4ns';
+
+ tXAW = '30ns'
+
+ # Current values from datasheet
+ IDD0 = '80mA'
+ IDD02 = '4mA'
+ IDD2N = '34mA'
+ IDD3N = '47mA'
+ IDD4W = '228mA'
+ IDD4R = '243mA'
+ IDD5 = '280mA'
+ IDD3P1 = '41mA'
+
+# A single LPDDR2-S4 x32 interface (one command/address bus), with
+# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
+# in a 1x32 configuration.
+class LPDDR2_S4_1066_1x32(DRAMInterface):
+ # No DLL in LPDDR2
+ dll = False
+
+ # size of device
+ device_size = '512MB'
+
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR2_S4 is a BL4 and BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '1kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Use a single rank
+ ranks_per_channel = 1
+
+ # LPDDR2-S4 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 533 MHz
+ tCK = '1.876ns'
+
+ # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
+ tCS = '3.75ns'
+
+ # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the controller
+ tBURST = '7.5ns'
+
+ # Irrespective of speed grade, tWTR is 7.5 ns
+ tWTR = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
+ tRTW = '3.75ns'
+
+ # Fixed at 15 ns
+ tRCD = '15ns'
+
+ # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
+ tCL = '15ns'
+
+ # Pre-charge one bank 15 ns (all banks 18 ns)
+ tRP = '15ns'
+
+ tRAS = '42ns'
+ tWR = '15ns'
+
+ tRTP = '7.5ns'
+
+ # LPDDR2-S4, 4 Gbit
+ tRFC = '130ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '7.5ns'
+
+ # self refresh exit time
+ tXS = '140ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Irrespective of density, tFAW is 50 ns
+ tXAW = '50ns'
+ activation_limit = 4
+
+ # Current values from datasheet
+ IDD0 = '15mA'
+ IDD02 = '70mA'
+ IDD2N = '2mA'
+ IDD2N2 = '30mA'
+ IDD3N = '2.5mA'
+ IDD3N2 = '30mA'
+ IDD4W = '10mA'
+ IDD4W2 = '190mA'
+ IDD4R = '3mA'
+ IDD4R2 = '220mA'
+ IDD5 = '40mA'
+ IDD52 = '150mA'
+ IDD3P1 = '1.2mA'
+ IDD3P12 = '8mA'
+ IDD2P1 = '0.6mA'
+ IDD2P12 = '0.8mA'
+ IDD6 = '1mA'
+ IDD62 = '3.2mA'
+ VDD = '1.8V'
+ VDD2 = '1.2V'
+
+# A single WideIO x128 interface (one command and address bus), with
+# default timings based on an estimated WIO-200 8 Gbit part.
+class WideIO_200_1x128(DRAMInterface):
+ # No DLL for WideIO
+ dll = False
+
+ # size of device
+ device_size = '1024MB'
+
+ # 1x128 configuration, 1 device with a 128-bit interface
+ device_bus_width = 128
+
+ # This is a BL4 device
+ burst_length = 4
+
+ # Each device has a page (row buffer) size of 4KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '4kB'
+
+ # 1x128 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Use one rank for a one-high die stack
+ ranks_per_channel = 1
+
+ # WideIO has 4 banks in all configurations
+ banks_per_rank = 4
+
+ # 200 MHz
+ tCK = '5ns'
+
+ # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
+ tCS = '10ns'
+
+ # 4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
+ # Note this is a BL4 SDR device.
+ tBURST = '20ns'
+
+ # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
+ tWTR = '15ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
+ tRTW = '10ns'
+
+ # WIO-200
+ tRCD = '18ns'
+ tCL = '18ns'
+ tRP = '18ns'
+ tRAS = '42ns'
+ tWR = '15ns'
+ # Read to precharge is same as the burst
+ tRTP = '20ns'
+
+ # WIO 8 Gb
+ tRFC = '210ns'
+
+ # WIO 8 Gb, <=85C, half for >85C
+ tREFI = '3.9us'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Two instead of four activation window
+ tXAW = '50ns'
+ activation_limit = 2
+
+ # The WideIO specification does not provide current information
+
+# A single LPDDR3 x32 interface (one command/address bus), with
+# default timings based on a LPDDR3-1600 4 Gbit part (Micron
+# EDF8132A1MC) in a 1x32 configuration.
+class LPDDR3_1600_1x32(DRAMInterface):
+ # No DLL for LPDDR3
+ dll = False
+
+ # size of device
+ device_size = '512MB'
+
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 4KB
+ device_rowbuffer_size = '4kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Technically the datasheet is a dual-rank package, but for
+ # comparison with the LPDDR2 config we stick to a single rank
+ ranks_per_channel = 1
+
+ # LPDDR3 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 800 MHz
+ tCK = '1.25ns'
+
+ # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the controller
+ tBURST = '5ns'
+
+ # Irrespective of speed grade, tWTR is 7.5 ns
+ tWTR = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
+ tRTW = '2.5ns'
+
+ tRCD = '18ns'
+
+ # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
+ tCL = '15ns'
+
+ tRAS = '42ns'
+ tWR = '15ns'
+
+ # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
+ tRTP = '7.5ns'
+
+ # Pre-charge one bank 18 ns (all banks 21 ns)
+ tRP = '18ns'
+
+ # LPDDR3, 4 Gb
+ tRFC = '130ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '7.5ns'
+
+ # self refresh exit time
+ tXS = '140ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Irrespective of size, tFAW is 50 ns
+ tXAW = '50ns'
+ activation_limit = 4
+
+ # Current values from datasheet
+ IDD0 = '8mA'
+ IDD02 = '60mA'
+ IDD2N = '0.8mA'
+ IDD2N2 = '26mA'
+ IDD3N = '2mA'
+ IDD3N2 = '34mA'
+ IDD4W = '2mA'
+ IDD4W2 = '190mA'
+ IDD4R = '2mA'
+ IDD4R2 = '230mA'
+ IDD5 = '28mA'
+ IDD52 = '150mA'
+ IDD3P1 = '1.4mA'
+ IDD3P12 = '11mA'
+ IDD2P1 = '0.8mA'
+ IDD2P12 = '1.8mA'
+ IDD6 = '0.5mA'
+ IDD62 = '1.8mA'
+ VDD = '1.8V'
+ VDD2 = '1.2V'
+
+# A single GDDR5 x64 interface, with
+# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
+# H5GQ1H24AFR) in a 2x32 configuration.
+class GDDR5_4000_2x32(DRAMInterface):
+ # size of device
+ device_size = '128MB'
+
+ # 2x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # GDDR5 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 2Kbits (256Bytes)
+ device_rowbuffer_size = '256B'
+
+ # 2x32 configuration, so 2 devices
+ devices_per_rank = 2
+
+ # assume single rank
+ ranks_per_channel = 1
+
+ # GDDR5 has 4 bank groups
+ bank_groups_per_rank = 4
+
+ # GDDR5 has 16 banks with 4 bank groups
+ banks_per_rank = 16
+
+ # 1000 MHz
+ tCK = '1ns'
+
+ # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
+ # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
+ # 8 beats at 4000 MHz = 2 beats at 1000 MHz
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = '2ns'
+
+ # Here using the average of WTR_S and WTR_L
+ tWTR = '5ns'
+
+ # Assume 2 cycles
+ tRTW = '2ns'
+
+ # @1000MHz data rate, tCCD_L is 3 CK
+ # CAS-to-CAS delay for bursts to the same bank group
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = '3ns';
+
+ tRCD = '12ns'
+
+ # tCL is not directly found in datasheet and assumed equal tRCD
+ tCL = '12ns'
+
+ tRP = '12ns'
+ tRAS = '28ns'
+
+ # RRD_S (different bank group)
+ # RRD_S is 5.5 ns in datasheet.
+ # rounded to the next multiple of tCK
+ tRRD = '6ns'
+
+ # RRD_L (same bank group)
+ # RRD_L is 5.5 ns in datasheet.
+ # rounded to the next multiple of tCK
+ tRRD_L = '6ns'
+
+ tXAW = '23ns'
+
+ # tXAW < 4 x tRRD.
+ # Therefore, activation limit is set to 0
+ activation_limit = 0
+
+ tRFC = '65ns'
+ tWR = '12ns'
+
+ # Read-to-Precharge 2 CK
+ tRTP = '2ns'
+
+# A single HBM x128 interface (one command and address bus), with
+# default timings based on data publically released
+# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
+# IDD measurement values, and by extrapolating data from other classes.
+# Architecture values based on published HBM spec
+# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
+class HBM_1000_4H_1x128(DRAMInterface):
+ # HBM gen1 supports up to 8 128-bit physical channels
+ # Configuration defines a single channel, with the capacity
+ # set to (full_ stack_capacity / 8) based on 2Gb dies
+ # To use all 8 channels, set 'channels' parameter to 8 in
+ # system configuration
+
+ # 128-bit interface legacy mode
+ device_bus_width = 128
+
+ # HBM supports BL4 and BL2 (legacy mode only)
+ burst_length = 4
+
+ # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
+ # with 8 channels, 128MB per channel
+ device_size = '128MB'
+
+ device_rowbuffer_size = '2kB'
+
+ # 1x128 configuration
+ devices_per_rank = 1
+
+ # HBM does not have a CS pin; set rank to 1
+ ranks_per_channel = 1
+
+ # HBM has 8 or 16 banks depending on capacity
+ # 2Gb dies have 8 banks
+ banks_per_rank = 8
+
+ # depending on frequency, bank groups may be required
+ # will always have 4 bank groups when enabled
+ # current specifications do not define the minimum frequency for
+ # bank group architecture
+ # setting bank_groups_per_rank to 0 to disable until range is defined
+ bank_groups_per_rank = 0
+
+ # 500 MHz for 1Gbps DDR data rate
+ tCK = '2ns'
+
+ # single rank device, set to 0
+ tCS = '0ns'
+
+ # BL2 and BL4 supported, default to BL4
+ # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
+ tBURST = '4ns'
+
+ tWTR = '10ns'
+
+ # start with 2 cycles turnaround, similar to other memory classes
+ # could be more with variations across the stack
+ tRTW = '4ns'
+
+ # use values from IDD measurement in JEDEC spec
+ # use tRP value for tRCD and tCL similar to other classes
+ tRP = '15ns'
+ tRCD = '15ns'
+ tCL = '15ns'
+ tRAS = '33ns'
+
+ # value for 2Gb device from JEDEC spec
+ tRFC = '160ns'
+
+ # value for 2Gb device from JEDEC spec
+ tREFI = '3.9us'
+
+ # extrapolate the following from LPDDR configs, using ns values
+ # to minimize burst length, prefetch differences
+ tWR = '18ns'
+ tRTP = '7.5ns'
+ # from MemCon example, tRRD is 4ns with 2ns tCK
+ tRRD = '4ns'
+
+ # from MemCon example, tFAW is 30ns with 2ns tCK
+ tXAW = '30ns'
+ activation_limit = 4
+
+ # 4tCK
+ tXP = '8ns'
+
+ # start with tRFC + tXP -> 160ns + 8ns = 168ns
+ tXS = '168ns'
+
+# A single HBM x64 interface (one command and address bus), with
+# default timings based on HBM gen1 and data publically released
+# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
+# Note: This defines a pseudo-channel with a unique controller
+# instantiated per pseudo-channel
+# Stay at same IO rate (1Gbps) to maintain timing relationship with
+# HBM gen1 class (HBM_1000_4H_x128) where possible
+class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
+ # For HBM gen2 with pseudo-channel mode, configure 2X channels.
+ # Configuration defines a single pseudo channel, with the capacity
+ # set to (full_ stack_capacity / 16) based on 8Gb dies
+ # To use all 16 pseudo channels, set 'channels' parameter to 16 in
+ # system configuration
+
+ # 64-bit pseudo-channle interface
+ device_bus_width = 64
+
+ # HBM pseudo-channel only supports BL4
+ burst_length = 4
+
+ # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
+ # with 16 channels, 256MB per channel
+ device_size = '256MB'
+
+ # page size is halved with pseudo-channel; maintaining the same same
number
+ # of rows per pseudo-channel with 2X banks across 2 channels
+ device_rowbuffer_size = '1kB'
+
+ # HBM has 8 or 16 banks depending on capacity
+ # Starting with 4Gb dies, 16 banks are defined
+ banks_per_rank = 16
+
+ # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
+ tCS = '2ns'
+
+ # reset tRFC for larger, 8Gb device
+ # use HBM1 4Gb value as a starting point
+ tRFC = '260ns'
+
+ # start with tRFC + tXP -> 160ns + 8ns = 168ns
+ tXS = '268ns'
+
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '10ns'
+
+ # self refresh exit time
+ tXS = '65ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_5500_1x16_BG_BL32(DRAMInterface):
+
+ # Increase buffer size to account for more bank resources
+ read_buffer_size = 64
+
+ # Set page policy to better suit DMC Huxley
+ page_policy = 'close_adaptive'
+
+ # 16-bit channel interface
+ device_bus_width = 16
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL32 for higher command bandwidth
+ burst_length = 32
+
+ # size of device in bytes
+ device_size = '1GB'
+
+ # 2kB page with BG mode
+ device_rowbuffer_size = '2kB'
+
+ # Use a 1x16 configuration
+ devices_per_rank = 1
+
+ # Use a single rank
+ ranks_per_channel = 1
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Initial configuration will have 16 banks with Bank Group Arch
+ # to maximim resources and enable higher data rates
+ banks_per_rank = 16
+ bank_groups_per_rank = 4
+
+ # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+ tCK = '1.455ns'
+
+ # Greater of 2 CK or 18ns
+ tRCD = '18ns'
+
+ # Base RL is 16 CK @ 687.5 MHz = 23.28ns
+ tCL = '23.280ns'
+
+ # Greater of 2 CK or 18ns
+ tRP = '18ns'
+
+ # Greater of 3 CK or 42ns
+ tRAS = '42ns'
+
+ # Greater of 3 CK or 34ns
+ tWR = '34ns'
+
+ # active powerdown and precharge powerdown exit time
+ # Greater of 3 CK or 7ns
+ tXP = '7ns'
+
+ # self refresh exit time (tRFCab + 7.5ns)
+ tXS = '217.5ns'
+
+ # Greater of 2 CK or 7.5 ns minus 2 CK
+ tRTP = '4.59ns'
+
+ # With BG architecture, burst of 32 transferred in two 16-beat
+ # sub-bursts, with a 16-beat gap in between.
+ # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+ # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
+ tBURST = '8.73ns'
+ # can interleave a Bstof32 from another bank group at tBURST_MIN
+ # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+ tBURST_MIN = '2.91ns'
+ # tBURST_MAX is the maximum burst delay for same bank group timing
+ # this is 8 CK @ 687.5 MHz
+ tBURST_MAX = '11.64ns'
+
+ # 8 CK @ 687.5 MHz
+ tCCD_L = "11.64ns"
+
+ # LPDDR5, 8 Gbit/channel for 280ns tRFCab
+ tRFC = '210ns'
+ tREFI = '3.9us'
+
+ # Greater of 4 CK or 6.25 ns
+ tWTR = '6.25ns'
+ # Greater of 4 CK or 12 ns
+ tWTR_L = '12ns'
+
+ # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+ # tWCKDQ0/tCK will be 1 CK for most cases
+ # For gem5 RL = WL and BL/n is already accounted for with tBURST
+ # Result is and additional 1 CK is required
+ tRTW = '1.455ns'
+
+ # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
+ tCS = '2.91ns'
+
+ # 2 CK
+ tPPD = '2.91ns'
+
+ # Greater of 2 CK or 5 ns
+ tRRD = '5ns'
+ tRRD_L = '5ns'
+
+ # With Bank Group Arch mode tFAW is 20 ns
+ tXAW = '20ns'
+ activation_limit = 4
+
+ # at 5Gbps, 4:1 WCK to CK ratio required
+ # 2 data beats per WCK (DDR) -> 8 per CK
+ beats_per_clock = 8
+
+ # 2 cycles required to send activate command
+ # 2 command phases can be sent back-to-back or
+ # with a gap up to tAAD = 8 CK
+ two_cycle_activate = True
+ tAAD = '11.640ns'
+
+ data_clock_sync = True
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL16 for smaller access granularity
+ burst_length = 16
+
+ # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST = '2.91ns'
+ tBURST_MIN = '2.91ns'
+ # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST_MAX = '5.82ns'
+
+ # 4 CK @ 687.5 MHz
+ tCCD_L = "5.82ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+ # 4kB page with 8B mode
+ device_rowbuffer_size = '4kB'
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Select 8B
+ banks_per_rank = 8
+ bank_groups_per_rank = 0
+
+ # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST = '5.82ns'
+ tBURST_MIN = '5.82ns'
+ tBURST_MAX = '5.82ns'
+
+ # Greater of 4 CK or 12 ns
+ tWTR = '12ns'
+
+ # Greater of 2 CK or 10 ns
+ tRRD = '10ns'
+
+ # With 8B mode tFAW is 40 ns
+ tXAW = '40ns'
+ activation_limit = 4
+
+ # Reset BG arch timing for 8B mode
+ tCCD_L = "0ns"
+ tRRD_L = "0ns"
+ tWTR_L = "0ns"
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+ # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+ tCK = '1.25ns'
+
+ # Base RL is 17 CK @ 800 MHz = 21.25ns
+ tCL = '21.25ns'
+
+ # With BG architecture, burst of 32 transferred in two 16-beat
+ # sub-bursts, with a 16-beat gap in between.
+ # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
+ # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
+ tBURST = '7.5ns'
+ # can interleave a Bstof32 from another bank group at tBURST_MIN
+ # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
+ tBURST_MIN = '2.5ns'
+ # tBURST_MAX is the maximum burst delay for same bank group timing
+ # this is 8 CK @ 800 MHz
+ tBURST_MAX = '10ns'
+
+ # 8 CK @ 800 MHz
+ tCCD_L = "10ns"
+
+ # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+ # tWCKDQ0/tCK will be 1 CK for most cases
+ # For gem5 RL = WL and BL/n is already accounted for with tBURST
+ # Result is and additional 1 CK is required
+ tRTW = '1.25ns'
+
+ # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # 2 CK
+ tPPD = '2.5ns'
+
+ # 2 command phases can be sent back-to-back or
+ # with a gap up to tAAD = 8 CK
+ tAAD = '10ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on initial
+# JEDEC specifcation
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL16 for smaller access granularity
+ burst_length = 16
+
+ # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
+ tBURST = '2.5ns'
+ tBURST_MIN = '2.5ns'
+ # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
+ tBURST_MAX = '5ns'
+
+ # 4 CK @ 800 MHz
+ tCCD_L = "5ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
+
+ # 4kB page with 8B mode
+ device_rowbuffer_size = '4kB'
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Select 8B
+ banks_per_rank = 8
+ bank_groups_per_rank = 0
+
+ # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
+ tBURST = '5ns'
+ tBURST_MIN = '5ns'
+ tBURST_MAX = '5ns'
+
+ # Greater of 4 CK or 12 ns
+ tWTR = '12ns'
+
+ # Greater of 2 CK or 10 ns
+ tRRD = '10ns'
+
+ # With 8B mode tFAW is 40 ns
+ tXAW = '40ns'
+ activation_limit = 4
+
+ # Reset BG arch timing for 8B mode
+ tCCD_L = "0ns"
+ tRRD_L = "0ns"
+ tWTR_L = "0ns"
diff --git a/src/mem/SConscript b/src/mem/SConscript
index b77dbb1..76ffdbd 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -1,6 +1,6 @@
# -*- mode:python -*-
#
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018, 2020 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@
SimObject('AddrMapper.py')
SimObject('Bridge.py')
SimObject('DRAMCtrl.py')
+SimObject('DRAMInterface.py')
SimObject('ExternalMaster.py')
SimObject('ExternalSlave.py')
SimObject('MemObject.py')
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc
index dc244fe..533aa01 100644
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -47,6 +47,7 @@
#include "debug/DRAMState.hh"
#include "debug/Drain.hh"
#include "debug/QOS.hh"
+#include "params/DRAMInterface.hh"
#include "sim/system.hh"
using namespace std;
@@ -58,12 +59,13 @@
retryRdReq(false), retryWrReq(false),
nextReqEvent([this]{ processNextReqEvent(); }, name()),
respondEvent([this]{ processRespondEvent(); }, name()),
+ dram(p->dram),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size),
writeHighThreshold(writeBufferSize * p->write_high_thresh_perc /
100.0),
writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
minWritesPerSwitch(p->min_writes_per_switch),
- writesThisTime(0), readsThisTime(0), tCS(p->tCS),
+ writesThisTime(0), readsThisTime(0),
memSchedPolicy(p->mem_sched_policy),
frontendLatency(p->static_frontend_latency),
backendLatency(p->static_backend_latency),
@@ -75,37 +77,23 @@
readQueue.resize(p->qos_priorities);
writeQueue.resize(p->qos_priorities);
+ dram->setCtrl(this);
+
// perform a basic check of the write thresholds
if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
fatal("Write buffer low threshold %d must be smaller than the "
"high threshold %d\n", p->write_low_thresh_perc,
p->write_high_thresh_perc);
-
- // determine the rows per bank by looking at the total capacity
- uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
-
- DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
- AbstractMemory::size());
-
- // create a DRAM interface
- // will only populate the ranks if DRAM is configured
- dram = new DRAMInterface(*this, p, capacity, range);
- DPRINTF(DRAM, "Created DRAM interface \n");
}
void
DRAMCtrl::init()
{
- MemCtrl::init();
-
if (!port.isConnected()) {
fatal("DRAMCtrl %s is unconnected!\n", name());
} else {
port.sendRangeChange();
}
-
- dram->init(range);
-
}
void
@@ -115,8 +103,6 @@
isTimingMode = system()->isTimingMode();
if (isTimingMode) {
- dram->startup();
-
// shift the bus busy time sufficiently far ahead that we never
// have to worry about negative values when computing the time for
// the next request, this will add an insignificant bubble at the
@@ -134,7 +120,7 @@
"is responding");
// do the actual memory access and turn the packet into a response
- access(pkt);
+ dram->access(pkt);
Tick latency = 0;
if (pkt->hasData()) {
@@ -264,7 +250,7 @@
// address of first DRAM packet is kept unaliged. Subsequent DRAM
packets
// are aligned to burst size boundaries. This is to ensure we
accurately
// check read packets against packets in write queue.
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
+ const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
unsigned pktsServicedByWrQ = 0;
BurstHelper* burst_helper = NULL;
@@ -364,7 +350,7 @@
// if the request size is larger than burst size, the pkt is split into
// multiple DRAM packets
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
+ const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
uint32_t burstSize = dram->bytesPerBurst();
for (int cnt = 0; cnt < pktCount; ++cnt) {
@@ -527,7 +513,7 @@
DRAMPacket* dram_pkt = respQueue.front();
// media specific checks and functions when read response is complete
- dram->respondEventDRAM(dram_pkt->rank);
+ dram->respondEvent(dram_pkt->rank);
if (dram_pkt->burstHelper) {
// it is a split packet
@@ -726,12 +712,12 @@
void
DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
{
- DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
+ DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr());
bool needsResponse = pkt->needsResponse();
// do the actual memory access which also turns the packet into a
// response
- access(pkt);
+ dram->access(pkt);
// turn packet around to go back to requester if response expected
if (needsResponse) {
@@ -876,9 +862,9 @@
// if not, shift to next burst window
Tick act_at;
if (twoCycleActivate)
- act_at = ctrl.verifyMultiCmd(act_tick, tAAD);
+ act_at = ctrl->verifyMultiCmd(act_tick, tAAD);
else
- act_at = ctrl.verifySingleCmd(act_tick);
+ act_at = ctrl->verifySingleCmd(act_tick);
DPRINTF(DRAM, "Activate at tick %d\n", act_at);
@@ -996,7 +982,7 @@
// Issuing an explicit PRE command
// Verify that we have command bandwidth to issue the precharge
// if not, shift to next burst window
- pre_at = ctrl.verifySingleCmd(pre_tick);
+ pre_at = ctrl->verifySingleCmd(pre_tick);
// enforce tPPD
for (int i = 0; i < banksPerRank; i++) {
rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
@@ -1046,7 +1032,7 @@
// first clean up the burstTick set, removing old entries
// before adding new entries for next burst
- ctrl.pruneBurstTick();
+ ctrl->pruneBurstTick();
// get the rank
Rank& rank_ref = *ranks[dram_pkt->rank];
@@ -1098,9 +1084,9 @@
// verify that we have command bandwidth to issue the burst
// if not, shift to next burst window
if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) >
clkResyncDelay))
- cmd_at = ctrl.verifyMultiCmd(cmd_at, tCK);
+ cmd_at = ctrl->verifyMultiCmd(cmd_at, tCK);
else
- cmd_at = ctrl.verifySingleCmd(cmd_at);
+ cmd_at = ctrl->verifySingleCmd(cmd_at);
// if we are interleaving bursts, ensure that
// 1) we don't double interleave on next burst issue
@@ -1200,9 +1186,9 @@
// either look at the read queue or write queue
const std::vector<DRAMPacketQueue>& queue =
- ctrl.selQueue(dram_pkt->isRead());
+ ctrl->selQueue(dram_pkt->isRead());
- for (uint8_t i = 0; i < ctrl.numPriorities(); ++i) {
+ for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
auto p = queue[i].begin();
// keep on looking until we find a hit or reach the end of the
// queue
@@ -1273,6 +1259,7 @@
// Update latency stats
stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
stats.totQLat += cmd_at - dram_pkt->entryTime;
+ stats.totBusLat += tBURST;
} else {
// Schedule write done event to decrement event count
// after the readyTime has been reached
@@ -1338,13 +1325,9 @@
// Update latency stats
stats.masterReadTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
-
- stats.bytesRead += dram->bytesPerBurst();
- stats.totBusLat += dram->burstDly();
stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size;
} else {
++writesThisTime;
- stats.bytesWritten += dram->bytesPerBurst();
stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size;
stats.masterWriteTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
@@ -1446,8 +1429,9 @@
// Figure out which read request goes next
// If we are changing command type, incorporate the minimum
- // bus turnaround delay which will be tCS (different rank)
case
- to_read = chooseNext((*queue), switched_cmd_type ? tCS :
0);
+ // bus turnaround delay which will be rank to rank delay
+ to_read = chooseNext((*queue), switched_cmd_type ?
+ dram->rankDelay() : 0);
if (to_read != queue->end()) {
// candidate read found
@@ -1526,7 +1510,8 @@
// If we are changing command type, incorporate the minimum
// bus turnaround delay
to_write = chooseNext((*queue),
- switched_cmd_type ? std::min(dram->minRdToWr(),
tCS) : 0);
+ switched_cmd_type ? std::min(dram->minRdToWr(),
+ dram->rankDelay()) : 0);
if (to_write != queue->end()) {
write_found = true;
@@ -1599,11 +1584,8 @@
}
}
-DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl,
- const DRAMCtrlParams* _p,
- const uint64_t capacity,
- const AddrRange range)
- : SimObject(_p), ctrl(_ctrl),
+DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p)
+ : AbstractMemory(_p),
addrMapping(_p->addr_mapping),
burstSize((_p->devices_per_rank * _p->burst_length *
_p->device_bus_width) / 8),
@@ -1618,7 +1600,7 @@
bankGroupsPerRank(_p->bank_groups_per_rank),
bankGroupArch(_p->bank_groups_per_rank > 0),
banksPerRank(_p->banks_per_rank), rowsPerBank(0),
- tCK(_p->tCK), tCL(_p->tCL), tBURST(_p->tBURST),
+ tCK(_p->tCK), tCS(_p->tCS), tCL(_p->tCL), tBURST(_p->tBURST),
tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX),
tRTW(_p->tRTW),
tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD),
tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP),
@@ -1634,12 +1616,12 @@
wrToRdDly(tCL + tBURST + _p->tWTR), rdToWrDly(tBURST + tRTW),
wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L),
rdToWrDlySameBG(tRTW + _p->tBURST_MAX),
- rankToRankDly(ctrl.rankDelay() + tBURST),
+ rankToRankDly(tCS + tBURST),
pageMgmt(_p->page_policy),
maxAccessesPerRow(_p->max_accesses_per_row),
timeStampOffset(0), activeRank(0),
enableDRAMPowerdown(_p->enable_dram_powerdown),
- stats(_ctrl, *this)
+ stats(*this)
{
fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
"must be a power of two\n", burstSize);
@@ -1651,7 +1633,7 @@
for (int i = 0; i < ranksPerChannel; i++) {
DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
- Rank* rank = new Rank(ctrl, _p, i, *this);
+ Rank* rank = new Rank(_p, i, *this);
ranks.push_back(rank);
}
@@ -1659,6 +1641,11 @@
uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
ranksPerChannel;
+ uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
+
+ DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
+ AbstractMemory::size());
+
// if actual DRAM size does not match memory capacity in system warn!
if (deviceCapacity != capacity / (1024 * 1024))
warn("DRAM device capacity (%d Mbytes) does not match the "
@@ -1713,8 +1700,10 @@
}
void
-DRAMInterface::init(AddrRange range)
+DRAMInterface::init()
{
+ AbstractMemory::init();
+
// a bit of sanity checks on the interleaving, save it for here to
// ensure that the system pointer is initialised
if (range.interleaved()) {
@@ -1736,7 +1725,7 @@
// channel striping has to be done at a granularity that
// is equal or larger to a cache line
- if (ctrl.system()->cacheLineSize() > range.granularity()) {
+ if (system()->cacheLineSize() > range.granularity()) {
fatal("Channel interleaving of %s must be at least as
large "
"as the cache line size\n", name());
}
@@ -1755,8 +1744,10 @@
void
DRAMInterface::startup()
{
- // timestamp offset should be in clock cycles for DRAMPower
- timeStampOffset = divCeil(curTick(), tCK);
+ if (system()->isTimingMode()) {
+ // timestamp offset should be in clock cycles for DRAMPower
+ timeStampOffset = divCeil(curTick(), tCK);
+ }
for (auto r : ranks) {
r->startup(curTick() + tREFI - tRP);
@@ -1802,7 +1793,7 @@
}
void
-DRAMInterface::respondEventDRAM(uint8_t rank)
+DRAMInterface::respondEvent(uint8_t rank)
{
Rank& rank_ref = *ranks[rank];
@@ -1943,7 +1934,7 @@
std::max(ranks[i]->banks[j].preAllowedAt, curTick()) +
tRP;
// When is the earliest the R/W burst can issue?
- const Tick col_allowed_at = ctrl.inReadBusState(false) ?
+ const Tick col_allowed_at = ctrl->inReadBusState(false) ?
ranks[i]->banks[j].rdAllowedAt :
ranks[i]->banks[j].wrAllowedAt;
Tick col_at = std::max(col_allowed_at, act_at + tRCD);
@@ -1983,9 +1974,15 @@
return make_pair(bank_mask, hidden_bank_prep);
}
-DRAMInterface::Rank::Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int
_rank,
- DRAMInterface& _dram)
- : EventManager(&_ctrl), ctrl(_ctrl), dram(_dram),
+DRAMInterface*
+DRAMInterfaceParams::create()
+{
+ return new DRAMInterface(this);
+}
+
+DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p,
+ int _rank, DRAMInterface& _dram)
+ : EventManager(&_dram), dram(_dram),
pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE),
pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE),
refreshState(REF_IDLE), inLowPowerState(false), rank(_rank),
@@ -1998,7 +1995,7 @@
refreshEvent([this]{ processRefreshEvent(); }, name()),
powerEvent([this]{ processPowerEvent(); }, name()),
wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
- stats(_ctrl, *this)
+ stats(_dram, *this)
{
for (int b = 0; b < _p->banks_per_rank; b++) {
banks[b].bank = b;
@@ -2049,8 +2046,10 @@
DRAMInterface::Rank::isQueueEmpty() const
{
// check commmands in Q based on current bus direction
- bool no_queued_cmds = (ctrl.inReadBusState(true) && (readEntries == 0))
- || (!ctrl.inReadBusState(true) && (writeEntries ==
0));
+ bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
+ (readEntries == 0))
+ || (!dram.ctrl->inReadBusState(true) &&
+ (writeEntries == 0));
return no_queued_cmds;
}
@@ -2174,7 +2173,7 @@
// if a request is at the moment being handled and this request is
// accessing the current rank then wait for it to finish
if ((rank == dram.activeRank)
- && (ctrl.nextReqEvent.scheduled())) {
+ && (dram.ctrl->nextReqEvent.scheduled())) {
// hand control over to the request loop until it is
// evaluated next
DPRINTF(DRAM, "Refresh awaiting draining\n");
@@ -2249,7 +2248,7 @@
// or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
// should have outstanding precharge or read response event
assert(prechargeEvent.scheduled() ||
- ctrl.respondEvent.scheduled());
+ dram.ctrl->respondEvent.scheduled());
// will start refresh when pwrState transitions to IDLE
}
@@ -2309,8 +2308,8 @@
assert(!powerEvent.scheduled());
- if ((ctrl.drainState() == DrainState::Draining) ||
- (ctrl.drainState() == DrainState::Drained)) {
+ if ((dram.ctrl->drainState() == DrainState::Draining) ||
+ (dram.ctrl->drainState() == DrainState::Drained)) {
// if draining, do not re-enter low-power mode.
// simply go to IDLE and wait
schedulePowerEvent(PWR_IDLE, curTick());
@@ -2535,10 +2534,10 @@
}
// completed refresh event, ensure next request is scheduled
- if (!ctrl.nextReqEvent.scheduled()) {
+ if (!dram.ctrl->nextReqEvent.scheduled()) {
DPRINTF(DRAM, "Scheduling next request after refreshing"
" rank %d\n", rank);
- schedule(ctrl.nextReqEvent, curTick());
+ schedule(dram.ctrl->nextReqEvent, curTick());
}
}
@@ -2597,8 +2596,8 @@
// bypass auto-refresh and go straight to SREF, where memory
// will issue refresh immediately upon entry
if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
- (ctrl.drainState() != DrainState::Draining) &&
- (ctrl.drainState() != DrainState::Drained) &&
+ (dram.ctrl->drainState() != DrainState::Draining) &&
+ (dram.ctrl->drainState() != DrainState::Drained) &&
dram.enableDRAMPowerdown) {
DPRINTF(DRAMState, "Rank %d bypassing refresh and
transitioning "
"to self refresh at %11u tick\n", rank, curTick());
@@ -2669,7 +2668,7 @@
// power (mW) = ----------- * ----------
// time (tick) tick_frequency
stats.averagePower = (stats.totalEnergy.value() /
- (curTick() - ctrl.lastStatsResetTick)) *
+ (curTick() - dram.ctrl->lastStatsResetTick)) *
(SimClock::Frequency / 1000000000.0);
}
@@ -2699,7 +2698,7 @@
bool
DRAMInterface::Rank::forceSelfRefreshExit() const {
return (readEntries != 0) ||
- (!ctrl.inReadBusState(true) && (writeEntries != 0));
+ (!dram.ctrl->inReadBusState(true) && (writeEntries != 0));
}
DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl)
@@ -2710,15 +2709,15 @@
ADD_STAT(writeReqs, "Number of write requests accepted"),
ADD_STAT(readBursts,
- "Number of DRAM read bursts, "
+ "Number of controller read bursts, "
"including those serviced by the write queue"),
ADD_STAT(writeBursts,
- "Number of DRAM write bursts, "
+ "Number of controller write bursts, "
"including those merged in the write queue"),
ADD_STAT(servicedByWrQ,
- "Number of DRAM read bursts serviced by the write queue"),
+ "Number of controller read bursts serviced by the write
queue"),
ADD_STAT(mergedWrBursts,
- "Number of DRAM write bursts merged with an existing one"),
+ "Number of controller write bursts merged with an existing
one"),
ADD_STAT(neitherReadNorWriteReqs,
"Number of requests that are neither read nor write"),
@@ -2726,9 +2725,6 @@
ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
- ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
- ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
-
ADD_STAT(numRdRetry, "Number of times read queue was full causing
retry"),
ADD_STAT(numWrRetry, "Number of times write queue was full causing
retry"),
@@ -2743,22 +2739,13 @@
ADD_STAT(wrPerTurnAround,
"Writes before turning the bus around for reads"),
- ADD_STAT(bytesRead, "Total number of bytes read from memory"),
ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
- ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(bytesReadSys, "Total read bytes from the system interface
side"),
ADD_STAT(bytesWrittenSys,
"Total written bytes from the system interface side"),
- ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiByte/s"),
- ADD_STAT(avgWrBW, "Average achieved write bandwidth in MiByte/s"),
ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
- ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
- ADD_STAT(busUtil, "Data bus utilization in percentage"),
- ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
- ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
ADD_STAT(totGap, "Total gap between requests"),
ADD_STAT(avgGap, "Average gap between requests"),
@@ -2790,12 +2777,11 @@
{
using namespace Stats;
- assert(ctrl._system);
- const auto max_masters = ctrl._system->maxMasters();
+ assert(ctrl.system());
+ const auto max_masters = ctrl.system()->maxMasters();
avgRdQLen.precision(2);
avgWrQLen.precision(2);
- avgBusLat.precision(2);
readPktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
writePktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
@@ -2810,14 +2796,9 @@
.init(ctrl.writeBufferSize)
.flags(nozero);
- avgRdBW.precision(2);
- avgWrBW.precision(2);
avgRdBWSys.precision(2);
avgWrBWSys.precision(2);
- peakBW.precision(2);
- busUtil.precision(2);
avgGap.precision(2);
- busUtilWrite.precision(2);
// per-master bytes read and written to memory
masterReadBytes
@@ -2849,9 +2830,6 @@
.flags(nonan)
.precision(2);
- busUtilRead
- .precision(2);
-
masterWriteRate
.flags(nozero | nonan)
.precision(12);
@@ -2865,7 +2843,7 @@
.precision(2);
for (int i = 0; i < max_masters; i++) {
- const std::string master = ctrl._system->getMasterName(i);
+ const std::string master = ctrl.system()->getMasterName(i);
masterReadBytes.subname(i, master);
masterReadRate.subname(i, master);
masterWriteBytes.subname(i, master);
@@ -2879,22 +2857,11 @@
}
// Formula stats
- avgBusLat = totBusLat / (readBursts - servicedByWrQ);
-
- avgRdBW = (bytesRead / 1000000) / simSeconds;
- avgWrBW = (bytesWritten / 1000000) / simSeconds;
avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
- peakBW = (SimClock::Frequency / ctrl.dram->burstDataDly()) *
- ctrl.dram->bytesPerBurst() / 1000000;
-
- busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
avgGap = totGap / (readReqs + writeReqs);
- busUtilRead = avgRdBW / peakBW * 100;
- busUtilWrite = avgWrBW / peakBW * 100;
-
masterReadRate = masterReadBytes / simSeconds;
masterWriteRate = masterWriteBytes / simSeconds;
masterReadAvgLat = masterReadTotalLat / masterReadAccesses;
@@ -2907,8 +2874,8 @@
ctrl.lastStatsResetTick = curTick();
}
-DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram)
- : Stats::Group(&_ctrl, csprintf("dram").c_str()),
+DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
+ : Stats::Group(&_dram),
dram(_dram),
ADD_STAT(readBursts, "Number of DRAM read bursts"),
@@ -2918,10 +2885,13 @@
ADD_STAT(perBankWrBursts, "Per bank write bursts"),
ADD_STAT(totQLat, "Total ticks spent queuing"),
+ ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
ADD_STAT(totMemAccLat,
"Total ticks spent from burst creation until serviced "
"by the DRAM"),
+
ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"),
+ ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"),
ADD_STAT(readRowHits, "Number of row buffer hits during reads"),
@@ -2934,6 +2904,12 @@
ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
+ ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
+
+ ADD_STAT(busUtil, "Data bus utilization in percentage"),
+ ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
+ ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
+
ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined")
{
@@ -2945,6 +2921,7 @@
using namespace Stats;
avgQLat.precision(2);
+ avgBusLat.precision(2);
avgMemAccLat.precision(2);
readRowHitRate.precision(2);
@@ -2958,10 +2935,16 @@
dram.maxAccessesPerRow : dram.rowBufferSize)
.flags(nozero);
+ peakBW.precision(2);
+ busUtil.precision(2);
+ busUtilWrite.precision(2);
+ busUtilRead.precision(2);
+
pageHitRate.precision(2);
// Formula stats
avgQLat = totQLat / readBursts;
+ avgBusLat = totBusLat / readBursts;
avgMemAccLat = totMemAccLat / readBursts;
readRowHitRate = (readRowHits / readBursts) * 100;
@@ -2969,13 +2952,19 @@
avgRdBW = (bytesRead / 1000000) / simSeconds;
avgWrBW = (bytesWritten / 1000000) / simSeconds;
+ peakBW = (SimClock::Frequency / dram.burstDataDly()) *
+ dram.bytesPerBurst() / 1000000;
+
+ busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
+ busUtilRead = avgRdBW / peakBW * 100;
+ busUtilWrite = avgWrBW / peakBW * 100;
pageHitRate = (writeRowHits + readRowHits) /
(writeBursts + readBursts) * 100;
}
-DRAMInterface::RankStats::RankStats(DRAMCtrl &_ctrl, Rank &_rank)
- : Stats::Group(&_ctrl, csprintf("dram_rank%d", _rank.rank).c_str()),
+DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
+ : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
rank(_rank),
ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"),
@@ -3034,7 +3023,7 @@
DRAMCtrl::recvFunctional(PacketPtr pkt)
{
// rely on the abstract memory
- functionalAccess(pkt);
+ dram->functionalAccess(pkt);
}
Port &
@@ -3099,7 +3088,7 @@
DRAMCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(ctrl.getAddrRange());
+ ranges.push_back(ctrl.dram->getAddrRange());
return ranges;
}
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index 4464f7a..1b6d8b5 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -56,12 +56,15 @@
#include "enums/AddrMap.hh"
#include "enums/MemSched.hh"
#include "enums/PageManage.hh"
+#include "mem/abstract_mem.hh"
#include "mem/drampower.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/DRAMCtrl.hh"
#include "sim/eventq.hh"
+class DRAMInterfaceParams;
+
/**
* A basic class to track the bank state, i.e. what row is
* currently open (if any), when is the bank free to accept a new
@@ -243,7 +246,7 @@
* The DRAMInterface includes a class for individual ranks
* and per rank functions.
*/
-class DRAMInterface : public SimObject
+class DRAMInterface : public AbstractMemory
{
private:
/**
@@ -340,7 +343,7 @@
class Rank;
struct RankStats : public Stats::Group
{
- RankStats(DRAMCtrl &ctrl, Rank &rank);
+ RankStats(DRAMInterface &dram, Rank &rank);
void regStats() override;
void resetStats() override;
@@ -406,13 +409,6 @@
*/
class Rank : public EventManager
{
- protected:
-
- /**
- * A reference to the parent DRAMCtrl instance
- */
- DRAMCtrl& ctrl;
-
private:
/**
@@ -532,10 +528,10 @@
*/
Tick lastBurstTick;
- Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int _rank,
+ Rank(const DRAMInterfaceParams* _p, int _rank,
DRAMInterface& _dram);
- const std::string name() const { return csprintf("dram_%d", rank);
}
+ const std::string name() const { return csprintf("%d", rank); }
/**
* Kick off accounting for power and refresh states and
@@ -662,9 +658,9 @@
};
/**
- * A reference to the parent DRAMCtrl instance
+ * A pointer to the parent DRAMCtrl instance
*/
- DRAMCtrl& ctrl;
+ DRAMCtrl* ctrl;
/**
* Memory controller configuration initialized based on parameter
@@ -695,6 +691,7 @@
* DRAM timing requirements
*/
const Tick M5_CLASS_VAR_USED tCK;
+ const Tick tCS;
const Tick tCL;
const Tick tBURST;
const Tick tBURST_MIN;
@@ -774,7 +771,7 @@
bool trace = true);
struct DRAMStats : public Stats::Group {
- DRAMStats(DRAMCtrl &ctrl, DRAMInterface &dram);
+ DRAMStats(DRAMInterface &dram);
void regStats() override;
@@ -790,10 +787,12 @@
// Latencies summed over all requests
Stats::Scalar totQLat;
+ Stats::Scalar totBusLat;
Stats::Scalar totMemAccLat;
// Average latencies per request
Stats::Formula avgQLat;
+ Stats::Formula avgBusLat;
Stats::Formula avgMemAccLat;
// Row hit count and rate
@@ -809,6 +808,11 @@
// Average bandwidth
Stats::Formula avgRdBW;
Stats::Formula avgWrBW;
+ Stats::Formula peakBW;
+ // bus utilization
+ Stats::Formula busUtil;
+ Stats::Formula busUtilRead;
+ Stats::Formula busUtilWrite;
Stats::Formula pageHitRate;
};
@@ -820,11 +824,16 @@
std::vector<Rank*> ranks;
public:
+ /** Setting a pointer to the controller */
+ void setCtrl(DRAMCtrl* _ctrl)
+ {
+ ctrl = _ctrl;
+ };
+
/**
* Initialize the DRAM interface and verify parameters
- * @param range is the address range for this interface
*/
- void init(AddrRange range);
+ void init();
/**
* Iterate through dram ranks and instantiate per rank startup routine
@@ -853,6 +862,20 @@
void suspend();
/**
+ * Get an address in a dense range which starts from 0. The input
+ * address is the physical address of the request in an address
+ * space that contains other SimObjects apart from this
+ * controller.
+ *
+ * @param addr The intput address which should be in the addrRange
+ * @return An address in the continues range [0, max)
+ */
+ Addr getCtrlAddr(Addr addr)
+ {
+ return range.getOffset(addr);
+ }
+
+ /**
* @return number of bytes in a burst for this interface
*/
uint32_t bytesPerBurst () { return burstSize; };
@@ -887,6 +910,13 @@
*/
Tick minRdToWr () { return tRTW; };
+ /**
+ * Determine the required delay for an access to a different rank
+ *
+ * @return required rank to rank delay
+ */
+ Tick rankDelay() { return tCS; };
+
/*
* Function to calulate RAS cycle time for use within and
* outside of this class
@@ -968,7 +998,7 @@
*
* @param rank Specifies rank associated with read burst
*/
- void respondEventDRAM(uint8_t rank);
+ void respondEvent(uint8_t rank);
/**
* Check the refresh state to determine if refresh needs
@@ -1004,8 +1034,7 @@
virtual void process() { rank->resetStats(); };
};
- DRAMInterface(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p,
- uint64_t capacity, AddrRange range);
+ DRAMInterface(const DRAMInterfaceParams* _p);
};
/**
@@ -1170,20 +1199,6 @@
void accessAndRespond(PacketPtr pkt, Tick static_latency);
/**
- * Get an address in a dense range which starts from 0. The input
- * address is the physical address of the request in an address
- * space that contains other SimObjects apart from this
- * controller.
- *
- * @param addr The intput address which should be in the addrRange
- * @return An address in the continues range [0, max)
- */
- Addr getCtrlAddr(Addr addr)
- {
- return range.getOffset(addr);
- }
-
- /**
* The memory schduler/arbiter - picks which request needs to
* go next, based on the specified policy such as FCFS or FR-FCFS
* and moves it to the head of the queue.
@@ -1265,6 +1280,11 @@
std::unordered_multiset<Tick> burstTicks;
/**
+ * Create pointer to interface of the actual dram media
+ */
+ DRAMInterface* const dram;
+
+ /**
* The following are basic design parameters of the memory
* controller, and are initialized based on parameter values.
* The rowsPerBank is determined based on the capacity, number of
@@ -1279,12 +1299,6 @@
uint32_t readsThisTime;
/**
- * Basic memory timing parameters initialized based on parameter
- * values. These will be used across memory interfaces.
- */
- const Tick tCS;
-
- /**
* Memory controller configuration initialized based on parameter
* values.
*/
@@ -1338,10 +1352,6 @@
// Average queue lengths
Stats::Average avgRdQLen;
Stats::Average avgWrQLen;
- // Latencies summed over all requests
- Stats::Scalar totBusLat;
- // Average latencies per request
- Stats::Formula avgBusLat;
Stats::Scalar numRdRetry;
Stats::Scalar numWrRetry;
@@ -1352,21 +1362,12 @@
Stats::Histogram rdPerTurnAround;
Stats::Histogram wrPerTurnAround;
- Stats::Scalar bytesRead;
Stats::Scalar bytesReadWrQ;
- Stats::Scalar bytesWritten;
Stats::Scalar bytesReadSys;
Stats::Scalar bytesWrittenSys;
// Average bandwidth
- Stats::Formula avgRdBW;
- Stats::Formula avgWrBW;
Stats::Formula avgRdBWSys;
Stats::Formula avgWrBWSys;
- Stats::Formula peakBW;
- // bus utilization
- Stats::Formula busUtil;
- Stats::Formula busUtilRead;
- Stats::Formula busUtilWrite;
Stats::Scalar totGap;
Stats::Formula avgGap;
@@ -1405,11 +1406,6 @@
/** The time when stats were last reset used to calculate average
power */
Tick lastStatsResetTick;
- /**
- * Create pointer to interfasce to the actual media
- */
- DRAMInterface* dram;
-
DRAMCtrl(const DRAMCtrlParams* p);
DrainState drain() override;
@@ -1458,13 +1454,6 @@
};
/**
- * Determine the required delay for an access to a different rank
- *
- * @return required rank to rank delay
- */
- Tick rankDelay() { return tCS; };
-
- /**
* Check the current direction of the memory channel
*
* @param next_state Check either the current or next bus state
diff --git a/src/mem/drampower.cc b/src/mem/drampower.cc
index f506928..7a44aa1 100644
--- a/src/mem/drampower.cc
+++ b/src/mem/drampower.cc
@@ -40,13 +40,13 @@
#include "base/intmath.hh"
#include "sim/core.hh"
-DRAMPower::DRAMPower(const DRAMCtrlParams* p, bool include_io) :
+DRAMPower::DRAMPower(const DRAMInterfaceParams* p, bool include_io) :
powerlib(libDRAMPower(getMemSpec(p), include_io))
{
}
Data::MemArchitectureSpec
-DRAMPower::getArchParams(const DRAMCtrlParams* p)
+DRAMPower::getArchParams(const DRAMInterfaceParams* p)
{
Data::MemArchitectureSpec archSpec;
archSpec.burstLength = p->burst_length;
@@ -68,7 +68,7 @@
}
Data::MemTimingSpec
-DRAMPower::getTimingParams(const DRAMCtrlParams* p)
+DRAMPower::getTimingParams(const DRAMInterfaceParams* p)
{
// Set the values that are used for power calculations and ignore
// the ones only used by the controller functionality in DRAMPower
@@ -100,7 +100,7 @@
}
Data::MemPowerSpec
-DRAMPower::getPowerParams(const DRAMCtrlParams* p)
+DRAMPower::getPowerParams(const DRAMInterfaceParams* p)
{
// All DRAMPower currents are in mA
Data::MemPowerSpec powerSpec;
@@ -132,7 +132,7 @@
}
Data::MemorySpecification
-DRAMPower::getMemSpec(const DRAMCtrlParams* p)
+DRAMPower::getMemSpec(const DRAMInterfaceParams* p)
{
Data::MemorySpecification memSpec;
memSpec.memArchSpec = getArchParams(p);
@@ -142,13 +142,13 @@
}
bool
-DRAMPower::hasTwoVDD(const DRAMCtrlParams* p)
+DRAMPower::hasTwoVDD(const DRAMInterfaceParams* p)
{
return p->VDD2 == 0 ? false : true;
}
uint8_t
-DRAMPower::getDataRate(const DRAMCtrlParams* p)
+DRAMPower::getDataRate(const DRAMInterfaceParams* p)
{
uint32_t burst_cycles = divCeil(p->tBURST_MAX, p->tCK);
uint8_t data_rate = p->burst_length / burst_cycles;
diff --git a/src/mem/drampower.hh b/src/mem/drampower.hh
index ed47476..da68a78 100644
--- a/src/mem/drampower.hh
+++ b/src/mem/drampower.hh
@@ -44,7 +44,7 @@
#define __MEM_DRAM_POWER_HH__
#include "libdrampower/LibDRAMPower.h"
-#include "params/DRAMCtrl.hh"
+#include "params/DRAMInterface.hh"
/**
* DRAMPower is a standalone tool which calculates the power consumed by a
@@ -57,43 +57,44 @@
/**
* Transform the architechture parameters defined in
- * DRAMCtrlParams to the memSpec of DRAMPower
+ * DRAMInterfaceParams to the memSpec of DRAMPower
*/
- static Data::MemArchitectureSpec getArchParams(const DRAMCtrlParams*
p);
+ static Data::MemArchitectureSpec getArchParams(
+ const DRAMInterfaceParams* p);
/**
- * Transforms the timing parameters defined in DRAMCtrlParams to
+ * Transforms the timing parameters defined in DRAMInterfaceParams to
* the memSpec of DRAMPower
*/
- static Data::MemTimingSpec getTimingParams(const DRAMCtrlParams* p);
+ static Data::MemTimingSpec getTimingParams(const DRAMInterfaceParams*
p);
/**
* Transforms the power and current parameters defined in
- * DRAMCtrlParam to the memSpec of DRAMPower
+ * DRAMInterfaceParams to the memSpec of DRAMPower
*/
- static Data::MemPowerSpec getPowerParams(const DRAMCtrlParams* p);
+ static Data::MemPowerSpec getPowerParams(const DRAMInterfaceParams* p);
/**
* Determine data rate, either one or two.
*/
- static uint8_t getDataRate(const DRAMCtrlParams* p);
+ static uint8_t getDataRate(const DRAMInterfaceParams* p);
/**
* Determine if DRAM has two voltage domains (or one)
*/
- static bool hasTwoVDD(const DRAMCtrlParams* p);
+ static bool hasTwoVDD(const DRAMInterfaceParams* p);
/**
- * Return an instance of MemSpec based on the DRAMCtrlParams
+ * Return an instance of MemSpec based on the DRAMInterfaceParams
*/
- static Data::MemorySpecification getMemSpec(const DRAMCtrlParams* p);
+ static Data::MemorySpecification getMemSpec(const DRAMInterfaceParams*
p);
public:
// Instance of DRAMPower Library
libDRAMPower powerlib;
- DRAMPower(const DRAMCtrlParams* p, bool include_io);
+ DRAMPower(const DRAMInterfaceParams* p, bool include_io);
};
diff --git a/src/mem/qos/QoSMemCtrl.py b/src/mem/qos/QoSMemCtrl.py
index 1cd3f0b..f55105b 100644
--- a/src/mem/qos/QoSMemCtrl.py
+++ b/src/mem/qos/QoSMemCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -34,18 +34,21 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
-from m5.objects.AbstractMemory import AbstractMemory
+from m5.proxy import *
+from m5.objects.ClockedObject import ClockedObject
from m5.objects.QoSTurnaround import *
# QoS Queue Selection policy used to select packets among same-QoS queues
class QoSQPolicy(Enum): vals = ["fifo", "lifo", "lrg"]
-class QoSMemCtrl(AbstractMemory):
+class QoSMemCtrl(ClockedObject):
type = 'QoSMemCtrl'
cxx_header = "mem/qos/mem_ctrl.hh"
cxx_class = 'QoS::MemCtrl'
abstract = True
+ system = Param.System(Parent.any, "System that the controller belongs
to.")
+
##### QoS support parameters ####
# Number of priorities in the system
diff --git a/src/mem/qos/QoSMemSinkCtrl.py b/src/mem/qos/QoSMemSinkCtrl.py
index 572cad5..03a988a 100644
--- a/src/mem/qos/QoSMemSinkCtrl.py
+++ b/src/mem/qos/QoSMemSinkCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
from m5.params import *
from m5.objects.QoSMemCtrl import *
+from QoSMemSinkInterface import *
class QoSMemSinkCtrl(QoSMemCtrl):
type = 'QoSMemSinkCtrl'
@@ -44,6 +45,10 @@
cxx_class = "QoS::MemSinkCtrl"
port = SlavePort("Slave ports")
+
+ intf = Param.QoSMemSinkInterface(QoSMemSinkInterface(), "Interface
to "\
+ "memory")
+
# the basic configuration of the controller architecture, note
# that each entry corresponds to a burst for the specific DRAM
# configuration (e.g. x32 with burst length 8 is 32 bytes) and not
@@ -59,5 +64,3 @@
# response latency - time to issue a response once a request is
serviced
response_latency = Param.Latency("20ns", "Memory response latency")
-
-
diff --git a/src/mem/qos/QoSMemSinkInterface.py
b/src/mem/qos/QoSMemSinkInterface.py
new file mode 100644
index 0000000..fd8254f
--- /dev/null
+++ b/src/mem/qos/QoSMemSinkInterface.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Matteo Andreozzi
+# Wendy Elsasser
+
+from AbstractMemory import AbstractMemory
+
+class QoSMemSinkInterface(AbstractMemory):
+ type = 'QoSMemSinkInterface'
+ cxx_header = "mem/qos/mem_sink.hh"
diff --git a/src/mem/qos/SConscript b/src/mem/qos/SConscript
index f8601b6..1d90f9c 100644
--- a/src/mem/qos/SConscript
+++ b/src/mem/qos/SConscript
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
SimObject('QoSMemCtrl.py')
SimObject('QoSMemSinkCtrl.py')
+SimObject('QoSMemSinkInterface.py')
SimObject('QoSPolicy.py')
SimObject('QoSTurnaround.py')
diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 50e6035..190960b 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited
+ * Copyright (c) 2017-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -42,7 +42,7 @@
namespace QoS {
MemCtrl::MemCtrl(const QoSMemCtrlParams * p)
- : AbstractMemory(p),
+ : ClockedObject(p),
policy(p->qos_policy),
turnPolicy(p->qos_turnaround_policy),
queuePolicy(QueuePolicy::create(p)),
@@ -51,7 +51,8 @@
qosSyncroScheduler(p->qos_syncro_scheduler),
totalReadQueueSize(0), totalWriteQueueSize(0),
busState(READ), busStateNext(READ),
- stats(*this)
+ stats(*this),
+ _system(p->system)
{
// Set the priority policy
if (policy) {
@@ -77,12 +78,6 @@
{}
void
-MemCtrl::init()
-{
- AbstractMemory::init();
-}
-
-void
MemCtrl::logRequest(BusState dir, MasterID m_id, uint8_t qos,
Addr addr, uint64_t entries)
{
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 0e29fcc..50ddc94 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited
+ * Copyright (c) 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -36,9 +36,9 @@
*/
#include "debug/QOS.hh"
-#include "mem/abstract_mem.hh"
-#include "mem/qos/q_policy.hh"
+#include "mem/mem_object.hh"
#include "mem/qos/policy.hh"
+#include "mem/qos/q_policy.hh"
#include "params/QoSMemCtrl.hh"
#include "sim/system.hh"
@@ -49,6 +49,8 @@
#ifndef __MEM_QOS_MEM_CTRL_HH__
#define __MEM_QOS_MEM_CTRL_HH__
+class System;
+
namespace QoS {
/**
@@ -56,7 +58,7 @@
* which support QoS - it provides access to a set of QoS
* scheduling policies
*/
-class MemCtrl: public AbstractMemory
+class MemCtrl: public ClockedObject
{
public:
/** Bus Direction */
@@ -151,6 +153,9 @@
Stats::Scalar numStayWriteState;
} stats;
+ /** Pointer to the System object */
+ System* _system;
+
/**
* Initializes dynamically counters and
* statistics for a given Master
@@ -266,11 +271,6 @@
virtual ~MemCtrl();
/**
- * Initializes this object
- */
- void init() override;
-
- /**
* Gets the current bus state
*
* @return current bus state
@@ -346,6 +346,10 @@
* @return total number of priority levels
*/
uint8_t numPriorities() const { return _numPriorities; }
+
+ /** read the system pointer
+ * @return pointer to the system object */
+ System* system() const { return _system; }
};
template<typename Queues>
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index 1f104e4..fb06b9d 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited
+ * Copyright (c) 2018-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -40,6 +40,7 @@
#include "debug/Drain.hh"
#include "debug/QOS.hh"
#include "mem_sink.hh"
+#include "params/QoSMemSinkInterface.hh"
#include "sim/system.hh"
namespace QoS {
@@ -50,12 +51,15 @@
memoryPacketSize(p->memory_packet_size),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size), port(name() + ".port", *this),
+ intf(p->intf),
retryRdReq(false), retryWrReq(false), nextRequest(0),
nextReqEvent(this)
{
// Resize read and write queue to allocate space
// for configured QoS priorities
readQueue.resize(numPriorities());
writeQueue.resize(numPriorities());
+
+ intf->setMemCtrl(this);
}
MemSinkCtrl::~MemSinkCtrl()
@@ -92,7 +96,7 @@
"%s Should not see packets where cache is responding\n",
__func__);
- access(pkt);
+ intf->access(pkt);
return responseLatency;
}
@@ -101,7 +105,7 @@
{
pkt->pushLabel(name());
- functionalAccess(pkt);
+ intf->functionalAccess(pkt);
pkt->popLabel();
}
@@ -279,7 +283,7 @@
// Do the actual memory access which also turns the packet
// into a response
- access(pkt);
+ intf->access(pkt);
// Log the response
logResponse(pkt->isRead()? READ : WRITE,
@@ -351,7 +355,7 @@
MemSinkCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(memory.getAddrRange());
+ ranges.push_back(memory.intf->getAddrRange());
return ranges;
}
@@ -390,3 +394,19 @@
return new QoS::MemSinkCtrl(this);
}
+QoSMemSinkInterface::QoSMemSinkInterface(const QoSMemSinkInterfaceParams*
_p)
+ : AbstractMemory(_p)
+{
+}
+
+void
+QoSMemSinkInterface::init()
+{
+ AbstractMemory::init();
+}
+
+QoSMemSinkInterface*
+QoSMemSinkInterfaceParams::create()
+{
+ return new QoSMemSinkInterface(this);
+}
diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh
index 9a51269..3b10abd 100644
--- a/src/mem/qos/mem_sink.hh
+++ b/src/mem/qos/mem_sink.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited
+ * Copyright (c) 2018-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -41,10 +41,14 @@
#ifndef __MEM_QOS_MEM_SINK_HH__
#define __MEM_QOS_MEM_SINK_HH__
+#include "mem/abstract_mem.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/QoSMemSinkCtrl.hh"
+class QoSMemSinkInterfaceParams;
+class QoSMemSinkInterface;
+
namespace QoS {
/**
@@ -163,6 +167,11 @@
/** Memory slave port */
MemoryPort port;
+ /**
+ * Create pointer to interface of actual media
+ */
+ QoSMemSinkInterface* const intf;
+
/** Read request pending */
bool retryRdReq;
@@ -244,4 +253,23 @@
} // namespace QoS
+class QoSMemSinkInterface : public AbstractMemory
+{
+ public:
+ /** Initialize the memory interface */
+ void init();
+
+ /** Setting a pointer to the interface */
+ void setMemCtrl(QoS::MemSinkCtrl* _ctrl)
+ {
+ ctrl = _ctrl;
+ };
+
+ /** Pointer to the controller */
+ QoS::MemSinkCtrl* ctrl;
+
+ QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p);
+};
+
+
#endif /* __MEM_QOS_MEM_SINK_HH__ */
diff --git a/tests/configs/base_config.py b/tests/configs/base_config.py
index 0f79938..e2d3851 100644
--- a/tests/configs/base_config.py
+++ b/tests/configs/base_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2013, 2017-2018 ARM Limited
+# Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -221,7 +221,12 @@
super(BaseSESystem, self).init_system(system)
def create_system(self):
- system = System(physmem = self.mem_class(),
+ if issubclass(self.mem_class, m5.objects.DRAMInterface):
+ mem_ctrl = DRAMCtrl()
+ mem_ctrl.dram = self.mem_class()
+ else:
+ mem_ctrl = self.mem_class()
+ system = System(physmem = mem_ctrl,
membus = SystemXBar(),
mem_mode = self.mem_mode,
multi_thread = (self.num_threads > 1))
@@ -275,6 +280,16 @@
# the physmem name to avoid bumping all the reference stats
system.physmem = [self.mem_class(range = r)
for r in system.mem_ranges]
+ if issubclass(self.mem_class, m5.objects.DRAMInterface):
+ mem_ctrls = []
+ for r in system.mem_ranges:
+ mem_ctrl = DRAMCtrl()
+ mem_ctrl.dram = self.mem_class(range = r)
+ mem_ctrls.append(mem_ctrl)
+ system.physmem = mem_ctrls
+ else:
+ system.physmem = [self.mem_class(range = r)
+ for r in system.mem_ranges]
for i in range(len(system.physmem)):
system.physmem[i].port = system.membus.master
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/28968
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
Gerrit-Change-Number: 28968
Gerrit-PatchSet: 1
Gerrit-Owner: Wendy Elsasser <wendy.elsasser(a)arm.com>
Gerrit-MessageType: newchange
JL
Jason Lowe-Power (Gerrit)
Tue, Sep 8, 2020 4:38 PM
Jason Lowe-Power has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/28968 )
Change subject: mem: Make MemCtrl a ClockedObject
......................................................................
mem: Make MemCtrl a ClockedObject
Made DRAMCtrl a ClockedObject, with DRAMInterface
defined as an AbstractMemory. The address
ranges are now defined per interface. Currently
the model only includes a DRAMInterface but this
can be expanded for other media types.
The controller object includes a parameter to the
interface, which is setup when gem5 is configured.
Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28968
Reviewed-by: Jason Lowe-Power <power.jg(a)gmail.com>
Maintainer: Jason Lowe-Power <power.jg(a)gmail.com>
Tested-by: kokoro <noreply+kokoro(a)google.com>
M configs/common/MemConfig.py
M configs/dram/low_power_sweep.py
M configs/dram/sweep.py
M configs/example/memcheck.py
M configs/learning_gem5/part1/simple.py
M configs/learning_gem5/part1/two_level.py
M configs/learning_gem5/part2/simple_cache.py
M configs/learning_gem5/part2/simple_memobj.py
M configs/learning_gem5/part3/simple_ruby.py
M configs/ruby/Ruby.py
M src/mem/DRAMCtrl.py
A src/mem/DRAMInterface.py
M src/mem/SConscript
M src/mem/dram_ctrl.cc
M src/mem/dram_ctrl.hh
M src/mem/drampower.cc
M src/mem/drampower.hh
M src/mem/qos/QoSMemCtrl.py
M src/mem/qos/QoSMemSinkCtrl.py
A src/mem/qos/QoSMemSinkInterface.py
M src/mem/qos/SConscript
M src/mem/qos/mem_ctrl.cc
M src/mem/qos/mem_ctrl.hh
M src/mem/qos/mem_sink.cc
M src/mem/qos/mem_sink.hh
M tests/gem5/configs/base_config.py
26 files changed, 1,913 insertions(+), 1,736 deletions(-)
Approvals:
Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index b530145..1ace875 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -40,7 +40,7 @@
from common import ObjectList
from common import HMC
-def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,
+def create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,
xor_low_bit):
"""
Helper function for creating a single memoy controller from the given
@@ -63,32 +63,32 @@
# Create an instance so we can figure out the address
# mapping and row-buffer size
-
interface = intf()
Only do this for DRAMs
- if issubclass(cls, m5.objects.DRAMCtrl):
- if issubclass(intf, m5.objects.DRAMInterface):
# If the channel bits are appearing after the column
# bits, we need to add the appropriate number of bits
# for the row buffer size
-
rowbuffer_size = interface.device_rowbuffer_size.value * \
-
interface.devices_per_rank.value
intlv_low_bit = int(math.log(rowbuffer_size, 2))
# We got all we need to configure the appropriate address
# range
- ctrl.range = m5.objects.AddrRange(r.start, size = r.size(),
- interface.range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit =
intlv_low_bit + intlv_bits - 1,
xorHighBit = xor_high_bit,
intlvBits = intlv_bits,
intlvMatch = i)
def config_mem(options, system):
"""
@@ -148,10 +148,10 @@
if 2 ** intlv_bits != nbr_mem_ctrls:
fatal("Number of memory channels must be a power of 2")
- cls = ObjectList.mem_list.get(opt_mem_type)
- intf = ObjectList.mem_list.get(opt_mem_type)
mem_ctrls = []
- if opt_elastic_trace_en and not issubclass(cls,
m5.objects.SimpleMemory):
- if opt_elastic_trace_en and not issubclass(intf,
m5.objects.SimpleMemory):
fatal("When elastic trace is enabled, configure mem-type as "
"simple-mem.")
@@ -162,36 +162,53 @@
intlv_size = max(opt_mem_channels_intlv, system.cache_line_size.value)
# For every range (most systems will only have one), create an
-
array of controllers and set their parameters to match their
-
address mapping in the case of a DRAM
-
array of memory interfaces and set their parameters to match
-
their address mapping in the case of a DRAM
for r in system.mem_ranges:
for i in range(nbr_mem_ctrls):
-
mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls,
intlv_bits,
-
# Create the DRAM interface
-
dram_intf = create_mem_intf(intf, r, i, nbr_mem_ctrls,
intlv_bits,
intlv_size, opt_xor_low_bit)
+
# Set the number of ranks based on the command-line
# options if it was explicitly set
-
if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks:
-
mem_ctrl.ranks_per_channel = opt_mem_ranks
opt_mem_ranks:
-
if issubclass(intf, m5.objects.DRAMInterface):
-
dram_intf.enable_dram_powerdown = opt_dram_powerdown
if opt_elastic_trace_en:
-
dram_intf.latency = '1ns'
print("For elastic trace, over-riding Simple Memory "
"latency to 1ns.")
-
# Create the controller that will drive the interface
-
if opt_mem_type == "HMC_2500_1x32":
-
# The static latency of the vault controllers is estimated
-
# to be smaller than a full DRAM channel controller
-
mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8,
-
static_backend_latency
= '4ns',
= '4ns')
-
else:
-
mem_ctrl = m5.objects.DRAMCtrl()
-
# Hookup the controller to the interface and add to the list
-
mem_ctrl.dram = dram_intf
mem_ctrls.append(mem_ctrl)
-
Create a controller and connect the interfaces to a controller
-
for i in range(len(mem_ctrls)):
-
if opt_mem_type == "HMC_2500_1x32":
-
# Connect the controllers to the membus
-
mem_ctrls[i].port = xbar[i/4].master
-
# Set memory device size. There is an independent controller
for
-
# each vault. All vaults are same size.
-
mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size
-
else:
-
# Connect the controllers to the membus
-
mem_ctrls[i].port = xbar.master
-
subsystem.mem_ctrls = mem_ctrls
-
Connect the controllers to the membus
- for i in range(len(subsystem.mem_ctrls)):
-
if opt_mem_type == "HMC_2500_1x32":
-
subsystem.mem_ctrls[i].port = xbar[i/4].master
-
# Set memory device size. There is an independent controller
for
-
# each vault. All vaults are same size.
-
subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size
-
else:
-
subsystem.mem_ctrls[i].port = xbar.master
diff --git a/configs/dram/low_power_sweep.py
b/configs/dram/low_power_sweep.py
index 9a62393..0da2b93 100644
--- a/configs/dram/low_power_sweep.py
+++ b/configs/dram/low_power_sweep.py
@@ -111,14 +111,19 @@
Sanity check for memory controller class.
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
- fatal("This script assumes the memory is a DRAMInterface subclass")
There is no point slowing things down by saving any data.
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
+
+# enable DRAM low power states
+system.mem_ctrls[0].dram.enable_dram_powerdown = True
Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = args.addr_map
-system.mem_ctrls[0].page_policy = args.page_policy
+system.mem_ctrls[0].dram.addr_mapping = args.addr_map
+system.mem_ctrls[0].dram.page_policy = args.page_policy
We create a traffic generator state for each param combination we want to
test. Each traffic generator state is specified in the config file and
the
@@ -132,22 +137,22 @@
cfg_file = open(cfg_file_path, 'w')
Get the number of banks
-nbr_banks = int(system.mem_ctrls[0].banks_per_rank.value)
+nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
determine the burst size in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
- system.mem_ctrls[0].dram.device_rowbuffer_size.value
Inter-request delay should be such that we can hit as many transitions
to/from low power states as possible to. We provide a min and max itt to
the
traffic generator and it randomises in the range. The parameter is in
seconds and we need it in ticks (ps).
-itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000
+itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
#The itt value when set to (tRAS + tRP + tCK) covers the case where
a read command is delayed beyond the delay from ACT to PRE_PDN entry of
the
@@ -155,9 +160,9 @@
between a write and power down entry will be tRCD + tCL + tWR + tRP +
tCK.
As we use this delay as a unit and create multiples of it as bigger
delays
for the sweep, this parameter works for reads, writes and mix of them.
-pd_entry_time = (system.mem_ctrls[0].tRAS.value +
+pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
-
system.mem_ctrls[0].dram.tRP.value +
-
system.mem_ctrls[0].dram.tCK.value) * 1000000000000
We sweep itt max using the multipliers specified by the user.
itt_max_str = args.itt_list.strip().split()
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py
index a340b46..a771c5c 100644
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -116,13 +116,15 @@
the following assumes that we are using the native DRAM
controller, check to be sure
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
- fatal("This script assumes the memory is a DRAMInterface subclass")
there is no point slowing things down by saving any data
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = options.addr_map
+system.mem_ctrls[0].dram.addr_mapping = options.addr_map
stay in each state for 0.25 ms, long enough to warm things up, and
short enough to avoid hitting a refresh
@@ -133,21 +135,21 @@
the DRAM maximum bandwidth to ensure that it is saturated
get the number of banks
-nbr_banks = system.mem_ctrls[0].banks_per_rank.value
+nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
determine the burst length in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
- system.mem_ctrls[0].dram.device_rowbuffer_size.value
match the maximum bandwidth of the memory, the parameter is in seconds
and we need it in ticks (ps)
-itt = getattr(system.mem_ctrls[0].tBURST_MIN, 'value',
+itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
-
system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
assume we start at 0
max_addr = mem_range.end
diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py
index 6d80d60..6bccd54 100644
--- a/configs/example/memcheck.py
+++ b/configs/example/memcheck.py
@@ -217,7 +217,7 @@
proto_tester = TrafficGen(config_file = cfg_file_path)
Set up the system along with a DRAM controller
-system = System(physmem = DDR3_1600_8x8())
+system = System(physmem = DRAMCtrl(dram = DDR3_1600_8x8()))
system.voltage_domain = VoltageDomain(voltage = '1V')
diff --git a/configs/learning_gem5/part1/simple.py
b/configs/learning_gem5/part1/simple.py
index ef73a06..cfd15be 100644
--- a/configs/learning_gem5/part1/simple.py
+++ b/configs/learning_gem5/part1/simple.py
@@ -77,8 +77,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Connect the system up to the membus
diff --git a/configs/learning_gem5/part1/two_level.py
b/configs/learning_gem5/part1/two_level.py
index 564c785..0dbcfc7 100644
--- a/configs/learning_gem5/part1/two_level.py
+++ b/configs/learning_gem5/part1/two_level.py
@@ -132,8 +132,9 @@
system.system_port = system.membus.slave
Create a DDR3 memory controller
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Create a process for a simple "Hello World" application
diff --git a/configs/learning_gem5/part2/simple_cache.py
b/configs/learning_gem5/part2/simple_cache.py
index 8d98d92..fbea73d 100644
--- a/configs/learning_gem5/part2/simple_cache.py
+++ b/configs/learning_gem5/part2/simple_cache.py
@@ -76,8 +76,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Connect the system up to the membus
diff --git a/configs/learning_gem5/part2/simple_memobj.py
b/configs/learning_gem5/part2/simple_memobj.py
index d30977c..e792eb9 100644
--- a/configs/learning_gem5/part2/simple_memobj.py
+++ b/configs/learning_gem5/part2/simple_memobj.py
@@ -74,8 +74,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
Connect the system up to the membus
diff --git a/configs/learning_gem5/part3/simple_ruby.py
b/configs/learning_gem5/part3/simple_ruby.py
index c47ee7e..7f70a8c 100644
--- a/configs/learning_gem5/part3/simple_ruby.py
+++ b/configs/learning_gem5/part3/simple_ruby.py
@@ -68,8 +68,9 @@
system.cpu = [TimingSimpleCPU() for i in range(2)]
Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
create the interrupt controller for the CPU and connect to the membus
for cpu in system.cpu:
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index 9bceaa3..9f400a8 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -130,15 +130,16 @@
dir_ranges = []
for r in system.mem_ranges:
mem_type = ObjectList.mem_list.get(options.mem_type)
-
dram_intf = MemConfig.create_mem_intf(mem_type, r, index,
options.num_dirs, int(math.log(options.num_dirs, 2)),
intlv_size, options.xor_low_bit)
-
mem_ctrl = m5.objects.DRAMCtrl(dram = dram_intf)
if options.access_backing_store:
mem_ctrl.kvm_map=False
mem_ctrls.append(mem_ctrl)
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py
index 0f70dff..b7b43dc 100644
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -40,26 +40,12 @@
from m5.params import *
from m5.proxy import *
-from m5.objects.AbstractMemory import *
from m5.objects.QoSMemCtrl import *
Enum for memory scheduling algorithms, currently First-Come
First-Served and a First-Row Hit then First-Come First-Served
class MemSched(Enum): vals = ['fcfs', 'frfcfs']
-# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
-# channel, rank, bank, row and column, respectively, and going from
-# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
-# suitable for an open-page policy, optimising for sequential accesses
-# hitting in the open row. For a closed-page policy, RoCoRaBaCh
-# maximises parallelism.
-class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
-# Enum for the page policy, either open, open_adaptive, close, or
-# close_adaptive.
-class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
-
'close_adaptive']
-
DRAMCtrl is a single-channel single-ported DRAM controller model
that aims to model the most important system-level performance
effects of a DRAM without getting into too much detail of the DRAM
@@ -72,12 +58,11 @@
# bus in front of the controller for multiple ports
port = SlavePort("Slave port")
-
the basic configuration of the controller architecture, note
-
that each entry corresponds to a burst for the specific DRAM
-
configuration (e.g. x32 with burst length 8 is 32 bytes) and not
-
the cacheline size or request/packet size
- write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
- read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
@@ -93,15 +78,6 @@
# scheduler, address map and page policy
mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
-
addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
-
page_policy = Param.PageManage('open_adaptive', "Page management
policy")
-
enforce a limit on the number of accesses per row
-
max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
-
"closing");
-
size of DRAM Chip in Bytes
-
device_size = Param.MemorySize("Size of DRAM chip")
pipeline latency of the controller and PHY, split into a
frontend part and a backend part, with reads and writes serviced
@@ -109,1404 +85,3 @@
# serviced by the memory seeing the sum of the two
static_frontend_latency = Param.Latency("10ns", "Static frontend
latency")
static_backend_latency = Param.Latency("10ns", "Static backend
latency")
-
the physical organisation of the DRAM
- device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
-
"device/chip")
- burst_length = Param.Unsigned("Burst lenght (BL) in beats")
- device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
-
"device/chip")
- devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
- ranks_per_channel = Param.Unsigned("Number of ranks per channel")
-
default to 0 bank groups per rank, indicating bank group architecture
-
is not used
-
update per memory class when bank group architecture is supported
- bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
- banks_per_rank = Param.Unsigned("Number of banks per rank")
-
Enable DRAM powerdown states if True. This is False by default due to
-
performance being lower when enabled
- enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
-
For power modelling we need to know if the DRAM has a DLL or not
- dll = Param.Bool(True, "DRAM has DLL or not")
-
DRAMPower provides in addition to the core power, the possibility to
-
include RD/WR termination and IO power. This calculation assumes some
-
default values. The integration of DRAMPower with gem5 does not
include
-
IO and RD/WR termination power by default. This might be added as an
-
additional feature in the future.
-
timing behaviour and constraints - all in nanoseconds
-
the base clock period of the DRAM
- tCK = Param.Latency("Clock period")
-
the amount of time in nanoseconds from issuing an activate command
-
to the data being available in the row buffer for a read/write
- tRCD = Param.Latency("RAS to CAS delay")
-
the time from issuing a read/write command to seeing the actual data
- tCL = Param.Latency("CAS latency")
-
minimum time between a precharge and subsequent activate
- tRP = Param.Latency("Row precharge time")
-
minimum time between an activate and a precharge to the same row
- tRAS = Param.Latency("ACT to PRE delay")
-
minimum time between a write data transfer and a precharge
- tWR = Param.Latency("Write recovery time")
-
minimum time between a read and precharge command
- tRTP = Param.Latency("Read to precharge")
-
time to complete a burst transfer, typically the burst length
-
divided by two due to the DDR bus, but by making it a parameter
-
it is easier to also evaluate SDR memories like WideIO.
-
This parameter has to account for burst length.
-
Read/Write requests with data size larger than one full burst are
broken
access,
-
which could be greater than tBURST when the memory access time is
greater
-
than tBURST
- tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
-
tBURST_MIN is the minimum delay between bursts, which could be less
than
-
tBURST when interleaving is supported
- tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
-
CAS-to-CAS delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
-
Write-to-Write delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
-
This will be used to enable different same bank group delays
-
for writes versus reads
- tCCD_L_WR = Param.Latency(Self.tCCD_L,
-
"Same bank group Write to Write delay")
-
time taken to complete one refresh cycle (N rows in all banks)
- tRFC = Param.Latency("Refresh cycle time")
-
refresh command interval, how often a "ref" command needs
-
to be sent. It is 7.8 us for a 64ms refresh requirement
- tREFI = Param.Latency("Refresh command interval")
-
write-to-read, same rank turnaround penalty
- tWTR = Param.Latency("Write to read, same rank switching time")
-
write-to-read, same rank turnaround penalty for same bank group
- tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
-
"time, same bank group")
-
read-to-write, same rank turnaround penalty
- tRTW = Param.Latency("Read to write, same rank switching time")
-
rank-to-rank bus delay penalty
-
this does not correlate to a memory timing parameter and encompasses:
-
1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
-
different rank bus delay
- tCS = Param.Latency("Rank to rank switching time")
-
minimum precharge to precharge delay time
- tPPD = Param.Latency("0ns", "PRE to PRE delay")
-
maximum delay between two-cycle ACT command phases
- tAAD = Param.Latency(Self.tCK,
-
"Maximum delay between two-cycle ACT commands")
- two_cycle_activate = Param.Bool(False,
-
"Two cycles required to send activate")
-
minimum row activate to row activate delay time
- tRRD = Param.Latency("ACT to ACT delay")
-
only utilized with bank group architectures; set to 0 for default
case
- tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
-
time window in which a maximum number of activates are allowed
-
to take place, set to 0 to disable
- tXAW = Param.Latency("X activation window")
- activation_limit = Param.Unsigned("Max number of activates in window")
-
time to exit power-down mode
-
Exit power-down to next valid command delay
- tXP = Param.Latency("0ns", "Power-up Delay")
-
Exit Powerdown to commands requiring a locked DLL
- tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
-
time to exit self-refresh mode
- tXS = Param.Latency("0ns", "Self-refresh exit latency")
-
time to exit self-refresh mode with locked DLL
- tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
-
number of data beats per clock. with DDR, default is 2, one per edge
- beats_per_clock = Param.Unsigned(2, "Data beats per clock")
- data_clock_sync = Param.Bool(False, "Synchronization commands
required")
-
Currently rolled into other params
- ######################################################################
-
tRC - assumed to be tRAS + tRP
-
Power Behaviour and Constraints
-
DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
-
defined as VDD and VDD2. Each current is defined for each voltage
domain
-
separately. For example, current IDD0 is active-precharge current for
-
voltage domain VDD and current IDD02 is active-precharge current for
-
voltage domain VDD2.
-
By default all currents are set to 0mA. Users who are only
interested in
-
the performance of DRAMs can leave them at 0.
-
Operating 1 Bank Active-Precharge current
- IDD0 = Param.Current("0mA", "Active precharge current")
-
Operating 1 Bank Active-Precharge current multiple voltage Range
- IDD02 = Param.Current("0mA", "Active precharge current VDD2")
-
Precharge Power-down Current: Slow exit
- IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
-
Precharge Power-down Current: Slow exit multiple voltage Range
- IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
-
Precharge Power-down Current: Fast exit
- IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
-
Precharge Power-down Current: Fast exit multiple voltage Range
- IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
-
Precharge Standby current
- IDD2N = Param.Current("0mA", "Precharge Standby current")
-
Precharge Standby current multiple voltage range
- IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
-
Active Power-down current: slow exit
- IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
-
Active Power-down current: slow exit multiple voltage range
- IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
-
Active Power-down current : fast exit
- IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
-
Active Power-down current : fast exit multiple voltage range
- IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
-
Active Standby current
- IDD3N = Param.Current("0mA", "Active Standby current")
-
Active Standby current multiple voltage range
- IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
-
Burst Read Operating Current
- IDD4R = Param.Current("0mA", "READ current")
-
Burst Read Operating Current multiple voltage range
- IDD4R2 = Param.Current("0mA", "READ current VDD2")
-
Burst Write Operating Current
- IDD4W = Param.Current("0mA", "WRITE current")
-
Burst Write Operating Current multiple voltage range
- IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
-
Refresh Current
- IDD5 = Param.Current("0mA", "Refresh current")
-
Refresh Current multiple voltage range
- IDD52 = Param.Current("0mA", "Refresh current VDD2")
-
Self-Refresh Current
- IDD6 = Param.Current("0mA", "Self-refresh Current")
-
Self-Refresh Current multiple voltage range
- IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
-
Main voltage range of the DRAM
- VDD = Param.Voltage("0V", "Main Voltage Range")
-
Second voltage range defined by some DRAMs
- VDD2 = Param.Voltage("0V", "2nd Voltage Range")
-# A single DDR3-1600 x64 channel (one command and address bus), with
-# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
-# an 8x8 configuration.
-class DDR3_1600_8x8(DRAMCtrl):
-
size of device in bytes
- device_size = '512MB'
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
DDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
Use two ranks
- ranks_per_channel = 2
-
DDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
-
8 beats across an x64 interface translates to 4 clocks @ 800 MHz
- tBURST = '5ns'
-
DDR3-1600 11-11-11
- tRCD = '13.75ns'
- tCL = '13.75ns'
- tRP = '13.75ns'
- tRAS = '35ns'
- tRRD = '6ns'
- tXAW = '30ns'
- activation_limit = 4
- tRFC = '260ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns
- tWTR = '7.5ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
- tXS = '270ns'
-
Current values from datasheet Die Rev E,J
- IDD0 = '55mA'
- IDD2N = '32mA'
- IDD3N = '38mA'
- IDD4W = '125mA'
- IDD4R = '157mA'
- IDD5 = '235mA'
- IDD3P1 = '38mA'
- IDD2P1 = '32mA'
- IDD6 = '20mA'
- VDD = '1.5V'
-# A single HMC-2500 x32 model based on:
-# [1] DRAMSpec: a high-level DRAM bank modelling tool
-# developed at the University of Kaiserslautern. This high level tool
-# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
-# estimate the DRAM bank latency and power numbers.
-# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
-# cubes (E. Azarkhish et. al)
-# Assumed for the HMC model is a 30 nm technology node.
-# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
-# layers).
-# Each layer has 16 vaults and each vault consists of 2 banks per layer.
-# In order to be able to use the same controller used for 2D DRAM
generations
-# for HMC, the following analogy is done:
-# Channel (DDR) => Vault (HMC)
-# device_size (DDR) => size of a single layer in a vault
-# ranks per channel (DDR) => number of layers
-# banks per rank (DDR) => banks per layer
-# devices per rank (DDR) => devices per layer ( 1 for HMC).
-# The parameters for which no input is available are inherited from the
DDR3
-# configuration.
-# This configuration includes the latencies from the DRAM to the logic
layer
-# of the HMC
-class HMC_2500_1x32(DDR3_1600_8x8):
-
size of device
-
two banks per device with each bank 4MB [2]
- device_size = '8MB'
-
1x32 configuration, 1 device with 32 TSVs [2]
- device_bus_width = 32
-
HMC is a BL8 device [2]
- burst_length = 8
-
Each device has a page (row buffer) size of 256 bytes [2]
- device_rowbuffer_size = '256B'
-
1x32 configuration, so 1 device [2]
- devices_per_rank = 1
-
4 layers so 4 ranks [2]
- ranks_per_channel = 4
-
HMC has 2 banks per layer [2]
-
Each layer represents a rank. With 4 layers and 8 banks in total,
each
-
layer has 2 banks; thus 2 banks per rank.
- banks_per_rank = 2
-
1250 MHz [2]
- tCK = '0.8ns'
-
8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
- tBURST = '3.2ns'
-
Values using DRAMSpec HMC model [1]
- tRCD = '10.2ns'
- tCL = '9.9ns'
- tRP = '7.7ns'
- tRAS = '21.6ns'
-
tRRD depends on the power supply network for each vendor.
-
We assume a tRRD of a double bank approach to be equal to 4 clock
-
cycles (Assumption)
- tRRD = '3.2ns'
-
activation limit is set to 0 since there are only 2 banks per vault
-
layer.
- activation_limit = 0
-
Values using DRAMSpec HMC model [1]
- tRFC = '59ns'
- tWR = '8ns'
- tRTP = '4.9ns'
-
Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
-
0.8 ns (Assumption)
- tCS = '0.8ns'
-
Value using DRAMSpec HMC model [1]
- tREFI = '3.9us'
-
The default page policy in the vault controllers is simple closed
page
-
[2] nevertheless 'close' policy opens and closes the row multiple
times
-
for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
- page_policy = 'close_adaptive'
-
RoCoRaBaCh resembles the default address mapping in HMC
- addr_mapping = 'RoCoRaBaCh'
- min_writes_per_switch = 8
-
These parameters do not directly correlate with buffer_size in real
-
hardware. Nevertheless, their value has been tuned to achieve a
-
bandwidth similar to the cycle-accurate model in [2]
- write_buffer_size = 32
- read_buffer_size = 32
-
The static latency of the vault controllers is estimated to be
smaller
-
than a full DRAM channel controller
- static_backend_latency='4ns'
- static_frontend_latency='4ns'
-# A single DDR3-2133 x64 channel refining a selected subset of the
-# options for the DDR-1600 configuration, based on the same DDR3-1600
-# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
-# consistent across the two configurations.
-class DDR3_2133_8x8(DDR3_1600_8x8):
-
1066 MHz
- tCK = '0.938ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
- tBURST = '3.752ns'
-
DDR3-2133 14-14-14
- tRCD = '13.09ns'
- tCL = '13.09ns'
- tRP = '13.09ns'
- tRAS = '33ns'
- tRRD = '5ns'
- tXAW = '25ns'
-
Current values from datasheet
- IDD0 = '70mA'
- IDD2N = '37mA'
- IDD3N = '44mA'
- IDD4W = '157mA'
- IDD4R = '191mA'
- IDD5 = '250mA'
- IDD3P1 = '44mA'
- IDD2P1 = '43mA'
- IDD6 ='20mA'
- VDD = '1.5V'
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
-# in an 16x4 configuration.
-# Total channel capacity is 32GB
-# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
-class DDR4_2400_16x4(DRAMCtrl):
-
size of device
- device_size = '1GB'
-
16x4 configuration, 16 devices each with a 4-bit interface
- device_bus_width = 4
-
DDR4 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 512 byte (1K columns x4)
- device_rowbuffer_size = '512B'
-
16x4 configuration, so 16 devices
- devices_per_rank = 16
-
Match our DDR3 configurations which is dual rank
- ranks_per_channel = 2
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 4 for x4 case
- bank_groups_per_rank = 4
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 16
-
override the default buffer sizes and go for something larger to
-
accommodate the larger bank count
- write_buffer_size = 128
- read_buffer_size = 64
-
1200 MHz
- tCK = '0.833ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '3.332ns'
-
@2400 data rate, tCCD_L is 6 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '5ns';
-
DDR4-2400 17-17-17
- tRCD = '14.16ns'
- tCL = '14.16ns'
- tRP = '14.16ns'
- tRAS = '32ns'
-
RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
- tRRD = '3.332ns'
-
RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
tFAW for 512B page is MAX(16 CK, 13ns)
- tXAW = '13.328ns'
- activation_limit = 4
-
tRFC is 350ns
- tRFC = '350ns'
- tWR = '15ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
- tRTW = '1.666ns'
-
Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
- tCS = '1.666ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
-
exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
-
tRFC + 10ns = 340ns
- tXS = '340ns'
-
Current values from datasheet
- IDD0 = '43mA'
- IDD02 = '3mA'
- IDD2N = '34mA'
- IDD3N = '38mA'
- IDD3N2 = '3mA'
- IDD4W = '103mA'
- IDD4R = '110mA'
- IDD5 = '250mA'
- IDD3P1 = '32mA'
- IDD2P1 = '25mA'
- IDD6 = '30mA'
- VDD = '1.2V'
- VDD2 = '2.5V'
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
-# in an 8x8 configuration.
-# Total channel capacity is 16GB
-# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
-class DDR4_2400_8x8(DDR4_2400_16x4):
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
- tXAW = '21ns'
-
Current values from datasheet
- IDD0 = '48mA'
- IDD3N = '43mA'
- IDD4W = '123mA'
- IDD4R = '135mA'
- IDD3P1 = '37mA'
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
-# in an 4x16 configuration.
-# Total channel capacity is 4GB
-# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
-class DDR4_2400_4x16(DDR4_2400_16x4):
-
4x16 configuration, 4 devices each with an 16-bit interface
- device_bus_width = 16
-
Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
- device_rowbuffer_size = '2kB'
-
4x16 configuration, so 4 devices
- devices_per_rank = 4
-
Single rank for x16
- ranks_per_channel = 1
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 2 for x16 case
- bank_groups_per_rank = 2
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 8
-
RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
- tRRD = '5.3ns'
-
RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
- tRRD_L = '6.4ns';
- tXAW = '30ns'
-
Current values from datasheet
- IDD0 = '80mA'
- IDD02 = '4mA'
- IDD2N = '34mA'
- IDD3N = '47mA'
- IDD4W = '228mA'
- IDD4R = '243mA'
- IDD5 = '280mA'
- IDD3P1 = '41mA'
-# A single LPDDR2-S4 x32 interface (one command/address bus), with
-# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
-# in a 1x32 configuration.
-class LPDDR2_S4_1066_1x32(DRAMCtrl):
-
No DLL in LPDDR2
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR2_S4 is a BL4 and BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1KB
-
(this depends on the memory density)
- device_rowbuffer_size = '1kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR2-S4 has 8 banks in all configurations
- banks_per_rank = 8
-
533 MHz
- tCK = '1.876ns'
-
Fixed at 15 ns
- tRCD = '15ns'
-
8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
- tCL = '15ns'
-
Pre-charge one bank 15 ns (all banks 18 ns)
- tRP = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
- tRTP = '7.5ns'
-
8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '7.5ns'
-
LPDDR2-S4, 4 Gbit
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
- tRTW = '3.75ns'
-
Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
- tCS = '3.75ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of density, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '15mA'
- IDD02 = '70mA'
- IDD2N = '2mA'
- IDD2N2 = '30mA'
- IDD3N = '2.5mA'
- IDD3N2 = '30mA'
- IDD4W = '10mA'
- IDD4W2 = '190mA'
- IDD4R = '3mA'
- IDD4R2 = '220mA'
- IDD5 = '40mA'
- IDD52 = '150mA'
- IDD3P1 = '1.2mA'
- IDD3P12 = '8mA'
- IDD2P1 = '0.6mA'
- IDD2P12 = '0.8mA'
- IDD6 = '1mA'
- IDD62 = '3.2mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-# A single WideIO x128 interface (one command and address bus), with
-# default timings based on an estimated WIO-200 8 Gbit part.
-class WideIO_200_1x128(DRAMCtrl):
-
No DLL for WideIO
- dll = False
-
size of device
- device_size = '1024MB'
-
1x128 configuration, 1 device with a 128-bit interface
- device_bus_width = 128
-
This is a BL4 device
- burst_length = 4
-
Each device has a page (row buffer) size of 4KB
-
(this depends on the memory density)
- device_rowbuffer_size = '4kB'
-
1x128 configuration, so 1 device
- devices_per_rank = 1
-
Use one rank for a one-high die stack
- ranks_per_channel = 1
-
WideIO has 4 banks in all configurations
- banks_per_rank = 4
-
200 MHz
- tCK = '5ns'
-
WIO-200
- tRCD = '18ns'
- tCL = '18ns'
- tRP = '18ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Read to precharge is same as the burst
- tRTP = '20ns'
-
4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
-
Note this is a BL4 SDR device.
- tBURST = '20ns'
-
WIO 8 Gb
- tRFC = '210ns'
-
WIO 8 Gb, <=85C, half for >85C
- tREFI = '3.9us'
-
Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
- tWTR = '15ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
- tRTW = '10ns'
-
Default different rank bus delay to 2 CK, @200 MHz = 10 ns
- tCS = '10ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Two instead of four activation window
- tXAW = '50ns'
- activation_limit = 2
-
The WideIO specification does not provide current information
-# A single LPDDR3 x32 interface (one command/address bus), with
-# default timings based on a LPDDR3-1600 4 Gbit part (Micron
-# EDF8132A1MC) in a 1x32 configuration.
-class LPDDR3_1600_1x32(DRAMCtrl):
-
No DLL for LPDDR3
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 4KB
- device_rowbuffer_size = '4kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Technically the datasheet is a dual-rank package, but for
-
comparison with the LPDDR2 config we stick to a single rank
- ranks_per_channel = 1
-
LPDDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
- tRCD = '18ns'
-
12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
- tCL = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
- tRTP = '7.5ns'
-
Pre-charge one bank 18 ns (all banks 21 ns)
- tRP = '18ns'
-
8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '5ns'
-
LPDDR3, 4 Gb
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of size, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '8mA'
- IDD02 = '60mA'
- IDD2N = '0.8mA'
- IDD2N2 = '26mA'
- IDD3N = '2mA'
- IDD3N2 = '34mA'
- IDD4W = '2mA'
- IDD4W2 = '190mA'
- IDD4R = '2mA'
- IDD4R2 = '230mA'
- IDD5 = '28mA'
- IDD52 = '150mA'
- IDD3P1 = '1.4mA'
- IDD3P12 = '11mA'
- IDD2P1 = '0.8mA'
- IDD2P12 = '1.8mA'
- IDD6 = '0.5mA'
- IDD62 = '1.8mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-# A single GDDR5 x64 interface, with
-# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
-# H5GQ1H24AFR) in a 2x32 configuration.
-class GDDR5_4000_2x32(DRAMCtrl):
-
size of device
- device_size = '128MB'
-
2x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
GDDR5 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 2Kbits (256Bytes)
- device_rowbuffer_size = '256B'
-
2x32 configuration, so 2 devices
- devices_per_rank = 2
-
assume single rank
- ranks_per_channel = 1
-
GDDR5 has 4 bank groups
- bank_groups_per_rank = 4
-
GDDR5 has 16 banks with 4 bank groups
- banks_per_rank = 16
-
1000 MHz
- tCK = '1ns'
-
8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
-
Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
-
8 beats at 4000 MHz = 2 beats at 1000 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '2ns'
-
@1000MHz data rate, tCCD_L is 3 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '3ns';
- tRCD = '12ns'
-
tCL is not directly found in datasheet and assumed equal tRCD
- tCL = '12ns'
- tRP = '12ns'
- tRAS = '28ns'
-
RRD_S (different bank group)
-
RRD_S is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD = '6ns'
-
RRD_L (same bank group)
-
RRD_L is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD_L = '6ns'
- tXAW = '23ns'
-
tXAW < 4 x tRRD.
-
Therefore, activation limit is set to 0
- activation_limit = 0
- tRFC = '65ns'
- tWR = '12ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Read-to-Precharge 2 CK
- tRTP = '2ns'
-
Assume 2 cycles
- tRTW = '2ns'
-# A single HBM x128 interface (one command and address bus), with
-# default timings based on data publically released
-# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
-# IDD measurement values, and by extrapolating data from other classes.
-# Architecture values based on published HBM spec
-# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
-class HBM_1000_4H_1x128(DRAMCtrl):
-
HBM gen1 supports up to 8 128-bit physical channels
-
Configuration defines a single channel, with the capacity
-
set to (full_ stack_capacity / 8) based on 2Gb dies
-
To use all 8 channels, set 'channels' parameter to 8 in
-
system configuration
-
128-bit interface legacy mode
- device_bus_width = 128
-
HBM supports BL4 and BL2 (legacy mode only)
- burst_length = 4
-
size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
-
with 8 channels, 128MB per channel
- device_size = '128MB'
- device_rowbuffer_size = '2kB'
-
1x128 configuration
- devices_per_rank = 1
-
HBM does not have a CS pin; set rank to 1
- ranks_per_channel = 1
-
HBM has 8 or 16 banks depending on capacity
-
2Gb dies have 8 banks
- banks_per_rank = 8
-
depending on frequency, bank groups may be required
-
will always have 4 bank groups when enabled
-
current specifications do not define the minimum frequency for
-
bank group architecture
-
setting bank_groups_per_rank to 0 to disable until range is defined
- bank_groups_per_rank = 0
-
500 MHz for 1Gbps DDR data rate
- tCK = '2ns'
-
use values from IDD measurement in JEDEC spec
-
use tRP value for tRCD and tCL similar to other classes
- tRP = '15ns'
- tRCD = '15ns'
- tCL = '15ns'
- tRAS = '33ns'
-
BL2 and BL4 supported, default to BL4
-
DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
- tBURST = '4ns'
-
value for 2Gb device from JEDEC spec
- tRFC = '160ns'
-
value for 2Gb device from JEDEC spec
- tREFI = '3.9us'
-
extrapolate the following from LPDDR configs, using ns values
-
to minimize burst length, prefetch differences
- tWR = '18ns'
- tRTP = '7.5ns'
- tWTR = '10ns'
-
start with 2 cycles turnaround, similar to other memory classes
-
could be more with variations across the stack
- tRTW = '4ns'
-
single rank device, set to 0
- tCS = '0ns'
-
from MemCon example, tRRD is 4ns with 2ns tCK
- tRRD = '4ns'
-
from MemCon example, tFAW is 30ns with 2ns tCK
- tXAW = '30ns'
- activation_limit = 4
-
4tCK
- tXP = '8ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '168ns'
-# A single HBM x64 interface (one command and address bus), with
-# default timings based on HBM gen1 and data publically released
-# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
-# Note: This defines a pseudo-channel with a unique controller
-# instantiated per pseudo-channel
-# Stay at same IO rate (1Gbps) to maintain timing relationship with
-# HBM gen1 class (HBM_1000_4H_x128) where possible
-class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
-
For HBM gen2 with pseudo-channel mode, configure 2X channels.
-
Configuration defines a single pseudo channel, with the capacity
-
set to (full_ stack_capacity / 16) based on 8Gb dies
-
To use all 16 pseudo channels, set 'channels' parameter to 16 in
-
system configuration
-
64-bit pseudo-channle interface
- device_bus_width = 64
-
HBM pseudo-channel only supports BL4
- burst_length = 4
-
size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
-
with 16 channels, 256MB per channel
- device_size = '256MB'
-
page size is halved with pseudo-channel; maintaining the same same
number
-
of rows per pseudo-channel with 2X banks across 2 channels
- device_rowbuffer_size = '1kB'
-
HBM has 8 or 16 banks depending on capacity
-
Starting with 4Gb dies, 16 banks are defined
- banks_per_rank = 16
-
reset tRFC for larger, 8Gb device
-
use HBM1 4Gb value as a starting point
- tRFC = '260ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '268ns'
-
Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
- tCS = '2ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '10ns'
-
self refresh exit time
- tXS = '65ns'
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_5500_1x16_BG_BL32(DRAMCtrl):
-
Increase buffer size to account for more bank resources
- read_buffer_size = 64
-
Set page policy to better suit DMC Huxley
- page_policy = 'close_adaptive'
-
16-bit channel interface
- device_bus_width = 16
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL32 for higher command bandwidth
- burst_length = 32
-
size of device in bytes
- device_size = '1GB'
-
2kB page with BG mode
- device_rowbuffer_size = '2kB'
-
Use a 1x16 configuration
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Initial configuration will have 16 banks with Bank Group Arch
-
to maximim resources and enable higher data rates
- banks_per_rank = 16
- bank_groups_per_rank = 4
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.455ns'
-
Greater of 2 CK or 18ns
- tRCD = '18ns'
-
Base RL is 16 CK @ 687.5 MHz = 23.28ns
- tCL = '23.280ns'
-
Greater of 2 CK or 18ns
- tRP = '18ns'
-
Greater of 3 CK or 42ns
- tRAS = '42ns'
-
Greater of 3 CK or 34ns
- tWR = '34ns'
-
active powerdown and precharge powerdown exit time
-
Greater of 3 CK or 7ns
- tXP = '7ns'
-
self refresh exit time (tRFCab + 7.5ns)
- tXS = '217.5ns'
-
Greater of 2 CK or 7.5 ns minus 2 CK
- tRTP = '4.59ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
- tBURST = '8.73ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- tBURST_MIN = '2.91ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 687.5 MHz
- tBURST_MAX = '11.64ns'
-
8 CK @ 687.5 MHz
- tCCD_L = "11.64ns"
-
LPDDR5, 8 Gbit/channel for 280ns tRFCab
- tRFC = '210ns'
- tREFI = '3.9us'
-
Greater of 4 CK or 6.25 ns
- tWTR = '6.25ns'
-
Greater of 4 CK or 12 ns
- tWTR_L = '12ns'
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.455ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
- tCS = '2.91ns'
-
2 CK
- tPPD = '2.91ns'
-
Greater of 2 CK or 5 ns
- tRRD = '5ns'
- tRRD_L = '5ns'
-
With Bank Group Arch mode tFAW is 20 ns
- tXAW = '20ns'
- activation_limit = 4
-
at 5Gbps, 4:1 WCK to CK ratio required
-
2 data beats per WCK (DDR) -> 8 per CK
- beats_per_clock = 8
-
2 cycles required to send activate command
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- two_cycle_activate = True
- tAAD = '11.640ns'
- data_clock_sync = True
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '2.91ns'
- tBURST_MIN = '2.91ns'
-
For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST_MAX = '5.82ns'
-
4 CK @ 687.5 MHz
- tCCD_L = "5.82ns"
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '5.82ns'
- tBURST_MIN = '5.82ns'
- tBURST_MAX = '5.82ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.25ns'
-
Base RL is 17 CK @ 800 MHz = 21.25ns
- tCL = '21.25ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
- tBURST = '7.5ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
- tBURST_MIN = '2.5ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 800 MHz
- tBURST_MAX = '10ns'
-
8 CK @ 800 MHz
- tCCD_L = "10ns"
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.25ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
- tCS = '2.5ns'
-
2 CK
- tPPD = '2.5ns'
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- tAAD = '10ns'
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on initial
-# JEDEC specifcation
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '2.5ns'
- tBURST_MIN = '2.5ns'
-
For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST_MAX = '5ns'
-
4 CK @ 800 MHz
- tCCD_L = "5ns"
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '5ns'
- tBURST_MIN = '5ns'
- tBURST_MAX = '5ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py
new file mode 100644
index 0000000..f571920
--- /dev/null
+++ b/src/mem/DRAMInterface.py
@@ -0,0 +1,1473 @@
+# Copyright (c) 2012-2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# Copyright (c) 2015 University of Kaiserslautern
+# Copyright (c) 2015 The University of Bologna
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from m5.params import *
+from m5.proxy import *
+
+from m5.objects.AbstractMemory import AbstractMemory
+
+# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
+# channel, rank, bank, row and column, respectively, and going from
+# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
+# suitable for an open-page policy, optimising for sequential accesses
+# hitting in the open row. For a closed-page policy, RoCoRaBaCh
+# maximises parallelism.
+class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
+
+# Enum for the page policy, either open, open_adaptive, close, or
+# close_adaptive.
+class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
+class DRAMInterface(AbstractMemory):
- type = 'DRAMInterface'
- cxx_header = "mem/dram_ctrl.hh"
-
Allow the interface to set required controller buffer sizes
-
each entry corresponds to a burst for the specific DRAM
-
configuration (e.g. x32 with burst length 8 is 32 bytes) and not
-
the cacheline size or request/packet size
- write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
- read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
-
scheduler, address map and page policy
- addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
- page_policy = Param.PageManage('open_adaptive', "Page management
policy")
-
enforce a limit on the number of accesses per row
- max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
-
"closing");
-
size of DRAM Chip in Bytes
- device_size = Param.MemorySize("Size of DRAM chip")
-
the physical organisation of the DRAM
- device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
-
"device/chip")
- burst_length = Param.Unsigned("Burst lenght (BL) in beats")
- device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
-
"device/chip")
- devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
- ranks_per_channel = Param.Unsigned("Number of ranks per channel")
-
default to 0 bank groups per rank, indicating bank group architecture
-
is not used
-
update per memory class when bank group architecture is supported
- bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
- banks_per_rank = Param.Unsigned("Number of banks per rank")
-
Enable DRAM powerdown states if True. This is False by default due to
-
performance being lower when enabled
- enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
-
For power modelling we need to know if the DRAM has a DLL or not
- dll = Param.Bool(True, "DRAM has DLL or not")
-
DRAMPower provides in addition to the core power, the possibility to
-
include RD/WR termination and IO power. This calculation assumes some
-
default values. The integration of DRAMPower with gem5 does not
include
-
IO and RD/WR termination power by default. This might be added as an
-
additional feature in the future.
-
timing behaviour and constraints - all in nanoseconds
-
the base clock period of the DRAM
- tCK = Param.Latency("Clock period")
-
the amount of time in nanoseconds from issuing an activate command
-
to the data being available in the row buffer for a read/write
- tRCD = Param.Latency("RAS to CAS delay")
-
the time from issuing a read/write command to seeing the actual data
- tCL = Param.Latency("CAS latency")
-
minimum time between a precharge and subsequent activate
- tRP = Param.Latency("Row precharge time")
-
minimum time between an activate and a precharge to the same row
- tRAS = Param.Latency("ACT to PRE delay")
-
minimum time between a write data transfer and a precharge
- tWR = Param.Latency("Write recovery time")
-
minimum time between a read and precharge command
- tRTP = Param.Latency("Read to precharge")
-
time to complete a burst transfer, typically the burst length
-
divided by two due to the DDR bus, but by making it a parameter
-
it is easier to also evaluate SDR memories like WideIO.
-
This parameter has to account for burst length.
-
Read/Write requests with data size larger than one full burst are
broken
access,
-
which could be greater than tBURST when the memory access time is
greater
-
than tBURST
- tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
-
tBURST_MIN is the minimum delay between bursts, which could be less
than
-
tBURST when interleaving is supported
- tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
-
CAS-to-CAS delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
-
Write-to-Write delay for bursts to the same bank group
-
only utilized with bank group architectures; set to 0 for default
case
-
This will be used to enable different same bank group delays
-
for writes versus reads
- tCCD_L_WR = Param.Latency(Self.tCCD_L,
-
"Same bank group Write to Write delay")
-
time taken to complete one refresh cycle (N rows in all banks)
- tRFC = Param.Latency("Refresh cycle time")
-
refresh command interval, how often a "ref" command needs
-
to be sent. It is 7.8 us for a 64ms refresh requirement
- tREFI = Param.Latency("Refresh command interval")
-
write-to-read, same rank turnaround penalty
- tWTR = Param.Latency("Write to read, same rank switching time")
-
write-to-read, same rank turnaround penalty for same bank group
- tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
-
"time, same bank group")
-
read-to-write, same rank turnaround penalty
- tRTW = Param.Latency("Read to write, same rank switching time")
-
rank-to-rank bus delay penalty
-
this does not correlate to a memory timing parameter and encompasses:
-
1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
-
different rank bus delay
- tCS = Param.Latency("Rank to rank switching time")
-
minimum precharge to precharge delay time
- tPPD = Param.Latency("0ns", "PRE to PRE delay")
-
maximum delay between two-cycle ACT command phases
- tAAD = Param.Latency(Self.tCK,
-
"Maximum delay between two-cycle ACT commands")
- two_cycle_activate = Param.Bool(False,
-
"Two cycles required to send activate")
-
minimum row activate to row activate delay time
- tRRD = Param.Latency("ACT to ACT delay")
-
only utilized with bank group architectures; set to 0 for default
case
- tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
-
time window in which a maximum number of activates are allowed
-
to take place, set to 0 to disable
- tXAW = Param.Latency("X activation window")
- activation_limit = Param.Unsigned("Max number of activates in window")
-
time to exit power-down mode
-
Exit power-down to next valid command delay
- tXP = Param.Latency("0ns", "Power-up Delay")
-
Exit Powerdown to commands requiring a locked DLL
- tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
-
time to exit self-refresh mode
- tXS = Param.Latency("0ns", "Self-refresh exit latency")
-
time to exit self-refresh mode with locked DLL
- tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
-
number of data beats per clock. with DDR, default is 2, one per edge
- beats_per_clock = Param.Unsigned(2, "Data beats per clock")
- data_clock_sync = Param.Bool(False, "Synchronization commands
required")
-
Currently rolled into other params
- ######################################################################
-
tRC - assumed to be tRAS + tRP
-
Power Behaviour and Constraints
-
DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
-
defined as VDD and VDD2. Each current is defined for each voltage
domain
-
separately. For example, current IDD0 is active-precharge current for
-
voltage domain VDD and current IDD02 is active-precharge current for
-
voltage domain VDD2.
-
By default all currents are set to 0mA. Users who are only
interested in
-
the performance of DRAMs can leave them at 0.
-
Operating 1 Bank Active-Precharge current
- IDD0 = Param.Current("0mA", "Active precharge current")
-
Operating 1 Bank Active-Precharge current multiple voltage Range
- IDD02 = Param.Current("0mA", "Active precharge current VDD2")
-
Precharge Power-down Current: Slow exit
- IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
-
Precharge Power-down Current: Slow exit multiple voltage Range
- IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
-
Precharge Power-down Current: Fast exit
- IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
-
Precharge Power-down Current: Fast exit multiple voltage Range
- IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
-
Precharge Standby current
- IDD2N = Param.Current("0mA", "Precharge Standby current")
-
Precharge Standby current multiple voltage range
- IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
-
Active Power-down current: slow exit
- IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
-
Active Power-down current: slow exit multiple voltage range
- IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
-
Active Power-down current : fast exit
- IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
-
Active Power-down current : fast exit multiple voltage range
- IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
-
Active Standby current
- IDD3N = Param.Current("0mA", "Active Standby current")
-
Active Standby current multiple voltage range
- IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
-
Burst Read Operating Current
- IDD4R = Param.Current("0mA", "READ current")
-
Burst Read Operating Current multiple voltage range
- IDD4R2 = Param.Current("0mA", "READ current VDD2")
-
Burst Write Operating Current
- IDD4W = Param.Current("0mA", "WRITE current")
-
Burst Write Operating Current multiple voltage range
- IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
-
Refresh Current
- IDD5 = Param.Current("0mA", "Refresh current")
-
Refresh Current multiple voltage range
- IDD52 = Param.Current("0mA", "Refresh current VDD2")
-
Self-Refresh Current
- IDD6 = Param.Current("0mA", "Self-refresh Current")
-
Self-Refresh Current multiple voltage range
- IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
-
Main voltage range of the DRAM
- VDD = Param.Voltage("0V", "Main Voltage Range")
-
Second voltage range defined by some DRAMs
- VDD2 = Param.Voltage("0V", "2nd Voltage Range")
+# A single DDR3-1600 x64 channel (one command and address bus), with
+# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
+# an 8x8 configuration.
+class DDR3_1600_8x8(DRAMInterface):
-
size of device in bytes
- device_size = '512MB'
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
DDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
Use two ranks
- ranks_per_channel = 2
-
DDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
-
8 beats across an x64 interface translates to 4 clocks @ 800 MHz
- tBURST = '5ns'
-
DDR3-1600 11-11-11
- tRCD = '13.75ns'
- tCL = '13.75ns'
- tRP = '13.75ns'
- tRAS = '35ns'
- tRRD = '6ns'
- tXAW = '30ns'
- activation_limit = 4
- tRFC = '260ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns
- tWTR = '7.5ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
- tXS = '270ns'
-
Current values from datasheet Die Rev E,J
- IDD0 = '55mA'
- IDD2N = '32mA'
- IDD3N = '38mA'
- IDD4W = '125mA'
- IDD4R = '157mA'
- IDD5 = '235mA'
- IDD3P1 = '38mA'
- IDD2P1 = '32mA'
- IDD6 = '20mA'
- VDD = '1.5V'
+# A single HMC-2500 x32 model based on:
+# [1] DRAMSpec: a high-level DRAM bank modelling tool
+# developed at the University of Kaiserslautern. This high level tool
+# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
+# estimate the DRAM bank latency and power numbers.
+# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
+# cubes (E. Azarkhish et. al)
+# Assumed for the HMC model is a 30 nm technology node.
+# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
+# layers).
+# Each layer has 16 vaults and each vault consists of 2 banks per layer.
+# In order to be able to use the same controller used for 2D DRAM
generations
+# for HMC, the following analogy is done:
+# Channel (DDR) => Vault (HMC)
+# device_size (DDR) => size of a single layer in a vault
+# ranks per channel (DDR) => number of layers
+# banks per rank (DDR) => banks per layer
+# devices per rank (DDR) => devices per layer ( 1 for HMC).
+# The parameters for which no input is available are inherited from the
DDR3
+# configuration.
+# This configuration includes the latencies from the DRAM to the logic
layer
+# of the HMC
+class HMC_2500_1x32(DDR3_1600_8x8):
-
size of device
-
two banks per device with each bank 4MB [2]
- device_size = '8MB'
-
1x32 configuration, 1 device with 32 TSVs [2]
- device_bus_width = 32
-
HMC is a BL8 device [2]
- burst_length = 8
-
Each device has a page (row buffer) size of 256 bytes [2]
- device_rowbuffer_size = '256B'
-
1x32 configuration, so 1 device [2]
- devices_per_rank = 1
-
4 layers so 4 ranks [2]
- ranks_per_channel = 4
-
HMC has 2 banks per layer [2]
-
Each layer represents a rank. With 4 layers and 8 banks in total,
each
-
layer has 2 banks; thus 2 banks per rank.
- banks_per_rank = 2
-
1250 MHz [2]
- tCK = '0.8ns'
-
8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
- tBURST = '3.2ns'
-
Values using DRAMSpec HMC model [1]
- tRCD = '10.2ns'
- tCL = '9.9ns'
- tRP = '7.7ns'
- tRAS = '21.6ns'
-
tRRD depends on the power supply network for each vendor.
-
We assume a tRRD of a double bank approach to be equal to 4 clock
-
cycles (Assumption)
- tRRD = '3.2ns'
-
activation limit is set to 0 since there are only 2 banks per vault
-
layer.
- activation_limit = 0
-
Values using DRAMSpec HMC model [1]
- tRFC = '59ns'
- tWR = '8ns'
- tRTP = '4.9ns'
-
Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
-
0.8 ns (Assumption)
- tCS = '0.8ns'
-
Value using DRAMSpec HMC model [1]
- tREFI = '3.9us'
-
The default page policy in the vault controllers is simple closed
page
-
[2] nevertheless 'close' policy opens and closes the row multiple
times
-
for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
- page_policy = 'close_adaptive'
-
RoCoRaBaCh resembles the default address mapping in HMC
- addr_mapping = 'RoCoRaBaCh'
-
These parameters do not directly correlate with buffer_size in real
-
hardware. Nevertheless, their value has been tuned to achieve a
-
bandwidth similar to the cycle-accurate model in [2]
- write_buffer_size = 32
- read_buffer_size = 32
+# A single DDR3-2133 x64 channel refining a selected subset of the
+# options for the DDR-1600 configuration, based on the same DDR3-1600
+# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
+# consistent across the two configurations.
+class DDR3_2133_8x8(DDR3_1600_8x8):
-
1066 MHz
- tCK = '0.938ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
- tBURST = '3.752ns'
-
DDR3-2133 14-14-14
- tRCD = '13.09ns'
- tCL = '13.09ns'
- tRP = '13.09ns'
- tRAS = '33ns'
- tRRD = '5ns'
- tXAW = '25ns'
-
Current values from datasheet
- IDD0 = '70mA'
- IDD2N = '37mA'
- IDD3N = '44mA'
- IDD4W = '157mA'
- IDD4R = '191mA'
- IDD5 = '250mA'
- IDD3P1 = '44mA'
- IDD2P1 = '43mA'
- IDD6 ='20mA'
- VDD = '1.5V'
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
+# in an 16x4 configuration.
+# Total channel capacity is 32GB
+# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
+class DDR4_2400_16x4(DRAMInterface):
-
size of device
- device_size = '1GB'
-
16x4 configuration, 16 devices each with a 4-bit interface
- device_bus_width = 4
-
DDR4 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 512 byte (1K columns x4)
- device_rowbuffer_size = '512B'
-
16x4 configuration, so 16 devices
- devices_per_rank = 16
-
Match our DDR3 configurations which is dual rank
- ranks_per_channel = 2
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 4 for x4 case
- bank_groups_per_rank = 4
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 16
-
override the default buffer sizes and go for something larger to
-
accommodate the larger bank count
- write_buffer_size = 128
- read_buffer_size = 64
-
1200 MHz
- tCK = '0.833ns'
-
8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '3.332ns'
-
@2400 data rate, tCCD_L is 6 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '5ns';
-
DDR4-2400 17-17-17
- tRCD = '14.16ns'
- tCL = '14.16ns'
- tRP = '14.16ns'
- tRAS = '32ns'
-
RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
- tRRD = '3.332ns'
-
RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
tFAW for 512B page is MAX(16 CK, 13ns)
- tXAW = '13.328ns'
- activation_limit = 4
-
tRFC is 350ns
- tRFC = '350ns'
- tWR = '15ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
- tRTW = '1.666ns'
-
Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
- tCS = '1.666ns'
-
<=85C, half for >85C
- tREFI = '7.8us'
-
active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
self refresh exit time
-
exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
-
tRFC + 10ns = 340ns
- tXS = '340ns'
-
Current values from datasheet
- IDD0 = '43mA'
- IDD02 = '3mA'
- IDD2N = '34mA'
- IDD3N = '38mA'
- IDD3N2 = '3mA'
- IDD4W = '103mA'
- IDD4R = '110mA'
- IDD5 = '250mA'
- IDD3P1 = '32mA'
- IDD2P1 = '25mA'
- IDD6 = '30mA'
- VDD = '1.2V'
- VDD2 = '2.5V'
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
+# in an 8x8 configuration.
+# Total channel capacity is 16GB
+# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
+class DDR4_2400_8x8(DDR4_2400_16x4):
-
8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
8x8 configuration, so 8 devices
- devices_per_rank = 8
-
RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
- tXAW = '21ns'
-
Current values from datasheet
- IDD0 = '48mA'
- IDD3N = '43mA'
- IDD4W = '123mA'
- IDD4R = '135mA'
- IDD3P1 = '37mA'
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
+# in an 4x16 configuration.
+# Total channel capacity is 4GB
+# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
+class DDR4_2400_4x16(DDR4_2400_16x4):
-
4x16 configuration, 4 devices each with an 16-bit interface
- device_bus_width = 16
-
Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
- device_rowbuffer_size = '2kB'
-
4x16 configuration, so 4 devices
- devices_per_rank = 4
-
Single rank for x16
- ranks_per_channel = 1
-
DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
-
Set to 2 for x16 case
- bank_groups_per_rank = 2
-
DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
-
configurations). Currently we do not capture the additional
-
constraints incurred by the bank groups
- banks_per_rank = 8
-
RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
- tRRD = '5.3ns'
-
RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
- tRRD_L = '6.4ns';
- tXAW = '30ns'
-
Current values from datasheet
- IDD0 = '80mA'
- IDD02 = '4mA'
- IDD2N = '34mA'
- IDD3N = '47mA'
- IDD4W = '228mA'
- IDD4R = '243mA'
- IDD5 = '280mA'
- IDD3P1 = '41mA'
+# A single LPDDR2-S4 x32 interface (one command/address bus), with
+# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
+# in a 1x32 configuration.
+class LPDDR2_S4_1066_1x32(DRAMInterface):
-
No DLL in LPDDR2
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR2_S4 is a BL4 and BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 1KB
-
(this depends on the memory density)
- device_rowbuffer_size = '1kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR2-S4 has 8 banks in all configurations
- banks_per_rank = 8
-
533 MHz
- tCK = '1.876ns'
-
Fixed at 15 ns
- tRCD = '15ns'
-
8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
- tCL = '15ns'
-
Pre-charge one bank 15 ns (all banks 18 ns)
- tRP = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
- tRTP = '7.5ns'
-
8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '7.5ns'
-
LPDDR2-S4, 4 Gbit
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
- tRTW = '3.75ns'
-
Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
- tCS = '3.75ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of density, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '15mA'
- IDD02 = '70mA'
- IDD2N = '2mA'
- IDD2N2 = '30mA'
- IDD3N = '2.5mA'
- IDD3N2 = '30mA'
- IDD4W = '10mA'
- IDD4W2 = '190mA'
- IDD4R = '3mA'
- IDD4R2 = '220mA'
- IDD5 = '40mA'
- IDD52 = '150mA'
- IDD3P1 = '1.2mA'
- IDD3P12 = '8mA'
- IDD2P1 = '0.6mA'
- IDD2P12 = '0.8mA'
- IDD6 = '1mA'
- IDD62 = '3.2mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
+# A single WideIO x128 interface (one command and address bus), with
+# default timings based on an estimated WIO-200 8 Gbit part.
+class WideIO_200_1x128(DRAMInterface):
-
No DLL for WideIO
- dll = False
-
size of device
- device_size = '1024MB'
-
1x128 configuration, 1 device with a 128-bit interface
- device_bus_width = 128
-
This is a BL4 device
- burst_length = 4
-
Each device has a page (row buffer) size of 4KB
-
(this depends on the memory density)
- device_rowbuffer_size = '4kB'
-
1x128 configuration, so 1 device
- devices_per_rank = 1
-
Use one rank for a one-high die stack
- ranks_per_channel = 1
-
WideIO has 4 banks in all configurations
- banks_per_rank = 4
-
200 MHz
- tCK = '5ns'
-
WIO-200
- tRCD = '18ns'
- tCL = '18ns'
- tRP = '18ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Read to precharge is same as the burst
- tRTP = '20ns'
-
4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
-
Note this is a BL4 SDR device.
- tBURST = '20ns'
-
WIO 8 Gb
- tRFC = '210ns'
-
WIO 8 Gb, <=85C, half for >85C
- tREFI = '3.9us'
-
Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
- tWTR = '15ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
- tRTW = '10ns'
-
Default different rank bus delay to 2 CK, @200 MHz = 10 ns
- tCS = '10ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Two instead of four activation window
- tXAW = '50ns'
- activation_limit = 2
-
The WideIO specification does not provide current information
+# A single LPDDR3 x32 interface (one command/address bus), with
+# default timings based on a LPDDR3-1600 4 Gbit part (Micron
+# EDF8132A1MC) in a 1x32 configuration.
+class LPDDR3_1600_1x32(DRAMInterface):
-
No DLL for LPDDR3
- dll = False
-
size of device
- device_size = '512MB'
-
1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
LPDDR3 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 4KB
- device_rowbuffer_size = '4kB'
-
1x32 configuration, so 1 device
- devices_per_rank = 1
-
Technically the datasheet is a dual-rank package, but for
-
comparison with the LPDDR2 config we stick to a single rank
- ranks_per_channel = 1
-
LPDDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
800 MHz
- tCK = '1.25ns'
- tRCD = '18ns'
-
12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
- tCL = '15ns'
- tRAS = '42ns'
- tWR = '15ns'
-
Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
- tRTP = '7.5ns'
-
Pre-charge one bank 18 ns (all banks 21 ns)
- tRP = '18ns'
-
8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
-
Note this is a BL8 DDR device.
-
Requests larger than 32 bytes are broken down into multiple requests
-
in the controller
- tBURST = '5ns'
-
LPDDR3, 4 Gb
- tRFC = '130ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
self refresh exit time
- tXS = '140ns'
-
Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
Irrespective of size, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
Current values from datasheet
- IDD0 = '8mA'
- IDD02 = '60mA'
- IDD2N = '0.8mA'
- IDD2N2 = '26mA'
- IDD3N = '2mA'
- IDD3N2 = '34mA'
- IDD4W = '2mA'
- IDD4W2 = '190mA'
- IDD4R = '2mA'
- IDD4R2 = '230mA'
- IDD5 = '28mA'
- IDD52 = '150mA'
- IDD3P1 = '1.4mA'
- IDD3P12 = '11mA'
- IDD2P1 = '0.8mA'
- IDD2P12 = '1.8mA'
- IDD6 = '0.5mA'
- IDD62 = '1.8mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
+# A single GDDR5 x64 interface, with
+# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
+# H5GQ1H24AFR) in a 2x32 configuration.
+class GDDR5_4000_2x32(DRAMInterface):
-
size of device
- device_size = '128MB'
-
2x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
GDDR5 is a BL8 device
- burst_length = 8
-
Each device has a page (row buffer) size of 2Kbits (256Bytes)
- device_rowbuffer_size = '256B'
-
2x32 configuration, so 2 devices
- devices_per_rank = 2
-
assume single rank
- ranks_per_channel = 1
-
GDDR5 has 4 bank groups
- bank_groups_per_rank = 4
-
GDDR5 has 16 banks with 4 bank groups
- banks_per_rank = 16
-
1000 MHz
- tCK = '1ns'
-
8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
-
Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
-
8 beats at 4000 MHz = 2 beats at 1000 MHz
-
tBURST is equivalent to the CAS-to-CAS delay (tCCD)
-
With bank group architectures, tBURST represents the CAS-to-CAS
-
delay for bursts to different bank groups (tCCD_S)
- tBURST = '2ns'
-
@1000MHz data rate, tCCD_L is 3 CK
-
CAS-to-CAS delay for bursts to the same bank group
-
tBURST is equivalent to tCCD_S; no explicit parameter required
-
for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '3ns';
- tRCD = '12ns'
-
tCL is not directly found in datasheet and assumed equal tRCD
- tCL = '12ns'
- tRP = '12ns'
- tRAS = '28ns'
-
RRD_S (different bank group)
-
RRD_S is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD = '6ns'
-
RRD_L (same bank group)
-
RRD_L is 5.5 ns in datasheet.
-
rounded to the next multiple of tCK
- tRRD_L = '6ns'
- tXAW = '23ns'
-
tXAW < 4 x tRRD.
-
Therefore, activation limit is set to 0
- activation_limit = 0
- tRFC = '65ns'
- tWR = '12ns'
-
Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
Read-to-Precharge 2 CK
- tRTP = '2ns'
-
Assume 2 cycles
- tRTW = '2ns'
+# A single HBM x128 interface (one command and address bus), with
+# default timings based on data publically released
+# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
+# IDD measurement values, and by extrapolating data from other classes.
+# Architecture values based on published HBM spec
+# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
+class HBM_1000_4H_1x128(DRAMInterface):
-
HBM gen1 supports up to 8 128-bit physical channels
-
Configuration defines a single channel, with the capacity
-
set to (full_ stack_capacity / 8) based on 2Gb dies
-
To use all 8 channels, set 'channels' parameter to 8 in
-
system configuration
-
128-bit interface legacy mode
- device_bus_width = 128
-
HBM supports BL4 and BL2 (legacy mode only)
- burst_length = 4
-
size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
-
with 8 channels, 128MB per channel
- device_size = '128MB'
- device_rowbuffer_size = '2kB'
-
1x128 configuration
- devices_per_rank = 1
-
HBM does not have a CS pin; set rank to 1
- ranks_per_channel = 1
-
HBM has 8 or 16 banks depending on capacity
-
2Gb dies have 8 banks
- banks_per_rank = 8
-
depending on frequency, bank groups may be required
-
will always have 4 bank groups when enabled
-
current specifications do not define the minimum frequency for
-
bank group architecture
-
setting bank_groups_per_rank to 0 to disable until range is defined
- bank_groups_per_rank = 0
-
500 MHz for 1Gbps DDR data rate
- tCK = '2ns'
-
use values from IDD measurement in JEDEC spec
-
use tRP value for tRCD and tCL similar to other classes
- tRP = '15ns'
- tRCD = '15ns'
- tCL = '15ns'
- tRAS = '33ns'
-
BL2 and BL4 supported, default to BL4
-
DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
- tBURST = '4ns'
-
value for 2Gb device from JEDEC spec
- tRFC = '160ns'
-
value for 2Gb device from JEDEC spec
- tREFI = '3.9us'
-
extrapolate the following from LPDDR configs, using ns values
-
to minimize burst length, prefetch differences
- tWR = '18ns'
- tRTP = '7.5ns'
- tWTR = '10ns'
-
start with 2 cycles turnaround, similar to other memory classes
-
could be more with variations across the stack
- tRTW = '4ns'
-
single rank device, set to 0
- tCS = '0ns'
-
from MemCon example, tRRD is 4ns with 2ns tCK
- tRRD = '4ns'
-
from MemCon example, tFAW is 30ns with 2ns tCK
- tXAW = '30ns'
- activation_limit = 4
-
4tCK
- tXP = '8ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '168ns'
+# A single HBM x64 interface (one command and address bus), with
+# default timings based on HBM gen1 and data publically released
+# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
+# Note: This defines a pseudo-channel with a unique controller
+# instantiated per pseudo-channel
+# Stay at same IO rate (1Gbps) to maintain timing relationship with
+# HBM gen1 class (HBM_1000_4H_x128) where possible
+class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
-
For HBM gen2 with pseudo-channel mode, configure 2X channels.
-
Configuration defines a single pseudo channel, with the capacity
-
set to (full_ stack_capacity / 16) based on 8Gb dies
-
To use all 16 pseudo channels, set 'channels' parameter to 16 in
-
system configuration
-
64-bit pseudo-channle interface
- device_bus_width = 64
-
HBM pseudo-channel only supports BL4
- burst_length = 4
-
size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
-
with 16 channels, 256MB per channel
- device_size = '256MB'
-
page size is halved with pseudo-channel; maintaining the same same
number
-
of rows per pseudo-channel with 2X banks across 2 channels
- device_rowbuffer_size = '1kB'
-
HBM has 8 or 16 banks depending on capacity
-
Starting with 4Gb dies, 16 banks are defined
- banks_per_rank = 16
-
reset tRFC for larger, 8Gb device
-
use HBM1 4Gb value as a starting point
- tRFC = '260ns'
-
start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '268ns'
-
Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
- tCS = '2ns'
- tREFI = '3.9us'
-
active powerdown and precharge powerdown exit time
- tXP = '10ns'
-
self refresh exit time
- tXS = '65ns'
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_5500_1x16_BG_BL32(DRAMInterface):
+
-
Increase buffer size to account for more bank resources
- read_buffer_size = 64
-
Set page policy to better suit DMC Huxley
- page_policy = 'close_adaptive'
-
16-bit channel interface
- device_bus_width = 16
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL32 for higher command bandwidth
- burst_length = 32
-
size of device in bytes
- device_size = '1GB'
-
2kB page with BG mode
- device_rowbuffer_size = '2kB'
-
Use a 1x16 configuration
- devices_per_rank = 1
-
Use a single rank
- ranks_per_channel = 1
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Initial configuration will have 16 banks with Bank Group Arch
-
to maximim resources and enable higher data rates
- banks_per_rank = 16
- bank_groups_per_rank = 4
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.455ns'
-
Greater of 2 CK or 18ns
- tRCD = '18ns'
-
Base RL is 16 CK @ 687.5 MHz = 23.28ns
- tCL = '23.280ns'
-
Greater of 2 CK or 18ns
- tRP = '18ns'
-
Greater of 3 CK or 42ns
- tRAS = '42ns'
-
Greater of 3 CK or 34ns
- tWR = '34ns'
-
active powerdown and precharge powerdown exit time
-
Greater of 3 CK or 7ns
- tXP = '7ns'
-
self refresh exit time (tRFCab + 7.5ns)
- tXS = '217.5ns'
-
Greater of 2 CK or 7.5 ns minus 2 CK
- tRTP = '4.59ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
- tBURST = '8.73ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- tBURST_MIN = '2.91ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 687.5 MHz
- tBURST_MAX = '11.64ns'
-
8 CK @ 687.5 MHz
- tCCD_L = "11.64ns"
-
LPDDR5, 8 Gbit/channel for 280ns tRFCab
- tRFC = '210ns'
- tREFI = '3.9us'
-
Greater of 4 CK or 6.25 ns
- tWTR = '6.25ns'
-
Greater of 4 CK or 12 ns
- tWTR_L = '12ns'
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.455ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
- tCS = '2.91ns'
-
2 CK
- tPPD = '2.91ns'
-
Greater of 2 CK or 5 ns
- tRRD = '5ns'
- tRRD_L = '5ns'
-
With Bank Group Arch mode tFAW is 20 ns
- tXAW = '20ns'
- activation_limit = 4
-
at 5Gbps, 4:1 WCK to CK ratio required
-
2 data beats per WCK (DDR) -> 8 per CK
- beats_per_clock = 8
-
2 cycles required to send activate command
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- two_cycle_activate = True
- tAAD = '11.640ns'
- data_clock_sync = True
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
+
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '2.91ns'
- tBURST_MIN = '2.91ns'
-
For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST_MAX = '5.82ns'
-
4 CK @ 687.5 MHz
- tCCD_L = "5.82ns"
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
+
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '5.82ns'
- tBURST_MIN = '5.82ns'
- tBURST_MAX = '5.82ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
+
-
5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.25ns'
-
Base RL is 17 CK @ 800 MHz = 21.25ns
- tCL = '21.25ns'
-
With BG architecture, burst of 32 transferred in two 16-beat
-
sub-bursts, with a 16-beat gap in between.
-
Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
-
tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
- tBURST = '7.5ns'
-
can interleave a Bstof32 from another bank group at tBURST_MIN
-
16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
- tBURST_MIN = '2.5ns'
-
tBURST_MAX is the maximum burst delay for same bank group timing
-
this is 8 CK @ 800 MHz
- tBURST_MAX = '10ns'
-
8 CK @ 800 MHz
- tCCD_L = "10ns"
-
Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
-
tWCKDQ0/tCK will be 1 CK for most cases
-
For gem5 RL = WL and BL/n is already accounted for with tBURST
-
Result is and additional 1 CK is required
- tRTW = '1.25ns'
-
Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
- tCS = '2.5ns'
-
2 CK
- tPPD = '2.5ns'
-
2 command phases can be sent back-to-back or
-
with a gap up to tAAD = 8 CK
- tAAD = '10ns'
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on initial
+# JEDEC specifcation
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
+
-
LPDDR5 is a BL16 or BL32 device
-
With BG mode, BL16 and BL32 are supported
-
Use BL16 for smaller access granularity
- burst_length = 16
-
For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '2.5ns'
- tBURST_MIN = '2.5ns'
-
For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST_MAX = '5ns'
-
4 CK @ 800 MHz
- tCCD_L = "5ns"
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
+
-
4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
LPDDR5 supports configurable bank options
-
8B : BL32, all frequencies
-
16B : BL32 or BL16, <=3.2Gbps
-
16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
-
Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '5ns'
- tBURST_MIN = '5ns'
- tBURST_MAX = '5ns'
-
Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
diff --git a/src/mem/SConscript b/src/mem/SConscript
index 2fe179d..ceeed98 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -1,6 +1,6 @@
-- mode:python --
-# Copyright (c) 2018-2019 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
All rights reserved
The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@
SimObject('AddrMapper.py')
SimObject('Bridge.py')
SimObject('DRAMCtrl.py')
+SimObject('DRAMInterface.py')
SimObject('ExternalMaster.py')
SimObject('ExternalSlave.py')
SimObject('MemObject.py')
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc
index b646581..4055505 100644
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -47,6 +47,7 @@
#include "debug/DRAMState.hh"
#include "debug/Drain.hh"
#include "debug/QOS.hh"
+#include "params/DRAMInterface.hh"
#include "sim/system.hh"
using namespace std;
@@ -58,12 +59,13 @@
retryRdReq(false), retryWrReq(false),
nextReqEvent([this]{ processNextReqEvent(); }, name()),
respondEvent([this]{ processRespondEvent(); }, name()),
- readBufferSize(p->read_buffer_size),
- writeBufferSize(p->write_buffer_size),
- dram(p->dram),
- readBufferSize(dram->readBufferSize),
- writeBufferSize(dram->writeBufferSize),
writeHighThreshold(writeBufferSize * p->write_high_thresh_perc /
100.0),
writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
minWritesPerSwitch(p->min_writes_per_switch),
- writesThisTime(0), readsThisTime(0), tCS(p->tCS),
-
writesThisTime(0), readsThisTime(0),
memSchedPolicy(p->mem_sched_policy),
frontendLatency(p->static_frontend_latency),
backendLatency(p->static_backend_latency),
@@ -74,37 +76,23 @@
readQueue.resize(p->qos_priorities);
writeQueue.resize(p->qos_priorities);
-
dram->setCtrl(this);
-
// perform a basic check of the write thresholds
if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
fatal("Write buffer low threshold %d must be smaller than the "
"high threshold %d\n", p->write_low_thresh_perc,
p->write_high_thresh_perc);
- // determine the rows per bank by looking at the total capacity
- uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
- DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
-
AbstractMemory::size());
- // create a DRAM interface
- // will only populate the ranks if DRAM is configured
- dram = new DRAMInterface(*this, p, capacity, range);
- DPRINTF(DRAM, "Created DRAM interface \n");
}
void
DRAMCtrl::init()
{
-
MemCtrl::init();
-
if (!port.isConnected()) {
fatal("DRAMCtrl %s is unconnected!\n", name());
} else {
port.sendRangeChange();
}
-
dram->init(range);
-
}
void
@@ -114,8 +102,6 @@
isTimingMode = system()->isTimingMode();
if (isTimingMode) {
-
dram->startupRanks();
-
// shift the bus busy time sufficiently far ahead that we never
// have to worry about negative values when computing the time for
// the next request, this will add an insignificant bubble at the
@@ -133,7 +119,7 @@
"is responding");
// do the actual memory access and turn the packet into a response
-
dram->access(pkt);
Tick latency = 0;
if (pkt->hasData()) {
@@ -263,7 +249,7 @@
// address of first DRAM packet is kept unaliged. Subsequent DRAM
packets
// are aligned to burst size boundaries. This is to ensure we
accurately
// check read packets against packets in write queue.
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
-
const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
unsigned pktsServicedByWrQ = 0;
BurstHelper* burst_helper = NULL;
@@ -363,7 +349,7 @@
// if the request size is larger than burst size, the pkt is split into
// multiple DRAM packets
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
-
const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
uint32_t burstSize = dram->bytesPerBurst();
for (int cnt = 0; cnt < pktCount; ++cnt) {
@@ -526,7 +512,7 @@
DRAMPacket* dram_pkt = respQueue.front();
// media specific checks and functions when read response is complete
- dram->respondEventDRAM(dram_pkt->rank);
-
dram->respondEvent(dram_pkt->rank);
if (dram_pkt->burstHelper) {
// it is a split packet
@@ -727,12 +713,12 @@
void
DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
{
- DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
-
DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr());
bool needsResponse = pkt->needsResponse();
// do the actual memory access which also turns the packet into a
// response
-
dram->access(pkt);
// turn packet around to go back to requester if response expected
if (needsResponse) {
@@ -877,9 +863,9 @@
// if not, shift to next burst window
Tick act_at;
if (twoCycleActivate)
@@ -997,7 +983,7 @@
// Issuing an explicit PRE command
// Verify that we have command bandwidth to issue the precharge
// if not, shift to next burst window
@@ -1096,9 +1082,9 @@
// verify that we have command bandwidth to issue the burst
// if not, shift to next burst window
if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) >
clkResyncDelay))
@@ -1196,7 +1182,7 @@
bool got_more_hits = false;
bool got_bank_conflict = false;
@@ -1267,6 +1253,7 @@
// Update latency stats
stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
stats.totQLat += cmd_at - dram_pkt->entryTime;
@@ -1350,13 +1337,9 @@
// Update latency stats
stats.masterReadTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
-
stats.bytesRead += dram->bytesPerBurst();
-
stats.totBusLat += dram->burstDelay();
stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size;
} else {
++writesThisTime;
-
stats.bytesWritten += dram->bytesPerBurst();
stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size;
stats.masterWriteTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
@@ -1458,8 +1441,9 @@
// Figure out which read request goes next
// If we are changing command type, incorporate the minimum
case
0);
-
// bus turnaround delay which will be rank to rank delay
-
to_read = chooseNext((*queue), switched_cmd_type ?
-
dram->rankDelay() : 0);
if (to_read != queue->end()) {
// candidate read found
@@ -1538,7 +1522,8 @@
// If we are changing command type, incorporate the minimum
// bus turnaround delay
to_write = chooseNext((*queue),
tCS) : 0);
-
switched_cmd_type ? std::min(dram->minRdToWr(),
-
dram->rankDelay()) : 0);
if (to_write != queue->end()) {
write_found = true;
@@ -1611,11 +1596,8 @@
}
}
-DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl,
- : AbstractMemory(_p),
addrMapping(_p->addr_mapping),
burstSize((_p->devices_per_rank * _p->burst_length *
_p->device_bus_width) / 8),
@@ -1630,7 +1612,7 @@
bankGroupsPerRank(_p->bank_groups_per_rank),
bankGroupArch(_p->bank_groups_per_rank > 0),
banksPerRank(_p->banks_per_rank), rowsPerBank(0),
-
tCK(_p->tCK), tCS(_p->tCS), tCL(_p->tCL), tBURST(_p->tBURST),
tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX),
tRTW(_p->tRTW),
tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD),
tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP),
@@ -1646,13 +1628,15 @@
wrToRdDly(tCL + tBURST + _p->tWTR), rdToWrDly(tBURST + tRTW),
wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L),
rdToWrDlySameBG(tRTW + _p->tBURST_MAX),
-
rankToRankDly(tCS + tBURST),
pageMgmt(_p->page_policy),
maxAccessesPerRow(_p->max_accesses_per_row),
timeStampOffset(0), activeRank(0),
enableDRAMPowerdown(_p->enable_dram_powerdown),
lastStatsResetTick(0),
-
stats(*this),
-
readBufferSize(_p->read_buffer_size),
-
writeBufferSize(_p->write_buffer_size)
{
fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
"must be a power of two\n", burstSize);
@@ -1664,7 +1648,7 @@
for (int i = 0; i < ranksPerChannel; i++) {
DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
@@ -1672,6 +1656,11 @@
uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
ranksPerChannel;
@@ -1726,8 +1715,10 @@
}
void
-DRAMInterface::init(AddrRange range)
+DRAMInterface::init()
{
@@ -1749,7 +1740,7 @@
// channel striping has to be done at a granularity that
// is equal or larger to a cache line
large "
"as the cache line size\n", name());
}
@@ -1766,10 +1757,12 @@
}
void
-DRAMInterface::startupRanks()
+DRAMInterface::startup()
{
- // timestamp offset should be in clock cycles for DRAMPower
- timeStampOffset = divCeil(curTick(), tCK);
-
if (system()->isTimingMode()) {
-
// timestamp offset should be in clock cycles for DRAMPower
-
timeStampOffset = divCeil(curTick(), tCK);
-
}
for (auto r : ranks) {
r->startup(curTick() + tREFI - tRP);
@@ -1815,7 +1808,7 @@
}
void
-DRAMInterface::respondEventDRAM(uint8_t rank)
+DRAMInterface::respondEvent(uint8_t rank)
{
Rank& rank_ref = *ranks[rank];
@@ -1956,7 +1949,7 @@
std::max(ranks[i]->banks[j].preAllowedAt, curTick()) +
tRP;
// When is the earliest the R/W burst can issue?
ranks[i]->banks[j].rdAllowedAt :
ranks[i]->banks[j].wrAllowedAt;
Tick col_at = std::max(col_allowed_at, act_at + tRCD);
@@ -1996,9 +1989,15 @@
return make_pair(bank_mask, hidden_bank_prep);
}
-DRAMInterface::Rank::Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int
_rank,
-
DRAMInterface& _dram)
- : EventManager(&_ctrl), ctrl(_ctrl), dram(_dram),
+DRAMInterface*
+DRAMInterfaceParams::create()
+{
- return new DRAMInterface(this);
+}
+DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p,
-
stats(_dram, *this)
{
for (int b = 0; b < _p->banks_per_rank; b++) {
banks[b].bank = b;
@@ -2062,8 +2061,10 @@
DRAMInterface::Rank::isQueueEmpty() const
{
// check commmands in Q based on current bus direction
0));
@@ -2187,7 +2188,7 @@
// if a request is at the moment being handled and this request is
// accessing the current rank then wait for it to finish
if ((rank == dram.activeRank)
@@ -2262,7 +2263,7 @@
// or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
// should have outstanding precharge or read response event
assert(prechargeEvent.scheduled() ||
@@ -2322,8 +2323,8 @@
assert(!powerEvent.scheduled());
-
if ((dram.ctrl->drainState() == DrainState::Draining) ||
-
(dram.ctrl->drainState() == DrainState::Drained)) {
// if draining, do not re-enter low-power mode.
// simply go to IDLE and wait
schedulePowerEvent(PWR_IDLE, curTick());
@@ -2548,10 +2549,10 @@
}
// completed refresh event, ensure next request is scheduled
@@ -2610,8 +2611,8 @@
// bypass auto-refresh and go straight to SREF, where memory
// will issue refresh immediately upon entry
if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
-
(dram.ctrl->drainState() != DrainState::Draining) &&
-
(dram.ctrl->drainState() != DrainState::Drained) &&
dram.enableDRAMPowerdown) {
DPRINTF(DRAMState, "Rank %d bypassing refresh and
transitioning "
"to self refresh at %11u tick\n", rank, curTick());
@@ -2712,7 +2713,7 @@
bool
DRAMInterface::Rank::forceSelfRefreshExit() const {
return (readEntries != 0) ||
-
(dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
}
DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl)
@@ -2723,15 +2724,15 @@
ADD_STAT(writeReqs, "Number of write requests accepted"),
ADD_STAT(readBursts,
-
"Number of controller read bursts, "
"including those serviced by the write queue"),
ADD_STAT(writeBursts,
-
"Number of controller write bursts, "
"including those merged in the write queue"),
ADD_STAT(servicedByWrQ,
queue"),
ADD_STAT(mergedWrBursts,
one"),
ADD_STAT(neitherReadNorWriteReqs,
"Number of requests that are neither read nor write"),
@@ -2739,9 +2740,6 @@
ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
retry"),
ADD_STAT(numWrRetry, "Number of times write queue was full causing
retry"),
@@ -2756,22 +2754,13 @@
ADD_STAT(wrPerTurnAround,
"Writes before turning the bus around for reads"),
-
ADD_STAT(bytesRead, "Total number of bytes read from memory"),
ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
-
ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(bytesReadSys, "Total read bytes from the system interface
side"),
ADD_STAT(bytesWrittenSys,
"Total written bytes from the system interface side"),
-
ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiByte/s"),
-
ADD_STAT(avgWrBW, "Average achieved write bandwidth in MiByte/s"),
ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
-
ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
ADD_STAT(busUtil, "Data bus utilization in percentage"),
-
ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
-
ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
ADD_STAT(totGap, "Total gap between requests"),
ADD_STAT(avgGap, "Average gap between requests"),
@@ -2803,12 +2792,11 @@
{
using namespace Stats;
-
assert(ctrl._system);
-
const auto max_masters = ctrl._system->maxMasters();
-
avgBusLat.precision(2);
readPktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
writePktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
@@ -2823,14 +2811,9 @@
.init(ctrl.writeBufferSize)
.flags(nozero);
-
avgRdBW.precision(2);
-
avgWrBW.precision(2);
avgRdBWSys.precision(2);
avgWrBWSys.precision(2);
-
peakBW.precision(2);
-
busUtil.precision(2);
avgGap.precision(2);
-
busUtilWrite.precision(2);
// per-master bytes read and written to memory
masterReadBytes
@@ -2862,9 +2845,6 @@
.flags(nonan)
.precision(2);
-
busUtilRead
-
.precision(2);
-
masterWriteRate
.flags(nozero | nonan)
.precision(12);
@@ -2878,7 +2858,7 @@
.precision(2);
for (int i = 0; i < max_masters; i++) {
-
const std::string master = ctrl.system()->getMasterName(i);
masterReadBytes.subname(i, master);
masterReadRate.subname(i, master);
masterWriteBytes.subname(i, master);
@@ -2892,22 +2872,11 @@
}
// Formula stats
-
avgBusLat = totBusLat / (readBursts - servicedByWrQ);
-
avgRdBW = (bytesRead / 1000000) / simSeconds;
-
avgWrBW = (bytesWritten / 1000000) / simSeconds;
avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
-
peakBW = (SimClock::Frequency / ctrl.dram->burstDataDelay()) *
-
ctrl.dram->bytesPerBurst() / 1000000;
-
busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
avgGap = totGap / (readReqs + writeReqs);
-
busUtilRead = avgRdBW / peakBW * 100;
-
busUtilWrite = avgWrBW / peakBW * 100;
-
masterReadRate = masterReadBytes / simSeconds;
masterWriteRate = masterWriteBytes / simSeconds;
masterReadAvgLat = masterReadTotalLat / masterReadAccesses;
@@ -2920,8 +2889,8 @@
dram.lastStatsResetTick = curTick();
}
-DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram)
- : Stats::Group(&_ctrl, csprintf("dram").c_str()),
+DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
-
: Stats::Group(&_dram),
dram(_dram),
ADD_STAT(readBursts, "Number of DRAM read bursts"),
@@ -2931,10 +2900,13 @@
ADD_STAT(perBankWrBursts, "Per bank write bursts"),
ADD_STAT(totQLat, "Total ticks spent queuing"),
-
ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
ADD_STAT(totMemAccLat,
"Total ticks spent from burst creation until serviced "
"by the DRAM"),
-
ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"),
-
ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"),
ADD_STAT(readRowHits, "Number of row buffer hits during reads"),
@@ -2947,6 +2919,12 @@
ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
-
ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
ADD_STAT(busUtil, "Data bus utilization in percentage"),
-
ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
-
ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
-
ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined")
{
@@ -2958,6 +2936,7 @@
using namespace Stats;
avgQLat.precision(2);
-
avgBusLat.precision(2);
avgMemAccLat.precision(2);
readRowHitRate.precision(2);
@@ -2971,10 +2950,16 @@
dram.maxAccessesPerRow : dram.rowBufferSize)
.flags(nozero);
-
peakBW.precision(2);
-
busUtil.precision(2);
-
busUtilWrite.precision(2);
-
busUtilRead.precision(2);
-
pageHitRate.precision(2);
// Formula stats
avgQLat = totQLat / readBursts;
-
avgBusLat = totBusLat / readBursts;
avgMemAccLat = totMemAccLat / readBursts;
readRowHitRate = (readRowHits / readBursts) * 100;
@@ -2982,13 +2967,19 @@
avgRdBW = (bytesRead / 1000000) / simSeconds;
avgWrBW = (bytesWritten / 1000000) / simSeconds;
-
peakBW = (SimClock::Frequency / dram.burstDataDelay()) *
-
dram.bytesPerBurst() / 1000000;
-
busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
-
busUtilRead = avgRdBW / peakBW * 100;
-
busUtilWrite = avgWrBW / peakBW * 100;
pageHitRate = (writeRowHits + readRowHits) /
(writeBursts + readBursts) * 100;
}
-DRAMInterface::RankStats::RankStats(DRAMCtrl &_ctrl, Rank &_rank)
- : Stats::Group(&_ctrl, csprintf("dram_rank%d", _rank.rank).c_str()),
+DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
-
: Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
rank(_rank),
ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"),
@@ -3047,7 +3038,7 @@
DRAMCtrl::recvFunctional(PacketPtr pkt)
{
// rely on the abstract memory
- dram->functionalAccess(pkt);
}
Port &
@@ -3093,6 +3084,7 @@
// if we switched to timing mode, kick things into action,
// and behave as if we restored from a checkpoint
startup();
@@ -3112,7 +3104,7 @@
DRAMCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(ctrl.getAddrRange());
- ranges.push_back(ctrl.dram->getAddrRange());
return ranges;
}
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index dc030b1..417e935 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -55,12 +55,15 @@
#include "enums/AddrMap.hh"
#include "enums/MemSched.hh"
#include "enums/PageManage.hh"
+#include "mem/abstract_mem.hh"
#include "mem/drampower.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/DRAMCtrl.hh"
#include "sim/eventq.hh"
+class DRAMInterfaceParams;
+
/**
- A basic class to track the bank state, i.e. what row is
- currently open (if any), when is the bank free to accept a new
@@ -242,7 +245,7 @@
- The DRAMInterface includes a class for individual ranks
- and per rank functions.
/
-class DRAMInterface : public SimObject
+class DRAMInterface : public AbstractMemory
{
private:
/*
@@ -342,7 +345,7 @@
class Rank;
struct RankStats : public Stats::Group
{
@@ -408,13 +411,6 @@
*/
class Rank : public EventManager
{
@@ -534,10 +530,10 @@
*/
Tick lastBurstTick;
}
@@ -659,15 +655,16 @@
* @param next Memory Command
* @return true if timeStamp of Command 1 < timeStamp of Command 2
*/
- static bool sortTime(const Command& cmd, const Command& cmd_next)
- static bool
- sortTime(const Command& cmd, const Command& cmd_next)
{
return cmd.timeStamp < cmd_next.timeStamp;
-
DRAMCtrl* ctrl;
/**
- Memory controller configuration initialized based on parameter
@@ -698,6 +695,7 @@
- DRAM timing requirements
*/
const Tick M5_CLASS_VAR_USED tCK;
-
const Tick tCS;
const Tick tCL;
const Tick tBURST;
const Tick tBURST_MIN;
@@ -781,7 +779,7 @@
struct DRAMStats : public Stats::Group
{
@@ -798,10 +796,12 @@
// Latencies summed over all requests
Stats::Scalar totQLat;
@@ -817,6 +817,11 @@
// Average bandwidth
Stats::Formula avgRdBW;
Stats::Formula avgWrBW;
@@ -828,16 +833,28 @@
std::vector<Rank*> ranks;
public:
- /**
-
* Buffer sizes for read and write queues in the controller
-
* These are passed to the controller on instantiation
-
* Defining them here allows for buffers to be resized based
-
* on memory type / configuration.
-
*/
- const uint32_t readBufferSize;
- const uint32_t writeBufferSize;
- /** Setting a pointer to the controller */
- void setCtrl(DRAMCtrl* _ctrl) { ctrl = _ctrl; }
-
/**
* Initialize the DRAM interface and verify parameters
-
void init() override;
/**
- Iterate through dram ranks and instantiate per rank startup routine
*/
-
void startup() override;
/**
- Iterate through dram ranks to exit self-refresh in order to drain
@@ -861,15 +878,26 @@
void suspend();
/**
-
* Get an address in a dense range which starts from 0. The input
-
* address is the physical address of the request in an address
-
* space that contains other SimObjects apart from this
-
* controller.
-
*
-
* @param addr The intput address which should be in the addrRange
-
* @return An address in the continues range [0, max)
-
*/
-
Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); }
-
/**
- @return number of bytes in a burst for this interface
*/
- uint32_t bytesPerBurst() const { return burstSize; };
- uint32_t numRanks() const { return ranksPerChannel; };
- Tick burstDataDelay() const
- Tick
- burstDataDelay() const
{
return (burstInterleave ? tBURST_MAX / 2 : tBURST);
}
@@ -893,7 +922,14 @@
*
- @return additional bus turnaround required for read-to-write
*/
- Tick minRdToWr() const { return tRTW; };
-
Tick minRdToWr() const { return tRTW; }
-
/**
-
* Determine the required delay for an access to a different rank
-
*
-
* @return required rank to rank delay
-
*/
-
Tick rankDelay() const { return tCS; }
/*
*/
- bool burstReady(uint8_t rank) const
- bool
- burstReady(uint8_t rank) const
{
return ranks[rank]->inRefIdleState();
}
@@ -979,7 +1016,7 @@
*
- @param rank Specifies rank associated with read burst
*/
- void respondEventDRAM(uint8_t rank);
- DRAMInterface(const DRAMInterfaceParams* _p);
};
/**
@@ -1141,20 +1177,6 @@
void accessAndRespond(PacketPtr pkt, Tick static_latency);
/**
-
* Get an address in a dense range which starts from 0. The input
-
* address is the physical address of the request in an address
-
* space that contains other SimObjects apart from this
-
* controller.
-
*
-
* @param addr The intput address which should be in the addrRange
-
* @return An address in the continues range [0, max)
-
*/
-
Addr getCtrlAddr(Addr addr)
-
{
-
return range.getOffset(addr);
-
}
-
/**
- The memory schduler/arbiter - picks which request needs to
- go next, based on the specified policy such as FCFS or FR-FCFS
- and moves it to the head of the queue.
@@ -1237,6 +1259,11 @@
std::unordered_multiset<Tick> burstTicks;
/**
-
* Basic memory timing parameters initialized based on parameter
-
* values. These will be used across memory interfaces.
-
*/
- const Tick tCS;
- /**
- Memory controller configuration initialized based on parameter
- values.
*/
@@ -1310,10 +1331,6 @@
// Average queue lengths
Stats::Average avgRdQLen;
Stats::Average avgWrQLen;
-
// Latencies summed over all requests
-
Stats::Scalar totBusLat;
-
// Average latencies per request
-
Stats::Formula avgBusLat;
Stats::Scalar numRdRetry;
Stats::Scalar numWrRetry;
@@ -1324,21 +1341,12 @@
Stats::Histogram rdPerTurnAround;
Stats::Histogram wrPerTurnAround;
-
Stats::Scalar bytesRead;
Stats::Scalar bytesReadWrQ;
-
Stats::Scalar bytesWritten;
Stats::Scalar bytesReadSys;
Stats::Scalar bytesWrittenSys;
// Average bandwidth
-
Stats::Formula avgRdBW;
-
Stats::Formula avgWrBW;
Stats::Formula avgRdBWSys;
Stats::Formula avgWrBWSys;
-
Stats::Formula peakBW;
-
// bus utilization
-
Stats::Formula busUtil;
-
Stats::Formula busUtilRead;
-
Stats::Formula busUtilWrite;
Stats::Scalar totGap;
Stats::Formula avgGap;
@@ -1367,11 +1375,6 @@
CtrlStats stats;
/**
-
* Create pointer to interfasce to the actual media
-
*/
-
DRAMInterface* dram;
-
/**
- Upstream caches need this packet until true is returned, so
- hold it for deletion until a subsequent call
*/
@@ -1449,13 +1452,6 @@
void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); }
/**
-
* Determine the required delay for an access to a different rank
-
*
-
* @return required rank to rank delay
-
*/
-
Tick rankDelay() const { return tCS; }
-
/**
- Check the current direction of the memory channel
- @param next_state Check either the current or next bus state
diff --git a/src/mem/drampower.cc b/src/mem/drampower.cc
index 13551a0..96dcb55 100644
--- a/src/mem/drampower.cc
+++ b/src/mem/drampower.cc
@@ -40,13 +40,13 @@
#include "base/intmath.hh"
#include "sim/core.hh"
-DRAMPower::DRAMPower(const DRAMCtrlParams* p, bool include_io) :
+DRAMPower::DRAMPower(const DRAMInterfaceParams* p, bool include_io) :
powerlib(libDRAMPower(getMemSpec(p), include_io))
{
}
Data::MemArchitectureSpec
-DRAMPower::getArchParams(const DRAMCtrlParams* p)
+DRAMPower::getArchParams(const DRAMInterfaceParams* p)
{
Data::MemArchitectureSpec archSpec;
archSpec.burstLength = p->burst_length;
@@ -68,7 +68,7 @@
}
Data::MemTimingSpec
-DRAMPower::getTimingParams(const DRAMCtrlParams* p)
+DRAMPower::getTimingParams(const DRAMInterfaceParams* p)
{
// Set the values that are used for power calculations and ignore
// the ones only used by the controller functionality in DRAMPower
@@ -100,7 +100,7 @@
}
Data::MemPowerSpec
-DRAMPower::getPowerParams(const DRAMCtrlParams* p)
+DRAMPower::getPowerParams(const DRAMInterfaceParams* p)
{
// All DRAMPower currents are in mA
Data::MemPowerSpec powerSpec;
@@ -132,7 +132,7 @@
}
Data::MemorySpecification
-DRAMPower::getMemSpec(const DRAMCtrlParams* p)
+DRAMPower::getMemSpec(const DRAMInterfaceParams* p)
{
Data::MemorySpecification memSpec;
memSpec.memArchSpec = getArchParams(p);
@@ -142,7 +142,18 @@
}
bool
-DRAMPower::hasTwoVDD(const DRAMCtrlParams* p)
+DRAMPower::hasTwoVDD(const DRAMInterfaceParams* p)
{
return p->VDD2 == 0 ? false : true;
}
+
+uint8_t
+DRAMPower::getDataRate(const DRAMInterfaceParams* p)
+{
- uint32_t burst_cycles = divCeil(p->tBURST_MAX, p->tCK);
- uint8_t data_rate = p->burst_length / burst_cycles;
- // 4 for GDDR5
- if (data_rate != 1 && data_rate != 2 && data_rate != 4 && data_rate !=
#include "libdrampower/LibDRAMPower.h"
-#include "params/DRAMCtrl.hh"
+#include "params/DRAMInterface.hh"
/**
- static Data::MemArchitectureSpec getArchParams(const DRAMCtrlParams*
p);
- static Data::MemTimingSpec getTimingParams(const DRAMCtrlParams* p);
- static Data::MemPowerSpec getPowerParams(const DRAMCtrlParams* p);
-
static Data::MemPowerSpec getPowerParams(const DRAMInterfaceParams* p);
-
/**
-
* Determine data rate, either one or two.
-
*/
-
static uint8_t getDataRate(const DRAMInterfaceParams* p);
/**
- Determine if DRAM has two voltage domains (or one)
*/
- static bool hasTwoVDD(const DRAMCtrlParams* p);
- static Data::MemorySpecification getMemSpec(const DRAMCtrlParams* p);
- static Data::MemorySpecification getMemSpec(const DRAMInterfaceParams*
p);
public:
// Instance of DRAMPower Library
libDRAMPower powerlib;
- DRAMPower(const DRAMCtrlParams* p, bool include_io);
- DRAMPower(const DRAMInterfaceParams* p, bool include_io);
};
diff --git a/src/mem/qos/QoSMemCtrl.py b/src/mem/qos/QoSMemCtrl.py
index 1cd3f0b..f55105b 100644
--- a/src/mem/qos/QoSMemCtrl.py
+++ b/src/mem/qos/QoSMemCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
All rights reserved.
The license below extends only to copyright in the software and shall
@@ -34,18 +34,21 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
-from m5.objects.AbstractMemory import AbstractMemory
+from m5.proxy import *
+from m5.objects.ClockedObject import ClockedObject
from m5.objects.QoSTurnaround import *
QoS Queue Selection policy used to select packets among same-QoS queues
class QoSQPolicy(Enum): vals = ["fifo", "lifo", "lrg"]
-class QoSMemCtrl(AbstractMemory):
+class QoSMemCtrl(ClockedObject):
type = 'QoSMemCtrl'
cxx_header = "mem/qos/mem_ctrl.hh"
cxx_class = 'QoS::MemCtrl'
abstract = True
diff --git a/src/mem/qos/QoSMemSinkCtrl.py b/src/mem/qos/QoSMemSinkCtrl.py
index 6c4f263..fafac64 100644
--- a/src/mem/qos/QoSMemSinkCtrl.py
+++ b/src/mem/qos/QoSMemSinkCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
All rights reserved.
The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
from m5.params import *
from m5.objects.QoSMemCtrl import *
+from m5.objects.QoSMemSinkInterface import *
class QoSMemSinkCtrl(QoSMemCtrl):
type = 'QoSMemSinkCtrl'
@@ -44,6 +45,10 @@
cxx_class = "QoS::MemSinkCtrl"
port = ResponsePort("Response ports")
@@ -59,5 +64,3 @@
# response latency - time to issue a response once a request is
serviced
response_latency = Param.Latency("20ns", "Memory response latency")
diff --git a/src/mem/qos/QoSMemSinkInterface.py
b/src/mem/qos/QoSMemSinkInterface.py
new file mode 100644
index 0000000..5c79f64
--- /dev/null
+++ b/src/mem/qos/QoSMemSinkInterface.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects.AbstractMemory import AbstractMemory
+
+class QoSMemSinkInterface(AbstractMemory):
- type = 'QoSMemSinkInterface'
- cxx_header = "mem/qos/mem_sink.hh"
diff --git a/src/mem/qos/SConscript b/src/mem/qos/SConscript
index f8601b6..1d90f9c 100644
--- a/src/mem/qos/SConscript
+++ b/src/mem/qos/SConscript
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
All rights reserved
The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
SimObject('QoSMemCtrl.py')
SimObject('QoSMemSinkCtrl.py')
+SimObject('QoSMemSinkInterface.py')
SimObject('QoSPolicy.py')
SimObject('QoSTurnaround.py')
diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 50e6035..190960b 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2017-2019 ARM Limited
- : ClockedObject(p),
policy(p->qos_policy),
turnPolicy(p->qos_turnaround_policy),
queuePolicy(QueuePolicy::create(p)),
@@ -51,7 +51,8 @@
qosSyncroScheduler(p->qos_syncro_scheduler),
totalReadQueueSize(0), totalWriteQueueSize(0),
busState(READ), busStateNext(READ),
- stats(*this),
- _system(p->system)
{
// Set the priority policy
if (policy) {
@@ -77,12 +78,6 @@
{}
void
-MemCtrl::init()
-{
- AbstractMemory::init();
-}
-void
MemCtrl::logRequest(BusState dir, MasterID m_id, uint8_t qos,
Addr addr, uint64_t entries)
{
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 0e29fcc..5d7c9d6 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2019 ARM Limited
-
- Copyright (c) 2020 ARM Limited
- All rights reserved
- The license below extends only to copyright in the software and shall
@@ -36,10 +36,10 @@
*/
#include "debug/QOS.hh"
-#include "mem/abstract_mem.hh"
-#include "mem/qos/q_policy.hh"
#include "mem/qos/policy.hh"
+#include "mem/qos/q_policy.hh"
#include "params/QoSMemCtrl.hh"
+#include "sim/clocked_object.hh"
#include "sim/system.hh"
#include <unordered_map>
@@ -56,7 +56,7 @@
- which support QoS - it provides access to a set of QoS
- scheduling policies
/
-class MemCtrl: public AbstractMemory
+class MemCtrl : public ClockedObject
{
public:
/* Bus Direction */
@@ -151,6 +151,9 @@
Stats::Scalar numStayWriteState;
} stats;
-
/** Pointer to the System object */
-
System* _system;
-
/**
* Initializes dynamically counters and
* statistics for a given Master
@@ -266,11 +269,6 @@
virtual ~MemCtrl();
/**
template<typename Queues>
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index 1f104e4..dbdf548 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2018 ARM Limited
-
- Copyright (c) 2018-2020 ARM Limited
- All rights reserved
- The license below extends only to copyright in the software and shall
@@ -40,6 +40,7 @@
#include "debug/Drain.hh"
#include "debug/QOS.hh"
#include "mem_sink.hh"
+#include "params/QoSMemSinkInterface.hh"
#include "sim/system.hh"
namespace QoS {
@@ -50,12 +51,15 @@
memoryPacketSize(p->memory_packet_size),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size), port(name() + ".port", *this),
-
interface(p->interface),
retryRdReq(false), retryWrReq(false), nextRequest(0),
nextReqEvent(this)
{
// Resize read and write queue to allocate space
// for configured QoS priorities
readQueue.resize(numPriorities());
writeQueue.resize(numPriorities());
-
interface->setMemCtrl(this);
}
MemSinkCtrl::~MemSinkCtrl()
@@ -92,7 +96,7 @@
"%s Should not see packets where cache is responding\n",
func);
- interface->access(pkt);
return responseLatency;
}
@@ -101,7 +105,7 @@
{
pkt->pushLabel(name());
-
interface->functionalAccess(pkt);
pkt->popLabel();
}
@@ -279,7 +283,7 @@
// Do the actual memory access which also turns the packet
// into a response
-
interface->access(pkt);
// Log the response
logResponse(pkt->isRead()? READ : WRITE,
@@ -351,7 +355,7 @@
MemSinkCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(memory.getAddrRange());
- ranges.push_back(memory.interface->getAddrRange());
return ranges;
}
@@ -390,3 +394,13 @@
return new QoS::MemSinkCtrl(this);
}
+QoSMemSinkInterface::QoSMemSinkInterface(const QoSMemSinkInterfaceParams*
_p)
- : AbstractMemory(_p)
+{
+}
+QoSMemSinkInterface*
+QoSMemSinkInterfaceParams::create()
+{
- return new QoSMemSinkInterface(this);
+}
diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh
index 9a51269..5f6c1be 100644
--- a/src/mem/qos/mem_sink.hh
+++ b/src/mem/qos/mem_sink.hh
@@ -1,5 +1,5 @@
/*
-
- Copyright (c) 2018 ARM Limited
-
- Copyright (c) 2018-2020 ARM Limited
- All rights reserved
- The license below extends only to copyright in the software and shall
@@ -41,10 +41,14 @@
#ifndef MEM_QOS_MEM_SINK_HH
#define MEM_QOS_MEM_SINK_HH
+#include "mem/abstract_mem.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/QoSMemSinkCtrl.hh"
+class QoSMemSinkInterfaceParams;
+class QoSMemSinkInterface;
+
namespace QoS {
/**
@@ -163,6 +167,11 @@
/** Memory slave port */
MemoryPort port;
@@ -244,4 +253,17 @@
} // namespace QoS
+class QoSMemSinkInterface : public AbstractMemory
+{
-
public:
-
/** Setting a pointer to the interface */
-
void setMemCtrl(QoS::MemSinkCtrl* _ctrl) { ctrl = _ctrl; };
-
/** Pointer to the controller */
-
QoS::MemSinkCtrl* ctrl;
-
QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p);
+};
-
#endif /* MEM_QOS_MEM_SINK_HH */
diff --git a/tests/gem5/configs/base_config.py
b/tests/gem5/configs/base_config.py
index b5bddf4..cbea768 100644
--- a/tests/gem5/configs/base_config.py
+++ b/tests/gem5/configs/base_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2013, 2017-2018 ARM Limited
+# Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
All rights reserved.
The license below extends only to copyright in the software and shall
@@ -220,7 +220,12 @@
super(BaseSESystem, self).init_system(system)
def create_system(self):
-
if issubclass(self.mem_class, m5.objects.DRAMInterface):
-
mem_ctrl = DRAMCtrl()
-
mem_ctrl.dram = self.mem_class()
-
else:
-
mem_ctrl = self.mem_class()
-
system = System(physmem = mem_ctrl,
membus = SystemXBar(),
mem_mode = self.mem_mode,
multi_thread = (self.num_threads > 1))
@@ -272,8 +277,16 @@
else:
# create the memory controllers and connect them, stick with
# the physmem name to avoid bumping all the reference stats
-
if issubclass(self.mem_class, m5.objects.DRAMInterface):
-
mem_ctrls = []
-
for r in system.mem_ranges:
-
mem_ctrl = DRAMCtrl()
-
mem_ctrl.dram = self.mem_class(range = r)
-
mem_ctrls.append(mem_ctrl)
-
system.physmem = mem_ctrls
-
else:
-
system.physmem = [self.mem_class(range = r)
-
for r in system.mem_ranges]
for i in range(len(system.physmem)):
system.physmem[i].port = system.membus.master
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/28968
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
Gerrit-Change-Number: 28968
Gerrit-PatchSet: 13
Gerrit-Owner: Wendy Elsasser <wendy.elsasser(a)arm.com>
Gerrit-Reviewer: Daniel Carvalho <odanrc(a)yahoo.com.br>
Gerrit-Reviewer: Jason Lowe-Power <power.jg(a)gmail.com>
Gerrit-Reviewer: John Alsop <johnathan.alsop(a)amd.com>
Gerrit-Reviewer: Matthew Poremba <matthew.poremba(a)amd.com>
Gerrit-Reviewer: Nikos Nikoleris <nikos.nikoleris(a)arm.com>
Gerrit-Reviewer: Srikant Bharadwaj <srikant.bharadwaj(a)amd.com>
Gerrit-Reviewer: kokoro <noreply+kokoro(a)google.com>
Gerrit-MessageType: merged
Jason Lowe-Power has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/28968 )
Change subject: mem: Make MemCtrl a ClockedObject
......................................................................
mem: Make MemCtrl a ClockedObject
Made DRAMCtrl a ClockedObject, with DRAMInterface
defined as an AbstractMemory. The address
ranges are now defined per interface. Currently
the model only includes a DRAMInterface but this
can be expanded for other media types.
The controller object includes a parameter to the
interface, which is setup when gem5 is configured.
Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28968
Reviewed-by: Jason Lowe-Power <power.jg(a)gmail.com>
Maintainer: Jason Lowe-Power <power.jg(a)gmail.com>
Tested-by: kokoro <noreply+kokoro(a)google.com>
---
M configs/common/MemConfig.py
M configs/dram/low_power_sweep.py
M configs/dram/sweep.py
M configs/example/memcheck.py
M configs/learning_gem5/part1/simple.py
M configs/learning_gem5/part1/two_level.py
M configs/learning_gem5/part2/simple_cache.py
M configs/learning_gem5/part2/simple_memobj.py
M configs/learning_gem5/part3/simple_ruby.py
M configs/ruby/Ruby.py
M src/mem/DRAMCtrl.py
A src/mem/DRAMInterface.py
M src/mem/SConscript
M src/mem/dram_ctrl.cc
M src/mem/dram_ctrl.hh
M src/mem/drampower.cc
M src/mem/drampower.hh
M src/mem/qos/QoSMemCtrl.py
M src/mem/qos/QoSMemSinkCtrl.py
A src/mem/qos/QoSMemSinkInterface.py
M src/mem/qos/SConscript
M src/mem/qos/mem_ctrl.cc
M src/mem/qos/mem_ctrl.hh
M src/mem/qos/mem_sink.cc
M src/mem/qos/mem_sink.hh
M tests/gem5/configs/base_config.py
26 files changed, 1,913 insertions(+), 1,736 deletions(-)
Approvals:
Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py
index b530145..1ace875 100644
--- a/configs/common/MemConfig.py
+++ b/configs/common/MemConfig.py
@@ -40,7 +40,7 @@
from common import ObjectList
from common import HMC
-def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,\
+def create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,
xor_low_bit):
"""
Helper function for creating a single memoy controller from the given
@@ -63,32 +63,32 @@
# Create an instance so we can figure out the address
# mapping and row-buffer size
- ctrl = cls()
+ interface = intf()
# Only do this for DRAMs
- if issubclass(cls, m5.objects.DRAMCtrl):
+ if issubclass(intf, m5.objects.DRAMInterface):
# If the channel bits are appearing after the column
# bits, we need to add the appropriate number of bits
# for the row buffer size
- if ctrl.addr_mapping.value == 'RoRaBaChCo':
+ if interface.addr_mapping.value == 'RoRaBaChCo':
# This computation only really needs to happen
# once, but as we rely on having an instance we
# end up having to repeat it for each and every
# one
- rowbuffer_size = ctrl.device_rowbuffer_size.value * \
- ctrl.devices_per_rank.value
+ rowbuffer_size = interface.device_rowbuffer_size.value * \
+ interface.devices_per_rank.value
intlv_low_bit = int(math.log(rowbuffer_size, 2))
# We got all we need to configure the appropriate address
# range
- ctrl.range = m5.objects.AddrRange(r.start, size = r.size(),
+ interface.range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit = \
intlv_low_bit + intlv_bits - 1,
xorHighBit = xor_high_bit,
intlvBits = intlv_bits,
intlvMatch = i)
- return ctrl
+ return interface
def config_mem(options, system):
"""
@@ -148,10 +148,10 @@
if 2 ** intlv_bits != nbr_mem_ctrls:
fatal("Number of memory channels must be a power of 2")
- cls = ObjectList.mem_list.get(opt_mem_type)
+ intf = ObjectList.mem_list.get(opt_mem_type)
mem_ctrls = []
- if opt_elastic_trace_en and not issubclass(cls,
m5.objects.SimpleMemory):
+ if opt_elastic_trace_en and not issubclass(intf,
m5.objects.SimpleMemory):
fatal("When elastic trace is enabled, configure mem-type as "
"simple-mem.")
@@ -162,36 +162,53 @@
intlv_size = max(opt_mem_channels_intlv, system.cache_line_size.value)
# For every range (most systems will only have one), create an
- # array of controllers and set their parameters to match their
- # address mapping in the case of a DRAM
+ # array of memory interfaces and set their parameters to match
+ # their address mapping in the case of a DRAM
for r in system.mem_ranges:
for i in range(nbr_mem_ctrls):
- mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls,
intlv_bits,
+ # Create the DRAM interface
+ dram_intf = create_mem_intf(intf, r, i, nbr_mem_ctrls,
intlv_bits,
intlv_size, opt_xor_low_bit)
+
# Set the number of ranks based on the command-line
# options if it was explicitly set
- if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks:
- mem_ctrl.ranks_per_channel = opt_mem_ranks
+ if issubclass(intf, m5.objects.DRAMInterface) and
opt_mem_ranks:
+ dram_intf.ranks_per_channel = opt_mem_ranks
# Enable low-power DRAM states if option is set
- if issubclass(cls, m5.objects.DRAMCtrl):
- mem_ctrl.enable_dram_powerdown = opt_dram_powerdown
+ if issubclass(intf, m5.objects.DRAMInterface):
+ dram_intf.enable_dram_powerdown = opt_dram_powerdown
if opt_elastic_trace_en:
- mem_ctrl.latency = '1ns'
+ dram_intf.latency = '1ns'
print("For elastic trace, over-riding Simple Memory "
"latency to 1ns.")
+ # Create the controller that will drive the interface
+ if opt_mem_type == "HMC_2500_1x32":
+ # The static latency of the vault controllers is estimated
+ # to be smaller than a full DRAM channel controller
+ mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8,
+ static_backend_latency
= '4ns',
+ static_frontend_latency
= '4ns')
+ else:
+ mem_ctrl = m5.objects.DRAMCtrl()
+
+ # Hookup the controller to the interface and add to the list
+ mem_ctrl.dram = dram_intf
mem_ctrls.append(mem_ctrl)
+ # Create a controller and connect the interfaces to a controller
+ for i in range(len(mem_ctrls)):
+ if opt_mem_type == "HMC_2500_1x32":
+ # Connect the controllers to the membus
+ mem_ctrls[i].port = xbar[i/4].master
+ # Set memory device size. There is an independent controller
for
+ # each vault. All vaults are same size.
+ mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size
+ else:
+ # Connect the controllers to the membus
+ mem_ctrls[i].port = xbar.master
+
subsystem.mem_ctrls = mem_ctrls
- # Connect the controllers to the membus
- for i in range(len(subsystem.mem_ctrls)):
- if opt_mem_type == "HMC_2500_1x32":
- subsystem.mem_ctrls[i].port = xbar[i/4].master
- # Set memory device size. There is an independent controller
for
- # each vault. All vaults are same size.
- subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size
- else:
- subsystem.mem_ctrls[i].port = xbar.master
diff --git a/configs/dram/low_power_sweep.py
b/configs/dram/low_power_sweep.py
index 9a62393..0da2b93 100644
--- a/configs/dram/low_power_sweep.py
+++ b/configs/dram/low_power_sweep.py
@@ -111,14 +111,19 @@
# Sanity check for memory controller class.
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
+ fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
+ fatal("This script assumes the memory is a DRAMInterface subclass")
# There is no point slowing things down by saving any data.
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
+
+# enable DRAM low power states
+system.mem_ctrls[0].dram.enable_dram_powerdown = True
# Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = args.addr_map
-system.mem_ctrls[0].page_policy = args.page_policy
+system.mem_ctrls[0].dram.addr_mapping = args.addr_map
+system.mem_ctrls[0].dram.page_policy = args.page_policy
# We create a traffic generator state for each param combination we want to
# test. Each traffic generator state is specified in the config file and
the
@@ -132,22 +137,22 @@
cfg_file = open(cfg_file_path, 'w')
# Get the number of banks
-nbr_banks = int(system.mem_ctrls[0].banks_per_rank.value)
+nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
# determine the burst size in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
- system.mem_ctrls[0].device_bus_width.value *
- system.mem_ctrls[0].burst_length.value) / 8)
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
+ system.mem_ctrls[0].dram.device_bus_width.value *
+ system.mem_ctrls[0].dram.burst_length.value) / 8)
# next, get the page size in bytes (the rowbuffer size is already in bytes)
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
+ system.mem_ctrls[0].dram.device_rowbuffer_size.value
# Inter-request delay should be such that we can hit as many transitions
# to/from low power states as possible to. We provide a min and max itt to
the
# traffic generator and it randomises in the range. The parameter is in
# seconds and we need it in ticks (ps).
-itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000
+itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
#The itt value when set to (tRAS + tRP + tCK) covers the case where
# a read command is delayed beyond the delay from ACT to PRE_PDN entry of
the
@@ -155,9 +160,9 @@
# between a write and power down entry will be tRCD + tCL + tWR + tRP +
tCK.
# As we use this delay as a unit and create multiples of it as bigger
delays
# for the sweep, this parameter works for reads, writes and mix of them.
-pd_entry_time = (system.mem_ctrls[0].tRAS.value +
- system.mem_ctrls[0].tRP.value +
- system.mem_ctrls[0].tCK.value) * 1000000000000
+pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
+ system.mem_ctrls[0].dram.tRP.value +
+ system.mem_ctrls[0].dram.tCK.value) * 1000000000000
# We sweep itt max using the multipliers specified by the user.
itt_max_str = args.itt_list.strip().split()
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py
index a340b46..a771c5c 100644
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -116,13 +116,15 @@
# the following assumes that we are using the native DRAM
# controller, check to be sure
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
+ fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
+ fatal("This script assumes the memory is a DRAMInterface subclass")
# there is no point slowing things down by saving any data
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
# Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = options.addr_map
+system.mem_ctrls[0].dram.addr_mapping = options.addr_map
# stay in each state for 0.25 ms, long enough to warm things up, and
# short enough to avoid hitting a refresh
@@ -133,21 +135,21 @@
# the DRAM maximum bandwidth to ensure that it is saturated
# get the number of banks
-nbr_banks = system.mem_ctrls[0].banks_per_rank.value
+nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
# determine the burst length in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
- system.mem_ctrls[0].device_bus_width.value *
- system.mem_ctrls[0].burst_length.value) / 8)
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
+ system.mem_ctrls[0].dram.device_bus_width.value *
+ system.mem_ctrls[0].dram.burst_length.value) / 8)
# next, get the page size in bytes
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
+ system.mem_ctrls[0].dram.device_rowbuffer_size.value
# match the maximum bandwidth of the memory, the parameter is in seconds
# and we need it in ticks (ps)
-itt = getattr(system.mem_ctrls[0].tBURST_MIN, 'value',
- system.mem_ctrls[0].tBURST.value) * 1000000000000
+itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
+ system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
# assume we start at 0
max_addr = mem_range.end
diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py
index 6d80d60..6bccd54 100644
--- a/configs/example/memcheck.py
+++ b/configs/example/memcheck.py
@@ -217,7 +217,7 @@
proto_tester = TrafficGen(config_file = cfg_file_path)
# Set up the system along with a DRAM controller
-system = System(physmem = DDR3_1600_8x8())
+system = System(physmem = DRAMCtrl(dram = DDR3_1600_8x8()))
system.voltage_domain = VoltageDomain(voltage = '1V')
diff --git a/configs/learning_gem5/part1/simple.py
b/configs/learning_gem5/part1/simple.py
index ef73a06..cfd15be 100644
--- a/configs/learning_gem5/part1/simple.py
+++ b/configs/learning_gem5/part1/simple.py
@@ -77,8 +77,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
diff --git a/configs/learning_gem5/part1/two_level.py
b/configs/learning_gem5/part1/two_level.py
index 564c785..0dbcfc7 100644
--- a/configs/learning_gem5/part1/two_level.py
+++ b/configs/learning_gem5/part1/two_level.py
@@ -132,8 +132,9 @@
system.system_port = system.membus.slave
# Create a DDR3 memory controller
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Create a process for a simple "Hello World" application
diff --git a/configs/learning_gem5/part2/simple_cache.py
b/configs/learning_gem5/part2/simple_cache.py
index 8d98d92..fbea73d 100644
--- a/configs/learning_gem5/part2/simple_cache.py
+++ b/configs/learning_gem5/part2/simple_cache.py
@@ -76,8 +76,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
diff --git a/configs/learning_gem5/part2/simple_memobj.py
b/configs/learning_gem5/part2/simple_memobj.py
index d30977c..e792eb9 100644
--- a/configs/learning_gem5/part2/simple_memobj.py
+++ b/configs/learning_gem5/part2/simple_memobj.py
@@ -74,8 +74,9 @@
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
diff --git a/configs/learning_gem5/part3/simple_ruby.py
b/configs/learning_gem5/part3/simple_ruby.py
index c47ee7e..7f70a8c 100644
--- a/configs/learning_gem5/part3/simple_ruby.py
+++ b/configs/learning_gem5/part3/simple_ruby.py
@@ -68,8 +68,9 @@
system.cpu = [TimingSimpleCPU() for i in range(2)]
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
# create the interrupt controller for the CPU and connect to the membus
for cpu in system.cpu:
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index 9bceaa3..9f400a8 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -130,15 +130,16 @@
dir_ranges = []
for r in system.mem_ranges:
mem_type = ObjectList.mem_list.get(options.mem_type)
- mem_ctrl = MemConfig.create_mem_ctrl(mem_type, r, index,
+ dram_intf = MemConfig.create_mem_intf(mem_type, r, index,
options.num_dirs, int(math.log(options.num_dirs, 2)),
intlv_size, options.xor_low_bit)
+ mem_ctrl = m5.objects.DRAMCtrl(dram = dram_intf)
if options.access_backing_store:
mem_ctrl.kvm_map=False
mem_ctrls.append(mem_ctrl)
- dir_ranges.append(mem_ctrl.range)
+ dir_ranges.append(mem_ctrl.dram.range)
if crossbar != None:
mem_ctrl.port = crossbar.master
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py
index 0f70dff..b7b43dc 100644
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -40,26 +40,12 @@
from m5.params import *
from m5.proxy import *
-from m5.objects.AbstractMemory import *
from m5.objects.QoSMemCtrl import *
# Enum for memory scheduling algorithms, currently First-Come
# First-Served and a First-Row Hit then First-Come First-Served
class MemSched(Enum): vals = ['fcfs', 'frfcfs']
-# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
-# channel, rank, bank, row and column, respectively, and going from
-# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
-# suitable for an open-page policy, optimising for sequential accesses
-# hitting in the open row. For a closed-page policy, RoCoRaBaCh
-# maximises parallelism.
-class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
-
-# Enum for the page policy, either open, open_adaptive, close, or
-# close_adaptive.
-class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
- 'close_adaptive']
-
# DRAMCtrl is a single-channel single-ported DRAM controller model
# that aims to model the most important system-level performance
# effects of a DRAM without getting into too much detail of the DRAM
@@ -72,12 +58,11 @@
# bus in front of the controller for multiple ports
port = SlavePort("Slave port")
- # the basic configuration of the controller architecture, note
- # that each entry corresponds to a burst for the specific DRAM
- # configuration (e.g. x32 with burst length 8 is 32 bytes) and not
- # the cacheline size or request/packet size
- write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
- read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
+ # Interface to volatile, DRAM media
+ dram = Param.DRAMInterface("DRAM interface")
+
+ # read and write buffer depths are set in the interface
+ # the controller will read these values when instantiated
# threshold in percent for when to forcefully trigger writes and
# start emptying the write buffer
@@ -93,15 +78,6 @@
# scheduler, address map and page policy
mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
- addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
- page_policy = Param.PageManage('open_adaptive', "Page management
policy")
-
- # enforce a limit on the number of accesses per row
- max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
- "closing");
-
- # size of DRAM Chip in Bytes
- device_size = Param.MemorySize("Size of DRAM chip")
# pipeline latency of the controller and PHY, split into a
# frontend part and a backend part, with reads and writes serviced
@@ -109,1404 +85,3 @@
# serviced by the memory seeing the sum of the two
static_frontend_latency = Param.Latency("10ns", "Static frontend
latency")
static_backend_latency = Param.Latency("10ns", "Static backend
latency")
-
- # the physical organisation of the DRAM
- device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
- "device/chip")
- burst_length = Param.Unsigned("Burst lenght (BL) in beats")
- device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
- "device/chip")
- devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
- ranks_per_channel = Param.Unsigned("Number of ranks per channel")
-
- # default to 0 bank groups per rank, indicating bank group architecture
- # is not used
- # update per memory class when bank group architecture is supported
- bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
- banks_per_rank = Param.Unsigned("Number of banks per rank")
-
- # Enable DRAM powerdown states if True. This is False by default due to
- # performance being lower when enabled
- enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
-
- # For power modelling we need to know if the DRAM has a DLL or not
- dll = Param.Bool(True, "DRAM has DLL or not")
-
- # DRAMPower provides in addition to the core power, the possibility to
- # include RD/WR termination and IO power. This calculation assumes some
- # default values. The integration of DRAMPower with gem5 does not
include
- # IO and RD/WR termination power by default. This might be added as an
- # additional feature in the future.
-
- # timing behaviour and constraints - all in nanoseconds
-
- # the base clock period of the DRAM
- tCK = Param.Latency("Clock period")
-
- # the amount of time in nanoseconds from issuing an activate command
- # to the data being available in the row buffer for a read/write
- tRCD = Param.Latency("RAS to CAS delay")
-
- # the time from issuing a read/write command to seeing the actual data
- tCL = Param.Latency("CAS latency")
-
- # minimum time between a precharge and subsequent activate
- tRP = Param.Latency("Row precharge time")
-
- # minimum time between an activate and a precharge to the same row
- tRAS = Param.Latency("ACT to PRE delay")
-
- # minimum time between a write data transfer and a precharge
- tWR = Param.Latency("Write recovery time")
-
- # minimum time between a read and precharge command
- tRTP = Param.Latency("Read to precharge")
-
- # time to complete a burst transfer, typically the burst length
- # divided by two due to the DDR bus, but by making it a parameter
- # it is easier to also evaluate SDR memories like WideIO.
- # This parameter has to account for burst length.
- # Read/Write requests with data size larger than one full burst are
broken
- # down into multiple requests in the controller
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = Param.Latency("Burst duration "
- "(typically burst length / 2 cycles)")
-
- # tBURST_MAX is the column array cycle delay required before next
access,
- # which could be greater than tBURST when the memory access time is
greater
- # than tBURST
- tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
-
- # tBURST_MIN is the minimum delay between bursts, which could be less
than
- # tBURST when interleaving is supported
- tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
-
- # CAS-to-CAS delay for bursts to the same bank group
- # only utilized with bank group architectures; set to 0 for default
case
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
-
- # Write-to-Write delay for bursts to the same bank group
- # only utilized with bank group architectures; set to 0 for default
case
- # This will be used to enable different same bank group delays
- # for writes versus reads
- tCCD_L_WR = Param.Latency(Self.tCCD_L,
- "Same bank group Write to Write delay")
-
- # time taken to complete one refresh cycle (N rows in all banks)
- tRFC = Param.Latency("Refresh cycle time")
-
- # refresh command interval, how often a "ref" command needs
- # to be sent. It is 7.8 us for a 64ms refresh requirement
- tREFI = Param.Latency("Refresh command interval")
-
- # write-to-read, same rank turnaround penalty
- tWTR = Param.Latency("Write to read, same rank switching time")
-
- # write-to-read, same rank turnaround penalty for same bank group
- tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
- "time, same bank group")
-
- # read-to-write, same rank turnaround penalty
- tRTW = Param.Latency("Read to write, same rank switching time")
-
- # rank-to-rank bus delay penalty
- # this does not correlate to a memory timing parameter and encompasses:
- # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
- # different rank bus delay
- tCS = Param.Latency("Rank to rank switching time")
-
- # minimum precharge to precharge delay time
- tPPD = Param.Latency("0ns", "PRE to PRE delay")
-
- # maximum delay between two-cycle ACT command phases
- tAAD = Param.Latency(Self.tCK,
- "Maximum delay between two-cycle ACT commands")
-
- two_cycle_activate = Param.Bool(False,
- "Two cycles required to send activate")
-
- # minimum row activate to row activate delay time
- tRRD = Param.Latency("ACT to ACT delay")
-
- # only utilized with bank group architectures; set to 0 for default
case
- tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
-
- # time window in which a maximum number of activates are allowed
- # to take place, set to 0 to disable
- tXAW = Param.Latency("X activation window")
- activation_limit = Param.Unsigned("Max number of activates in window")
-
- # time to exit power-down mode
- # Exit power-down to next valid command delay
- tXP = Param.Latency("0ns", "Power-up Delay")
-
- # Exit Powerdown to commands requiring a locked DLL
- tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
-
- # time to exit self-refresh mode
- tXS = Param.Latency("0ns", "Self-refresh exit latency")
-
- # time to exit self-refresh mode with locked DLL
- tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
-
- # number of data beats per clock. with DDR, default is 2, one per edge
- beats_per_clock = Param.Unsigned(2, "Data beats per clock")
-
- data_clock_sync = Param.Bool(False, "Synchronization commands
required")
-
- # Currently rolled into other params
- ######################################################################
-
- # tRC - assumed to be tRAS + tRP
-
- # Power Behaviour and Constraints
- # DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
- # defined as VDD and VDD2. Each current is defined for each voltage
domain
- # separately. For example, current IDD0 is active-precharge current for
- # voltage domain VDD and current IDD02 is active-precharge current for
- # voltage domain VDD2.
- # By default all currents are set to 0mA. Users who are only
interested in
- # the performance of DRAMs can leave them at 0.
-
- # Operating 1 Bank Active-Precharge current
- IDD0 = Param.Current("0mA", "Active precharge current")
-
- # Operating 1 Bank Active-Precharge current multiple voltage Range
- IDD02 = Param.Current("0mA", "Active precharge current VDD2")
-
- # Precharge Power-down Current: Slow exit
- IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
-
- # Precharge Power-down Current: Slow exit multiple voltage Range
- IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
-
- # Precharge Power-down Current: Fast exit
- IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
-
- # Precharge Power-down Current: Fast exit multiple voltage Range
- IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
-
- # Precharge Standby current
- IDD2N = Param.Current("0mA", "Precharge Standby current")
-
- # Precharge Standby current multiple voltage range
- IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
-
- # Active Power-down current: slow exit
- IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
-
- # Active Power-down current: slow exit multiple voltage range
- IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
-
- # Active Power-down current : fast exit
- IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
-
- # Active Power-down current : fast exit multiple voltage range
- IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
-
- # Active Standby current
- IDD3N = Param.Current("0mA", "Active Standby current")
-
- # Active Standby current multiple voltage range
- IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
-
- # Burst Read Operating Current
- IDD4R = Param.Current("0mA", "READ current")
-
- # Burst Read Operating Current multiple voltage range
- IDD4R2 = Param.Current("0mA", "READ current VDD2")
-
- # Burst Write Operating Current
- IDD4W = Param.Current("0mA", "WRITE current")
-
- # Burst Write Operating Current multiple voltage range
- IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
-
- # Refresh Current
- IDD5 = Param.Current("0mA", "Refresh current")
-
- # Refresh Current multiple voltage range
- IDD52 = Param.Current("0mA", "Refresh current VDD2")
-
- # Self-Refresh Current
- IDD6 = Param.Current("0mA", "Self-refresh Current")
-
- # Self-Refresh Current multiple voltage range
- IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
-
- # Main voltage range of the DRAM
- VDD = Param.Voltage("0V", "Main Voltage Range")
-
- # Second voltage range defined by some DRAMs
- VDD2 = Param.Voltage("0V", "2nd Voltage Range")
-
-# A single DDR3-1600 x64 channel (one command and address bus), with
-# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
-# an 8x8 configuration.
-class DDR3_1600_8x8(DRAMCtrl):
- # size of device in bytes
- device_size = '512MB'
-
- # 8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
- # DDR3 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
- # 8x8 configuration, so 8 devices
- devices_per_rank = 8
-
- # Use two ranks
- ranks_per_channel = 2
-
- # DDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 800 MHz
- tCK = '1.25ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
- tBURST = '5ns'
-
- # DDR3-1600 11-11-11
- tRCD = '13.75ns'
- tCL = '13.75ns'
- tRP = '13.75ns'
- tRAS = '35ns'
- tRRD = '6ns'
- tXAW = '30ns'
- activation_limit = 4
- tRFC = '260ns'
-
- tWR = '15ns'
-
- # Greater of 4 CK or 7.5 ns
- tWTR = '7.5ns'
-
- # Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
- # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # <=85C, half for >85C
- tREFI = '7.8us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
- # self refresh exit time
- tXS = '270ns'
-
- # Current values from datasheet Die Rev E,J
- IDD0 = '55mA'
- IDD2N = '32mA'
- IDD3N = '38mA'
- IDD4W = '125mA'
- IDD4R = '157mA'
- IDD5 = '235mA'
- IDD3P1 = '38mA'
- IDD2P1 = '32mA'
- IDD6 = '20mA'
- VDD = '1.5V'
-
-# A single HMC-2500 x32 model based on:
-# [1] DRAMSpec: a high-level DRAM bank modelling tool
-# developed at the University of Kaiserslautern. This high level tool
-# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
-# estimate the DRAM bank latency and power numbers.
-# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
-# cubes (E. Azarkhish et. al)
-# Assumed for the HMC model is a 30 nm technology node.
-# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
-# layers).
-# Each layer has 16 vaults and each vault consists of 2 banks per layer.
-# In order to be able to use the same controller used for 2D DRAM
generations
-# for HMC, the following analogy is done:
-# Channel (DDR) => Vault (HMC)
-# device_size (DDR) => size of a single layer in a vault
-# ranks per channel (DDR) => number of layers
-# banks per rank (DDR) => banks per layer
-# devices per rank (DDR) => devices per layer ( 1 for HMC).
-# The parameters for which no input is available are inherited from the
DDR3
-# configuration.
-# This configuration includes the latencies from the DRAM to the logic
layer
-# of the HMC
-class HMC_2500_1x32(DDR3_1600_8x8):
- # size of device
- # two banks per device with each bank 4MB [2]
- device_size = '8MB'
-
- # 1x32 configuration, 1 device with 32 TSVs [2]
- device_bus_width = 32
-
- # HMC is a BL8 device [2]
- burst_length = 8
-
- # Each device has a page (row buffer) size of 256 bytes [2]
- device_rowbuffer_size = '256B'
-
- # 1x32 configuration, so 1 device [2]
- devices_per_rank = 1
-
- # 4 layers so 4 ranks [2]
- ranks_per_channel = 4
-
- # HMC has 2 banks per layer [2]
- # Each layer represents a rank. With 4 layers and 8 banks in total,
each
- # layer has 2 banks; thus 2 banks per rank.
- banks_per_rank = 2
-
- # 1250 MHz [2]
- tCK = '0.8ns'
-
- # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
- tBURST = '3.2ns'
-
- # Values using DRAMSpec HMC model [1]
- tRCD = '10.2ns'
- tCL = '9.9ns'
- tRP = '7.7ns'
- tRAS = '21.6ns'
-
- # tRRD depends on the power supply network for each vendor.
- # We assume a tRRD of a double bank approach to be equal to 4 clock
- # cycles (Assumption)
- tRRD = '3.2ns'
-
- # activation limit is set to 0 since there are only 2 banks per vault
- # layer.
- activation_limit = 0
-
- # Values using DRAMSpec HMC model [1]
- tRFC = '59ns'
- tWR = '8ns'
- tRTP = '4.9ns'
-
- # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
- # 0.8 ns (Assumption)
- tCS = '0.8ns'
-
- # Value using DRAMSpec HMC model [1]
- tREFI = '3.9us'
-
- # The default page policy in the vault controllers is simple closed
page
- # [2] nevertheless 'close' policy opens and closes the row multiple
times
- # for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
- page_policy = 'close_adaptive'
-
- # RoCoRaBaCh resembles the default address mapping in HMC
- addr_mapping = 'RoCoRaBaCh'
- min_writes_per_switch = 8
-
- # These parameters do not directly correlate with buffer_size in real
- # hardware. Nevertheless, their value has been tuned to achieve a
- # bandwidth similar to the cycle-accurate model in [2]
- write_buffer_size = 32
- read_buffer_size = 32
-
- # The static latency of the vault controllers is estimated to be
smaller
- # than a full DRAM channel controller
- static_backend_latency='4ns'
- static_frontend_latency='4ns'
-
-# A single DDR3-2133 x64 channel refining a selected subset of the
-# options for the DDR-1600 configuration, based on the same DDR3-1600
-# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
-# consistent across the two configurations.
-class DDR3_2133_8x8(DDR3_1600_8x8):
- # 1066 MHz
- tCK = '0.938ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
- tBURST = '3.752ns'
-
- # DDR3-2133 14-14-14
- tRCD = '13.09ns'
- tCL = '13.09ns'
- tRP = '13.09ns'
- tRAS = '33ns'
- tRRD = '5ns'
- tXAW = '25ns'
-
- # Current values from datasheet
- IDD0 = '70mA'
- IDD2N = '37mA'
- IDD3N = '44mA'
- IDD4W = '157mA'
- IDD4R = '191mA'
- IDD5 = '250mA'
- IDD3P1 = '44mA'
- IDD2P1 = '43mA'
- IDD6 ='20mA'
- VDD = '1.5V'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
-# in an 16x4 configuration.
-# Total channel capacity is 32GB
-# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
-class DDR4_2400_16x4(DRAMCtrl):
- # size of device
- device_size = '1GB'
-
- # 16x4 configuration, 16 devices each with a 4-bit interface
- device_bus_width = 4
-
- # DDR4 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 512 byte (1K columns x4)
- device_rowbuffer_size = '512B'
-
- # 16x4 configuration, so 16 devices
- devices_per_rank = 16
-
- # Match our DDR3 configurations which is dual rank
- ranks_per_channel = 2
-
- # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
- # Set to 4 for x4 case
- bank_groups_per_rank = 4
-
- # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
- # configurations). Currently we do not capture the additional
- # constraints incurred by the bank groups
- banks_per_rank = 16
-
- # override the default buffer sizes and go for something larger to
- # accommodate the larger bank count
- write_buffer_size = 128
- read_buffer_size = 64
-
- # 1200 MHz
- tCK = '0.833ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = '3.332ns'
-
- # @2400 data rate, tCCD_L is 6 CK
- # CAS-to-CAS delay for bursts to the same bank group
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '5ns';
-
- # DDR4-2400 17-17-17
- tRCD = '14.16ns'
- tCL = '14.16ns'
- tRP = '14.16ns'
- tRAS = '32ns'
-
- # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
- tRRD = '3.332ns'
-
- # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
- # tFAW for 512B page is MAX(16 CK, 13ns)
- tXAW = '13.328ns'
- activation_limit = 4
- # tRFC is 350ns
- tRFC = '350ns'
-
- tWR = '15ns'
-
- # Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
- # Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
- tRTW = '1.666ns'
-
- # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
- tCS = '1.666ns'
-
- # <=85C, half for >85C
- tREFI = '7.8us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
- # self refresh exit time
- # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
- # tRFC + 10ns = 340ns
- tXS = '340ns'
-
- # Current values from datasheet
- IDD0 = '43mA'
- IDD02 = '3mA'
- IDD2N = '34mA'
- IDD3N = '38mA'
- IDD3N2 = '3mA'
- IDD4W = '103mA'
- IDD4R = '110mA'
- IDD5 = '250mA'
- IDD3P1 = '32mA'
- IDD2P1 = '25mA'
- IDD6 = '30mA'
- VDD = '1.2V'
- VDD2 = '2.5V'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
-# in an 8x8 configuration.
-# Total channel capacity is 16GB
-# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
-class DDR4_2400_8x8(DDR4_2400_16x4):
- # 8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
- # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
- # 8x8 configuration, so 8 devices
- devices_per_rank = 8
-
- # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
- tXAW = '21ns'
-
- # Current values from datasheet
- IDD0 = '48mA'
- IDD3N = '43mA'
- IDD4W = '123mA'
- IDD4R = '135mA'
- IDD3P1 = '37mA'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
-# in an 4x16 configuration.
-# Total channel capacity is 4GB
-# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
-class DDR4_2400_4x16(DDR4_2400_16x4):
- # 4x16 configuration, 4 devices each with an 16-bit interface
- device_bus_width = 16
-
- # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
- device_rowbuffer_size = '2kB'
-
- # 4x16 configuration, so 4 devices
- devices_per_rank = 4
-
- # Single rank for x16
- ranks_per_channel = 1
-
- # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
- # Set to 2 for x16 case
- bank_groups_per_rank = 2
-
- # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
- # configurations). Currently we do not capture the additional
- # constraints incurred by the bank groups
- banks_per_rank = 8
-
- # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
- tRRD = '5.3ns'
-
- # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
- tRRD_L = '6.4ns';
-
- tXAW = '30ns'
-
- # Current values from datasheet
- IDD0 = '80mA'
- IDD02 = '4mA'
- IDD2N = '34mA'
- IDD3N = '47mA'
- IDD4W = '228mA'
- IDD4R = '243mA'
- IDD5 = '280mA'
- IDD3P1 = '41mA'
-
-# A single LPDDR2-S4 x32 interface (one command/address bus), with
-# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
-# in a 1x32 configuration.
-class LPDDR2_S4_1066_1x32(DRAMCtrl):
- # No DLL in LPDDR2
- dll = False
-
- # size of device
- device_size = '512MB'
-
- # 1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # LPDDR2_S4 is a BL4 and BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 1KB
- # (this depends on the memory density)
- device_rowbuffer_size = '1kB'
-
- # 1x32 configuration, so 1 device
- devices_per_rank = 1
-
- # Use a single rank
- ranks_per_channel = 1
-
- # LPDDR2-S4 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 533 MHz
- tCK = '1.876ns'
-
- # Fixed at 15 ns
- tRCD = '15ns'
-
- # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
- tCL = '15ns'
-
- # Pre-charge one bank 15 ns (all banks 18 ns)
- tRP = '15ns'
-
- tRAS = '42ns'
- tWR = '15ns'
-
- tRTP = '7.5ns'
-
- # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
- # Note this is a BL8 DDR device.
- # Requests larger than 32 bytes are broken down into multiple requests
- # in the controller
- tBURST = '7.5ns'
-
- # LPDDR2-S4, 4 Gbit
- tRFC = '130ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
- # self refresh exit time
- tXS = '140ns'
-
- # Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
- tRTW = '3.75ns'
-
- # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
- tCS = '3.75ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Irrespective of density, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
- # Current values from datasheet
- IDD0 = '15mA'
- IDD02 = '70mA'
- IDD2N = '2mA'
- IDD2N2 = '30mA'
- IDD3N = '2.5mA'
- IDD3N2 = '30mA'
- IDD4W = '10mA'
- IDD4W2 = '190mA'
- IDD4R = '3mA'
- IDD4R2 = '220mA'
- IDD5 = '40mA'
- IDD52 = '150mA'
- IDD3P1 = '1.2mA'
- IDD3P12 = '8mA'
- IDD2P1 = '0.6mA'
- IDD2P12 = '0.8mA'
- IDD6 = '1mA'
- IDD62 = '3.2mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-
-# A single WideIO x128 interface (one command and address bus), with
-# default timings based on an estimated WIO-200 8 Gbit part.
-class WideIO_200_1x128(DRAMCtrl):
- # No DLL for WideIO
- dll = False
-
- # size of device
- device_size = '1024MB'
-
- # 1x128 configuration, 1 device with a 128-bit interface
- device_bus_width = 128
-
- # This is a BL4 device
- burst_length = 4
-
- # Each device has a page (row buffer) size of 4KB
- # (this depends on the memory density)
- device_rowbuffer_size = '4kB'
-
- # 1x128 configuration, so 1 device
- devices_per_rank = 1
-
- # Use one rank for a one-high die stack
- ranks_per_channel = 1
-
- # WideIO has 4 banks in all configurations
- banks_per_rank = 4
-
- # 200 MHz
- tCK = '5ns'
-
- # WIO-200
- tRCD = '18ns'
- tCL = '18ns'
- tRP = '18ns'
- tRAS = '42ns'
- tWR = '15ns'
- # Read to precharge is same as the burst
- tRTP = '20ns'
-
- # 4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
- # Note this is a BL4 SDR device.
- tBURST = '20ns'
-
- # WIO 8 Gb
- tRFC = '210ns'
-
- # WIO 8 Gb, <=85C, half for >85C
- tREFI = '3.9us'
-
- # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
- tWTR = '15ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
- tRTW = '10ns'
-
- # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
- tCS = '10ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Two instead of four activation window
- tXAW = '50ns'
- activation_limit = 2
-
- # The WideIO specification does not provide current information
-
-# A single LPDDR3 x32 interface (one command/address bus), with
-# default timings based on a LPDDR3-1600 4 Gbit part (Micron
-# EDF8132A1MC) in a 1x32 configuration.
-class LPDDR3_1600_1x32(DRAMCtrl):
- # No DLL for LPDDR3
- dll = False
-
- # size of device
- device_size = '512MB'
-
- # 1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # LPDDR3 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 4KB
- device_rowbuffer_size = '4kB'
-
- # 1x32 configuration, so 1 device
- devices_per_rank = 1
-
- # Technically the datasheet is a dual-rank package, but for
- # comparison with the LPDDR2 config we stick to a single rank
- ranks_per_channel = 1
-
- # LPDDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 800 MHz
- tCK = '1.25ns'
-
- tRCD = '18ns'
-
- # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
- tCL = '15ns'
-
- tRAS = '42ns'
- tWR = '15ns'
-
- # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
- tRTP = '7.5ns'
-
- # Pre-charge one bank 18 ns (all banks 21 ns)
- tRP = '18ns'
-
- # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
- # Note this is a BL8 DDR device.
- # Requests larger than 32 bytes are broken down into multiple requests
- # in the controller
- tBURST = '5ns'
-
- # LPDDR3, 4 Gb
- tRFC = '130ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
- # self refresh exit time
- tXS = '140ns'
-
- # Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
- # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Irrespective of size, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
- # Current values from datasheet
- IDD0 = '8mA'
- IDD02 = '60mA'
- IDD2N = '0.8mA'
- IDD2N2 = '26mA'
- IDD3N = '2mA'
- IDD3N2 = '34mA'
- IDD4W = '2mA'
- IDD4W2 = '190mA'
- IDD4R = '2mA'
- IDD4R2 = '230mA'
- IDD5 = '28mA'
- IDD52 = '150mA'
- IDD3P1 = '1.4mA'
- IDD3P12 = '11mA'
- IDD2P1 = '0.8mA'
- IDD2P12 = '1.8mA'
- IDD6 = '0.5mA'
- IDD62 = '1.8mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-
-# A single GDDR5 x64 interface, with
-# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
-# H5GQ1H24AFR) in a 2x32 configuration.
-class GDDR5_4000_2x32(DRAMCtrl):
- # size of device
- device_size = '128MB'
-
- # 2x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # GDDR5 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 2Kbits (256Bytes)
- device_rowbuffer_size = '256B'
-
- # 2x32 configuration, so 2 devices
- devices_per_rank = 2
-
- # assume single rank
- ranks_per_channel = 1
-
- # GDDR5 has 4 bank groups
- bank_groups_per_rank = 4
-
- # GDDR5 has 16 banks with 4 bank groups
- banks_per_rank = 16
-
- # 1000 MHz
- tCK = '1ns'
-
- # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
- # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
- # 8 beats at 4000 MHz = 2 beats at 1000 MHz
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = '2ns'
-
- # @1000MHz data rate, tCCD_L is 3 CK
- # CAS-to-CAS delay for bursts to the same bank group
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '3ns';
-
- tRCD = '12ns'
-
- # tCL is not directly found in datasheet and assumed equal tRCD
- tCL = '12ns'
-
- tRP = '12ns'
- tRAS = '28ns'
-
- # RRD_S (different bank group)
- # RRD_S is 5.5 ns in datasheet.
- # rounded to the next multiple of tCK
- tRRD = '6ns'
-
- # RRD_L (same bank group)
- # RRD_L is 5.5 ns in datasheet.
- # rounded to the next multiple of tCK
- tRRD_L = '6ns'
-
- tXAW = '23ns'
-
- # tXAW < 4 x tRRD.
- # Therefore, activation limit is set to 0
- activation_limit = 0
-
- tRFC = '65ns'
- tWR = '12ns'
-
- # Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
- # Read-to-Precharge 2 CK
- tRTP = '2ns'
-
- # Assume 2 cycles
- tRTW = '2ns'
-
-# A single HBM x128 interface (one command and address bus), with
-# default timings based on data publically released
-# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
-# IDD measurement values, and by extrapolating data from other classes.
-# Architecture values based on published HBM spec
-# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
-class HBM_1000_4H_1x128(DRAMCtrl):
- # HBM gen1 supports up to 8 128-bit physical channels
- # Configuration defines a single channel, with the capacity
- # set to (full_ stack_capacity / 8) based on 2Gb dies
- # To use all 8 channels, set 'channels' parameter to 8 in
- # system configuration
-
- # 128-bit interface legacy mode
- device_bus_width = 128
-
- # HBM supports BL4 and BL2 (legacy mode only)
- burst_length = 4
-
- # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
- # with 8 channels, 128MB per channel
- device_size = '128MB'
-
- device_rowbuffer_size = '2kB'
-
- # 1x128 configuration
- devices_per_rank = 1
-
- # HBM does not have a CS pin; set rank to 1
- ranks_per_channel = 1
-
- # HBM has 8 or 16 banks depending on capacity
- # 2Gb dies have 8 banks
- banks_per_rank = 8
-
- # depending on frequency, bank groups may be required
- # will always have 4 bank groups when enabled
- # current specifications do not define the minimum frequency for
- # bank group architecture
- # setting bank_groups_per_rank to 0 to disable until range is defined
- bank_groups_per_rank = 0
-
- # 500 MHz for 1Gbps DDR data rate
- tCK = '2ns'
-
- # use values from IDD measurement in JEDEC spec
- # use tRP value for tRCD and tCL similar to other classes
- tRP = '15ns'
- tRCD = '15ns'
- tCL = '15ns'
- tRAS = '33ns'
-
- # BL2 and BL4 supported, default to BL4
- # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
- tBURST = '4ns'
-
- # value for 2Gb device from JEDEC spec
- tRFC = '160ns'
-
- # value for 2Gb device from JEDEC spec
- tREFI = '3.9us'
-
- # extrapolate the following from LPDDR configs, using ns values
- # to minimize burst length, prefetch differences
- tWR = '18ns'
- tRTP = '7.5ns'
- tWTR = '10ns'
-
- # start with 2 cycles turnaround, similar to other memory classes
- # could be more with variations across the stack
- tRTW = '4ns'
-
- # single rank device, set to 0
- tCS = '0ns'
-
- # from MemCon example, tRRD is 4ns with 2ns tCK
- tRRD = '4ns'
-
- # from MemCon example, tFAW is 30ns with 2ns tCK
- tXAW = '30ns'
- activation_limit = 4
-
- # 4tCK
- tXP = '8ns'
-
- # start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '168ns'
-
-# A single HBM x64 interface (one command and address bus), with
-# default timings based on HBM gen1 and data publically released
-# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
-# Note: This defines a pseudo-channel with a unique controller
-# instantiated per pseudo-channel
-# Stay at same IO rate (1Gbps) to maintain timing relationship with
-# HBM gen1 class (HBM_1000_4H_x128) where possible
-class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
- # For HBM gen2 with pseudo-channel mode, configure 2X channels.
- # Configuration defines a single pseudo channel, with the capacity
- # set to (full_ stack_capacity / 16) based on 8Gb dies
- # To use all 16 pseudo channels, set 'channels' parameter to 16 in
- # system configuration
-
- # 64-bit pseudo-channle interface
- device_bus_width = 64
-
- # HBM pseudo-channel only supports BL4
- burst_length = 4
-
- # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
- # with 16 channels, 256MB per channel
- device_size = '256MB'
-
- # page size is halved with pseudo-channel; maintaining the same same
number
- # of rows per pseudo-channel with 2X banks across 2 channels
- device_rowbuffer_size = '1kB'
-
- # HBM has 8 or 16 banks depending on capacity
- # Starting with 4Gb dies, 16 banks are defined
- banks_per_rank = 16
-
- # reset tRFC for larger, 8Gb device
- # use HBM1 4Gb value as a starting point
- tRFC = '260ns'
-
- # start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '268ns'
- # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
- tCS = '2ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '10ns'
-
- # self refresh exit time
- tXS = '65ns'
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_5500_1x16_BG_BL32(DRAMCtrl):
-
- # Increase buffer size to account for more bank resources
- read_buffer_size = 64
-
- # Set page policy to better suit DMC Huxley
- page_policy = 'close_adaptive'
-
- # 16-bit channel interface
- device_bus_width = 16
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL32 for higher command bandwidth
- burst_length = 32
-
- # size of device in bytes
- device_size = '1GB'
-
- # 2kB page with BG mode
- device_rowbuffer_size = '2kB'
-
- # Use a 1x16 configuration
- devices_per_rank = 1
-
- # Use a single rank
- ranks_per_channel = 1
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Initial configuration will have 16 banks with Bank Group Arch
- # to maximim resources and enable higher data rates
- banks_per_rank = 16
- bank_groups_per_rank = 4
-
- # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.455ns'
-
- # Greater of 2 CK or 18ns
- tRCD = '18ns'
-
- # Base RL is 16 CK @ 687.5 MHz = 23.28ns
- tCL = '23.280ns'
-
- # Greater of 2 CK or 18ns
- tRP = '18ns'
-
- # Greater of 3 CK or 42ns
- tRAS = '42ns'
-
- # Greater of 3 CK or 34ns
- tWR = '34ns'
-
- # active powerdown and precharge powerdown exit time
- # Greater of 3 CK or 7ns
- tXP = '7ns'
-
- # self refresh exit time (tRFCab + 7.5ns)
- tXS = '217.5ns'
-
- # Greater of 2 CK or 7.5 ns minus 2 CK
- tRTP = '4.59ns'
-
- # With BG architecture, burst of 32 transferred in two 16-beat
- # sub-bursts, with a 16-beat gap in between.
- # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
- tBURST = '8.73ns'
- # can interleave a Bstof32 from another bank group at tBURST_MIN
- # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- tBURST_MIN = '2.91ns'
- # tBURST_MAX is the maximum burst delay for same bank group timing
- # this is 8 CK @ 687.5 MHz
- tBURST_MAX = '11.64ns'
-
- # 8 CK @ 687.5 MHz
- tCCD_L = "11.64ns"
-
- # LPDDR5, 8 Gbit/channel for 280ns tRFCab
- tRFC = '210ns'
- tREFI = '3.9us'
-
- # Greater of 4 CK or 6.25 ns
- tWTR = '6.25ns'
- # Greater of 4 CK or 12 ns
- tWTR_L = '12ns'
-
- # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
- # tWCKDQ0/tCK will be 1 CK for most cases
- # For gem5 RL = WL and BL/n is already accounted for with tBURST
- # Result is and additional 1 CK is required
- tRTW = '1.455ns'
-
- # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
- tCS = '2.91ns'
-
- # 2 CK
- tPPD = '2.91ns'
-
- # Greater of 2 CK or 5 ns
- tRRD = '5ns'
- tRRD_L = '5ns'
-
- # With Bank Group Arch mode tFAW is 20 ns
- tXAW = '20ns'
- activation_limit = 4
-
- # at 5Gbps, 4:1 WCK to CK ratio required
- # 2 data beats per WCK (DDR) -> 8 per CK
- beats_per_clock = 8
-
- # 2 cycles required to send activate command
- # 2 command phases can be sent back-to-back or
- # with a gap up to tAAD = 8 CK
- two_cycle_activate = True
- tAAD = '11.640ns'
-
- data_clock_sync = True
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL16 for smaller access granularity
- burst_length = 16
-
- # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '2.91ns'
- tBURST_MIN = '2.91ns'
- # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST_MAX = '5.82ns'
-
- # 4 CK @ 687.5 MHz
- tCCD_L = "5.82ns"
-
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
-
- # 4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
- # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '5.82ns'
- tBURST_MIN = '5.82ns'
- tBURST_MAX = '5.82ns'
-
- # Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
- # Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
- # With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
- # Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
-
- # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.25ns'
-
- # Base RL is 17 CK @ 800 MHz = 21.25ns
- tCL = '21.25ns'
-
- # With BG architecture, burst of 32 transferred in two 16-beat
- # sub-bursts, with a 16-beat gap in between.
- # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
- # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
- tBURST = '7.5ns'
- # can interleave a Bstof32 from another bank group at tBURST_MIN
- # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
- tBURST_MIN = '2.5ns'
- # tBURST_MAX is the maximum burst delay for same bank group timing
- # this is 8 CK @ 800 MHz
- tBURST_MAX = '10ns'
-
- # 8 CK @ 800 MHz
- tCCD_L = "10ns"
-
- # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
- # tWCKDQ0/tCK will be 1 CK for most cases
- # For gem5 RL = WL and BL/n is already accounted for with tBURST
- # Result is and additional 1 CK is required
- tRTW = '1.25ns'
-
- # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # 2 CK
- tPPD = '2.5ns'
-
- # 2 command phases can be sent back-to-back or
- # with a gap up to tAAD = 8 CK
- tAAD = '10ns'
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on initial
-# JEDEC specifcation
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL16 for smaller access granularity
- burst_length = 16
-
- # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '2.5ns'
- tBURST_MIN = '2.5ns'
- # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST_MAX = '5ns'
-
- # 4 CK @ 800 MHz
- tCCD_L = "5ns"
-
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
-
- # 4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
- # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '5ns'
- tBURST_MIN = '5ns'
- tBURST_MAX = '5ns'
-
- # Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
- # Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
- # With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
- # Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py
new file mode 100644
index 0000000..f571920
--- /dev/null
+++ b/src/mem/DRAMInterface.py
@@ -0,0 +1,1473 @@
+# Copyright (c) 2012-2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# Copyright (c) 2015 University of Kaiserslautern
+# Copyright (c) 2015 The University of Bologna
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.proxy import *
+
+from m5.objects.AbstractMemory import AbstractMemory
+
+# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
+# channel, rank, bank, row and column, respectively, and going from
+# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
+# suitable for an open-page policy, optimising for sequential accesses
+# hitting in the open row. For a closed-page policy, RoCoRaBaCh
+# maximises parallelism.
+class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
+
+# Enum for the page policy, either open, open_adaptive, close, or
+# close_adaptive.
+class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
+ 'close_adaptive']
+
+class DRAMInterface(AbstractMemory):
+ type = 'DRAMInterface'
+ cxx_header = "mem/dram_ctrl.hh"
+
+ # Allow the interface to set required controller buffer sizes
+ # each entry corresponds to a burst for the specific DRAM
+ # configuration (e.g. x32 with burst length 8 is 32 bytes) and not
+ # the cacheline size or request/packet size
+ write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
+ read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
+
+ # scheduler, address map and page policy
+ addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
+ page_policy = Param.PageManage('open_adaptive', "Page management
policy")
+
+ # enforce a limit on the number of accesses per row
+ max_accesses_per_row = Param.Unsigned(16, "Max accesses per row
before "
+ "closing");
+
+ # size of DRAM Chip in Bytes
+ device_size = Param.MemorySize("Size of DRAM chip")
+ # the physical organisation of the DRAM
+ device_bus_width = Param.Unsigned("data bus width in bits for each
DRAM "\
+ "device/chip")
+ burst_length = Param.Unsigned("Burst lenght (BL) in beats")
+ device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
+ "device/chip")
+ devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
+ ranks_per_channel = Param.Unsigned("Number of ranks per channel")
+
+ # default to 0 bank groups per rank, indicating bank group architecture
+ # is not used
+ # update per memory class when bank group architecture is supported
+ bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per
rank")
+ banks_per_rank = Param.Unsigned("Number of banks per rank")
+
+ # Enable DRAM powerdown states if True. This is False by default due to
+ # performance being lower when enabled
+ enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
+
+ # For power modelling we need to know if the DRAM has a DLL or not
+ dll = Param.Bool(True, "DRAM has DLL or not")
+
+ # DRAMPower provides in addition to the core power, the possibility to
+ # include RD/WR termination and IO power. This calculation assumes some
+ # default values. The integration of DRAMPower with gem5 does not
include
+ # IO and RD/WR termination power by default. This might be added as an
+ # additional feature in the future.
+
+ # timing behaviour and constraints - all in nanoseconds
+
+ # the base clock period of the DRAM
+ tCK = Param.Latency("Clock period")
+
+ # the amount of time in nanoseconds from issuing an activate command
+ # to the data being available in the row buffer for a read/write
+ tRCD = Param.Latency("RAS to CAS delay")
+
+ # the time from issuing a read/write command to seeing the actual data
+ tCL = Param.Latency("CAS latency")
+
+ # minimum time between a precharge and subsequent activate
+ tRP = Param.Latency("Row precharge time")
+
+ # minimum time between an activate and a precharge to the same row
+ tRAS = Param.Latency("ACT to PRE delay")
+
+ # minimum time between a write data transfer and a precharge
+ tWR = Param.Latency("Write recovery time")
+
+ # minimum time between a read and precharge command
+ tRTP = Param.Latency("Read to precharge")
+
+ # time to complete a burst transfer, typically the burst length
+ # divided by two due to the DDR bus, but by making it a parameter
+ # it is easier to also evaluate SDR memories like WideIO.
+ # This parameter has to account for burst length.
+ # Read/Write requests with data size larger than one full burst are
broken
+ # down into multiple requests in the controller
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = Param.Latency("Burst duration "
+ "(typically burst length / 2 cycles)")
+
+ # tBURST_MAX is the column array cycle delay required before next
access,
+ # which could be greater than tBURST when the memory access time is
greater
+ # than tBURST
+ tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
+
+ # tBURST_MIN is the minimum delay between bursts, which could be less
than
+ # tBURST when interleaving is supported
+ tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
+
+ # CAS-to-CAS delay for bursts to the same bank group
+ # only utilized with bank group architectures; set to 0 for default
case
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
+
+ # Write-to-Write delay for bursts to the same bank group
+ # only utilized with bank group architectures; set to 0 for default
case
+ # This will be used to enable different same bank group delays
+ # for writes versus reads
+ tCCD_L_WR = Param.Latency(Self.tCCD_L,
+ "Same bank group Write to Write delay")
+
+ # time taken to complete one refresh cycle (N rows in all banks)
+ tRFC = Param.Latency("Refresh cycle time")
+
+ # refresh command interval, how often a "ref" command needs
+ # to be sent. It is 7.8 us for a 64ms refresh requirement
+ tREFI = Param.Latency("Refresh command interval")
+
+ # write-to-read, same rank turnaround penalty
+ tWTR = Param.Latency("Write to read, same rank switching time")
+
+ # write-to-read, same rank turnaround penalty for same bank group
+ tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
+ "time, same bank group")
+
+ # read-to-write, same rank turnaround penalty
+ tRTW = Param.Latency("Read to write, same rank switching time")
+
+ # rank-to-rank bus delay penalty
+ # this does not correlate to a memory timing parameter and encompasses:
+ # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
+ # different rank bus delay
+ tCS = Param.Latency("Rank to rank switching time")
+
+ # minimum precharge to precharge delay time
+ tPPD = Param.Latency("0ns", "PRE to PRE delay")
+
+ # maximum delay between two-cycle ACT command phases
+ tAAD = Param.Latency(Self.tCK,
+ "Maximum delay between two-cycle ACT commands")
+
+ two_cycle_activate = Param.Bool(False,
+ "Two cycles required to send activate")
+
+ # minimum row activate to row activate delay time
+ tRRD = Param.Latency("ACT to ACT delay")
+
+ # only utilized with bank group architectures; set to 0 for default
case
+ tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
+
+ # time window in which a maximum number of activates are allowed
+ # to take place, set to 0 to disable
+ tXAW = Param.Latency("X activation window")
+ activation_limit = Param.Unsigned("Max number of activates in window")
+
+ # time to exit power-down mode
+ # Exit power-down to next valid command delay
+ tXP = Param.Latency("0ns", "Power-up Delay")
+
+ # Exit Powerdown to commands requiring a locked DLL
+ tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
+
+ # time to exit self-refresh mode
+ tXS = Param.Latency("0ns", "Self-refresh exit latency")
+
+ # time to exit self-refresh mode with locked DLL
+ tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
+
+ # number of data beats per clock. with DDR, default is 2, one per edge
+ beats_per_clock = Param.Unsigned(2, "Data beats per clock")
+
+ data_clock_sync = Param.Bool(False, "Synchronization commands
required")
+
+ # Currently rolled into other params
+ ######################################################################
+
+ # tRC - assumed to be tRAS + tRP
+
+ # Power Behaviour and Constraints
+ # DRAMs like LPDDR and WideIO have 2 external voltage domains. These
are
+ # defined as VDD and VDD2. Each current is defined for each voltage
domain
+ # separately. For example, current IDD0 is active-precharge current for
+ # voltage domain VDD and current IDD02 is active-precharge current for
+ # voltage domain VDD2.
+ # By default all currents are set to 0mA. Users who are only
interested in
+ # the performance of DRAMs can leave them at 0.
+
+ # Operating 1 Bank Active-Precharge current
+ IDD0 = Param.Current("0mA", "Active precharge current")
+
+ # Operating 1 Bank Active-Precharge current multiple voltage Range
+ IDD02 = Param.Current("0mA", "Active precharge current VDD2")
+
+ # Precharge Power-down Current: Slow exit
+ IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
+
+ # Precharge Power-down Current: Slow exit multiple voltage Range
+ IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
+
+ # Precharge Power-down Current: Fast exit
+ IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
+
+ # Precharge Power-down Current: Fast exit multiple voltage Range
+ IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
+
+ # Precharge Standby current
+ IDD2N = Param.Current("0mA", "Precharge Standby current")
+
+ # Precharge Standby current multiple voltage range
+ IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
+
+ # Active Power-down current: slow exit
+ IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
+
+ # Active Power-down current: slow exit multiple voltage range
+ IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
+
+ # Active Power-down current : fast exit
+ IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
+
+ # Active Power-down current : fast exit multiple voltage range
+ IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
+
+ # Active Standby current
+ IDD3N = Param.Current("0mA", "Active Standby current")
+
+ # Active Standby current multiple voltage range
+ IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
+
+ # Burst Read Operating Current
+ IDD4R = Param.Current("0mA", "READ current")
+
+ # Burst Read Operating Current multiple voltage range
+ IDD4R2 = Param.Current("0mA", "READ current VDD2")
+
+ # Burst Write Operating Current
+ IDD4W = Param.Current("0mA", "WRITE current")
+
+ # Burst Write Operating Current multiple voltage range
+ IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
+
+ # Refresh Current
+ IDD5 = Param.Current("0mA", "Refresh current")
+
+ # Refresh Current multiple voltage range
+ IDD52 = Param.Current("0mA", "Refresh current VDD2")
+
+ # Self-Refresh Current
+ IDD6 = Param.Current("0mA", "Self-refresh Current")
+
+ # Self-Refresh Current multiple voltage range
+ IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
+
+ # Main voltage range of the DRAM
+ VDD = Param.Voltage("0V", "Main Voltage Range")
+
+ # Second voltage range defined by some DRAMs
+ VDD2 = Param.Voltage("0V", "2nd Voltage Range")
+
+# A single DDR3-1600 x64 channel (one command and address bus), with
+# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
+# an 8x8 configuration.
+class DDR3_1600_8x8(DRAMInterface):
+ # size of device in bytes
+ device_size = '512MB'
+
+ # 8x8 configuration, 8 devices each with an 8-bit interface
+ device_bus_width = 8
+
+ # DDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
+ device_rowbuffer_size = '1kB'
+
+ # 8x8 configuration, so 8 devices
+ devices_per_rank = 8
+
+ # Use two ranks
+ ranks_per_channel = 2
+
+ # DDR3 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 800 MHz
+ tCK = '1.25ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
+ tBURST = '5ns'
+
+ # DDR3-1600 11-11-11
+ tRCD = '13.75ns'
+ tCL = '13.75ns'
+ tRP = '13.75ns'
+ tRAS = '35ns'
+ tRRD = '6ns'
+ tXAW = '30ns'
+ activation_limit = 4
+ tRFC = '260ns'
+
+ tWR = '15ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tWTR = '7.5ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tRTP = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
+ tRTW = '2.5ns'
+
+ # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # <=85C, half for >85C
+ tREFI = '7.8us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '6ns'
+
+ # self refresh exit time
+ tXS = '270ns'
+
+ # Current values from datasheet Die Rev E,J
+ IDD0 = '55mA'
+ IDD2N = '32mA'
+ IDD3N = '38mA'
+ IDD4W = '125mA'
+ IDD4R = '157mA'
+ IDD5 = '235mA'
+ IDD3P1 = '38mA'
+ IDD2P1 = '32mA'
+ IDD6 = '20mA'
+ VDD = '1.5V'
+
+# A single HMC-2500 x32 model based on:
+# [1] DRAMSpec: a high-level DRAM bank modelling tool
+# developed at the University of Kaiserslautern. This high level tool
+# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
+# estimate the DRAM bank latency and power numbers.
+# [2] High performance AXI-4.0 based interconnect for extensible smart
memory
+# cubes (E. Azarkhish et. al)
+# Assumed for the HMC model is a 30 nm technology node.
+# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory
(4
+# layers).
+# Each layer has 16 vaults and each vault consists of 2 banks per layer.
+# In order to be able to use the same controller used for 2D DRAM
generations
+# for HMC, the following analogy is done:
+# Channel (DDR) => Vault (HMC)
+# device_size (DDR) => size of a single layer in a vault
+# ranks per channel (DDR) => number of layers
+# banks per rank (DDR) => banks per layer
+# devices per rank (DDR) => devices per layer ( 1 for HMC).
+# The parameters for which no input is available are inherited from the
DDR3
+# configuration.
+# This configuration includes the latencies from the DRAM to the logic
layer
+# of the HMC
+class HMC_2500_1x32(DDR3_1600_8x8):
+ # size of device
+ # two banks per device with each bank 4MB [2]
+ device_size = '8MB'
+
+ # 1x32 configuration, 1 device with 32 TSVs [2]
+ device_bus_width = 32
+
+ # HMC is a BL8 device [2]
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 256 bytes [2]
+ device_rowbuffer_size = '256B'
+
+ # 1x32 configuration, so 1 device [2]
+ devices_per_rank = 1
+
+ # 4 layers so 4 ranks [2]
+ ranks_per_channel = 4
+
+ # HMC has 2 banks per layer [2]
+ # Each layer represents a rank. With 4 layers and 8 banks in total,
each
+ # layer has 2 banks; thus 2 banks per rank.
+ banks_per_rank = 2
+
+ # 1250 MHz [2]
+ tCK = '0.8ns'
+
+ # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
+ tBURST = '3.2ns'
+
+ # Values using DRAMSpec HMC model [1]
+ tRCD = '10.2ns'
+ tCL = '9.9ns'
+ tRP = '7.7ns'
+ tRAS = '21.6ns'
+
+ # tRRD depends on the power supply network for each vendor.
+ # We assume a tRRD of a double bank approach to be equal to 4 clock
+ # cycles (Assumption)
+ tRRD = '3.2ns'
+
+ # activation limit is set to 0 since there are only 2 banks per vault
+ # layer.
+ activation_limit = 0
+
+ # Values using DRAMSpec HMC model [1]
+ tRFC = '59ns'
+ tWR = '8ns'
+ tRTP = '4.9ns'
+
+ # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz
=
+ # 0.8 ns (Assumption)
+ tCS = '0.8ns'
+
+ # Value using DRAMSpec HMC model [1]
+ tREFI = '3.9us'
+
+ # The default page policy in the vault controllers is simple closed
page
+ # [2] nevertheless 'close' policy opens and closes the row multiple
times
+ # for bursts largers than 32Bytes. For this reason we
use 'close_adaptive'
+ page_policy = 'close_adaptive'
+
+ # RoCoRaBaCh resembles the default address mapping in HMC
+ addr_mapping = 'RoCoRaBaCh'
+
+ # These parameters do not directly correlate with buffer_size in real
+ # hardware. Nevertheless, their value has been tuned to achieve a
+ # bandwidth similar to the cycle-accurate model in [2]
+ write_buffer_size = 32
+ read_buffer_size = 32
+
+# A single DDR3-2133 x64 channel refining a selected subset of the
+# options for the DDR-1600 configuration, based on the same DDR3-1600
+# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
+# consistent across the two configurations.
+class DDR3_2133_8x8(DDR3_1600_8x8):
+ # 1066 MHz
+ tCK = '0.938ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
+ tBURST = '3.752ns'
+
+ # DDR3-2133 14-14-14
+ tRCD = '13.09ns'
+ tCL = '13.09ns'
+ tRP = '13.09ns'
+ tRAS = '33ns'
+ tRRD = '5ns'
+ tXAW = '25ns'
+
+ # Current values from datasheet
+ IDD0 = '70mA'
+ IDD2N = '37mA'
+ IDD3N = '44mA'
+ IDD4W = '157mA'
+ IDD4R = '191mA'
+ IDD5 = '250mA'
+ IDD3P1 = '44mA'
+ IDD2P1 = '43mA'
+ IDD6 ='20mA'
+ VDD = '1.5V'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
+# in an 16x4 configuration.
+# Total channel capacity is 32GB
+# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
+class DDR4_2400_16x4(DRAMInterface):
+ # size of device
+ device_size = '1GB'
+
+ # 16x4 configuration, 16 devices each with a 4-bit interface
+ device_bus_width = 4
+
+ # DDR4 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 512 byte (1K columns x4)
+ device_rowbuffer_size = '512B'
+
+ # 16x4 configuration, so 16 devices
+ devices_per_rank = 16
+
+ # Match our DDR3 configurations which is dual rank
+ ranks_per_channel = 2
+
+ # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+ # Set to 4 for x4 case
+ bank_groups_per_rank = 4
+
+ # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
+ # configurations). Currently we do not capture the additional
+ # constraints incurred by the bank groups
+ banks_per_rank = 16
+
+ # override the default buffer sizes and go for something larger to
+ # accommodate the larger bank count
+ write_buffer_size = 128
+ read_buffer_size = 64
+
+ # 1200 MHz
+ tCK = '0.833ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = '3.332ns'
+
+ # @2400 data rate, tCCD_L is 6 CK
+ # CAS-to-CAS delay for bursts to the same bank group
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = '5ns';
+
+ # DDR4-2400 17-17-17
+ tRCD = '14.16ns'
+ tCL = '14.16ns'
+ tRP = '14.16ns'
+ tRAS = '32ns'
+
+ # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
+ tRRD = '3.332ns'
+
+ # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
+ tRRD_L = '4.9ns';
+
+ # tFAW for 512B page is MAX(16 CK, 13ns)
+ tXAW = '13.328ns'
+ activation_limit = 4
+ # tRFC is 350ns
+ tRFC = '350ns'
+
+ tWR = '15ns'
+
+ # Here using the average of WTR_S and WTR_L
+ tWTR = '5ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tRTP = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666
ns
+ tRTW = '1.666ns'
+
+ # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
+ tCS = '1.666ns'
+
+ # <=85C, half for >85C
+ tREFI = '7.8us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '6ns'
+
+ # self refresh exit time
+ # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
+ # tRFC + 10ns = 340ns
+ tXS = '340ns'
+
+ # Current values from datasheet
+ IDD0 = '43mA'
+ IDD02 = '3mA'
+ IDD2N = '34mA'
+ IDD3N = '38mA'
+ IDD3N2 = '3mA'
+ IDD4W = '103mA'
+ IDD4R = '110mA'
+ IDD5 = '250mA'
+ IDD3P1 = '32mA'
+ IDD2P1 = '25mA'
+ IDD6 = '30mA'
+ VDD = '1.2V'
+ VDD2 = '2.5V'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
+# in an 8x8 configuration.
+# Total channel capacity is 16GB
+# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
+class DDR4_2400_8x8(DDR4_2400_16x4):
+ # 8x8 configuration, 8 devices each with an 8-bit interface
+ device_bus_width = 8
+
+ # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
+ device_rowbuffer_size = '1kB'
+
+ # 8x8 configuration, so 8 devices
+ devices_per_rank = 8
+
+ # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
+ tRRD_L = '4.9ns';
+
+ tXAW = '21ns'
+
+ # Current values from datasheet
+ IDD0 = '48mA'
+ IDD3N = '43mA'
+ IDD4W = '123mA'
+ IDD4R = '135mA'
+ IDD3P1 = '37mA'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
+# in an 4x16 configuration.
+# Total channel capacity is 4GB
+# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
+class DDR4_2400_4x16(DDR4_2400_16x4):
+ # 4x16 configuration, 4 devices each with an 16-bit interface
+ device_bus_width = 16
+
+ # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
+ device_rowbuffer_size = '2kB'
+
+ # 4x16 configuration, so 4 devices
+ devices_per_rank = 4
+
+ # Single rank for x16
+ ranks_per_channel = 1
+
+ # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+ # Set to 2 for x16 case
+ bank_groups_per_rank = 2
+
+ # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
+ # configurations). Currently we do not capture the additional
+ # constraints incurred by the bank groups
+ banks_per_rank = 8
+
+ # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
+ tRRD = '5.3ns'
+
+ # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
+ tRRD_L = '6.4ns';
+
+ tXAW = '30ns'
+
+ # Current values from datasheet
+ IDD0 = '80mA'
+ IDD02 = '4mA'
+ IDD2N = '34mA'
+ IDD3N = '47mA'
+ IDD4W = '228mA'
+ IDD4R = '243mA'
+ IDD5 = '280mA'
+ IDD3P1 = '41mA'
+
+# A single LPDDR2-S4 x32 interface (one command/address bus), with
+# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
+# in a 1x32 configuration.
+class LPDDR2_S4_1066_1x32(DRAMInterface):
+ # No DLL in LPDDR2
+ dll = False
+
+ # size of device
+ device_size = '512MB'
+
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR2_S4 is a BL4 and BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '1kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Use a single rank
+ ranks_per_channel = 1
+
+ # LPDDR2-S4 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 533 MHz
+ tCK = '1.876ns'
+
+ # Fixed at 15 ns
+ tRCD = '15ns'
+
+ # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
+ tCL = '15ns'
+
+ # Pre-charge one bank 15 ns (all banks 18 ns)
+ tRP = '15ns'
+
+ tRAS = '42ns'
+ tWR = '15ns'
+
+ tRTP = '7.5ns'
+
+ # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the controller
+ tBURST = '7.5ns'
+
+ # LPDDR2-S4, 4 Gbit
+ tRFC = '130ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '7.5ns'
+
+ # self refresh exit time
+ tXS = '140ns'
+
+ # Irrespective of speed grade, tWTR is 7.5 ns
+ tWTR = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
+ tRTW = '3.75ns'
+
+ # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
+ tCS = '3.75ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Irrespective of density, tFAW is 50 ns
+ tXAW = '50ns'
+ activation_limit = 4
+
+ # Current values from datasheet
+ IDD0 = '15mA'
+ IDD02 = '70mA'
+ IDD2N = '2mA'
+ IDD2N2 = '30mA'
+ IDD3N = '2.5mA'
+ IDD3N2 = '30mA'
+ IDD4W = '10mA'
+ IDD4W2 = '190mA'
+ IDD4R = '3mA'
+ IDD4R2 = '220mA'
+ IDD5 = '40mA'
+ IDD52 = '150mA'
+ IDD3P1 = '1.2mA'
+ IDD3P12 = '8mA'
+ IDD2P1 = '0.6mA'
+ IDD2P12 = '0.8mA'
+ IDD6 = '1mA'
+ IDD62 = '3.2mA'
+ VDD = '1.8V'
+ VDD2 = '1.2V'
+
+# A single WideIO x128 interface (one command and address bus), with
+# default timings based on an estimated WIO-200 8 Gbit part.
+class WideIO_200_1x128(DRAMInterface):
+ # No DLL for WideIO
+ dll = False
+
+ # size of device
+ device_size = '1024MB'
+
+ # 1x128 configuration, 1 device with a 128-bit interface
+ device_bus_width = 128
+
+ # This is a BL4 device
+ burst_length = 4
+
+ # Each device has a page (row buffer) size of 4KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '4kB'
+
+ # 1x128 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Use one rank for a one-high die stack
+ ranks_per_channel = 1
+
+ # WideIO has 4 banks in all configurations
+ banks_per_rank = 4
+
+ # 200 MHz
+ tCK = '5ns'
+
+ # WIO-200
+ tRCD = '18ns'
+ tCL = '18ns'
+ tRP = '18ns'
+ tRAS = '42ns'
+ tWR = '15ns'
+ # Read to precharge is same as the burst
+ tRTP = '20ns'
+
+ # 4 beats across an x128 SDR interface translates to 4 clocks @ 200
MHz.
+ # Note this is a BL4 SDR device.
+ tBURST = '20ns'
+
+ # WIO 8 Gb
+ tRFC = '210ns'
+
+ # WIO 8 Gb, <=85C, half for >85C
+ tREFI = '3.9us'
+
+ # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
+ tWTR = '15ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
+ tRTW = '10ns'
+
+ # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
+ tCS = '10ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Two instead of four activation window
+ tXAW = '50ns'
+ activation_limit = 2
+
+ # The WideIO specification does not provide current information
+
+# A single LPDDR3 x32 interface (one command/address bus), with
+# default timings based on a LPDDR3-1600 4 Gbit part (Micron
+# EDF8132A1MC) in a 1x32 configuration.
+class LPDDR3_1600_1x32(DRAMInterface):
+ # No DLL for LPDDR3
+ dll = False
+
+ # size of device
+ device_size = '512MB'
+
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 4KB
+ device_rowbuffer_size = '4kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Technically the datasheet is a dual-rank package, but for
+ # comparison with the LPDDR2 config we stick to a single rank
+ ranks_per_channel = 1
+
+ # LPDDR3 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 800 MHz
+ tCK = '1.25ns'
+
+ tRCD = '18ns'
+
+ # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
+ tCL = '15ns'
+
+ tRAS = '42ns'
+ tWR = '15ns'
+
+ # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
+ tRTP = '7.5ns'
+
+ # Pre-charge one bank 18 ns (all banks 21 ns)
+ tRP = '18ns'
+
+ # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the controller
+ tBURST = '5ns'
+
+ # LPDDR3, 4 Gb
+ tRFC = '130ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '7.5ns'
+
+ # self refresh exit time
+ tXS = '140ns'
+
+ # Irrespective of speed grade, tWTR is 7.5 ns
+ tWTR = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
+ tRTW = '2.5ns'
+
+ # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Irrespective of size, tFAW is 50 ns
+ tXAW = '50ns'
+ activation_limit = 4
+
+ # Current values from datasheet
+ IDD0 = '8mA'
+ IDD02 = '60mA'
+ IDD2N = '0.8mA'
+ IDD2N2 = '26mA'
+ IDD3N = '2mA'
+ IDD3N2 = '34mA'
+ IDD4W = '2mA'
+ IDD4W2 = '190mA'
+ IDD4R = '2mA'
+ IDD4R2 = '230mA'
+ IDD5 = '28mA'
+ IDD52 = '150mA'
+ IDD3P1 = '1.4mA'
+ IDD3P12 = '11mA'
+ IDD2P1 = '0.8mA'
+ IDD2P12 = '1.8mA'
+ IDD6 = '0.5mA'
+ IDD62 = '1.8mA'
+ VDD = '1.8V'
+ VDD2 = '1.2V'
+
+# A single GDDR5 x64 interface, with
+# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
+# H5GQ1H24AFR) in a 2x32 configuration.
+class GDDR5_4000_2x32(DRAMInterface):
+ # size of device
+ device_size = '128MB'
+
+ # 2x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # GDDR5 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 2Kbits (256Bytes)
+ device_rowbuffer_size = '256B'
+
+ # 2x32 configuration, so 2 devices
+ devices_per_rank = 2
+
+ # assume single rank
+ ranks_per_channel = 1
+
+ # GDDR5 has 4 bank groups
+ bank_groups_per_rank = 4
+
+ # GDDR5 has 16 banks with 4 bank groups
+ banks_per_rank = 16
+
+ # 1000 MHz
+ tCK = '1ns'
+
+ # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
+ # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
+ # 8 beats at 4000 MHz = 2 beats at 1000 MHz
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = '2ns'
+
+ # @1000MHz data rate, tCCD_L is 3 CK
+ # CAS-to-CAS delay for bursts to the same bank group
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = '3ns';
+
+ tRCD = '12ns'
+
+ # tCL is not directly found in datasheet and assumed equal tRCD
+ tCL = '12ns'
+
+ tRP = '12ns'
+ tRAS = '28ns'
+
+ # RRD_S (different bank group)
+ # RRD_S is 5.5 ns in datasheet.
+ # rounded to the next multiple of tCK
+ tRRD = '6ns'
+
+ # RRD_L (same bank group)
+ # RRD_L is 5.5 ns in datasheet.
+ # rounded to the next multiple of tCK
+ tRRD_L = '6ns'
+
+ tXAW = '23ns'
+
+ # tXAW < 4 x tRRD.
+ # Therefore, activation limit is set to 0
+ activation_limit = 0
+
+ tRFC = '65ns'
+ tWR = '12ns'
+
+ # Here using the average of WTR_S and WTR_L
+ tWTR = '5ns'
+
+ # Read-to-Precharge 2 CK
+ tRTP = '2ns'
+
+ # Assume 2 cycles
+ tRTW = '2ns'
+
+# A single HBM x128 interface (one command and address bus), with
+# default timings based on data publically released
+# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
+# IDD measurement values, and by extrapolating data from other classes.
+# Architecture values based on published HBM spec
+# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
+class HBM_1000_4H_1x128(DRAMInterface):
+ # HBM gen1 supports up to 8 128-bit physical channels
+ # Configuration defines a single channel, with the capacity
+ # set to (full_ stack_capacity / 8) based on 2Gb dies
+ # To use all 8 channels, set 'channels' parameter to 8 in
+ # system configuration
+
+ # 128-bit interface legacy mode
+ device_bus_width = 128
+
+ # HBM supports BL4 and BL2 (legacy mode only)
+ burst_length = 4
+
+ # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
+ # with 8 channels, 128MB per channel
+ device_size = '128MB'
+
+ device_rowbuffer_size = '2kB'
+
+ # 1x128 configuration
+ devices_per_rank = 1
+
+ # HBM does not have a CS pin; set rank to 1
+ ranks_per_channel = 1
+
+ # HBM has 8 or 16 banks depending on capacity
+ # 2Gb dies have 8 banks
+ banks_per_rank = 8
+
+ # depending on frequency, bank groups may be required
+ # will always have 4 bank groups when enabled
+ # current specifications do not define the minimum frequency for
+ # bank group architecture
+ # setting bank_groups_per_rank to 0 to disable until range is defined
+ bank_groups_per_rank = 0
+
+ # 500 MHz for 1Gbps DDR data rate
+ tCK = '2ns'
+
+ # use values from IDD measurement in JEDEC spec
+ # use tRP value for tRCD and tCL similar to other classes
+ tRP = '15ns'
+ tRCD = '15ns'
+ tCL = '15ns'
+ tRAS = '33ns'
+
+ # BL2 and BL4 supported, default to BL4
+ # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
+ tBURST = '4ns'
+
+ # value for 2Gb device from JEDEC spec
+ tRFC = '160ns'
+
+ # value for 2Gb device from JEDEC spec
+ tREFI = '3.9us'
+
+ # extrapolate the following from LPDDR configs, using ns values
+ # to minimize burst length, prefetch differences
+ tWR = '18ns'
+ tRTP = '7.5ns'
+ tWTR = '10ns'
+
+ # start with 2 cycles turnaround, similar to other memory classes
+ # could be more with variations across the stack
+ tRTW = '4ns'
+
+ # single rank device, set to 0
+ tCS = '0ns'
+
+ # from MemCon example, tRRD is 4ns with 2ns tCK
+ tRRD = '4ns'
+
+ # from MemCon example, tFAW is 30ns with 2ns tCK
+ tXAW = '30ns'
+ activation_limit = 4
+
+ # 4tCK
+ tXP = '8ns'
+
+ # start with tRFC + tXP -> 160ns + 8ns = 168ns
+ tXS = '168ns'
+
+# A single HBM x64 interface (one command and address bus), with
+# default timings based on HBM gen1 and data publically released
+# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
+# Note: This defines a pseudo-channel with a unique controller
+# instantiated per pseudo-channel
+# Stay at same IO rate (1Gbps) to maintain timing relationship with
+# HBM gen1 class (HBM_1000_4H_x128) where possible
+class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
+ # For HBM gen2 with pseudo-channel mode, configure 2X channels.
+ # Configuration defines a single pseudo channel, with the capacity
+ # set to (full_ stack_capacity / 16) based on 8Gb dies
+ # To use all 16 pseudo channels, set 'channels' parameter to 16 in
+ # system configuration
+
+ # 64-bit pseudo-channle interface
+ device_bus_width = 64
+
+ # HBM pseudo-channel only supports BL4
+ burst_length = 4
+
+ # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
+ # with 16 channels, 256MB per channel
+ device_size = '256MB'
+
+ # page size is halved with pseudo-channel; maintaining the same same
number
+ # of rows per pseudo-channel with 2X banks across 2 channels
+ device_rowbuffer_size = '1kB'
+
+ # HBM has 8 or 16 banks depending on capacity
+ # Starting with 4Gb dies, 16 banks are defined
+ banks_per_rank = 16
+
+ # reset tRFC for larger, 8Gb device
+ # use HBM1 4Gb value as a starting point
+ tRFC = '260ns'
+
+ # start with tRFC + tXP -> 160ns + 8ns = 168ns
+ tXS = '268ns'
+ # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
+ tCS = '2ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '10ns'
+
+ # self refresh exit time
+ tXS = '65ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_5500_1x16_BG_BL32(DRAMInterface):
+
+ # Increase buffer size to account for more bank resources
+ read_buffer_size = 64
+
+ # Set page policy to better suit DMC Huxley
+ page_policy = 'close_adaptive'
+
+ # 16-bit channel interface
+ device_bus_width = 16
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL32 for higher command bandwidth
+ burst_length = 32
+
+ # size of device in bytes
+ device_size = '1GB'
+
+ # 2kB page with BG mode
+ device_rowbuffer_size = '2kB'
+
+ # Use a 1x16 configuration
+ devices_per_rank = 1
+
+ # Use a single rank
+ ranks_per_channel = 1
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Initial configuration will have 16 banks with Bank Group Arch
+ # to maximim resources and enable higher data rates
+ banks_per_rank = 16
+ bank_groups_per_rank = 4
+
+ # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+ tCK = '1.455ns'
+
+ # Greater of 2 CK or 18ns
+ tRCD = '18ns'
+
+ # Base RL is 16 CK @ 687.5 MHz = 23.28ns
+ tCL = '23.280ns'
+
+ # Greater of 2 CK or 18ns
+ tRP = '18ns'
+
+ # Greater of 3 CK or 42ns
+ tRAS = '42ns'
+
+ # Greater of 3 CK or 34ns
+ tWR = '34ns'
+
+ # active powerdown and precharge powerdown exit time
+ # Greater of 3 CK or 7ns
+ tXP = '7ns'
+
+ # self refresh exit time (tRFCab + 7.5ns)
+ tXS = '217.5ns'
+
+ # Greater of 2 CK or 7.5 ns minus 2 CK
+ tRTP = '4.59ns'
+
+ # With BG architecture, burst of 32 transferred in two 16-beat
+ # sub-bursts, with a 16-beat gap in between.
+ # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+ # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
+ tBURST = '8.73ns'
+ # can interleave a Bstof32 from another bank group at tBURST_MIN
+ # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+ tBURST_MIN = '2.91ns'
+ # tBURST_MAX is the maximum burst delay for same bank group timing
+ # this is 8 CK @ 687.5 MHz
+ tBURST_MAX = '11.64ns'
+
+ # 8 CK @ 687.5 MHz
+ tCCD_L = "11.64ns"
+
+ # LPDDR5, 8 Gbit/channel for 280ns tRFCab
+ tRFC = '210ns'
+ tREFI = '3.9us'
+
+ # Greater of 4 CK or 6.25 ns
+ tWTR = '6.25ns'
+ # Greater of 4 CK or 12 ns
+ tWTR_L = '12ns'
+
+ # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+ # tWCKDQ0/tCK will be 1 CK for most cases
+ # For gem5 RL = WL and BL/n is already accounted for with tBURST
+ # Result is and additional 1 CK is required
+ tRTW = '1.455ns'
+
+ # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
+ tCS = '2.91ns'
+
+ # 2 CK
+ tPPD = '2.91ns'
+
+ # Greater of 2 CK or 5 ns
+ tRRD = '5ns'
+ tRRD_L = '5ns'
+
+ # With Bank Group Arch mode tFAW is 20 ns
+ tXAW = '20ns'
+ activation_limit = 4
+
+ # at 5Gbps, 4:1 WCK to CK ratio required
+ # 2 data beats per WCK (DDR) -> 8 per CK
+ beats_per_clock = 8
+
+ # 2 cycles required to send activate command
+ # 2 command phases can be sent back-to-back or
+ # with a gap up to tAAD = 8 CK
+ two_cycle_activate = True
+ tAAD = '11.640ns'
+
+ data_clock_sync = True
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL16 for smaller access granularity
+ burst_length = 16
+
+ # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST = '2.91ns'
+ tBURST_MIN = '2.91ns'
+ # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST_MAX = '5.82ns'
+
+ # 4 CK @ 687.5 MHz
+ tCCD_L = "5.82ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+ # 4kB page with 8B mode
+ device_rowbuffer_size = '4kB'
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Select 8B
+ banks_per_rank = 8
+ bank_groups_per_rank = 0
+
+ # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST = '5.82ns'
+ tBURST_MIN = '5.82ns'
+ tBURST_MAX = '5.82ns'
+
+ # Greater of 4 CK or 12 ns
+ tWTR = '12ns'
+
+ # Greater of 2 CK or 10 ns
+ tRRD = '10ns'
+
+ # With 8B mode tFAW is 40 ns
+ tXAW = '40ns'
+ activation_limit = 4
+
+ # Reset BG arch timing for 8B mode
+ tCCD_L = "0ns"
+ tRRD_L = "0ns"
+ tWTR_L = "0ns"
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+ # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+ tCK = '1.25ns'
+
+ # Base RL is 17 CK @ 800 MHz = 21.25ns
+ tCL = '21.25ns'
+
+ # With BG architecture, burst of 32 transferred in two 16-beat
+ # sub-bursts, with a 16-beat gap in between.
+ # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
+ # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
+ tBURST = '7.5ns'
+ # can interleave a Bstof32 from another bank group at tBURST_MIN
+ # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
+ tBURST_MIN = '2.5ns'
+ # tBURST_MAX is the maximum burst delay for same bank group timing
+ # this is 8 CK @ 800 MHz
+ tBURST_MAX = '10ns'
+
+ # 8 CK @ 800 MHz
+ tCCD_L = "10ns"
+
+ # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+ # tWCKDQ0/tCK will be 1 CK for most cases
+ # For gem5 RL = WL and BL/n is already accounted for with tBURST
+ # Result is and additional 1 CK is required
+ tRTW = '1.25ns'
+
+ # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # 2 CK
+ tPPD = '2.5ns'
+
+ # 2 command phases can be sent back-to-back or
+ # with a gap up to tAAD = 8 CK
+ tAAD = '10ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on initial
+# JEDEC specifcation
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL16 for smaller access granularity
+ burst_length = 16
+
+ # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
+ tBURST = '2.5ns'
+ tBURST_MIN = '2.5ns'
+ # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
+ tBURST_MAX = '5ns'
+
+ # 4 CK @ 800 MHz
+ tCCD_L = "5ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
+
+ # 4kB page with 8B mode
+ device_rowbuffer_size = '4kB'
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Select 8B
+ banks_per_rank = 8
+ bank_groups_per_rank = 0
+
+ # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
+ tBURST = '5ns'
+ tBURST_MIN = '5ns'
+ tBURST_MAX = '5ns'
+
+ # Greater of 4 CK or 12 ns
+ tWTR = '12ns'
+
+ # Greater of 2 CK or 10 ns
+ tRRD = '10ns'
+
+ # With 8B mode tFAW is 40 ns
+ tXAW = '40ns'
+ activation_limit = 4
+
+ # Reset BG arch timing for 8B mode
+ tCCD_L = "0ns"
+ tRRD_L = "0ns"
+ tWTR_L = "0ns"
diff --git a/src/mem/SConscript b/src/mem/SConscript
index 2fe179d..ceeed98 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -1,6 +1,6 @@
# -*- mode:python -*-
#
-# Copyright (c) 2018-2019 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
@@ -47,6 +47,7 @@
SimObject('AddrMapper.py')
SimObject('Bridge.py')
SimObject('DRAMCtrl.py')
+SimObject('DRAMInterface.py')
SimObject('ExternalMaster.py')
SimObject('ExternalSlave.py')
SimObject('MemObject.py')
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc
index b646581..4055505 100644
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -47,6 +47,7 @@
#include "debug/DRAMState.hh"
#include "debug/Drain.hh"
#include "debug/QOS.hh"
+#include "params/DRAMInterface.hh"
#include "sim/system.hh"
using namespace std;
@@ -58,12 +59,13 @@
retryRdReq(false), retryWrReq(false),
nextReqEvent([this]{ processNextReqEvent(); }, name()),
respondEvent([this]{ processRespondEvent(); }, name()),
- readBufferSize(p->read_buffer_size),
- writeBufferSize(p->write_buffer_size),
+ dram(p->dram),
+ readBufferSize(dram->readBufferSize),
+ writeBufferSize(dram->writeBufferSize),
writeHighThreshold(writeBufferSize * p->write_high_thresh_perc /
100.0),
writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
minWritesPerSwitch(p->min_writes_per_switch),
- writesThisTime(0), readsThisTime(0), tCS(p->tCS),
+ writesThisTime(0), readsThisTime(0),
memSchedPolicy(p->mem_sched_policy),
frontendLatency(p->static_frontend_latency),
backendLatency(p->static_backend_latency),
@@ -74,37 +76,23 @@
readQueue.resize(p->qos_priorities);
writeQueue.resize(p->qos_priorities);
+ dram->setCtrl(this);
+
// perform a basic check of the write thresholds
if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
fatal("Write buffer low threshold %d must be smaller than the "
"high threshold %d\n", p->write_low_thresh_perc,
p->write_high_thresh_perc);
-
- // determine the rows per bank by looking at the total capacity
- uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
-
- DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
- AbstractMemory::size());
-
- // create a DRAM interface
- // will only populate the ranks if DRAM is configured
- dram = new DRAMInterface(*this, p, capacity, range);
- DPRINTF(DRAM, "Created DRAM interface \n");
}
void
DRAMCtrl::init()
{
- MemCtrl::init();
-
if (!port.isConnected()) {
fatal("DRAMCtrl %s is unconnected!\n", name());
} else {
port.sendRangeChange();
}
-
- dram->init(range);
-
}
void
@@ -114,8 +102,6 @@
isTimingMode = system()->isTimingMode();
if (isTimingMode) {
- dram->startupRanks();
-
// shift the bus busy time sufficiently far ahead that we never
// have to worry about negative values when computing the time for
// the next request, this will add an insignificant bubble at the
@@ -133,7 +119,7 @@
"is responding");
// do the actual memory access and turn the packet into a response
- access(pkt);
+ dram->access(pkt);
Tick latency = 0;
if (pkt->hasData()) {
@@ -263,7 +249,7 @@
// address of first DRAM packet is kept unaliged. Subsequent DRAM
packets
// are aligned to burst size boundaries. This is to ensure we
accurately
// check read packets against packets in write queue.
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
+ const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
unsigned pktsServicedByWrQ = 0;
BurstHelper* burst_helper = NULL;
@@ -363,7 +349,7 @@
// if the request size is larger than burst size, the pkt is split into
// multiple DRAM packets
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
+ const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
uint32_t burstSize = dram->bytesPerBurst();
for (int cnt = 0; cnt < pktCount; ++cnt) {
@@ -526,7 +512,7 @@
DRAMPacket* dram_pkt = respQueue.front();
// media specific checks and functions when read response is complete
- dram->respondEventDRAM(dram_pkt->rank);
+ dram->respondEvent(dram_pkt->rank);
if (dram_pkt->burstHelper) {
// it is a split packet
@@ -727,12 +713,12 @@
void
DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
{
- DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
+ DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr());
bool needsResponse = pkt->needsResponse();
// do the actual memory access which also turns the packet into a
// response
- access(pkt);
+ dram->access(pkt);
// turn packet around to go back to requester if response expected
if (needsResponse) {
@@ -877,9 +863,9 @@
// if not, shift to next burst window
Tick act_at;
if (twoCycleActivate)
- act_at = ctrl.verifyMultiCmd(act_tick, tAAD);
+ act_at = ctrl->verifyMultiCmd(act_tick, tAAD);
else
- act_at = ctrl.verifySingleCmd(act_tick);
+ act_at = ctrl->verifySingleCmd(act_tick);
DPRINTF(DRAM, "Activate at tick %d\n", act_at);
@@ -997,7 +983,7 @@
// Issuing an explicit PRE command
// Verify that we have command bandwidth to issue the precharge
// if not, shift to next burst window
- pre_at = ctrl.verifySingleCmd(pre_tick);
+ pre_at = ctrl->verifySingleCmd(pre_tick);
// enforce tPPD
for (int i = 0; i < banksPerRank; i++) {
rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
@@ -1096,9 +1082,9 @@
// verify that we have command bandwidth to issue the burst
// if not, shift to next burst window
if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) >
clkResyncDelay))
- cmd_at = ctrl.verifyMultiCmd(cmd_at, tCK);
+ cmd_at = ctrl->verifyMultiCmd(cmd_at, tCK);
else
- cmd_at = ctrl.verifySingleCmd(cmd_at);
+ cmd_at = ctrl->verifySingleCmd(cmd_at);
// if we are interleaving bursts, ensure that
// 1) we don't double interleave on next burst issue
@@ -1196,7 +1182,7 @@
bool got_more_hits = false;
bool got_bank_conflict = false;
- for (uint8_t i = 0; i < ctrl.numPriorities(); ++i) {
+ for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
auto p = queue[i].begin();
// keep on looking until we find a hit or reach the end of the
// queue
@@ -1267,6 +1253,7 @@
// Update latency stats
stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
stats.totQLat += cmd_at - dram_pkt->entryTime;
+ stats.totBusLat += tBURST;
} else {
// Schedule write done event to decrement event count
// after the readyTime has been reached
@@ -1350,13 +1337,9 @@
// Update latency stats
stats.masterReadTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
-
- stats.bytesRead += dram->bytesPerBurst();
- stats.totBusLat += dram->burstDelay();
stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size;
} else {
++writesThisTime;
- stats.bytesWritten += dram->bytesPerBurst();
stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size;
stats.masterWriteTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
@@ -1458,8 +1441,9 @@
// Figure out which read request goes next
// If we are changing command type, incorporate the minimum
- // bus turnaround delay which will be tCS (different rank)
case
- to_read = chooseNext((*queue), switched_cmd_type ? tCS :
0);
+ // bus turnaround delay which will be rank to rank delay
+ to_read = chooseNext((*queue), switched_cmd_type ?
+ dram->rankDelay() : 0);
if (to_read != queue->end()) {
// candidate read found
@@ -1538,7 +1522,8 @@
// If we are changing command type, incorporate the minimum
// bus turnaround delay
to_write = chooseNext((*queue),
- switched_cmd_type ? std::min(dram->minRdToWr(),
tCS) : 0);
+ switched_cmd_type ? std::min(dram->minRdToWr(),
+ dram->rankDelay()) : 0);
if (to_write != queue->end()) {
write_found = true;
@@ -1611,11 +1596,8 @@
}
}
-DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl,
- const DRAMCtrlParams* _p,
- const uint64_t capacity,
- const AddrRange range)
- : SimObject(_p), ctrl(_ctrl),
+DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p)
+ : AbstractMemory(_p),
addrMapping(_p->addr_mapping),
burstSize((_p->devices_per_rank * _p->burst_length *
_p->device_bus_width) / 8),
@@ -1630,7 +1612,7 @@
bankGroupsPerRank(_p->bank_groups_per_rank),
bankGroupArch(_p->bank_groups_per_rank > 0),
banksPerRank(_p->banks_per_rank), rowsPerBank(0),
- tCK(_p->tCK), tCL(_p->tCL), tBURST(_p->tBURST),
+ tCK(_p->tCK), tCS(_p->tCS), tCL(_p->tCL), tBURST(_p->tBURST),
tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX),
tRTW(_p->tRTW),
tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD),
tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP),
@@ -1646,13 +1628,15 @@
wrToRdDly(tCL + tBURST + _p->tWTR), rdToWrDly(tBURST + tRTW),
wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L),
rdToWrDlySameBG(tRTW + _p->tBURST_MAX),
- rankToRankDly(ctrl.rankDelay() + tBURST),
+ rankToRankDly(tCS + tBURST),
pageMgmt(_p->page_policy),
maxAccessesPerRow(_p->max_accesses_per_row),
timeStampOffset(0), activeRank(0),
enableDRAMPowerdown(_p->enable_dram_powerdown),
lastStatsResetTick(0),
- stats(_ctrl, *this)
+ stats(*this),
+ readBufferSize(_p->read_buffer_size),
+ writeBufferSize(_p->write_buffer_size)
{
fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
"must be a power of two\n", burstSize);
@@ -1664,7 +1648,7 @@
for (int i = 0; i < ranksPerChannel; i++) {
DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
- Rank* rank = new Rank(ctrl, _p, i, *this);
+ Rank* rank = new Rank(_p, i, *this);
ranks.push_back(rank);
}
@@ -1672,6 +1656,11 @@
uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
ranksPerChannel;
+ uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
+
+ DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
+ AbstractMemory::size());
+
// if actual DRAM size does not match memory capacity in system warn!
if (deviceCapacity != capacity / (1024 * 1024))
warn("DRAM device capacity (%d Mbytes) does not match the "
@@ -1726,8 +1715,10 @@
}
void
-DRAMInterface::init(AddrRange range)
+DRAMInterface::init()
{
+ AbstractMemory::init();
+
// a bit of sanity checks on the interleaving, save it for here to
// ensure that the system pointer is initialised
if (range.interleaved()) {
@@ -1749,7 +1740,7 @@
// channel striping has to be done at a granularity that
// is equal or larger to a cache line
- if (ctrl.system()->cacheLineSize() > range.granularity()) {
+ if (system()->cacheLineSize() > range.granularity()) {
fatal("Channel interleaving of %s must be at least as
large "
"as the cache line size\n", name());
}
@@ -1766,10 +1757,12 @@
}
void
-DRAMInterface::startupRanks()
+DRAMInterface::startup()
{
- // timestamp offset should be in clock cycles for DRAMPower
- timeStampOffset = divCeil(curTick(), tCK);
+ if (system()->isTimingMode()) {
+ // timestamp offset should be in clock cycles for DRAMPower
+ timeStampOffset = divCeil(curTick(), tCK);
+ }
for (auto r : ranks) {
r->startup(curTick() + tREFI - tRP);
@@ -1815,7 +1808,7 @@
}
void
-DRAMInterface::respondEventDRAM(uint8_t rank)
+DRAMInterface::respondEvent(uint8_t rank)
{
Rank& rank_ref = *ranks[rank];
@@ -1956,7 +1949,7 @@
std::max(ranks[i]->banks[j].preAllowedAt, curTick()) +
tRP;
// When is the earliest the R/W burst can issue?
- const Tick col_allowed_at = ctrl.inReadBusState(false) ?
+ const Tick col_allowed_at = ctrl->inReadBusState(false) ?
ranks[i]->banks[j].rdAllowedAt :
ranks[i]->banks[j].wrAllowedAt;
Tick col_at = std::max(col_allowed_at, act_at + tRCD);
@@ -1996,9 +1989,15 @@
return make_pair(bank_mask, hidden_bank_prep);
}
-DRAMInterface::Rank::Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int
_rank,
- DRAMInterface& _dram)
- : EventManager(&_ctrl), ctrl(_ctrl), dram(_dram),
+DRAMInterface*
+DRAMInterfaceParams::create()
+{
+ return new DRAMInterface(this);
+}
+
+DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p,
+ int _rank, DRAMInterface& _dram)
+ : EventManager(&_dram), dram(_dram),
pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE),
pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE),
refreshState(REF_IDLE), inLowPowerState(false), rank(_rank),
@@ -2011,7 +2010,7 @@
refreshEvent([this]{ processRefreshEvent(); }, name()),
powerEvent([this]{ processPowerEvent(); }, name()),
wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
- stats(_ctrl, *this)
+ stats(_dram, *this)
{
for (int b = 0; b < _p->banks_per_rank; b++) {
banks[b].bank = b;
@@ -2062,8 +2061,10 @@
DRAMInterface::Rank::isQueueEmpty() const
{
// check commmands in Q based on current bus direction
- bool no_queued_cmds = (ctrl.inReadBusState(true) && (readEntries == 0))
- || (ctrl.inWriteBusState(true) && (writeEntries ==
0));
+ bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
+ (readEntries == 0))
+ || (dram.ctrl->inWriteBusState(true) &&
+ (writeEntries == 0));
return no_queued_cmds;
}
@@ -2187,7 +2188,7 @@
// if a request is at the moment being handled and this request is
// accessing the current rank then wait for it to finish
if ((rank == dram.activeRank)
- && (ctrl.requestEventScheduled())) {
+ && (dram.ctrl->requestEventScheduled())) {
// hand control over to the request loop until it is
// evaluated next
DPRINTF(DRAM, "Refresh awaiting draining\n");
@@ -2262,7 +2263,7 @@
// or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
// should have outstanding precharge or read response event
assert(prechargeEvent.scheduled() ||
- ctrl.respondEventScheduled());
+ dram.ctrl->respondEventScheduled());
// will start refresh when pwrState transitions to IDLE
}
@@ -2322,8 +2323,8 @@
assert(!powerEvent.scheduled());
- if ((ctrl.drainState() == DrainState::Draining) ||
- (ctrl.drainState() == DrainState::Drained)) {
+ if ((dram.ctrl->drainState() == DrainState::Draining) ||
+ (dram.ctrl->drainState() == DrainState::Drained)) {
// if draining, do not re-enter low-power mode.
// simply go to IDLE and wait
schedulePowerEvent(PWR_IDLE, curTick());
@@ -2548,10 +2549,10 @@
}
// completed refresh event, ensure next request is scheduled
- if (!ctrl.requestEventScheduled()) {
+ if (!dram.ctrl->requestEventScheduled()) {
DPRINTF(DRAM, "Scheduling next request after refreshing"
" rank %d\n", rank);
- ctrl.restartScheduler(curTick());
+ dram.ctrl->restartScheduler(curTick());
}
}
@@ -2610,8 +2611,8 @@
// bypass auto-refresh and go straight to SREF, where memory
// will issue refresh immediately upon entry
if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
- (ctrl.drainState() != DrainState::Draining) &&
- (ctrl.drainState() != DrainState::Drained) &&
+ (dram.ctrl->drainState() != DrainState::Draining) &&
+ (dram.ctrl->drainState() != DrainState::Drained) &&
dram.enableDRAMPowerdown) {
DPRINTF(DRAMState, "Rank %d bypassing refresh and
transitioning "
"to self refresh at %11u tick\n", rank, curTick());
@@ -2712,7 +2713,7 @@
bool
DRAMInterface::Rank::forceSelfRefreshExit() const {
return (readEntries != 0) ||
- (ctrl.inWriteBusState(true) && (writeEntries != 0));
+ (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
}
DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl)
@@ -2723,15 +2724,15 @@
ADD_STAT(writeReqs, "Number of write requests accepted"),
ADD_STAT(readBursts,
- "Number of DRAM read bursts, "
+ "Number of controller read bursts, "
"including those serviced by the write queue"),
ADD_STAT(writeBursts,
- "Number of DRAM write bursts, "
+ "Number of controller write bursts, "
"including those merged in the write queue"),
ADD_STAT(servicedByWrQ,
- "Number of DRAM read bursts serviced by the write queue"),
+ "Number of controller read bursts serviced by the write
queue"),
ADD_STAT(mergedWrBursts,
- "Number of DRAM write bursts merged with an existing one"),
+ "Number of controller write bursts merged with an existing
one"),
ADD_STAT(neitherReadNorWriteReqs,
"Number of requests that are neither read nor write"),
@@ -2739,9 +2740,6 @@
ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
- ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
- ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
-
ADD_STAT(numRdRetry, "Number of times read queue was full causing
retry"),
ADD_STAT(numWrRetry, "Number of times write queue was full causing
retry"),
@@ -2756,22 +2754,13 @@
ADD_STAT(wrPerTurnAround,
"Writes before turning the bus around for reads"),
- ADD_STAT(bytesRead, "Total number of bytes read from memory"),
ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
- ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(bytesReadSys, "Total read bytes from the system interface
side"),
ADD_STAT(bytesWrittenSys,
"Total written bytes from the system interface side"),
- ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiByte/s"),
- ADD_STAT(avgWrBW, "Average achieved write bandwidth in MiByte/s"),
ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
- ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
- ADD_STAT(busUtil, "Data bus utilization in percentage"),
- ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
- ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
ADD_STAT(totGap, "Total gap between requests"),
ADD_STAT(avgGap, "Average gap between requests"),
@@ -2803,12 +2792,11 @@
{
using namespace Stats;
- assert(ctrl._system);
- const auto max_masters = ctrl._system->maxMasters();
+ assert(ctrl.system());
+ const auto max_masters = ctrl.system()->maxMasters();
avgRdQLen.precision(2);
avgWrQLen.precision(2);
- avgBusLat.precision(2);
readPktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
writePktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
@@ -2823,14 +2811,9 @@
.init(ctrl.writeBufferSize)
.flags(nozero);
- avgRdBW.precision(2);
- avgWrBW.precision(2);
avgRdBWSys.precision(2);
avgWrBWSys.precision(2);
- peakBW.precision(2);
- busUtil.precision(2);
avgGap.precision(2);
- busUtilWrite.precision(2);
// per-master bytes read and written to memory
masterReadBytes
@@ -2862,9 +2845,6 @@
.flags(nonan)
.precision(2);
- busUtilRead
- .precision(2);
-
masterWriteRate
.flags(nozero | nonan)
.precision(12);
@@ -2878,7 +2858,7 @@
.precision(2);
for (int i = 0; i < max_masters; i++) {
- const std::string master = ctrl._system->getMasterName(i);
+ const std::string master = ctrl.system()->getMasterName(i);
masterReadBytes.subname(i, master);
masterReadRate.subname(i, master);
masterWriteBytes.subname(i, master);
@@ -2892,22 +2872,11 @@
}
// Formula stats
- avgBusLat = totBusLat / (readBursts - servicedByWrQ);
-
- avgRdBW = (bytesRead / 1000000) / simSeconds;
- avgWrBW = (bytesWritten / 1000000) / simSeconds;
avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
- peakBW = (SimClock::Frequency / ctrl.dram->burstDataDelay()) *
- ctrl.dram->bytesPerBurst() / 1000000;
-
- busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
avgGap = totGap / (readReqs + writeReqs);
- busUtilRead = avgRdBW / peakBW * 100;
- busUtilWrite = avgWrBW / peakBW * 100;
-
masterReadRate = masterReadBytes / simSeconds;
masterWriteRate = masterWriteBytes / simSeconds;
masterReadAvgLat = masterReadTotalLat / masterReadAccesses;
@@ -2920,8 +2889,8 @@
dram.lastStatsResetTick = curTick();
}
-DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram)
- : Stats::Group(&_ctrl, csprintf("dram").c_str()),
+DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
+ : Stats::Group(&_dram),
dram(_dram),
ADD_STAT(readBursts, "Number of DRAM read bursts"),
@@ -2931,10 +2900,13 @@
ADD_STAT(perBankWrBursts, "Per bank write bursts"),
ADD_STAT(totQLat, "Total ticks spent queuing"),
+ ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
ADD_STAT(totMemAccLat,
"Total ticks spent from burst creation until serviced "
"by the DRAM"),
+
ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"),
+ ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"),
ADD_STAT(readRowHits, "Number of row buffer hits during reads"),
@@ -2947,6 +2919,12 @@
ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
+ ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
+
+ ADD_STAT(busUtil, "Data bus utilization in percentage"),
+ ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
+ ADD_STAT(busUtilWrite, "Data bus utilization in percentage for
writes"),
+
ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined")
{
@@ -2958,6 +2936,7 @@
using namespace Stats;
avgQLat.precision(2);
+ avgBusLat.precision(2);
avgMemAccLat.precision(2);
readRowHitRate.precision(2);
@@ -2971,10 +2950,16 @@
dram.maxAccessesPerRow : dram.rowBufferSize)
.flags(nozero);
+ peakBW.precision(2);
+ busUtil.precision(2);
+ busUtilWrite.precision(2);
+ busUtilRead.precision(2);
+
pageHitRate.precision(2);
// Formula stats
avgQLat = totQLat / readBursts;
+ avgBusLat = totBusLat / readBursts;
avgMemAccLat = totMemAccLat / readBursts;
readRowHitRate = (readRowHits / readBursts) * 100;
@@ -2982,13 +2967,19 @@
avgRdBW = (bytesRead / 1000000) / simSeconds;
avgWrBW = (bytesWritten / 1000000) / simSeconds;
+ peakBW = (SimClock::Frequency / dram.burstDataDelay()) *
+ dram.bytesPerBurst() / 1000000;
+
+ busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
+ busUtilRead = avgRdBW / peakBW * 100;
+ busUtilWrite = avgWrBW / peakBW * 100;
pageHitRate = (writeRowHits + readRowHits) /
(writeBursts + readBursts) * 100;
}
-DRAMInterface::RankStats::RankStats(DRAMCtrl &_ctrl, Rank &_rank)
- : Stats::Group(&_ctrl, csprintf("dram_rank%d", _rank.rank).c_str()),
+DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
+ : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
rank(_rank),
ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"),
@@ -3047,7 +3038,7 @@
DRAMCtrl::recvFunctional(PacketPtr pkt)
{
// rely on the abstract memory
- functionalAccess(pkt);
+ dram->functionalAccess(pkt);
}
Port &
@@ -3093,6 +3084,7 @@
// if we switched to timing mode, kick things into action,
// and behave as if we restored from a checkpoint
startup();
+ dram->startup();
} else if (isTimingMode && !system()->isTimingMode()) {
// if we switch from timing mode, stop the refresh events to
// not cause issues with KVM
@@ -3112,7 +3104,7 @@
DRAMCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(ctrl.getAddrRange());
+ ranges.push_back(ctrl.dram->getAddrRange());
return ranges;
}
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh
index dc030b1..417e935 100644
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -55,12 +55,15 @@
#include "enums/AddrMap.hh"
#include "enums/MemSched.hh"
#include "enums/PageManage.hh"
+#include "mem/abstract_mem.hh"
#include "mem/drampower.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/DRAMCtrl.hh"
#include "sim/eventq.hh"
+class DRAMInterfaceParams;
+
/**
* A basic class to track the bank state, i.e. what row is
* currently open (if any), when is the bank free to accept a new
@@ -242,7 +245,7 @@
* The DRAMInterface includes a class for individual ranks
* and per rank functions.
*/
-class DRAMInterface : public SimObject
+class DRAMInterface : public AbstractMemory
{
private:
/**
@@ -342,7 +345,7 @@
class Rank;
struct RankStats : public Stats::Group
{
- RankStats(DRAMCtrl &ctrl, Rank &rank);
+ RankStats(DRAMInterface &dram, Rank &rank);
void regStats() override;
void resetStats() override;
@@ -408,13 +411,6 @@
*/
class Rank : public EventManager
{
- protected:
-
- /**
- * A reference to the parent DRAMCtrl instance
- */
- DRAMCtrl& ctrl;
-
private:
/**
@@ -534,10 +530,10 @@
*/
Tick lastBurstTick;
- Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int _rank,
+ Rank(const DRAMInterfaceParams* _p, int _rank,
DRAMInterface& _dram);
- const std::string name() const { return csprintf("dram_%d", rank);
}
+ const std::string name() const { return csprintf("%d", rank); }
/**
* Kick off accounting for power and refresh states and
@@ -659,15 +655,16 @@
* @param next Memory Command
* @return true if timeStamp of Command 1 < timeStamp of Command 2
*/
- static bool sortTime(const Command& cmd, const Command& cmd_next)
+ static bool
+ sortTime(const Command& cmd, const Command& cmd_next)
{
return cmd.timeStamp < cmd_next.timeStamp;
- };
+ }
/**
- * A reference to the parent DRAMCtrl instance
+ * A pointer to the parent DRAMCtrl instance
*/
- DRAMCtrl& ctrl;
+ DRAMCtrl* ctrl;
/**
* Memory controller configuration initialized based on parameter
@@ -698,6 +695,7 @@
* DRAM timing requirements
*/
const Tick M5_CLASS_VAR_USED tCK;
+ const Tick tCS;
const Tick tCL;
const Tick tBURST;
const Tick tBURST_MIN;
@@ -781,7 +779,7 @@
struct DRAMStats : public Stats::Group
{
- DRAMStats(DRAMCtrl &ctrl, DRAMInterface &dram);
+ DRAMStats(DRAMInterface &dram);
void regStats() override;
void resetStats() override;
@@ -798,10 +796,12 @@
// Latencies summed over all requests
Stats::Scalar totQLat;
+ Stats::Scalar totBusLat;
Stats::Scalar totMemAccLat;
// Average latencies per request
Stats::Formula avgQLat;
+ Stats::Formula avgBusLat;
Stats::Formula avgMemAccLat;
// Row hit count and rate
@@ -817,6 +817,11 @@
// Average bandwidth
Stats::Formula avgRdBW;
Stats::Formula avgWrBW;
+ Stats::Formula peakBW;
+ // bus utilization
+ Stats::Formula busUtil;
+ Stats::Formula busUtilRead;
+ Stats::Formula busUtilWrite;
Stats::Formula pageHitRate;
};
@@ -828,16 +833,28 @@
std::vector<Rank*> ranks;
public:
+
+ /**
+ * Buffer sizes for read and write queues in the controller
+ * These are passed to the controller on instantiation
+ * Defining them here allows for buffers to be resized based
+ * on memory type / configuration.
+ */
+ const uint32_t readBufferSize;
+ const uint32_t writeBufferSize;
+
+ /** Setting a pointer to the controller */
+ void setCtrl(DRAMCtrl* _ctrl) { ctrl = _ctrl; }
+
/**
* Initialize the DRAM interface and verify parameters
- * @param range is the address range for this interface
*/
- void init(AddrRange range);
+ void init() override;
/**
* Iterate through dram ranks and instantiate per rank startup routine
*/
- void startupRanks();
+ void startup() override;
/**
* Iterate through dram ranks to exit self-refresh in order to drain
@@ -861,15 +878,26 @@
void suspend();
/**
+ * Get an address in a dense range which starts from 0. The input
+ * address is the physical address of the request in an address
+ * space that contains other SimObjects apart from this
+ * controller.
+ *
+ * @param addr The intput address which should be in the addrRange
+ * @return An address in the continues range [0, max)
+ */
+ Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); }
+
+ /**
* @return number of bytes in a burst for this interface
*/
- uint32_t bytesPerBurst() const { return burstSize; };
+ uint32_t bytesPerBurst() const { return burstSize; }
/**
*
* @return number of ranks per channel for this interface
*/
- uint32_t numRanks() const { return ranksPerChannel; };
+ uint32_t numRanks() const { return ranksPerChannel; }
/*
* @return time to send a burst of data
@@ -879,7 +907,8 @@
/*
* @return time to send a burst of data without gaps
*/
- Tick burstDataDelay() const
+ Tick
+ burstDataDelay() const
{
return (burstInterleave ? tBURST_MAX / 2 : tBURST);
}
@@ -893,7 +922,14 @@
*
* @return additional bus turnaround required for read-to-write
*/
- Tick minRdToWr() const { return tRTW; };
+ Tick minRdToWr() const { return tRTW; }
+
+ /**
+ * Determine the required delay for an access to a different rank
+ *
+ * @return required rank to rank delay
+ */
+ Tick rankDelay() const { return tCS; }
/*
* Function to calulate RAS cycle time for use within and
@@ -957,7 +993,8 @@
* This requires the DRAM to be in the
* REF IDLE state
*/
- bool burstReady(uint8_t rank) const
+ bool
+ burstReady(uint8_t rank) const
{
return ranks[rank]->inRefIdleState();
}
@@ -979,7 +1016,7 @@
*
* @param rank Specifies rank associated with read burst
*/
- void respondEventDRAM(uint8_t rank);
+ void respondEvent(uint8_t rank);
/**
* Check the refresh state to determine if refresh needs
@@ -989,8 +1026,7 @@
*/
void checkRefreshState(uint8_t rank);
- DRAMInterface(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p,
- uint64_t capacity, AddrRange range);
+ DRAMInterface(const DRAMInterfaceParams* _p);
};
/**
@@ -1141,20 +1177,6 @@
void accessAndRespond(PacketPtr pkt, Tick static_latency);
/**
- * Get an address in a dense range which starts from 0. The input
- * address is the physical address of the request in an address
- * space that contains other SimObjects apart from this
- * controller.
- *
- * @param addr The intput address which should be in the addrRange
- * @return An address in the continues range [0, max)
- */
- Addr getCtrlAddr(Addr addr)
- {
- return range.getOffset(addr);
- }
-
- /**
* The memory schduler/arbiter - picks which request needs to
* go next, based on the specified policy such as FCFS or FR-FCFS
* and moves it to the head of the queue.
@@ -1237,6 +1259,11 @@
std::unordered_multiset<Tick> burstTicks;
/**
+ * Create pointer to interface of the actual dram media
+ */
+ DRAMInterface* const dram;
+
+ /**
* The following are basic design parameters of the memory
* controller, and are initialized based on parameter values.
* The rowsPerBank is determined based on the capacity, number of
@@ -1251,12 +1278,6 @@
uint32_t readsThisTime;
/**
- * Basic memory timing parameters initialized based on parameter
- * values. These will be used across memory interfaces.
- */
- const Tick tCS;
-
- /**
* Memory controller configuration initialized based on parameter
* values.
*/
@@ -1310,10 +1331,6 @@
// Average queue lengths
Stats::Average avgRdQLen;
Stats::Average avgWrQLen;
- // Latencies summed over all requests
- Stats::Scalar totBusLat;
- // Average latencies per request
- Stats::Formula avgBusLat;
Stats::Scalar numRdRetry;
Stats::Scalar numWrRetry;
@@ -1324,21 +1341,12 @@
Stats::Histogram rdPerTurnAround;
Stats::Histogram wrPerTurnAround;
- Stats::Scalar bytesRead;
Stats::Scalar bytesReadWrQ;
- Stats::Scalar bytesWritten;
Stats::Scalar bytesReadSys;
Stats::Scalar bytesWrittenSys;
// Average bandwidth
- Stats::Formula avgRdBW;
- Stats::Formula avgWrBW;
Stats::Formula avgRdBWSys;
Stats::Formula avgWrBWSys;
- Stats::Formula peakBW;
- // bus utilization
- Stats::Formula busUtil;
- Stats::Formula busUtilRead;
- Stats::Formula busUtilWrite;
Stats::Scalar totGap;
Stats::Formula avgGap;
@@ -1367,11 +1375,6 @@
CtrlStats stats;
/**
- * Create pointer to interfasce to the actual media
- */
- DRAMInterface* dram;
-
- /**
* Upstream caches need this packet until true is returned, so
* hold it for deletion until a subsequent call
*/
@@ -1449,13 +1452,6 @@
void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); }
/**
- * Determine the required delay for an access to a different rank
- *
- * @return required rank to rank delay
- */
- Tick rankDelay() const { return tCS; }
-
- /**
* Check the current direction of the memory channel
*
* @param next_state Check either the current or next bus state
diff --git a/src/mem/drampower.cc b/src/mem/drampower.cc
index 13551a0..96dcb55 100644
--- a/src/mem/drampower.cc
+++ b/src/mem/drampower.cc
@@ -40,13 +40,13 @@
#include "base/intmath.hh"
#include "sim/core.hh"
-DRAMPower::DRAMPower(const DRAMCtrlParams* p, bool include_io) :
+DRAMPower::DRAMPower(const DRAMInterfaceParams* p, bool include_io) :
powerlib(libDRAMPower(getMemSpec(p), include_io))
{
}
Data::MemArchitectureSpec
-DRAMPower::getArchParams(const DRAMCtrlParams* p)
+DRAMPower::getArchParams(const DRAMInterfaceParams* p)
{
Data::MemArchitectureSpec archSpec;
archSpec.burstLength = p->burst_length;
@@ -68,7 +68,7 @@
}
Data::MemTimingSpec
-DRAMPower::getTimingParams(const DRAMCtrlParams* p)
+DRAMPower::getTimingParams(const DRAMInterfaceParams* p)
{
// Set the values that are used for power calculations and ignore
// the ones only used by the controller functionality in DRAMPower
@@ -100,7 +100,7 @@
}
Data::MemPowerSpec
-DRAMPower::getPowerParams(const DRAMCtrlParams* p)
+DRAMPower::getPowerParams(const DRAMInterfaceParams* p)
{
// All DRAMPower currents are in mA
Data::MemPowerSpec powerSpec;
@@ -132,7 +132,7 @@
}
Data::MemorySpecification
-DRAMPower::getMemSpec(const DRAMCtrlParams* p)
+DRAMPower::getMemSpec(const DRAMInterfaceParams* p)
{
Data::MemorySpecification memSpec;
memSpec.memArchSpec = getArchParams(p);
@@ -142,7 +142,18 @@
}
bool
-DRAMPower::hasTwoVDD(const DRAMCtrlParams* p)
+DRAMPower::hasTwoVDD(const DRAMInterfaceParams* p)
{
return p->VDD2 == 0 ? false : true;
}
+
+uint8_t
+DRAMPower::getDataRate(const DRAMInterfaceParams* p)
+{
+ uint32_t burst_cycles = divCeil(p->tBURST_MAX, p->tCK);
+ uint8_t data_rate = p->burst_length / burst_cycles;
+ // 4 for GDDR5
+ if (data_rate != 1 && data_rate != 2 && data_rate != 4 && data_rate !=
8)
+ fatal("Got unexpected data rate %d, should be 1 or 2 or 4 or 8\n");
+ return data_rate;
+}
diff --git a/src/mem/drampower.hh b/src/mem/drampower.hh
index da24bca..da68a78 100644
--- a/src/mem/drampower.hh
+++ b/src/mem/drampower.hh
@@ -44,7 +44,7 @@
#define __MEM_DRAM_POWER_HH__
#include "libdrampower/LibDRAMPower.h"
-#include "params/DRAMCtrl.hh"
+#include "params/DRAMInterface.hh"
/**
* DRAMPower is a standalone tool which calculates the power consumed by a
@@ -57,38 +57,44 @@
/**
* Transform the architechture parameters defined in
- * DRAMCtrlParams to the memSpec of DRAMPower
+ * DRAMInterfaceParams to the memSpec of DRAMPower
*/
- static Data::MemArchitectureSpec getArchParams(const DRAMCtrlParams*
p);
+ static Data::MemArchitectureSpec getArchParams(
+ const DRAMInterfaceParams* p);
/**
- * Transforms the timing parameters defined in DRAMCtrlParams to
+ * Transforms the timing parameters defined in DRAMInterfaceParams to
* the memSpec of DRAMPower
*/
- static Data::MemTimingSpec getTimingParams(const DRAMCtrlParams* p);
+ static Data::MemTimingSpec getTimingParams(const DRAMInterfaceParams*
p);
/**
* Transforms the power and current parameters defined in
- * DRAMCtrlParam to the memSpec of DRAMPower
+ * DRAMInterfaceParams to the memSpec of DRAMPower
*/
- static Data::MemPowerSpec getPowerParams(const DRAMCtrlParams* p);
+ static Data::MemPowerSpec getPowerParams(const DRAMInterfaceParams* p);
+
+ /**
+ * Determine data rate, either one or two.
+ */
+ static uint8_t getDataRate(const DRAMInterfaceParams* p);
/**
* Determine if DRAM has two voltage domains (or one)
*/
- static bool hasTwoVDD(const DRAMCtrlParams* p);
+ static bool hasTwoVDD(const DRAMInterfaceParams* p);
/**
- * Return an instance of MemSpec based on the DRAMCtrlParams
+ * Return an instance of MemSpec based on the DRAMInterfaceParams
*/
- static Data::MemorySpecification getMemSpec(const DRAMCtrlParams* p);
+ static Data::MemorySpecification getMemSpec(const DRAMInterfaceParams*
p);
public:
// Instance of DRAMPower Library
libDRAMPower powerlib;
- DRAMPower(const DRAMCtrlParams* p, bool include_io);
+ DRAMPower(const DRAMInterfaceParams* p, bool include_io);
};
diff --git a/src/mem/qos/QoSMemCtrl.py b/src/mem/qos/QoSMemCtrl.py
index 1cd3f0b..f55105b 100644
--- a/src/mem/qos/QoSMemCtrl.py
+++ b/src/mem/qos/QoSMemCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -34,18 +34,21 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
-from m5.objects.AbstractMemory import AbstractMemory
+from m5.proxy import *
+from m5.objects.ClockedObject import ClockedObject
from m5.objects.QoSTurnaround import *
# QoS Queue Selection policy used to select packets among same-QoS queues
class QoSQPolicy(Enum): vals = ["fifo", "lifo", "lrg"]
-class QoSMemCtrl(AbstractMemory):
+class QoSMemCtrl(ClockedObject):
type = 'QoSMemCtrl'
cxx_header = "mem/qos/mem_ctrl.hh"
cxx_class = 'QoS::MemCtrl'
abstract = True
+ system = Param.System(Parent.any, "System that the controller belongs
to.")
+
##### QoS support parameters ####
# Number of priorities in the system
diff --git a/src/mem/qos/QoSMemSinkCtrl.py b/src/mem/qos/QoSMemSinkCtrl.py
index 6c4f263..fafac64 100644
--- a/src/mem/qos/QoSMemSinkCtrl.py
+++ b/src/mem/qos/QoSMemSinkCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
from m5.params import *
from m5.objects.QoSMemCtrl import *
+from m5.objects.QoSMemSinkInterface import *
class QoSMemSinkCtrl(QoSMemCtrl):
type = 'QoSMemSinkCtrl'
@@ -44,6 +45,10 @@
cxx_class = "QoS::MemSinkCtrl"
port = ResponsePort("Response ports")
+
+ interface = Param.QoSMemSinkInterface(QoSMemSinkInterface(),
+ "Interface to memory")
+
# the basic configuration of the controller architecture, note
# that each entry corresponds to a burst for the specific DRAM
# configuration (e.g. x32 with burst length 8 is 32 bytes) and not
@@ -59,5 +64,3 @@
# response latency - time to issue a response once a request is
serviced
response_latency = Param.Latency("20ns", "Memory response latency")
-
-
diff --git a/src/mem/qos/QoSMemSinkInterface.py
b/src/mem/qos/QoSMemSinkInterface.py
new file mode 100644
index 0000000..5c79f64
--- /dev/null
+++ b/src/mem/qos/QoSMemSinkInterface.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects.AbstractMemory import AbstractMemory
+
+class QoSMemSinkInterface(AbstractMemory):
+ type = 'QoSMemSinkInterface'
+ cxx_header = "mem/qos/mem_sink.hh"
diff --git a/src/mem/qos/SConscript b/src/mem/qos/SConscript
index f8601b6..1d90f9c 100644
--- a/src/mem/qos/SConscript
+++ b/src/mem/qos/SConscript
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
SimObject('QoSMemCtrl.py')
SimObject('QoSMemSinkCtrl.py')
+SimObject('QoSMemSinkInterface.py')
SimObject('QoSPolicy.py')
SimObject('QoSTurnaround.py')
diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 50e6035..190960b 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited
+ * Copyright (c) 2017-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -42,7 +42,7 @@
namespace QoS {
MemCtrl::MemCtrl(const QoSMemCtrlParams * p)
- : AbstractMemory(p),
+ : ClockedObject(p),
policy(p->qos_policy),
turnPolicy(p->qos_turnaround_policy),
queuePolicy(QueuePolicy::create(p)),
@@ -51,7 +51,8 @@
qosSyncroScheduler(p->qos_syncro_scheduler),
totalReadQueueSize(0), totalWriteQueueSize(0),
busState(READ), busStateNext(READ),
- stats(*this)
+ stats(*this),
+ _system(p->system)
{
// Set the priority policy
if (policy) {
@@ -77,12 +78,6 @@
{}
void
-MemCtrl::init()
-{
- AbstractMemory::init();
-}
-
-void
MemCtrl::logRequest(BusState dir, MasterID m_id, uint8_t qos,
Addr addr, uint64_t entries)
{
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 0e29fcc..5d7c9d6 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited
+ * Copyright (c) 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -36,10 +36,10 @@
*/
#include "debug/QOS.hh"
-#include "mem/abstract_mem.hh"
-#include "mem/qos/q_policy.hh"
#include "mem/qos/policy.hh"
+#include "mem/qos/q_policy.hh"
#include "params/QoSMemCtrl.hh"
+#include "sim/clocked_object.hh"
#include "sim/system.hh"
#include <unordered_map>
@@ -56,7 +56,7 @@
* which support QoS - it provides access to a set of QoS
* scheduling policies
*/
-class MemCtrl: public AbstractMemory
+class MemCtrl : public ClockedObject
{
public:
/** Bus Direction */
@@ -151,6 +151,9 @@
Stats::Scalar numStayWriteState;
} stats;
+ /** Pointer to the System object */
+ System* _system;
+
/**
* Initializes dynamically counters and
* statistics for a given Master
@@ -266,11 +269,6 @@
virtual ~MemCtrl();
/**
- * Initializes this object
- */
- void init() override;
-
- /**
* Gets the current bus state
*
* @return current bus state
@@ -346,6 +344,10 @@
* @return total number of priority levels
*/
uint8_t numPriorities() const { return _numPriorities; }
+
+ /** read the system pointer
+ * @return pointer to the system object */
+ System* system() const { return _system; }
};
template<typename Queues>
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index 1f104e4..dbdf548 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited
+ * Copyright (c) 2018-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -40,6 +40,7 @@
#include "debug/Drain.hh"
#include "debug/QOS.hh"
#include "mem_sink.hh"
+#include "params/QoSMemSinkInterface.hh"
#include "sim/system.hh"
namespace QoS {
@@ -50,12 +51,15 @@
memoryPacketSize(p->memory_packet_size),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size), port(name() + ".port", *this),
+ interface(p->interface),
retryRdReq(false), retryWrReq(false), nextRequest(0),
nextReqEvent(this)
{
// Resize read and write queue to allocate space
// for configured QoS priorities
readQueue.resize(numPriorities());
writeQueue.resize(numPriorities());
+
+ interface->setMemCtrl(this);
}
MemSinkCtrl::~MemSinkCtrl()
@@ -92,7 +96,7 @@
"%s Should not see packets where cache is responding\n",
__func__);
- access(pkt);
+ interface->access(pkt);
return responseLatency;
}
@@ -101,7 +105,7 @@
{
pkt->pushLabel(name());
- functionalAccess(pkt);
+ interface->functionalAccess(pkt);
pkt->popLabel();
}
@@ -279,7 +283,7 @@
// Do the actual memory access which also turns the packet
// into a response
- access(pkt);
+ interface->access(pkt);
// Log the response
logResponse(pkt->isRead()? READ : WRITE,
@@ -351,7 +355,7 @@
MemSinkCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(memory.getAddrRange());
+ ranges.push_back(memory.interface->getAddrRange());
return ranges;
}
@@ -390,3 +394,13 @@
return new QoS::MemSinkCtrl(this);
}
+QoSMemSinkInterface::QoSMemSinkInterface(const QoSMemSinkInterfaceParams*
_p)
+ : AbstractMemory(_p)
+{
+}
+
+QoSMemSinkInterface*
+QoSMemSinkInterfaceParams::create()
+{
+ return new QoSMemSinkInterface(this);
+}
diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh
index 9a51269..5f6c1be 100644
--- a/src/mem/qos/mem_sink.hh
+++ b/src/mem/qos/mem_sink.hh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited
+ * Copyright (c) 2018-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -41,10 +41,14 @@
#ifndef __MEM_QOS_MEM_SINK_HH__
#define __MEM_QOS_MEM_SINK_HH__
+#include "mem/abstract_mem.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/QoSMemSinkCtrl.hh"
+class QoSMemSinkInterfaceParams;
+class QoSMemSinkInterface;
+
namespace QoS {
/**
@@ -163,6 +167,11 @@
/** Memory slave port */
MemoryPort port;
+ /**
+ * Create pointer to interface of actual media
+ */
+ QoSMemSinkInterface* const interface;
+
/** Read request pending */
bool retryRdReq;
@@ -244,4 +253,17 @@
} // namespace QoS
+class QoSMemSinkInterface : public AbstractMemory
+{
+ public:
+ /** Setting a pointer to the interface */
+ void setMemCtrl(QoS::MemSinkCtrl* _ctrl) { ctrl = _ctrl; };
+
+ /** Pointer to the controller */
+ QoS::MemSinkCtrl* ctrl;
+
+ QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p);
+};
+
+
#endif /* __MEM_QOS_MEM_SINK_HH__ */
diff --git a/tests/gem5/configs/base_config.py
b/tests/gem5/configs/base_config.py
index b5bddf4..cbea768 100644
--- a/tests/gem5/configs/base_config.py
+++ b/tests/gem5/configs/base_config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2013, 2017-2018 ARM Limited
+# Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -220,7 +220,12 @@
super(BaseSESystem, self).init_system(system)
def create_system(self):
- system = System(physmem = self.mem_class(),
+ if issubclass(self.mem_class, m5.objects.DRAMInterface):
+ mem_ctrl = DRAMCtrl()
+ mem_ctrl.dram = self.mem_class()
+ else:
+ mem_ctrl = self.mem_class()
+ system = System(physmem = mem_ctrl,
membus = SystemXBar(),
mem_mode = self.mem_mode,
multi_thread = (self.num_threads > 1))
@@ -272,8 +277,16 @@
else:
# create the memory controllers and connect them, stick with
# the physmem name to avoid bumping all the reference stats
- system.physmem = [self.mem_class(range = r)
- for r in system.mem_ranges]
+ if issubclass(self.mem_class, m5.objects.DRAMInterface):
+ mem_ctrls = []
+ for r in system.mem_ranges:
+ mem_ctrl = DRAMCtrl()
+ mem_ctrl.dram = self.mem_class(range = r)
+ mem_ctrls.append(mem_ctrl)
+ system.physmem = mem_ctrls
+ else:
+ system.physmem = [self.mem_class(range = r)
+ for r in system.mem_ranges]
for i in range(len(system.physmem)):
system.physmem[i].port = system.membus.master
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/28968
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8
Gerrit-Change-Number: 28968
Gerrit-PatchSet: 13
Gerrit-Owner: Wendy Elsasser <wendy.elsasser(a)arm.com>
Gerrit-Reviewer: Daniel Carvalho <odanrc(a)yahoo.com.br>
Gerrit-Reviewer: Jason Lowe-Power <power.jg(a)gmail.com>
Gerrit-Reviewer: John Alsop <johnathan.alsop(a)amd.com>
Gerrit-Reviewer: Matthew Poremba <matthew.poremba(a)amd.com>
Gerrit-Reviewer: Nikos Nikoleris <nikos.nikoleris(a)arm.com>
Gerrit-Reviewer: Srikant Bharadwaj <srikant.bharadwaj(a)amd.com>
Gerrit-Reviewer: kokoro <noreply+kokoro(a)google.com>
Gerrit-MessageType: merged