gem5-dev@gem5.org

The gem5 Developer List

View all threads

[S] Change in gem5/gem5[develop]: dev-amdgpu: Add a few MQD attributes to GPUFS checkpoint

VR
VISHNU RAMADAS (Gerrit)
Thu, Apr 27, 2023 9:15 PM

VISHNU RAMADAS has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/70077?usp=email )

Change subject: dev-amdgpu: Add a few MQD attributes to GPUFS checkpoint
......................................................................

dev-amdgpu: Add a few MQD attributes to GPUFS checkpoint

During GPUFS checkpoint restore, doorbells callbacks are created based
on certain MQD attributes. These callbacks are required to create new
SDMA doorbells. If these attributes are not present in the checkpoint,
the restore hangs indefinitely waiting for ioctl calls that access these
doorbells to finish execution. This commit adds the attributes required
for checkpoint restore to proceed.

Change-Id: Id3d1b7a2627d4c50133d923096495957a233f675
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70077
Reviewed-by: Matt Sinclair mattdsinclair@gmail.com
Reviewed-by: Matthew Poremba matthew.poremba@amd.com
Maintainer: Matt Sinclair mattdsinclair@gmail.com
Tested-by: kokoro noreply+kokoro@google.com
Maintainer: Matthew Poremba matthew.poremba@amd.com

M src/dev/amdgpu/pm4_packet_processor.cc
1 file changed, 30 insertions(+), 0 deletions(-)

Approvals:
Matthew Poremba: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
Matt Sinclair: Looks good to me, but someone else must approve; Looks
good to me, approved

diff --git a/src/dev/amdgpu/pm4_packet_processor.cc
b/src/dev/amdgpu/pm4_packet_processor.cc
index 071fe8b..3690113 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -1025,6 +1025,11 @@
uint32_t pipe[num_queues];
uint32_t queue[num_queues];
bool privileged[num_queues];

  • uint32_t hqd_active[num_queues];

  • uint32_t hqd_vmid[num_queues];

  • Addr aql_rptr[num_queues];

  • uint32_t doorbell[num_queues];

  • uint32_t hqd_pq_control[num_queues];

    int i = 0;
    for (auto iter : queues) {
    @@ -1048,6 +1053,11 @@
    pipe[i] = q->pipe();
    queue[i] = q->queue();
    privileged[i] = q->privileged();

  •    hqd_active[i] = q->getMQD()->hqd_active;
    
  •    hqd_vmid[i] = q->getMQD()->hqd_vmid;
    
  •    aql_rptr[i] = q->getMQD()->aqlRptr;
    
  •    doorbell[i] = q->getMQD()->doorbell;
    
  •    hqd_pq_control[i] = q->getMQD()->hqd_pq_control;
        i++;
    }
    

@@ -1067,6 +1077,11 @@
SERIALIZE_ARRAY(pipe, num_queues);
SERIALIZE_ARRAY(queue, num_queues);
SERIALIZE_ARRAY(privileged, num_queues);

  • SERIALIZE_ARRAY(hqd_active, num_queues);
  • SERIALIZE_ARRAY(hqd_vmid, num_queues);
  • SERIALIZE_ARRAY(aql_rptr, num_queues);
  • SERIALIZE_ARRAY(doorbell, num_queues);
  • SERIALIZE_ARRAY(hqd_pq_control, num_queues);
    }

void
@@ -1093,6 +1108,11 @@
uint32_t pipe[num_queues];
uint32_t queue[num_queues];
bool privileged[num_queues];

  • uint32_t hqd_active[num_queues];

  • uint32_t hqd_vmid[num_queues];

  • Addr aql_rptr[num_queues];

  • uint32_t doorbell[num_queues];

  • uint32_t hqd_pq_control[num_queues];

    UNSERIALIZE_ARRAY(id, num_queues);
    UNSERIALIZE_ARRAY(mqd_base, num_queues);
    @@ -1109,6 +1129,11 @@
    UNSERIALIZE_ARRAY(pipe, num_queues);
    UNSERIALIZE_ARRAY(queue, num_queues);
    UNSERIALIZE_ARRAY(privileged, num_queues);

  • UNSERIALIZE_ARRAY(hqd_active, num_queues);

  • UNSERIALIZE_ARRAY(hqd_vmid, num_queues);

  • UNSERIALIZE_ARRAY(aql_rptr, num_queues);

  • UNSERIALIZE_ARRAY(doorbell, num_queues);

  • UNSERIALIZE_ARRAY(hqd_pq_control, num_queues);

    for (int i = 0; i < num_queues; i++) {
    QueueDesc *mqd = new QueueDesc();
    @@ -1132,6 +1157,11 @@
    queues[id[i]]->processing(processing[i]);
    queues[id[i]]->ib(ib[i]);
    queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);

  •    queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
    
  •    queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
    
  •    queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i];
    
  •    queues[id[i]]->getMQD()->doorbell = doorbell[i];
    
  •    queues[id[i]]->getMQD()->hqd_pq_control = hqd_pq_control[i];
    
        DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
                queues[id[i]]->id(), queues[id[i]]->rptr(),
    

--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70077?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-MessageType: merged
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Id3d1b7a2627d4c50133d923096495957a233f675
Gerrit-Change-Number: 70077
Gerrit-PatchSet: 2
Gerrit-Owner: VISHNU RAMADAS vramadas@wisc.edu
Gerrit-Reviewer: Matt Sinclair mattdsinclair.wisc@gmail.com
Gerrit-Reviewer: Matt Sinclair mattdsinclair@gmail.com
Gerrit-Reviewer: Matthew Poremba matthew.poremba@amd.com
Gerrit-Reviewer: VISHNU RAMADAS vramadas@wisc.edu
Gerrit-Reviewer: kokoro noreply+kokoro@google.com

VISHNU RAMADAS has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/70077?usp=email ) Change subject: dev-amdgpu: Add a few MQD attributes to GPUFS checkpoint ...................................................................... dev-amdgpu: Add a few MQD attributes to GPUFS checkpoint During GPUFS checkpoint restore, doorbells callbacks are created based on certain MQD attributes. These callbacks are required to create new SDMA doorbells. If these attributes are not present in the checkpoint, the restore hangs indefinitely waiting for ioctl calls that access these doorbells to finish execution. This commit adds the attributes required for checkpoint restore to proceed. Change-Id: Id3d1b7a2627d4c50133d923096495957a233f675 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70077 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Matthew Poremba <matthew.poremba@amd.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com> Maintainer: Matthew Poremba <matthew.poremba@amd.com> --- M src/dev/amdgpu/pm4_packet_processor.cc 1 file changed, 30 insertions(+), 0 deletions(-) Approvals: Matthew Poremba: Looks good to me, approved; Looks good to me, approved kokoro: Regressions pass Matt Sinclair: Looks good to me, but someone else must approve; Looks good to me, approved diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index 071fe8b..3690113 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -1025,6 +1025,11 @@ uint32_t pipe[num_queues]; uint32_t queue[num_queues]; bool privileged[num_queues]; + uint32_t hqd_active[num_queues]; + uint32_t hqd_vmid[num_queues]; + Addr aql_rptr[num_queues]; + uint32_t doorbell[num_queues]; + uint32_t hqd_pq_control[num_queues]; int i = 0; for (auto iter : queues) { @@ -1048,6 +1053,11 @@ pipe[i] = q->pipe(); queue[i] = q->queue(); privileged[i] = q->privileged(); + hqd_active[i] = q->getMQD()->hqd_active; + hqd_vmid[i] = q->getMQD()->hqd_vmid; + aql_rptr[i] = q->getMQD()->aqlRptr; + doorbell[i] = q->getMQD()->doorbell; + hqd_pq_control[i] = q->getMQD()->hqd_pq_control; i++; } @@ -1067,6 +1077,11 @@ SERIALIZE_ARRAY(pipe, num_queues); SERIALIZE_ARRAY(queue, num_queues); SERIALIZE_ARRAY(privileged, num_queues); + SERIALIZE_ARRAY(hqd_active, num_queues); + SERIALIZE_ARRAY(hqd_vmid, num_queues); + SERIALIZE_ARRAY(aql_rptr, num_queues); + SERIALIZE_ARRAY(doorbell, num_queues); + SERIALIZE_ARRAY(hqd_pq_control, num_queues); } void @@ -1093,6 +1108,11 @@ uint32_t pipe[num_queues]; uint32_t queue[num_queues]; bool privileged[num_queues]; + uint32_t hqd_active[num_queues]; + uint32_t hqd_vmid[num_queues]; + Addr aql_rptr[num_queues]; + uint32_t doorbell[num_queues]; + uint32_t hqd_pq_control[num_queues]; UNSERIALIZE_ARRAY(id, num_queues); UNSERIALIZE_ARRAY(mqd_base, num_queues); @@ -1109,6 +1129,11 @@ UNSERIALIZE_ARRAY(pipe, num_queues); UNSERIALIZE_ARRAY(queue, num_queues); UNSERIALIZE_ARRAY(privileged, num_queues); + UNSERIALIZE_ARRAY(hqd_active, num_queues); + UNSERIALIZE_ARRAY(hqd_vmid, num_queues); + UNSERIALIZE_ARRAY(aql_rptr, num_queues); + UNSERIALIZE_ARRAY(doorbell, num_queues); + UNSERIALIZE_ARRAY(hqd_pq_control, num_queues); for (int i = 0; i < num_queues; i++) { QueueDesc *mqd = new QueueDesc(); @@ -1132,6 +1157,11 @@ queues[id[i]]->processing(processing[i]); queues[id[i]]->ib(ib[i]); queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]); + queues[id[i]]->getMQD()->hqd_active = hqd_active[i]; + queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i]; + queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i]; + queues[id[i]]->getMQD()->doorbell = doorbell[i]; + queues[id[i]]->getMQD()->hqd_pq_control = hqd_pq_control[i]; DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n", queues[id[i]]->id(), queues[id[i]]->rptr(), -- To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/70077?usp=email To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings Gerrit-MessageType: merged Gerrit-Project: public/gem5 Gerrit-Branch: develop Gerrit-Change-Id: Id3d1b7a2627d4c50133d923096495957a233f675 Gerrit-Change-Number: 70077 Gerrit-PatchSet: 2 Gerrit-Owner: VISHNU RAMADAS <vramadas@wisc.edu> Gerrit-Reviewer: Matt Sinclair <mattdsinclair.wisc@gmail.com> Gerrit-Reviewer: Matt Sinclair <mattdsinclair@gmail.com> Gerrit-Reviewer: Matthew Poremba <matthew.poremba@amd.com> Gerrit-Reviewer: VISHNU RAMADAS <vramadas@wisc.edu> Gerrit-Reviewer: kokoro <noreply+kokoro@google.com>