GSP: Implements preliminary command synchronization via GPU interrupts.

Core: Added a comment to explain the logic for the RunLoop iterations.
2014-07-22 22:59:26 -04:00 · 2014-07-22 22:59:26 -04:00 · ec14ffe1cd
commit ec14ffe1cd
parent c48ab0bd9e
4 changed files with 140 additions and 29 deletions
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -26,21 +26,25 @@ ARM_Interface*  g_sys_core      = nullptr;  ///< ARM11 system (OS) core
 /// Run the core CPU loop
 void RunLoop() {
    for (;;){
-        g_app_core->Run(GPU::kFrameTicks);
+        // This function loops for 100 instructions in the CPU before trying to update hardware.
+        // This is a little bit faster than SingleStep, and should be pretty much equivalent. The 
+        // number of instructions chosen is fairly arbitrary, however a large number will more 
+        // drastically affect the frequency of GSP interrupts and likely break things. The point of
+        // this is to just loop in the CPU for more than 1 instruction to reduce overhead and make
+        // it a little bit faster...
+        g_app_core->Run(100);
        HW::Update();
+        if (HLE::g_reschedule) {
            Kernel::Reschedule();
        }
    }
+}

 /// Step the CPU one instruction
 void SingleStep() {
    g_app_core->Step();
-
-    // Update and reschedule after approx. 1 frame
-    u64 current_ticks = Core::g_app_core->GetTicks();
-    if ((current_ticks - g_last_ticks) >= GPU::kFrameTicks || HLE::g_reschedule) {
-        g_last_ticks = current_ticks;
    HW::Update();
+    if (HLE::g_reschedule) {
        Kernel::Reschedule();
    }
 }
--- a/src/core/hle/service/gsp.cpp
+++ b/src/core/hle/service/gsp.cpp
@ -21,6 +21,27 @@
 // Main graphics debugger object - TODO: Here is probably not the best place for this
 GraphicsDebugger g_debugger;

+/// GSP thread interrupt queue header
+struct GX_InterruptQueue {
+    union {
+        u32 hex;
+
+        // Index of last interrupt in the queue
+        BitField<0,8,u32>   index;
+
+        // Number of interrupts remaining to be processed by the userland code
+        BitField<8,8,u32>   number_interrupts;
+
+        // Error code - zero on success, otherwise an error has occurred
+        BitField<16,8,u32>  error_code;
+    };
+
+    u32 unk0;
+    u32 unk1;
+
+    GSP_GPU::GXInterruptId slot[0x34];   ///< Interrupt ID slots
+};
+
 /// GSP shared memory GX command buffer header
 union GX_CmdBufferHeader {
    u32 hex;
@ -45,20 +66,28 @@ namespace GSP_GPU {
 Handle g_event = 0;
 Handle g_shared_memory = 0;

-u32 g_thread_id = 0;
+u32 g_thread_id = 1;

 /// Gets a pointer to the start (header) of a command buffer in GSP shared memory
 static inline u8* GX_GetCmdBufferPointer(u32 thread_id, u32 offset=0) {
+    if (0 == g_shared_memory) return nullptr;
+
    return Kernel::GetSharedMemoryPointer(g_shared_memory, 0x800 + (thread_id * 0x200) + offset);
 }

+/// Gets a pointer to the start (header) of a command buffer in GSP shared memory
+static inline GX_InterruptQueue* GetInterruptQueue(u32 thread_id) {
+    return (GX_InterruptQueue*)Kernel::GetSharedMemoryPointer(g_shared_memory, sizeof(GX_InterruptQueue) * thread_id);
+}
+
 /// Finishes execution of a GSP command
 void GX_FinishCommand(u32 thread_id) {
    GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(thread_id);

    g_debugger.GXCommandProcessed(GX_GetCmdBufferPointer(thread_id, 0x20 + (header->index * 0x20)));

-    header->number_commands = header->number_commands - 1;
+    header->number_commands = 0;
+
    // TODO: Increment header->index?
 }

@ -134,33 +163,55 @@ void RegisterInterruptRelayQueue(Service::Interface* self) {
    u32* cmd_buff = Service::GetCommandBuffer();
    u32 flags = cmd_buff[1];
    g_event = cmd_buff[3];
+    g_shared_memory = Kernel::CreateSharedMemory("GSPSharedMem");

    _assert_msg_(GSP, (g_event != 0), "handle is not valid!");

-    Kernel::SetEventLocked(g_event, false);
-
-    // Hack - This function will permanently set the state of the GSP event such that GPU command
-    // synchronization barriers always passthrough. Correct solution would be to set this after the
-    // GPU as processed all queued up commands, but due to the emulator being single-threaded they
-    // will always be ready.
-    Kernel::SetPermanentLock(g_event, true);
-
-    cmd_buff[0] = 0;                // Result - no error
-    cmd_buff[2] = g_thread_id;      // ThreadID
+    cmd_buff[2] = g_thread_id++; // ThreadID
    cmd_buff[4] = g_shared_memory; // GSP shared memory
+
+    Kernel::SignalEvent(GSP_GPU::g_event); // TODO(bunnei): Is this correct?
 }

+/**
+ * Signals that the specified interrupt type has occurred to userland code
+ * @param interrupt_id ID of interrupt that is being signalled
+ */
+void SignalInterrupt(GXInterruptId interrupt_id) {
+    if (0 == GSP_GPU::g_event) {
+        WARN_LOG(GSP, "cannot synchronize until GSP event has been created!");
+        return;
+    }
+    if (0 == g_shared_memory) {
+        WARN_LOG(GSP, "cannot synchronize until GSP shared memory has been created!");
+        return;
+    }
+    for (int thread_id = 0; thread_id < 0x4; ++thread_id) {
+        GX_InterruptQueue* interrupt_queue = GetInterruptQueue(thread_id);
+        interrupt_queue->number_interrupts = interrupt_queue->number_interrupts + 1;
        
-/// This triggers handling of the GX command written to the command buffer in shared memory.
-void TriggerCmdReqQueue(Service::Interface* self) {
+        u8 next = interrupt_queue->index;
+        next += interrupt_queue->number_interrupts;
+        next = next % 0x34;
+
+        interrupt_queue->slot[next] = interrupt_id;
+        interrupt_queue->error_code = 0x0; // No error
+    }
+    Kernel::SignalEvent(GSP_GPU::g_event);
+}
+
+/// Executes the next GSP command
+void ExecuteCommand(int thread_id, int command_index) {

    // Utility function to convert register ID to address
    auto WriteGPURegister = [](u32 id, u32 data) {
        GPU::Write<u32>(0x1EF00000 + 4 * id, data);
    };

-    GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id);
-    auto& command = *(const GXCommand*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20));
+    GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(thread_id);
+    auto& command = *(const GXCommand*)GX_GetCmdBufferPointer(thread_id, (command_index + 1) * 0x20);
+
+    NOTICE_LOG(GSP, "decoding command 0x%08X", (int)command.id.Value());

    switch (command.id) {

@ -186,6 +237,7 @@ void TriggerCmdReqQueue(Service::Interface* self) {
        g_debugger.CommandListCalled(params.address,
                                     (u32*)Memory::GetPointer(params.address),
                                     params.size);
+        SignalInterrupt(GXInterruptId::P3D);
        break;
    }

@ -208,6 +260,16 @@ void TriggerCmdReqQueue(Service::Interface* self) {

    // TODO: Check if texture copies are implemented correctly..
    case GXCommandId::SET_DISPLAY_TRANSFER:
+        // TODO(bunnei): Signalling all of these interrupts here is totally wrong, but it seems to
+        // work well enough for running demos. Need to figure out how these all work and trigger
+        // them correctly.
+        SignalInterrupt(GXInterruptId::PSC0);
+        SignalInterrupt(GXInterruptId::PSC1);
+        SignalInterrupt(GXInterruptId::PPF);
+        SignalInterrupt(GXInterruptId::P3D);
+        SignalInterrupt(GXInterruptId::DMA);
+        break;
+
    case GXCommandId::SET_TEXTURE_COPY:
    {
        auto& params = command.image_copy;
@ -233,8 +295,21 @@ void TriggerCmdReqQueue(Service::Interface* self) {
    default:
        ERROR_LOG(GSP, "unknown command 0x%08X", (int)command.id.Value());
    }
+}

-    GX_FinishCommand(g_thread_id);
+/// This triggers handling of the GX command written to the command buffer in shared memory.
+void TriggerCmdReqQueue(Service::Interface* self) {
+    // Iterate through each thread's command queue...
+    for (int thread_id = 0; thread_id < 0x4; ++thread_id) {
+        GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(thread_id);
+
+        // Iterate through each command...
+        for (int command_index = 0; command_index < header->number_commands; ++command_index) {
+            ExecuteCommand(thread_id, command_index);
+        }
+
+        GX_FinishCommand(thread_id);
+    }
 }

 const Interface::FunctionInfo FunctionTable[] = {
@ -275,7 +350,7 @@ const Interface::FunctionInfo FunctionTable[] = {

 Interface::Interface() {
    Register(FunctionTable, ARRAY_SIZE(FunctionTable));
-    g_shared_memory = Kernel::CreateSharedMemory("GSPSharedMem");
+    g_shared_memory = 0;
 }

 Interface::~Interface() {
--- a/src/core/hle/service/gsp.h
+++ b/src/core/hle/service/gsp.h
@ -29,6 +29,16 @@ enum class GXCommandId : u32 {
    SET_COMMAND_LIST_FIRST = 0x05,
 };

+enum class GXInterruptId : u8 {
+    PSC0    = 0x00,
+    PSC1    = 0x01,
+    PDC0    = 0x02, // Seems called every vertical screen line
+    PDC1    = 0x03, // Seems called every frame
+    PPF     = 0x04,
+    P3D     = 0x05,
+    DMA     = 0x06,
+};
+
 struct GXCommand {
    BitField<0, 8, GXCommandId> id;

@ -84,4 +94,10 @@ public:

 };

+/**
+ * Signals that the specified interrupt type has occurred to userland code
+ * @param interrupt_id ID of interrupt that is being signalled
+ */
+void SignalInterrupt(GXInterruptId interrupt_id);
+
 } // namespace
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@ -7,7 +7,11 @@

 #include "core/core.h"
 #include "core/mem_map.h"
+
+#include "core/hle/hle.h"
 #include "core/hle/kernel/thread.h"
+#include "core/hle/service/gsp.h"
+
 #include "core/hw/gpu.h"

 #include "video_core/video_core.h"
@ -17,6 +21,7 @@ namespace GPU {

 RegisterSet<u32, Regs> g_regs;

+u32 g_cur_line = 0;     ///< Current vertical screen line
 u64 g_last_ticks = 0;   ///< Last CPU ticks

 /**
@ -249,17 +254,28 @@ template void Write<u8>(u32 addr, const u8 data);
 void Update() {
    u64 current_ticks = Core::g_app_core->GetTicks();

-    // Fake a vertical blank
-    if ((current_ticks - g_last_ticks) >= kFrameTicks) {
+    // Synchronize line...
+    if ((current_ticks - g_last_ticks) >= GPU::kFrameTicks / 400) {
+        GSP_GPU::SignalInterrupt(GSP_GPU::GXInterruptId::PDC0);
+        g_cur_line++;
        g_last_ticks = current_ticks;
+    }
+
+    // Synchronize frame...
+    if (g_cur_line >= 400) {
+        g_cur_line = 0;
+        GSP_GPU::SignalInterrupt(GSP_GPU::GXInterruptId::PDC1);
        VideoCore::g_renderer->SwapBuffers();
        Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
+        HLE::Reschedule(__func__);
    }
 }

 /// Initialize hardware
 void Init() {
+    g_cur_line = 0;
    g_last_ticks = Core::g_app_core->GetTicks();
+
 //    SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM);
    SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM);