From 0cb3bcfbb7081456dbe8bbe262350f85c7ebf3f7 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 27 Jan 2020 22:48:15 -0400 Subject: [PATCH 1/3] Maxwell3D: Correct query reporting. --- src/video_core/engines/maxwell_3d.cpp | 99 ++++++++++++++------------- src/video_core/engines/maxwell_3d.h | 16 +++-- 2 files changed, 61 insertions(+), 54 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..2a5855795 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -519,61 +519,66 @@ void Maxwell3D::ProcessFirmwareCall4() { regs.reg_array[0xd00] = 1; } -void Maxwell3D::ProcessQueryGet() { - const GPUVAddr sequence_address{regs.query.QueryAddress()}; - // Since the sequence address is given as a GPU VAddr, we have to convert it to an application - // VAddr before writing. - - // TODO(Subv): Support the other query units. - ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, - "Units other than CROP are unimplemented"); - - u64 result = 0; - - // TODO(Subv): Support the other query variables - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - // This seems to actually write the query sequence to the query address. - result = regs.query.query_sequence; - break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast(regs.query.query_get.select.Value())); - } - - // TODO(Subv): Research and implement how query sync conditions work. - +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { struct LongQueryResult { u64_le value; u64_le timestamp; }; static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); + const GPUVAddr sequence_address{regs.query.QueryAddress()}; + if (long_query) { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{}; + query_result.value = payload; + // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming + query_result.timestamp = system.CoreTiming().GetTicks(); + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + } else { + memory_manager.Write(sequence_address, static_cast(payload)); + } +} - switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: - case Regs::QueryMode::Write2: { - u32 sequence = regs.query.query_sequence; - if (regs.query.query_get.short_query) { - // Write the current query sequence to the sequence address. - // TODO(Subv): Find out what happens if you use a long query type but mark it as a short - // query. - memory_manager.Write(sequence_address, sequence); - } else { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{}; - query_result.value = result; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); - } +void Maxwell3D::ProcessQueryGet() { + // TODO(Subv): Support the other query units. + ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, + "Units other than CROP are unimplemented"); + + switch (regs.query.query_get.operation) { + case Regs::QueryOperation::Release: { + u64 result = regs.query.query_sequence; + StampQueryResult(result, regs.query.query_get.short_query == 0); + break; + } + case Regs::QueryOperation::Acquire: { + // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU + // to write a value that matches the current payload. + UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); + break; + } + case Regs::QueryOperation::Counter: { + u64 result{}; + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + result = 0; + break; + default: + result = 1; + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast(regs.query.query_get.select.Value())); + } + StampQueryResult(result, regs.query.query_get.short_query == 0); + break; + } + case Regs::QueryOperation::Trap: { + UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); + break; + } + default: { + UNIMPLEMENTED_MSG("Unknown query operation"); break; } - default: - UNIMPLEMENTED_MSG("Query mode {} not implemented", - static_cast(regs.query.query_get.mode.Value())); } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e437bacb7..78e055765 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -71,12 +71,11 @@ public: static constexpr std::size_t MaxConstBuffers = 18; static constexpr std::size_t MaxConstBufferSize = 0x10000; - enum class QueryMode : u32 { - Write = 0, - Sync = 1, - // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 - // is. - Write2 = 2, + enum class QueryOperation : u32 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, }; enum class QueryUnit : u32 { @@ -1077,7 +1076,7 @@ public: u32 query_sequence; union { u32 raw; - BitField<0, 2, QueryMode> mode; + BitField<0, 2, QueryOperation> operation; BitField<4, 1, u32> fence; BitField<12, 4, QueryUnit> unit; BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1409,6 +1408,9 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); + // Writes the query result accordingly + void StampQueryResult(u64 payload, bool long_query); + // Handles Conditional Rendering void ProcessQueryCondition(); From 8e9a4944dbbb4a22d149bb989faf32db0a979766 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Feb 2020 10:32:51 -0400 Subject: [PATCH 2/3] GPU: Implement GPU Clock correctly. --- src/video_core/engines/maxwell_3d.cpp | 3 ++- src/video_core/gpu.cpp | 14 +++++++++++++- src/video_core/gpu.h | 2 ++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2a5855795..a7e1dee04 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@ #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" @@ -533,7 +534,7 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { LongQueryResult query_result{}; query_result.value = payload; // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); + query_result.timestamp = system.GPU().GetTicks(); memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); } else { memory_manager.Write(sequence_address, static_cast(payload)); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4aca39faf 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -122,6 +123,17 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +// This values were reversed engineered by fincs from NVN +// The gpu clock is reported in units of 385/625 nanoseconds +constexpr u64 gpu_ticks_num = 384; +constexpr u64 gpu_ticks_den = 625; + +u64 GPU::GetTicks() const { + const u64 cpu_ticks = system.CoreTiming().GetTicks(); + const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + return (nanoseconds * gpu_ticks_num) / gpu_ticks_den; +} + void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } @@ -340,7 +352,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = system.CoreTiming().GetTicks(); + block.timestamp = GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public: bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + u64 GetTicks() const; + std::unique_lock LockSync() { return std::unique_lock{sync_mutex}; } From d6ed31b9faeb2b1ee7c04098e614a73ca11869af Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Feb 2020 18:16:07 -0400 Subject: [PATCH 3/3] GPU: Address Feedback. --- src/video_core/engines/maxwell_3d.cpp | 7 ++----- src/video_core/gpu.cpp | 14 ++++++++------ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a7e1dee04..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -531,10 +531,7 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast // GPU, this command may actually take a while to complete in real hardware due to GPU // wait queues. - LongQueryResult query_result{}; - query_result.value = payload; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.GPU().GetTicks(); + LongQueryResult query_result{payload, system.GPU().GetTicks()}; memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); } else { memory_manager.Write(sequence_address, static_cast(payload)); @@ -548,7 +545,7 @@ void Maxwell3D::ProcessQueryGet() { switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: { - u64 result = regs.query.query_sequence; + const u64 result = regs.query.query_sequence; StampQueryResult(result, regs.query.query_get.short_query == 0); break; } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4aca39faf..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -123,15 +123,17 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } -// This values were reversed engineered by fincs from NVN -// The gpu clock is reported in units of 385/625 nanoseconds -constexpr u64 gpu_ticks_num = 384; -constexpr u64 gpu_ticks_den = 625; - u64 GPU::GetTicks() const { + // This values were reversed engineered by fincs from NVN + // The gpu clock is reported in units of 385/625 nanoseconds + constexpr u64 gpu_ticks_num = 384; + constexpr u64 gpu_ticks_den = 625; + const u64 cpu_ticks = system.CoreTiming().GetTicks(); const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); - return (nanoseconds * gpu_ticks_num) / gpu_ticks_den; + const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; + const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; + return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; } void GPU::FlushCommands() {