gl_buffer_cache: Rework to support internalized buffers

This commit is contained in:
ReinUsesLisp 2019-05-27 20:50:11 -03:00
parent f8ba72d491
commit 8155b12d3d
3 changed files with 176 additions and 67 deletions

View File

@ -7,90 +7,165 @@
#include <utility> #include <utility>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/assert.h"
#include "core/core.h" #include "core/core.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL { namespace OpenGL {
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, namespace {
std::size_t alignment, GLuint buffer, GLintptr offset)
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, alignment{alignment}, constexpr GLuint EmptyBuffer = 0;
buffer{buffer}, offset{offset} {} constexpr GLintptr CachedBufferOffset = 0;
OGLBuffer CreateBuffer(std::size_t size, GLenum usage) {
OGLBuffer buffer;
buffer.Create();
glNamedBufferData(buffer.handle, size, nullptr, usage);
return buffer;
}
} // Anonymous namespace
CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr)
: RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr} {}
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
: RasterizerCache{rasterizer}, stream_buffer(size, true) {} : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
std::pair<GLuint, GLintptr> OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, OGLBufferCache::~OGLBufferCache() = default;
std::size_t alignment, bool cache) {
void OGLBufferCache::Unregister(const std::shared_ptr<CachedBufferEntry>& entry) {
std::lock_guard lock{mutex};
if (entry->IsInternalized()) {
internalized_entries.erase(entry->GetCacheAddr());
}
ReserveBuffer(entry);
RasterizerCache<std::shared_ptr<CachedBufferEntry>>::Unregister(entry);
}
OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size,
std::size_t alignment, bool internalize) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const auto host_ptr{memory_manager.GetPointer(gpu_addr)};
const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)};
if (!host_ptr) { if (!host_ptr) {
// Return a dummy buffer when host_ptr is invalid. return {EmptyBuffer, 0};
return {0, 0};
} }
// Cache management is a big overhead, so only cache entries with a given size. // Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games. // TODO: Figure out which size is the best for given games.
cache &= size >= 2048; if (!internalize && size < 0x800 &&
internalized_entries.find(cache_addr) == internalized_entries.end()) {
if (cache) { return StreamBufferUpload(host_ptr, size, alignment);
if (auto entry = TryGet(host_ptr); entry) {
if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
return {entry->GetBuffer(), entry->GetOffset()};
}
Unregister(entry);
}
} }
AlignBuffer(alignment); auto entry = TryGet(host_ptr);
const GLintptr uploaded_offset = buffer_offset; if (!entry) {
return FixedBufferUpload(gpu_addr, host_ptr, size, internalize);
std::memcpy(buffer_ptr, host_ptr, size);
buffer_ptr += size;
buffer_offset += size;
const GLuint buffer = stream_buffer.GetHandle();
if (cache) {
const VAddr cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr);
Register(std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr, size, alignment, buffer,
uploaded_offset));
} }
return {buffer, uploaded_offset}; if (entry->GetSize() < size) {
GrowBuffer(entry, size);
}
return {entry->GetBuffer(), CachedBufferOffset};
} }
std::pair<GLuint, GLintptr> OGLBufferCache::UploadHostMemory(const void* raw_pointer, OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer,
std::size_t size,
std::size_t alignment) {
return StreamBufferUpload(raw_pointer, size, alignment);
}
bool OGLBufferCache::Map(std::size_t max_size) {
const auto max_size_ = static_cast<GLsizeiptr>(max_size);
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(max_size_, 4);
buffer_offset = buffer_offset_base;
return invalidate;
}
void OGLBufferCache::Unmap() {
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
}
OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer,
std::size_t size, std::size_t size,
std::size_t alignment) { std::size_t alignment) {
std::lock_guard lock{mutex};
AlignBuffer(alignment); AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size);
const GLintptr uploaded_offset = buffer_offset; const GLintptr uploaded_offset = buffer_offset;
std::memcpy(buffer_ptr, raw_pointer, size);
buffer_ptr += size; buffer_ptr += size;
buffer_offset += size; buffer_offset += size;
return {stream_buffer.GetHandle(), uploaded_offset}; return {stream_buffer.GetHandle(), uploaded_offset};
} }
bool OGLBufferCache::Map(std::size_t max_size) { OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr,
bool invalidate; std::size_t size, bool internalize) {
std::tie(buffer_ptr, buffer_offset_base, invalidate) = if (internalize) {
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); internalized_entries.emplace(ToCacheAddr(host_ptr));
buffer_offset = buffer_offset_base;
if (invalidate) {
InvalidateAll();
} }
return invalidate; auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr);
auto entry = GetUncachedBuffer(cpu_addr, host_ptr);
entry->SetSize(size);
entry->SetInternalState(internalize);
Register(entry);
if (entry->GetCapacity() < size) {
entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size);
}
glNamedBufferSubData(entry->GetBuffer(), 0, static_cast<GLintptr>(size), host_ptr);
return {entry->GetBuffer(), CachedBufferOffset};
} }
void OGLBufferCache::Unmap() { void OGLBufferCache::GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size) {
stream_buffer.Unmap(buffer_offset - buffer_offset_base); const auto old_size = static_cast<GLintptr>(entry->GetSize());
if (entry->GetCapacity() < new_size) {
const auto old_buffer = entry->GetBuffer();
OGLBuffer new_buffer = CreateBuffer(new_size, GL_STATIC_COPY);
// Copy bits from the old buffer to the new buffer.
glCopyNamedBufferSubData(old_buffer, new_buffer.handle, 0, 0, old_size);
entry->SetCapacity(std::move(new_buffer), new_size);
}
// Upload the new bits.
const auto size_diff = static_cast<GLintptr>(new_size - old_size);
glNamedBufferSubData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
// Update entry's size in the object and in the cache.
entry->SetSize(new_size);
Unregister(entry);
Register(entry);
}
std::shared_ptr<CachedBufferEntry> OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
if (auto entry = TryGetReservedBuffer(host_ptr); entry) {
return entry;
}
return std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr);
}
std::shared_ptr<CachedBufferEntry> OGLBufferCache::TryGetReservedBuffer(u8* host_ptr) {
const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
if (it == buffer_reserve.end()) {
return {};
}
auto& reserve = it->second;
auto entry = reserve.back();
reserve.pop_back();
return entry;
}
void OGLBufferCache::ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry) {
buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
} }
void OGLBufferCache::AlignBuffer(std::size_t alignment) { void OGLBufferCache::AlignBuffer(std::size_t alignment) {

View File

@ -5,9 +5,12 @@
#pragma once #pragma once
#include <cstddef> #include <cstddef>
#include <map>
#include <memory> #include <memory>
#include <tuple> #include <tuple>
#include <unordered_set>
#include <utility> #include <utility>
#include <vector>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_cache.h"
@ -20,8 +23,7 @@ class RasterizerOpenGL;
class CachedBufferEntry final : public RasterizerCacheObject { class CachedBufferEntry final : public RasterizerCacheObject {
public: public:
explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr);
std::size_t alignment, GLuint buffer, GLintptr offset);
VAddr GetCpuAddr() const override { VAddr GetCpuAddr() const override {
return cpu_addr; return cpu_addr;
@ -35,55 +37,87 @@ public:
return size; return size;
} }
std::size_t GetAlignment() const { std::size_t GetCapacity() const {
return alignment; return capacity;
}
bool IsInternalized() const {
return is_internal;
} }
GLuint GetBuffer() const { GLuint GetBuffer() const {
return buffer; return buffer.handle;
} }
GLintptr GetOffset() const { void SetSize(std::size_t new_size) {
return offset; size = new_size;
}
void SetInternalState(bool is_internal_) {
is_internal = is_internal_;
}
void SetCapacity(OGLBuffer&& new_buffer, std::size_t new_capacity) {
capacity = new_capacity;
buffer = std::move(new_buffer);
} }
private: private:
VAddr cpu_addr{}; VAddr cpu_addr{};
std::size_t size{}; std::size_t size{};
std::size_t alignment{}; std::size_t capacity{};
bool is_internal{};
GLuint buffer{}; OGLBuffer buffer;
GLintptr offset{};
}; };
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
using BufferInfo = std::pair<GLuint, GLintptr>;
public: public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);
~OGLBufferCache();
void Unregister(const std::shared_ptr<CachedBufferEntry>& entry) override;
/// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its
/// offset. /// offset.
std::pair<GLuint, GLintptr> UploadMemory(GPUVAddr gpu_addr, std::size_t size, BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
std::size_t alignment = 4, bool cache = true); bool internalize = false);
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
std::pair<GLuint, GLintptr> UploadHostMemory(const void* raw_pointer, std::size_t size, BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
std::size_t alignment = 4); std::size_t alignment = 4);
bool Map(std::size_t max_size); bool Map(std::size_t max_size);
void Unmap(); void Unmap();
protected: protected:
void AlignBuffer(std::size_t alignment);
// We do not have to flush this cache as things in it are never modified by us. // We do not have to flush this cache as things in it are never modified by us.
void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
private: private:
OGLStreamBuffer stream_buffer; BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment);
BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
bool internalize);
void GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size);
std::shared_ptr<CachedBufferEntry> GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr);
std::shared_ptr<CachedBufferEntry> TryGetReservedBuffer(u8* host_ptr);
void ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry);
void AlignBuffer(std::size_t alignment);
u8* buffer_ptr = nullptr; u8* buffer_ptr = nullptr;
GLintptr buffer_offset = 0; GLintptr buffer_offset = 0;
GLintptr buffer_offset_base = 0; GLintptr buffer_offset_base = 0;
OGLStreamBuffer stream_buffer;
std::unordered_set<CacheAddr> internalized_entries;
std::unordered_map<CacheAddr, std::vector<std::shared_ptr<CachedBufferEntry>>> buffer_reserve;
}; };
} // namespace OpenGL } // namespace OpenGL

View File

@ -790,7 +790,7 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
size = Common::AlignUp(size, sizeof(GLvec4)); size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
const std::size_t alignment = device.GetUniformBufferAlignment(); const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
bind_ubo_pushbuffer.Push(cbuf, offset, size); bind_ubo_pushbuffer.Push(cbuf, offset, size);
} }