mirror of
https://github.com/Lime3DS/Lime3DS.git
synced 2025-01-16 16:42:11 +01:00
rasterizer_cache: Fixes to (unaligned) texture downloads (#6697)
* rasterizer_cache: Header cleanup * gl_texture_runtime: Fix incorrect stride in single scanline downloads * texture_codec: Fix unaligned texture downloads
This commit is contained in:
parent
700c00f021
commit
e783b0d4a9
@ -42,11 +42,13 @@ add_library(video_core STATIC
|
|||||||
rasterizer_cache/rasterizer_cache.h
|
rasterizer_cache/rasterizer_cache.h
|
||||||
rasterizer_cache/rasterizer_cache_base.h
|
rasterizer_cache/rasterizer_cache_base.h
|
||||||
rasterizer_cache/sampler_params.h
|
rasterizer_cache/sampler_params.h
|
||||||
|
rasterizer_cache/slot_id.h
|
||||||
rasterizer_cache/surface_base.cpp
|
rasterizer_cache/surface_base.cpp
|
||||||
rasterizer_cache/surface_base.h
|
rasterizer_cache/surface_base.h
|
||||||
rasterizer_cache/surface_params.cpp
|
rasterizer_cache/surface_params.cpp
|
||||||
rasterizer_cache/surface_params.h
|
rasterizer_cache/surface_params.h
|
||||||
rasterizer_cache/texture_codec.h
|
rasterizer_cache/texture_codec.h
|
||||||
|
rasterizer_cache/texture_cube.h
|
||||||
rasterizer_cache/utils.cpp
|
rasterizer_cache/utils.cpp
|
||||||
rasterizer_cache/utils.h
|
rasterizer_cache/utils.h
|
||||||
renderer_opengl/frame_dumper_opengl.cpp
|
renderer_opengl/frame_dumper_opengl.cpp
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/custom_textures/custom_tex_manager.h"
|
#include "video_core/custom_textures/custom_tex_manager.h"
|
||||||
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
|
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
|
||||||
|
#include "video_core/rasterizer_cache/surface_base.h"
|
||||||
#include "video_core/regs.h"
|
#include "video_core/regs.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
#include "video_core/texture/texture_decode.h"
|
#include "video_core/texture/texture_decode.h"
|
||||||
@ -1212,7 +1213,7 @@ void RasterizerCache<T>::ClearAll(bool flush) {
|
|||||||
|
|
||||||
// Remove the whole cache without really looking at it.
|
// Remove the whole cache without really looking at it.
|
||||||
cached_pages -= flush_interval;
|
cached_pages -= flush_interval;
|
||||||
dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF);
|
dirty_regions.clear();
|
||||||
page_table.clear();
|
page_table.clear();
|
||||||
remove_surfaces.clear();
|
remove_surfaces.clear();
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,8 @@
|
|||||||
#include <boost/icl/interval_map.hpp>
|
#include <boost/icl/interval_map.hpp>
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
#include "video_core/rasterizer_cache/sampler_params.h"
|
#include "video_core/rasterizer_cache/sampler_params.h"
|
||||||
#include "video_core/rasterizer_cache/surface_base.h"
|
#include "video_core/rasterizer_cache/surface_params.h"
|
||||||
|
#include "video_core/rasterizer_cache/texture_cube.h"
|
||||||
|
|
||||||
namespace Memory {
|
namespace Memory {
|
||||||
class MemorySystem;
|
class MemorySystem;
|
||||||
@ -70,12 +71,6 @@ class RasterizerCache {
|
|||||||
SurfaceId depth_id;
|
SurfaceId depth_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TextureCube {
|
|
||||||
SurfaceId surface_id;
|
|
||||||
std::array<SurfaceId, 6> face_ids;
|
|
||||||
std::array<u64, 6> ticks;
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
|
explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
|
||||||
Runtime& runtime, Pica::Regs& regs, RendererBase& renderer);
|
Runtime& runtime, Pica::Regs& regs, RendererBase& renderer);
|
||||||
|
21
src/video_core/rasterizer_cache/slot_id.h
Normal file
21
src/video_core/rasterizer_cache/slot_id.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
// Copyright 2023 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/slot_vector.h"
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
using SurfaceId = Common::SlotId;
|
||||||
|
using SamplerId = Common::SlotId;
|
||||||
|
|
||||||
|
/// Fake surface ID for null surfaces
|
||||||
|
constexpr SurfaceId NULL_SURFACE_ID{0};
|
||||||
|
/// Fake surface ID for null cube surfaces
|
||||||
|
constexpr SurfaceId NULL_SURFACE_CUBE_ID{1};
|
||||||
|
/// Fake sampler ID for null samplers
|
||||||
|
constexpr SamplerId NULL_SAMPLER_ID{0};
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
@ -3,6 +3,7 @@
|
|||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <bit>
|
#include <bit>
|
||||||
#include <span>
|
#include <span>
|
||||||
@ -264,6 +265,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
|
|||||||
const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
|
const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
|
||||||
const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
|
const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
|
||||||
const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
|
const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
|
||||||
|
const u32 begin_pixel_index = aligned_down_start_offset * 8 / GetFormatBpp(format);
|
||||||
|
|
||||||
ASSERT(!morton_to_linear ||
|
ASSERT(!morton_to_linear ||
|
||||||
(aligned_start_offset == start_offset && aligned_end_offset == end_offset));
|
(aligned_start_offset == start_offset && aligned_end_offset == end_offset));
|
||||||
@ -271,12 +273,12 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
|
|||||||
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
||||||
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
||||||
// the shader we read/write the linear buffer from the bottom up
|
// the shader we read/write the linear buffer from the bottom up
|
||||||
u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
|
u32 x = (begin_pixel_index % (width * 8)) / 8;
|
||||||
|
u32 y = (begin_pixel_index / (width * 8)) * 8;
|
||||||
|
u32 linear_offset = ((height - 8 - y) * width + x) * aligned_bytes_per_pixel;
|
||||||
u32 tiled_offset = 0;
|
u32 tiled_offset = 0;
|
||||||
u32 x = 0;
|
|
||||||
u32 y = 0;
|
|
||||||
|
|
||||||
const auto LinearNextTile = [&] {
|
const auto linear_next_tile = [&] {
|
||||||
x = (x + 8) % width;
|
x = (x + 8) % width;
|
||||||
linear_offset += 8 * aligned_bytes_per_pixel;
|
linear_offset += 8 * aligned_bytes_per_pixel;
|
||||||
if (!x) {
|
if (!x) {
|
||||||
@ -300,7 +302,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
|
|||||||
std::min(aligned_start_offset, end_offset) - start_offset);
|
std::min(aligned_start_offset, end_offset) - start_offset);
|
||||||
|
|
||||||
tiled_offset += aligned_start_offset - start_offset;
|
tiled_offset += aligned_start_offset - start_offset;
|
||||||
LinearNextTile();
|
linear_next_tile();
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the copy spans multiple tiles, copy the fully aligned tiles in between.
|
// If the copy spans multiple tiles, copy the fully aligned tiles in between.
|
||||||
@ -313,7 +315,7 @@ static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 en
|
|||||||
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
|
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
|
||||||
MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
|
MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
|
||||||
tiled_offset += tile_size;
|
tiled_offset += tile_size;
|
||||||
LinearNextTile();
|
linear_next_tile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
52
src/video_core/rasterizer_cache/texture_cube.h
Normal file
52
src/video_core/rasterizer_cache/texture_cube.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
// Copyright 2023 Citra Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common/hash.h"
|
||||||
|
#include "video_core/rasterizer_cache/slot_id.h"
|
||||||
|
#include "video_core/regs_texturing.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
|
||||||
|
struct TextureCube {
|
||||||
|
SurfaceId surface_id;
|
||||||
|
std::array<SurfaceId, 6> face_ids;
|
||||||
|
std::array<u64, 6> ticks;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TextureCubeConfig {
|
||||||
|
PAddr px;
|
||||||
|
PAddr nx;
|
||||||
|
PAddr py;
|
||||||
|
PAddr ny;
|
||||||
|
PAddr pz;
|
||||||
|
PAddr nz;
|
||||||
|
u32 width;
|
||||||
|
u32 levels;
|
||||||
|
Pica::TexturingRegs::TextureFormat format;
|
||||||
|
|
||||||
|
bool operator==(const TextureCubeConfig& rhs) const {
|
||||||
|
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator!=(const TextureCubeConfig& rhs) const {
|
||||||
|
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u64 Hash() const {
|
||||||
|
return Common::ComputeHash64(this, sizeof(TextureCubeConfig));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCore
|
||||||
|
|
||||||
|
namespace std {
|
||||||
|
template <>
|
||||||
|
struct hash<VideoCore::TextureCubeConfig> {
|
||||||
|
std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept {
|
||||||
|
return config.Hash();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace std
|
@ -5,24 +5,11 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
#include "common/hash.h"
|
|
||||||
#include "common/math_util.h"
|
#include "common/math_util.h"
|
||||||
#include "common/slot_vector.h"
|
|
||||||
#include "common/vector_math.h"
|
#include "common/vector_math.h"
|
||||||
#include "video_core/regs_texturing.h"
|
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
using SurfaceId = Common::SlotId;
|
|
||||||
using SamplerId = Common::SlotId;
|
|
||||||
|
|
||||||
/// Fake surface ID for null surfaces
|
|
||||||
constexpr SurfaceId NULL_SURFACE_ID{0};
|
|
||||||
/// Fake surface ID for null cube surfaces
|
|
||||||
constexpr SurfaceId NULL_SURFACE_CUBE_ID{1};
|
|
||||||
/// Fake sampler ID for null samplers
|
|
||||||
constexpr SamplerId NULL_SAMPLER_ID{0};
|
|
||||||
|
|
||||||
struct Offset {
|
struct Offset {
|
||||||
u32 x = 0;
|
u32 x = 0;
|
||||||
u32 y = 0;
|
u32 y = 0;
|
||||||
@ -79,30 +66,6 @@ struct StagingData {
|
|||||||
std::span<u8> mapped;
|
std::span<u8> mapped;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TextureCubeConfig {
|
|
||||||
PAddr px;
|
|
||||||
PAddr nx;
|
|
||||||
PAddr py;
|
|
||||||
PAddr ny;
|
|
||||||
PAddr pz;
|
|
||||||
PAddr nz;
|
|
||||||
u32 width;
|
|
||||||
u32 levels;
|
|
||||||
Pica::TexturingRegs::TextureFormat format;
|
|
||||||
|
|
||||||
bool operator==(const TextureCubeConfig& rhs) const {
|
|
||||||
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator!=(const TextureCubeConfig& rhs) const {
|
|
||||||
return std::memcmp(this, &rhs, sizeof(TextureCubeConfig)) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u64 Hash() const {
|
|
||||||
return Common::ComputeHash64(this, sizeof(TextureCubeConfig));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class SurfaceParams;
|
class SurfaceParams;
|
||||||
|
|
||||||
u32 MipLevels(u32 width, u32 height, u32 max_level);
|
u32 MipLevels(u32 width, u32 height, u32 max_level);
|
||||||
@ -134,12 +97,3 @@ void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr en
|
|||||||
std::span<u8> source, std::span<u8> dest, bool convert = false);
|
std::span<u8> source, std::span<u8> dest, bool convert = false);
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
||||||
namespace std {
|
|
||||||
template <>
|
|
||||||
struct hash<VideoCore::TextureCubeConfig> {
|
|
||||||
std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept {
|
|
||||||
return config.Hash();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} // namespace std
|
|
||||||
|
@ -484,20 +484,19 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download,
|
|||||||
|
|
||||||
bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download,
|
bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download,
|
||||||
const VideoCore::StagingData& staging) {
|
const VideoCore::StagingData& staging) {
|
||||||
const bool is_full_download = download.texture_rect == GetRect();
|
if (driver->IsOpenGLES()) {
|
||||||
const bool has_sub_image = driver->HasArbGetTextureSubImage();
|
|
||||||
if (driver->IsOpenGLES() || (!is_full_download && !has_sub_image)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d;
|
|
||||||
const auto& tuple = runtime->GetFormatTuple(pixel_format);
|
const auto& tuple = runtime->GetFormatTuple(pixel_format);
|
||||||
|
const u32 unscaled_width = download.texture_rect.GetWidth();
|
||||||
|
|
||||||
glActiveTexture(GL_TEXTURE0);
|
glPixelStorei(GL_PACK_ROW_LENGTH, unscaled_width);
|
||||||
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
|
|
||||||
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
|
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
|
||||||
|
|
||||||
// Prefer glGetTextureSubImage in most cases since it's the fastest and most convenient option
|
// Prefer glGetTextureSubImage in most cases since it's the fastest and most convenient option
|
||||||
|
const bool is_full_download = download.texture_rect == GetRect();
|
||||||
|
const bool has_sub_image = driver->HasArbGetTextureSubImage();
|
||||||
if (has_sub_image) {
|
if (has_sub_image) {
|
||||||
const GLsizei buf_size = static_cast<GLsizei>(staging.mapped.size());
|
const GLsizei buf_size = static_cast<GLsizei>(staging.mapped.size());
|
||||||
glGetTextureSubImage(Handle(0), download.texture_level, download.texture_rect.left,
|
glGetTextureSubImage(Handle(0), download.texture_level, download.texture_rect.left,
|
||||||
@ -505,16 +504,19 @@ bool Surface::DownloadWithoutFbo(const VideoCore::BufferTextureCopy& download,
|
|||||||
download.texture_rect.GetHeight(), 1, tuple.format, tuple.type,
|
download.texture_rect.GetHeight(), 1, tuple.format, tuple.type,
|
||||||
buf_size, staging.mapped.data());
|
buf_size, staging.mapped.data());
|
||||||
return true;
|
return true;
|
||||||
|
} else if (is_full_download) {
|
||||||
|
// This should only trigger for full texture downloads in oldish intel drivers
|
||||||
|
// that only support up to 4.3
|
||||||
|
OpenGLState state = OpenGLState::GetCurState();
|
||||||
|
state.texture_units[0].texture_2d = Handle(0);
|
||||||
|
state.Apply();
|
||||||
|
|
||||||
|
glGetTexImage(GL_TEXTURE_2D, download.texture_level, tuple.format, tuple.type,
|
||||||
|
staging.mapped.data());
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
// This should only trigger for full texture downloads in oldish intel drivers
|
|
||||||
// that only support up to 4.3
|
|
||||||
glBindTexture(GL_TEXTURE_2D, Handle(0));
|
|
||||||
glGetTexImage(GL_TEXTURE_2D, download.texture_level, tuple.format, tuple.type,
|
|
||||||
staging.mapped.data());
|
|
||||||
glBindTexture(GL_TEXTURE_2D, old_tex);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Surface::Attach(GLenum target, u32 level, u32 layer, bool scaled) {
|
void Surface::Attach(GLenum target, u32 level, u32 layer, bool scaled) {
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include "video_core/rasterizer_cache/framebuffer_base.h"
|
#include "video_core/rasterizer_cache/framebuffer_base.h"
|
||||||
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
|
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
|
||||||
|
#include "video_core/rasterizer_cache/surface_base.h"
|
||||||
#include "video_core/renderer_opengl/gl_blit_helper.h"
|
#include "video_core/renderer_opengl/gl_blit_helper.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user