dolphin/Source/Core/VideoBackends/Software/TextureSampler.cpp
Pokechu22 fa9c43cfaa Software: Handle texture wrapping more accurately
This fixes various texture offsetting issues with negative texture coordinates (bringing the software renderer in line with the hardware renderers).  It also handles the invalid wrap mode accurately (as was done for the hardware renderers in the previous commit).  Lastly, it handles wrapping with non-power-of-2 texture sizes in a hardware-accurate way (which is somewhat broken looking, as games aren't supposed to use wrapping with non-power-of-2 sizes); this has not been done for the hardware renderers.
2021-07-20 19:23:23 -07:00

262 lines
8.3 KiB
C++

// Copyright 2009 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "VideoBackends/Software/TextureSampler.h"
#include <algorithm>
#include <cmath>
#include "Common/CommonTypes.h"
#include "Common/MsgHandler.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/TextureDecoder.h"
#define ALLOW_MIPMAP 1
namespace TextureSampler
{
static inline void WrapCoord(int* coordp, WrapMode wrap_mode, int image_size)
{
int coord = *coordp;
switch (wrap_mode)
{
case WrapMode::Clamp:
coord = std::clamp(coord, 0, image_size - 1);
break;
case WrapMode::Repeat:
// Per YAGCD's info on TX_SETMODE1_I0 (et al.), mirror "requires the texture size to be a power
// of two. (wrapping is implemented by a logical AND (SIZE-1))". So though this doesn't wrap
// nicely for non-power-of-2 sizes, that's how hardware does it.
coord = coord & (image_size - 1);
break;
case WrapMode::Mirror:
{
// YAGCD doesn't mention this, but this seems to be the check used to implement mirroring.
// With power-of-2 sizes, this correctly checks if it's an even-numbered repeat or an
// odd-numbered one, and thus can decide whether to reflect. It fails in unusual ways
// with non-power-of-2 sizes, but seems to match what happens on actual hardware.
if ((coord & image_size) != 0)
coord = ~coord;
coord = coord & (image_size - 1);
break;
}
default:
// Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp.
PanicAlertFmt("Invalid wrap mode: {}", wrap_mode);
coord = std::clamp(coord, 0, image_size - 1);
break;
}
*coordp = coord;
}
static inline void SetTexel(const u8* inTexel, u32* outTexel, u32 fract)
{
outTexel[0] = inTexel[0] * fract;
outTexel[1] = inTexel[1] * fract;
outTexel[2] = inTexel[2] * fract;
outTexel[3] = inTexel[3] * fract;
}
static inline void AddTexel(const u8* inTexel, u32* outTexel, u32 fract)
{
outTexel[0] += inTexel[0] * fract;
outTexel[1] += inTexel[1] * fract;
outTexel[2] += inTexel[2] * fract;
outTexel[3] += inTexel[3] * fract;
}
void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample)
{
int baseMip = 0;
bool mipLinear = false;
#if (ALLOW_MIPMAP)
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const TexMode0& tm0 = texUnit.texMode0[texmap & 3];
const s32 lodFract = lod & 0xf;
if (lod > 0 && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
{
// use mipmap
baseMip = lod >> 4;
mipLinear = (lodFract && tm0.mipmap_filter == MipMode::Linear);
// if using nearest mip filter and lodFract >= 0.5 round up to next mip
if (tm0.mipmap_filter == MipMode::Point && lodFract >= 8)
baseMip++;
}
if (mipLinear)
{
u8 sampledTex[4];
u32 texel[4];
SampleMip(s, t, baseMip, linear, texmap, sampledTex);
SetTexel(sampledTex, texel, (16 - lodFract));
SampleMip(s, t, baseMip + 1, linear, texmap, sampledTex);
AddTexel(sampledTex, texel, lodFract);
sample[0] = (u8)(texel[0] >> 4);
sample[1] = (u8)(texel[1] >> 4);
sample[2] = (u8)(texel[2] >> 4);
sample[3] = (u8)(texel[3] >> 4);
}
else
#endif
{
SampleMip(s, t, baseMip, linear, texmap, sample);
}
}
void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
{
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const u8 subTexmap = texmap & 3;
const TexMode0& tm0 = texUnit.texMode0[subTexmap];
const TexImage0& ti0 = texUnit.texImage0[subTexmap];
const TexTLUT& texTlut = texUnit.texTlut[subTexmap];
const TextureFormat texfmt = ti0.format;
const TLUTFormat tlutfmt = texTlut.tlut_format;
const u8* imageSrc;
const u8* imageSrcOdd = nullptr;
if (texUnit.texImage1[subTexmap].cache_manually_managed)
{
imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE];
if (texfmt == TextureFormat::RGBA8)
imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE];
}
else
{
const u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5;
imageSrc = Memory::GetPointer(imageBase);
}
int image_width_minus_1 = ti0.width;
int image_height_minus_1 = ti0.height;
const int tlutAddress = texTlut.tmem_offset << 9;
const u8* tlut = &texMem[tlutAddress];
// reduce sample location and texture size to mip level
// move texture pointer to mip location
if (mip)
{
int mipWidth = image_width_minus_1 + 1;
int mipHeight = image_height_minus_1 + 1;
const int fmtWidth = TexDecoder_GetBlockWidthInTexels(texfmt);
const int fmtHeight = TexDecoder_GetBlockHeightInTexels(texfmt);
const int fmtDepth = TexDecoder_GetTexelSizeInNibbles(texfmt);
image_width_minus_1 >>= mip;
image_height_minus_1 >>= mip;
s >>= mip;
t >>= mip;
while (mip)
{
mipWidth = std::max(mipWidth, fmtWidth);
mipHeight = std::max(mipHeight, fmtHeight);
const u32 size = (mipWidth * mipHeight * fmtDepth) >> 1;
imageSrc += size;
mipWidth >>= 1;
mipHeight >>= 1;
mip--;
}
}
if (linear)
{
// offset linear sampling
s -= 64;
t -= 64;
// integer part of sample location
int imageS = s >> 7;
int imageT = t >> 7;
// linear sampling
int imageSPlus1 = imageS + 1;
const int fractS = s & 0x7f;
int imageTPlus1 = imageT + 1;
const int fractT = t & 0x7f;
u8 sampledTex[4];
u32 texel[4];
WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1);
WrapCoord(&imageSPlus1, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageTPlus1, tm0.wrap_t, image_height_minus_1 + 1);
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
{
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, image_width_minus_1, texfmt,
tlut, tlutfmt);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, image_width_minus_1, texfmt,
tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, image_width_minus_1, texfmt,
tlut, tlutfmt);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, image_width_minus_1,
texfmt, tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (fractT));
}
else
{
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageT,
image_width_minus_1);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1, imageT,
image_width_minus_1);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageS, imageTPlus1,
image_width_minus_1);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexelRGBA8FromTmem(sampledTex, imageSrc, imageSrcOdd, imageSPlus1,
imageTPlus1, image_width_minus_1);
AddTexel(sampledTex, texel, (fractS) * (fractT));
}
sample[0] = (u8)(texel[0] >> 14);
sample[1] = (u8)(texel[1] >> 14);
sample[2] = (u8)(texel[2] >> 14);
sample[3] = (u8)(texel[3] >> 14);
}
else
{
// integer part of sample location
int imageS = s >> 7;
int imageT = t >> 7;
// nearest neighbor sampling
WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1);
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut,
tlutfmt);
else
TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT,
image_width_minus_1);
}
}
} // namespace TextureSampler