mirror of
https://github.com/Lime3DS/Lime3DS.git
synced 2025-01-16 08:32:11 +01:00
renderer/vulkan: Emulate custom border colors in shaders when unavailable. (#6878)
This commit is contained in:
parent
f3d92dd3b8
commit
6ddf4b241f
@ -12,8 +12,16 @@ set(HASH_FILES
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_util.cpp"
|
||||
"${VIDEO_CORE}/renderer_opengl/gl_shader_util.h"
|
||||
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.cpp"
|
||||
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen.h"
|
||||
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.cpp"
|
||||
"${VIDEO_CORE}/renderer_vulkan/vk_shader_gen_spv.h"
|
||||
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp"
|
||||
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h"
|
||||
"${VIDEO_CORE}/shader/shader.cpp"
|
||||
"${VIDEO_CORE}/shader/shader.h"
|
||||
"${VIDEO_CORE}/shader/shader_uniforms.cpp"
|
||||
"${VIDEO_CORE}/shader/shader_uniforms.h"
|
||||
"${VIDEO_CORE}/pica.cpp"
|
||||
"${VIDEO_CORE}/pica.h"
|
||||
"${VIDEO_CORE}/regs_framebuffer.h"
|
||||
|
@ -599,6 +599,17 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) {
|
||||
SyncTextureLodBias(2);
|
||||
break;
|
||||
|
||||
// Texture borders
|
||||
case PICA_REG_INDEX(texturing.texture0.border_color):
|
||||
SyncTextureBorderColor(0);
|
||||
break;
|
||||
case PICA_REG_INDEX(texturing.texture1.border_color):
|
||||
SyncTextureBorderColor(1);
|
||||
break;
|
||||
case PICA_REG_INDEX(texturing.texture2.border_color):
|
||||
SyncTextureBorderColor(2);
|
||||
break;
|
||||
|
||||
// Clipping plane
|
||||
case PICA_REG_INDEX(rasterizer.clip_coef[0]):
|
||||
case PICA_REG_INDEX(rasterizer.clip_coef[1]):
|
||||
@ -821,6 +832,16 @@ void RasterizerAccelerated::SyncTextureLodBias(int tex_index) {
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncTextureBorderColor(int tex_index) {
|
||||
const auto pica_textures = regs.texturing.GetTextures();
|
||||
const auto params = pica_textures[tex_index].config;
|
||||
const Common::Vec4f border_color = ColorRGBA8(params.border_color.raw);
|
||||
if (border_color != uniform_block_data.data.tex_border_color[tex_index]) {
|
||||
uniform_block_data.data.tex_border_color[tex_index] = border_color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncClipCoef() {
|
||||
const auto raw_clip_coef = regs.rasterizer.GetClipCoef();
|
||||
const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(),
|
||||
|
@ -97,6 +97,9 @@ protected:
|
||||
/// Syncs the texture LOD bias to match the PICA register
|
||||
void SyncTextureLodBias(int tex_index);
|
||||
|
||||
/// Syncs the texture border color to match the PICA registers
|
||||
void SyncTextureBorderColor(int tex_index);
|
||||
|
||||
/// Syncs the clip coefficients to match the PICA register
|
||||
void SyncClipCoef();
|
||||
|
||||
|
@ -409,7 +409,9 @@ bool Instance::CreateDevice() {
|
||||
const bool has_extended_dynamic_state =
|
||||
add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, is_arm || is_qualcomm,
|
||||
"it is broken on Qualcomm and ARM drivers");
|
||||
const bool has_custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
const bool has_custom_border_color =
|
||||
add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, is_qualcomm,
|
||||
"it is broken on most Qualcomm driver versions");
|
||||
const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
||||
const bool has_pipeline_creation_cache_control =
|
||||
add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME);
|
||||
|
@ -69,6 +69,17 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
|
||||
|
||||
state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0);
|
||||
|
||||
const auto pica_textures = regs.texturing.GetTextures();
|
||||
for (u32 tex_index = 0; tex_index < 3; tex_index++) {
|
||||
const auto config = pica_textures[tex_index].config;
|
||||
state.texture_border_color[tex_index].enable_s.Assign(
|
||||
!instance.IsCustomBorderColorSupported() &&
|
||||
config.wrap_s == TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
|
||||
state.texture_border_color[tex_index].enable_t.Assign(
|
||||
!instance.IsCustomBorderColorSupported() &&
|
||||
config.wrap_t == TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
|
||||
}
|
||||
|
||||
// Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
|
||||
const bool emulate_logic_op = instance.NeedsLogicOpEmulation() &&
|
||||
!Pica::g_state.regs.framebuffer.output_merger.alphablend_enable;
|
||||
@ -284,54 +295,6 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
|
||||
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||
}
|
||||
|
||||
static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
|
||||
const auto& state = config.state;
|
||||
switch (texture_unit) {
|
||||
case 0:
|
||||
// Only unit 0 respects the texturing type
|
||||
switch (state.texture0_type) {
|
||||
case TexturingRegs::TextureConfig::Texture2D:
|
||||
return "textureLod(tex0, texcoord0, getLod(texcoord0 * "
|
||||
"vec2(textureSize(tex0, 0))) + tex_lod_bias[0])";
|
||||
case TexturingRegs::TextureConfig::Projection2D:
|
||||
// TODO (wwylele): find the exact LOD formula for projection texture
|
||||
return "textureProj(tex0, vec3(texcoord0, texcoord0_w))";
|
||||
case TexturingRegs::TextureConfig::TextureCube:
|
||||
return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
|
||||
case TexturingRegs::TextureConfig::Shadow2D:
|
||||
return "shadowTexture(texcoord0, texcoord0_w)";
|
||||
case TexturingRegs::TextureConfig::ShadowCube:
|
||||
return "shadowTextureCube(texcoord0, texcoord0_w)";
|
||||
case TexturingRegs::TextureConfig::Disabled:
|
||||
return "vec4(0.0)";
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
|
||||
UNIMPLEMENTED();
|
||||
return "texture(tex0, texcoord0)";
|
||||
}
|
||||
case 1:
|
||||
return "textureLod(tex1, texcoord1, getLod(texcoord1 * "
|
||||
"vec2(textureSize(tex1, 0))) + tex_lod_bias[1])";
|
||||
case 2:
|
||||
if (state.texture2_use_coord1)
|
||||
return "textureLod(tex2, texcoord1, getLod(texcoord1 * "
|
||||
"vec2(textureSize(tex2, 0))) + tex_lod_bias[2])";
|
||||
else
|
||||
return "textureLod(tex2, texcoord2, getLod(texcoord2 * "
|
||||
"vec2(textureSize(tex2, 0))) + tex_lod_bias[2])";
|
||||
case 3:
|
||||
if (state.proctex.enable) {
|
||||
return "ProcTex()";
|
||||
} else {
|
||||
LOG_DEBUG(Render_OpenGL, "Using Texture3 without enabling it");
|
||||
return "vec4(0.0)";
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes the specified TEV stage source component(s)
|
||||
static void AppendSource(std::string& out, const PicaFSConfig& config,
|
||||
TevStageConfig::Source source, std::string_view index_name) {
|
||||
@ -347,16 +310,16 @@ static void AppendSource(std::string& out, const PicaFSConfig& config,
|
||||
out += "secondary_fragment_color";
|
||||
break;
|
||||
case Source::Texture0:
|
||||
out += SampleTexture(config, 0);
|
||||
out += "sampleTexUnit0()";
|
||||
break;
|
||||
case Source::Texture1:
|
||||
out += SampleTexture(config, 1);
|
||||
out += "sampleTexUnit1()";
|
||||
break;
|
||||
case Source::Texture2:
|
||||
out += SampleTexture(config, 2);
|
||||
out += "sampleTexUnit2()";
|
||||
break;
|
||||
case Source::Texture3:
|
||||
out += SampleTexture(config, 3);
|
||||
out += "sampleTexUnit3()";
|
||||
break;
|
||||
case Source::PreviousBuffer:
|
||||
out += "combiner_buffer";
|
||||
@ -656,7 +619,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) {
|
||||
|
||||
// Compute fragment normals and tangents
|
||||
const auto perturbation = [&] {
|
||||
return fmt::format("2.0 * ({}).rgb - 1.0", SampleTexture(config, lighting.bump_selector));
|
||||
return fmt::format("2.0 * (sampleTexUnit{}()).rgb - 1.0", lighting.bump_selector);
|
||||
};
|
||||
|
||||
switch (lighting.bump_mode) {
|
||||
@ -700,7 +663,7 @@ static void WriteLighting(std::string& out, const PicaFSConfig& config) {
|
||||
"vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n";
|
||||
|
||||
if (lighting.enable_shadow) {
|
||||
std::string shadow_texture = SampleTexture(config, lighting.shadow_selector);
|
||||
std::string shadow_texture = fmt::format("sampleTexUnit{}()", lighting.shadow_selector);
|
||||
if (lighting.shadow_invert) {
|
||||
out += fmt::format("vec4 shadow = vec4(1.0) - {};\n", shadow_texture);
|
||||
} else {
|
||||
@ -1310,6 +1273,7 @@ float mix2(vec4 s, vec2 a) {
|
||||
|
||||
vec4 shadowTexture(vec2 uv, float w) {
|
||||
)";
|
||||
|
||||
if (!config.state.shadow_texture_orthographic) {
|
||||
out += "uv /= w;";
|
||||
}
|
||||
@ -1344,9 +1308,7 @@ vec4 shadowTextureCube(vec2 uv, float w) {
|
||||
uv = -c.xy;
|
||||
if (c.z > 0.0) uv.x = -uv.x;
|
||||
}
|
||||
)";
|
||||
out += "uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));";
|
||||
out += R"(
|
||||
uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));
|
||||
vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
|
||||
vec2 coord_floor = floor(coord);
|
||||
vec2 f = coord - coord_floor;
|
||||
@ -1409,10 +1371,92 @@ vec4 shadowTextureCube(vec2 uv, float w) {
|
||||
CompareShadow(pixels.w, z));
|
||||
return vec4(mix2(s, f));
|
||||
}
|
||||
)";
|
||||
)";
|
||||
|
||||
if (config.state.proctex.enable)
|
||||
if (config.state.proctex.enable) {
|
||||
AppendProcTexSampler(out, config);
|
||||
}
|
||||
|
||||
for (u32 texture_unit = 0; texture_unit < 4; texture_unit++) {
|
||||
out += fmt::format("vec4 sampleTexUnit{}() {{", texture_unit);
|
||||
if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) {
|
||||
out += "return vec4(0.0);}";
|
||||
continue;
|
||||
} else if (texture_unit == 3) {
|
||||
if (state.proctex.enable) {
|
||||
out += "return ProcTex();}";
|
||||
} else {
|
||||
out += "return vec4(0.0);}";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit;
|
||||
if (config.state.texture_border_color[texture_unit].enable_s) {
|
||||
out += fmt::format(R"(
|
||||
if (texcoord{}.x < 0 || texcoord{}.x > 1) {{
|
||||
return tex_border_color[{}];
|
||||
}}
|
||||
)",
|
||||
texcoord_num, texcoord_num, texture_unit);
|
||||
}
|
||||
if (config.state.texture_border_color[texture_unit].enable_t) {
|
||||
out += fmt::format(R"(
|
||||
if (texcoord{}.y < 0 || texcoord{}.y > 1) {{
|
||||
return tex_border_color[{}];
|
||||
}}
|
||||
)",
|
||||
texcoord_num, texcoord_num, texture_unit);
|
||||
}
|
||||
// TODO: 3D border?
|
||||
|
||||
switch (texture_unit) {
|
||||
case 0:
|
||||
// Only unit 0 respects the texturing type
|
||||
switch (state.texture0_type) {
|
||||
case TexturingRegs::TextureConfig::Texture2D:
|
||||
out += "return textureLod(tex0, texcoord0, getLod(texcoord0 * "
|
||||
"vec2(textureSize(tex0, 0))) + tex_lod_bias[0]);";
|
||||
break;
|
||||
case TexturingRegs::TextureConfig::Projection2D:
|
||||
// TODO (wwylele): find the exact LOD formula for projection texture
|
||||
out += "return textureProj(tex0, vec3(texcoord0, texcoord0_w));";
|
||||
break;
|
||||
case TexturingRegs::TextureConfig::TextureCube:
|
||||
out += "return texture(tex_cube, vec3(texcoord0, texcoord0_w));";
|
||||
break;
|
||||
case TexturingRegs::TextureConfig::Shadow2D:
|
||||
out += "return shadowTexture(texcoord0, texcoord0_w);";
|
||||
break;
|
||||
case TexturingRegs::TextureConfig::ShadowCube:
|
||||
out += "return shadowTextureCube(texcoord0, texcoord0_w);";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unhandled texture type {:x}", state.texture0_type);
|
||||
UNIMPLEMENTED();
|
||||
out += "return texture(tex0, texcoord0);";
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
out += "return textureLod(tex1, texcoord1, getLod(texcoord1 * vec2(textureSize(tex1, "
|
||||
"0))) + tex_lod_bias[1]);";
|
||||
break;
|
||||
case 2:
|
||||
if (state.texture2_use_coord1) {
|
||||
out += "return textureLod(tex2, texcoord1, getLod(texcoord1 * "
|
||||
"vec2(textureSize(tex2, 0))) + tex_lod_bias[1]);";
|
||||
} else {
|
||||
out += "return textureLod(tex2, texcoord2, getLod(texcoord2 * "
|
||||
"vec2(textureSize(tex2, 0))) + tex_lod_bias[2]);";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
out += "}";
|
||||
}
|
||||
|
||||
// We round the interpolated primary color to the nearest 1/255th
|
||||
// This maintains the PICA's 8 bits of precision
|
||||
|
@ -57,6 +57,11 @@ struct PicaFSConfigState {
|
||||
BitField<28, 1, u32> shadow_texture_orthographic;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<0, 1, u32> enable_s;
|
||||
BitField<1, 1, u32> enable_t;
|
||||
} texture_border_color[3];
|
||||
|
||||
std::array<TevStageConfigRaw, 6> tev_stages;
|
||||
|
||||
struct {
|
||||
|
@ -21,8 +21,8 @@ FragmentModule::FragmentModule(Core::TelemetrySession& telemetry_, const PicaFSC
|
||||
DefineArithmeticTypes();
|
||||
DefineUniformStructs();
|
||||
DefineInterface();
|
||||
if (config.state.proctex.enable) {
|
||||
DefineProcTexSampler();
|
||||
for (u32 i = 0; i < NUM_TEX_UNITS; i++) {
|
||||
DefineTexSampler(i);
|
||||
}
|
||||
DefineEntryPoint();
|
||||
}
|
||||
@ -225,7 +225,8 @@ void FragmentModule::WriteLighting() {
|
||||
|
||||
// Compute fragment normals and tangents
|
||||
const auto perturbation = [&]() -> Id {
|
||||
const Id texel{SampleTexture(lighting.bump_selector)};
|
||||
const Id texel{
|
||||
OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.bump_selector])};
|
||||
const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)};
|
||||
const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))};
|
||||
return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f));
|
||||
@ -284,7 +285,7 @@ void FragmentModule::WriteLighting() {
|
||||
|
||||
Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)};
|
||||
if (lighting.enable_shadow) {
|
||||
shadow = SampleTexture(lighting.shadow_selector);
|
||||
shadow = OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[lighting.shadow_selector]);
|
||||
if (lighting.shadow_invert) {
|
||||
shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow);
|
||||
}
|
||||
@ -710,89 +711,6 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func)
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::SampleTexture(u32 texture_unit) {
|
||||
const PicaFSConfigState& state = config.state;
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
|
||||
|
||||
// PICA's LOD formula for 2D textures.
|
||||
// This LOD formula is the same as the LOD lower limit defined in OpenGL.
|
||||
// f(x, y) >= max{m_u, m_v, m_w}
|
||||
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
|
||||
const auto sample_lod = [this, texture_unit](Id tex_id, Id texcoord_id) {
|
||||
const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)};
|
||||
const Id tex_image{OpImage(image2d_id, sampled_image)};
|
||||
const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
|
||||
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
|
||||
const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
|
||||
const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
|
||||
const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
|
||||
const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
|
||||
const Id dx_dy_max{
|
||||
OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
|
||||
const Id lod{OpLog2(f32_id, dx_dy_max)};
|
||||
const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))};
|
||||
const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)};
|
||||
return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord,
|
||||
spv::ImageOperandsMask::Lod, biased_lod);
|
||||
};
|
||||
|
||||
const auto sample = [this](Id tex_id, bool projection) {
|
||||
const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id;
|
||||
const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)};
|
||||
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
|
||||
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
|
||||
const Id coord{OpCompositeConstruct(vec_ids.Get(3),
|
||||
OpCompositeExtract(f32_id, texcoord0, 0),
|
||||
OpCompositeExtract(f32_id, texcoord0, 1), texcoord0_w)};
|
||||
if (projection) {
|
||||
return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
} else {
|
||||
return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
}
|
||||
};
|
||||
|
||||
switch (texture_unit) {
|
||||
case 0:
|
||||
// Only unit 0 respects the texturing type
|
||||
switch (state.texture0_type) {
|
||||
case Pica::TexturingRegs::TextureConfig::Texture2D:
|
||||
return sample_lod(tex0_id, texcoord0_id);
|
||||
case Pica::TexturingRegs::TextureConfig::Projection2D:
|
||||
return sample(tex0_id, true);
|
||||
case Pica::TexturingRegs::TextureConfig::TextureCube:
|
||||
return sample(tex_cube_id, false);
|
||||
case Pica::TexturingRegs::TextureConfig::Shadow2D:
|
||||
return SampleShadow();
|
||||
// case Pica::TexturingRegs::TextureConfig::ShadowCube:
|
||||
// return "shadowTextureCube(texcoord0, texcoord0_w)";
|
||||
case Pica::TexturingRegs::TextureConfig::Disabled:
|
||||
return zero_vec;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
|
||||
UNIMPLEMENTED();
|
||||
return zero_vec;
|
||||
}
|
||||
case 1:
|
||||
return sample_lod(tex1_id, texcoord1_id);
|
||||
case 2:
|
||||
if (state.texture2_use_coord1) {
|
||||
return sample_lod(tex2_id, texcoord1_id);
|
||||
} else {
|
||||
return sample_lod(tex2_id, texcoord2_id);
|
||||
}
|
||||
case 3:
|
||||
if (state.proctex.enable) {
|
||||
return OpFunctionCall(vec_ids.Get(4), proctex_func);
|
||||
} else {
|
||||
LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
|
||||
return zero_vec;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return void_id;
|
||||
}
|
||||
}
|
||||
|
||||
Id FragmentModule::CompareShadow(Id pixel, Id z) {
|
||||
const Id pixel_d24{OpShiftRightLogical(u32_id, pixel, ConstS32(8))};
|
||||
const Id pixel_s8{OpConvertUToF(f32_id, OpBitwiseAnd(u32_id, pixel, ConstU32(255u)))};
|
||||
@ -802,7 +720,7 @@ Id FragmentModule::CompareShadow(Id pixel, Id z) {
|
||||
}
|
||||
|
||||
Id FragmentModule::SampleShadow() {
|
||||
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
|
||||
const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord_id[0])};
|
||||
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
|
||||
const Id abs_min_w{OpFMul(f32_id, OpFMin(f32_id, OpFAbs(f32_id, texcoord0_w), ConstF32(1.f)),
|
||||
ConstF32(16777215.f))};
|
||||
@ -941,11 +859,145 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id
|
||||
return ProcTexLookupLUT(offset, combined);
|
||||
}
|
||||
|
||||
void FragmentModule::DefineProcTexSampler() {
|
||||
void FragmentModule::DefineTexSampler(u32 texture_unit) {
|
||||
const PicaFSConfigState& state = config.state;
|
||||
|
||||
const Id func_type{TypeFunction(vec_ids.Get(4))};
|
||||
proctex_func = OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
|
||||
sample_tex_unit_func[texture_unit] =
|
||||
OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
|
||||
AddLabel(OpLabel());
|
||||
|
||||
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
|
||||
|
||||
if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) {
|
||||
OpReturnValue(zero_vec);
|
||||
OpFunctionEnd();
|
||||
return;
|
||||
}
|
||||
|
||||
if (texture_unit == 3) {
|
||||
if (state.proctex.enable) {
|
||||
OpReturnValue(ProcTexSampler());
|
||||
} else {
|
||||
OpReturnValue(zero_vec);
|
||||
}
|
||||
OpFunctionEnd();
|
||||
return;
|
||||
}
|
||||
|
||||
const Id border_label{OpLabel()};
|
||||
const Id not_border_label{OpLabel()};
|
||||
|
||||
u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit;
|
||||
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[texcoord_num])};
|
||||
|
||||
auto& texture_border_color = state.texture_border_color[texture_unit];
|
||||
if (texture_border_color.enable_s || texture_border_color.enable_t) {
|
||||
const Id texcoord_s{OpCompositeExtract(f32_id, texcoord, 0)};
|
||||
const Id texcoord_t{OpCompositeExtract(f32_id, texcoord, 1)};
|
||||
|
||||
const Id s_lt_zero{OpFOrdLessThan(bool_id, texcoord_s, ConstF32(0.0f))};
|
||||
const Id s_gt_one{OpFOrdGreaterThan(bool_id, texcoord_s, ConstF32(1.0f))};
|
||||
const Id t_lt_zero{OpFOrdLessThan(bool_id, texcoord_t, ConstF32(0.0f))};
|
||||
const Id t_gt_one{OpFOrdGreaterThan(bool_id, texcoord_t, ConstF32(1.0f))};
|
||||
|
||||
Id cond{};
|
||||
if (texture_border_color.enable_s && texture_border_color.enable_t) {
|
||||
cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(4), s_lt_zero, s_gt_one,
|
||||
t_lt_zero, t_gt_one));
|
||||
} else if (texture_border_color.enable_s) {
|
||||
cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), s_lt_zero, s_gt_one));
|
||||
} else if (texture_border_color.enable_t) {
|
||||
cond = OpAny(bool_id, OpCompositeConstruct(bvec_ids.Get(2), t_lt_zero, t_gt_one));
|
||||
}
|
||||
|
||||
OpSelectionMerge(not_border_label, spv::SelectionControlMask::MaskNone);
|
||||
OpBranchConditional(cond, border_label, not_border_label);
|
||||
|
||||
AddLabel(border_label);
|
||||
const Id border_color{
|
||||
GetShaderDataMember(vec_ids.Get(4), ConstS32(29), ConstU32(texture_unit))};
|
||||
OpReturnValue(border_color);
|
||||
|
||||
AddLabel(not_border_label);
|
||||
}
|
||||
|
||||
// PICA's LOD formula for 2D textures.
|
||||
// This LOD formula is the same as the LOD lower limit defined in OpenGL.
|
||||
// f(x, y) >= max{m_u, m_v, m_w}
|
||||
// (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
|
||||
const auto sample_lod = [&](Id tex_id) {
|
||||
const Id sampled_image{OpLoad(TypeSampledImage(image2d_id), tex_id)};
|
||||
const Id tex_image{OpImage(image2d_id, sampled_image)};
|
||||
const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
|
||||
const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
|
||||
const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
|
||||
const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
|
||||
const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
|
||||
const Id dx_dy_max{
|
||||
OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
|
||||
const Id lod{OpLog2(f32_id, dx_dy_max)};
|
||||
const Id lod_bias{GetShaderDataMember(f32_id, ConstS32(28), ConstU32(texture_unit))};
|
||||
const Id biased_lod{OpFAdd(f32_id, lod, lod_bias)};
|
||||
return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord,
|
||||
spv::ImageOperandsMask::Lod, biased_lod);
|
||||
};
|
||||
|
||||
const auto sample_3d = [&](Id tex_id, bool projection) {
|
||||
const Id image_type = tex_id.value == tex_cube_id.value ? image_cube_id : image2d_id;
|
||||
const Id sampled_image{OpLoad(TypeSampledImage(image_type), tex_id)};
|
||||
const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
|
||||
const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord, 0),
|
||||
OpCompositeExtract(f32_id, texcoord, 1), texcoord0_w)};
|
||||
if (projection) {
|
||||
return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
} else {
|
||||
return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
|
||||
}
|
||||
};
|
||||
|
||||
Id ret_val{void_id};
|
||||
switch (texture_unit) {
|
||||
case 0:
|
||||
// Only unit 0 respects the texturing type
|
||||
switch (state.texture0_type) {
|
||||
case Pica::TexturingRegs::TextureConfig::Texture2D:
|
||||
ret_val = sample_lod(tex0_id);
|
||||
break;
|
||||
case Pica::TexturingRegs::TextureConfig::Projection2D:
|
||||
ret_val = sample_3d(tex0_id, true);
|
||||
break;
|
||||
case Pica::TexturingRegs::TextureConfig::TextureCube:
|
||||
ret_val = sample_3d(tex_cube_id, false);
|
||||
break;
|
||||
case Pica::TexturingRegs::TextureConfig::Shadow2D:
|
||||
ret_val = SampleShadow();
|
||||
// case Pica::TexturingRegs::TextureConfig::ShadowCube:
|
||||
// return "shadowTextureCube(texcoord0, texcoord0_w)";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
|
||||
UNIMPLEMENTED();
|
||||
ret_val = zero_vec;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
ret_val = sample_lod(tex1_id);
|
||||
break;
|
||||
case 2:
|
||||
ret_val = sample_lod(tex2_id);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
break;
|
||||
}
|
||||
|
||||
OpReturnValue(ret_val);
|
||||
OpFunctionEnd();
|
||||
}
|
||||
|
||||
Id FragmentModule::ProcTexSampler() {
|
||||
// Define noise tables at the beginning of the function
|
||||
if (config.state.proctex.noise_enable) {
|
||||
noise1d_table =
|
||||
@ -957,24 +1009,11 @@ void FragmentModule::DefineProcTexSampler() {
|
||||
|
||||
Id uv{};
|
||||
if (config.state.proctex.coord < 3) {
|
||||
Id texcoord_id{};
|
||||
switch (config.state.proctex.coord.Value()) {
|
||||
case 0:
|
||||
texcoord_id = texcoord0_id;
|
||||
break;
|
||||
case 1:
|
||||
texcoord_id = texcoord1_id;
|
||||
break;
|
||||
case 2:
|
||||
texcoord_id = texcoord2_id;
|
||||
break;
|
||||
}
|
||||
|
||||
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
|
||||
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])};
|
||||
uv = OpFAbs(vec_ids.Get(2), texcoord);
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unexpected proctex.coord >= 3");
|
||||
uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord0_id));
|
||||
uv = OpFAbs(vec_ids.Get(2), OpLoad(vec_ids.Get(2), texcoord_id[0]));
|
||||
}
|
||||
|
||||
// This LOD formula is the same as the LOD upper limit defined in OpenGL.
|
||||
@ -1058,8 +1097,7 @@ void FragmentModule::DefineProcTexSampler() {
|
||||
final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3);
|
||||
}
|
||||
|
||||
OpReturnValue(final_color);
|
||||
OpFunctionEnd();
|
||||
return final_color;
|
||||
}
|
||||
|
||||
Id FragmentModule::Byteround(Id variable_id, u32 size) {
|
||||
@ -1226,13 +1264,13 @@ Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) {
|
||||
case Source::SecondaryFragmentColor:
|
||||
return secondary_fragment_color;
|
||||
case Source::Texture0:
|
||||
return SampleTexture(0);
|
||||
return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[0]);
|
||||
case Source::Texture1:
|
||||
return SampleTexture(1);
|
||||
return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[1]);
|
||||
case Source::Texture2:
|
||||
return SampleTexture(2);
|
||||
return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[2]);
|
||||
case Source::Texture3:
|
||||
return SampleTexture(3);
|
||||
return OpFunctionCall(vec_ids.Get(4), sample_tex_unit_func[3]);
|
||||
case Source::PreviousBuffer:
|
||||
return combiner_buffer;
|
||||
case Source::Constant:
|
||||
@ -1428,9 +1466,9 @@ void FragmentModule::DefineEntryPoint() {
|
||||
|
||||
const Id main_type{TypeFunction(TypeVoid())};
|
||||
const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)};
|
||||
AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id,
|
||||
texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id,
|
||||
gl_frag_coord_id, gl_frag_depth_id);
|
||||
AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id,
|
||||
texcoord_id[0], texcoord_id[1], texcoord_id[2], texcoord0_w_id, normquat_id,
|
||||
view_id, color_id, gl_frag_coord_id, gl_frag_depth_id);
|
||||
AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft);
|
||||
AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
@ -1443,21 +1481,25 @@ void FragmentModule::DefineUniformStructs() {
|
||||
const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))};
|
||||
const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))};
|
||||
const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))};
|
||||
const Id border_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_NON_PROC_TEX_UNITS))};
|
||||
|
||||
const Id shader_data_struct_id{TypeStruct(
|
||||
i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id, i32_id,
|
||||
i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id,
|
||||
vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3),
|
||||
light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(3), vec_ids.Get(4))};
|
||||
const Id shader_data_struct_id{
|
||||
TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id, i32_id, i32_id, i32_id,
|
||||
i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, f32_id, i32_id, u32_id,
|
||||
lighting_lut_array_id, vec_ids.Get(3), vec_ids.Get(2), vec_ids.Get(2),
|
||||
vec_ids.Get(2), vec_ids.Get(3), light_src_array_id, const_color_array_id,
|
||||
vec_ids.Get(4), vec_ids.Get(3), border_color_array_id, vec_ids.Get(4))};
|
||||
|
||||
constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u};
|
||||
constexpr std::array shader_data_offsets{
|
||||
0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, 32u, 36u, 40u, 44u, 48u, 52u, 56u,
|
||||
60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u, 224u, 240u, 1136u, 1232u, 1248u, 1264u};
|
||||
constexpr std::array shader_data_offsets{0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u,
|
||||
32u, 36u, 40u, 44u, 48u, 52u, 56u, 60u,
|
||||
64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u,
|
||||
224u, 240u, 1136u, 1232u, 1248u, 1264u, 1312u};
|
||||
|
||||
Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u);
|
||||
Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u);
|
||||
Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u);
|
||||
Decorate(border_color_array_id, spv::Decoration::ArrayStride, 16u);
|
||||
for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) {
|
||||
MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]);
|
||||
}
|
||||
@ -1475,9 +1517,9 @@ void FragmentModule::DefineUniformStructs() {
|
||||
void FragmentModule::DefineInterface() {
|
||||
// Define interface block
|
||||
primary_color_id = DefineInput(vec_ids.Get(4), 1);
|
||||
texcoord0_id = DefineInput(vec_ids.Get(2), 2);
|
||||
texcoord1_id = DefineInput(vec_ids.Get(2), 3);
|
||||
texcoord2_id = DefineInput(vec_ids.Get(2), 4);
|
||||
texcoord_id[0] = DefineInput(vec_ids.Get(2), 2);
|
||||
texcoord_id[1] = DefineInput(vec_ids.Get(2), 3);
|
||||
texcoord_id[2] = DefineInput(vec_ids.Get(2), 4);
|
||||
texcoord0_w_id = DefineInput(f32_id, 5);
|
||||
normquat_id = DefineInput(vec_ids.Get(4), 6);
|
||||
view_id = DefineInput(vec_ids.Get(3), 7);
|
||||
|
@ -30,6 +30,8 @@ class FragmentModule : public Sirit::Module {
|
||||
static constexpr u32 NUM_TEV_STAGES = 6;
|
||||
static constexpr u32 NUM_LIGHTS = 8;
|
||||
static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
|
||||
static constexpr u32 NUM_TEX_UNITS = 4;
|
||||
static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3;
|
||||
|
||||
public:
|
||||
explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config);
|
||||
@ -57,15 +59,15 @@ private:
|
||||
/// Writes the code to emulate the specified TEV stage
|
||||
void WriteTevStage(s32 index);
|
||||
|
||||
/// Defines the tex3 proctex sampling function
|
||||
void DefineProcTexSampler();
|
||||
/// Defines the basic texture sampling functions for a unit
|
||||
void DefineTexSampler(u32 texture_unit);
|
||||
|
||||
/// Function for sampling the procedurally generated texture unit.
|
||||
Id ProcTexSampler();
|
||||
|
||||
/// Writes the if-statement condition used to evaluate alpha testing.
|
||||
void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
|
||||
|
||||
/// Samples the current fragment texel from the provided texture unit
|
||||
[[nodiscard]] Id SampleTexture(u32 texture_unit);
|
||||
|
||||
/// Samples the current fragment texel from shadow plane
|
||||
[[nodiscard]] Id SampleShadow();
|
||||
|
||||
@ -237,9 +239,7 @@ private:
|
||||
Id shader_data_id{};
|
||||
|
||||
Id primary_color_id{};
|
||||
Id texcoord0_id{};
|
||||
Id texcoord1_id{};
|
||||
Id texcoord2_id{};
|
||||
Id texcoord_id[NUM_NON_PROC_TEX_UNITS]{};
|
||||
Id texcoord0_w_id{};
|
||||
Id normquat_id{};
|
||||
Id view_id{};
|
||||
@ -276,7 +276,7 @@ private:
|
||||
Id alpha_results_2{};
|
||||
Id alpha_results_3{};
|
||||
|
||||
Id proctex_func{};
|
||||
Id sample_tex_unit_func[NUM_TEX_UNITS]{};
|
||||
Id noise1d_table{};
|
||||
Id noise2d_table{};
|
||||
Id lut_offsets{};
|
||||
|
@ -67,6 +67,7 @@ layout ({}std140) uniform shader_data {{
|
||||
vec4 const_color[NUM_TEV_STAGES];
|
||||
vec4 tev_combiner_buffer_color;
|
||||
vec3 tex_lod_bias;
|
||||
vec4 tex_border_color[3];
|
||||
vec4 clip_coef;
|
||||
}};
|
||||
)";
|
||||
|
@ -64,10 +64,11 @@ struct UniformData {
|
||||
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
|
||||
alignas(16) Common::Vec4f tev_combiner_buffer_color;
|
||||
alignas(16) Common::Vec3f tex_lod_bias;
|
||||
alignas(16) Common::Vec4f tex_border_color[3];
|
||||
alignas(16) Common::Vec4f clip_coef;
|
||||
};
|
||||
|
||||
static_assert(sizeof(UniformData) == 0x500,
|
||||
static_assert(sizeof(UniformData) == 0x530,
|
||||
"The size of the UniformData does not match the structure in the shader");
|
||||
static_assert(sizeof(UniformData) < 16384,
|
||||
"UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
Loading…
x
Reference in New Issue
Block a user