Use a generic version of the Convert* functions rather than lambdas.

This is some real monkey's paw shit.
This commit is contained in:
riperiperi 2020-05-27 00:13:04 +01:00
parent 85d0327542
commit aa43dcfbe8

@ -1,5 +1,6 @@
using Ryujinx.Common; using Ryujinx.Common;
using System; using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
using static Ryujinx.Graphics.Texture.BlockLinearConstants; using static Ryujinx.Graphics.Texture.BlockLinearConstants;
@ -9,7 +10,7 @@ namespace Ryujinx.Graphics.Texture
{ {
private const int HostStrideAlignment = 4; private const int HostStrideAlignment = 4;
public static Span<byte> ConvertBlockLinearToLinear( private static unsafe Span<byte> ConvertBlockLinearToLinear<T>(
int width, int width,
int height, int height,
int depth, int depth,
@ -17,13 +18,14 @@ namespace Ryujinx.Graphics.Texture
int layers, int layers,
int blockWidth, int blockWidth,
int blockHeight, int blockHeight,
int bytesPerPixel,
int gobBlocksInY, int gobBlocksInY,
int gobBlocksInZ, int gobBlocksInZ,
int gobBlocksInTileX, int gobBlocksInTileX,
SizeInfo sizeInfo, SizeInfo sizeInfo,
ReadOnlySpan<byte> data) ReadOnlySpan<byte> data) where T : unmanaged
{ {
int bytesPerPixel = Unsafe.SizeOf<T>();
int outSize = GetTextureSize( int outSize = GetTextureSize(
width, width,
height, height,
@ -89,77 +91,89 @@ namespace Ryujinx.Graphics.Texture
mipGobBlocksInZ, mipGobBlocksInZ,
bytesPerPixel); bytesPerPixel);
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged fixed (byte* outputPtr = output, dataPtr = data)
{ {
fixed (byte* outputPtr = output, dataPtr = data) byte* outPtr = outputPtr + outOffs;
for (int layer = 0; layer < layers; layer++)
{ {
byte* outPtr = outputPtr + outOffs; byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
for (int layer = 0; layer < layers; layer++)
for (int z = 0; z < d; z++)
{ {
byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); layoutConverter.SetZ(z);
for (int y = 0; y < h; y++)
for (int z = 0; z < d; z++)
{ {
layoutConverter.SetZ(z); layoutConverter.SetY(y);
for (int y = 0; y < h; y++)
for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
{ {
layoutConverter.SetY(y); byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
byte* offset2 = offset + 0x20;
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) Vector128<byte> value = *(Vector128<byte>*)offset;
{ Vector128<byte> value2 = *(Vector128<byte>*)offset2;
byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); Vector128<byte> value3 = *(Vector128<byte>*)offset3;
byte* offset2 = offset + 0x20; Vector128<byte> value4 = *(Vector128<byte>*)offset4;
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
Vector128<byte> value = *(Vector128<byte>*)offset; *(Vector128<byte>*)outPtr = value;
Vector128<byte> value2 = *(Vector128<byte>*)offset2; *(Vector128<byte>*)(outPtr + 16) = value2;
Vector128<byte> value3 = *(Vector128<byte>*)offset3; *(Vector128<byte>*)(outPtr + 32) = value3;
Vector128<byte> value4 = *(Vector128<byte>*)offset4; *(Vector128<byte>*)(outPtr + 48) = value4;
*(Vector128<byte>*)outPtr = value;
*(Vector128<byte>*)(outPtr + 16) = value2;
*(Vector128<byte>*)(outPtr + 32) = value3;
*(Vector128<byte>*)(outPtr + 48) = value4;
}
for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
{
byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
}
for (int x = xStart; x < w; x++, outPtr += bytesPerPixel)
{
byte* offset = inBaseOffset + layoutConverter.GetOffset(x);
*(T*)outPtr = *(T*)offset;
}
outPtr += outStrideGap;
} }
for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
{
byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
}
for (int x = xStart; x < w; x++, outPtr += bytesPerPixel)
{
byte* offset = inBaseOffset + layoutConverter.GetOffset(x);
*(T*)outPtr = *(T*)offset;
}
outPtr += outStrideGap;
} }
} }
outOffs += stride * h * d * layers;
} }
return true; outOffs += stride * h * d * layers;
} }
bool _ = bytesPerPixel switch
{
1 => Convert<byte>(output, data),
2 => Convert<ushort>(output, data),
4 => Convert<uint>(output, data),
8 => Convert<ulong>(output, data),
12 => Convert<Bpp12Pixel>(output, data),
16 => Convert<Vector128<byte>>(output, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
} }
return output; return output;
} }
public static Span<byte> ConvertBlockLinearToLinear(
int width,
int height,
int depth,
int levels,
int layers,
int blockWidth,
int blockHeight,
int bytesPerPixel,
int gobBlocksInY,
int gobBlocksInZ,
int gobBlocksInTileX,
SizeInfo sizeInfo,
ReadOnlySpan<byte> data)
{
return bytesPerPixel switch
{
1 => ConvertBlockLinearToLinear<byte>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
2 => ConvertBlockLinearToLinear<ushort>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
4 => ConvertBlockLinearToLinear<uint>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
8 => ConvertBlockLinearToLinear<ulong>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
12 => ConvertBlockLinearToLinear<Bpp12Pixel>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
16 => ConvertBlockLinearToLinear<Vector128<byte>>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
}
public static Span<byte> ConvertLinearStridedToLinear( public static Span<byte> ConvertLinearStridedToLinear(
int width, int width,
int height, int height,
@ -191,7 +205,7 @@ namespace Ryujinx.Graphics.Texture
return output; return output;
} }
public static Span<byte> ConvertLinearToBlockLinear( private static unsafe Span<byte> ConvertLinearToBlockLinear<T>(
int width, int width,
int height, int height,
int depth, int depth,
@ -199,13 +213,14 @@ namespace Ryujinx.Graphics.Texture
int layers, int layers,
int blockWidth, int blockWidth,
int blockHeight, int blockHeight,
int bytesPerPixel,
int gobBlocksInY, int gobBlocksInY,
int gobBlocksInZ, int gobBlocksInZ,
int gobBlocksInTileX, int gobBlocksInTileX,
SizeInfo sizeInfo, SizeInfo sizeInfo,
ReadOnlySpan<byte> data) ReadOnlySpan<byte> data) where T : unmanaged
{ {
int bytesPerPixel = Unsafe.SizeOf<T>();
Span<byte> output = new byte[sizeInfo.TotalSize]; Span<byte> output = new byte[sizeInfo.TotalSize];
int inOffs = 0; int inOffs = 0;
@ -261,78 +276,90 @@ namespace Ryujinx.Graphics.Texture
mipGobBlocksInZ, mipGobBlocksInZ,
bytesPerPixel); bytesPerPixel);
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged fixed (byte* outputPtr = output, dataPtr = data)
{ {
fixed (byte* outputPtr = output, dataPtr = data) byte* inPtr = dataPtr + inOffs;
for (int layer = 0; layer < layers; layer++)
{ {
byte* inPtr = dataPtr + inOffs; byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
for (int layer = 0; layer < layers; layer++)
for (int z = 0; z < d; z++)
{ {
byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); layoutConverter.SetZ(z);
for (int y = 0; y < h; y++)
for (int z = 0; z < d; z++)
{ {
layoutConverter.SetZ(z); layoutConverter.SetY(y);
for (int y = 0; y < h; y++)
for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
{ {
layoutConverter.SetY(y); byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
byte* offset2 = offset + 0x20;
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) Vector128<byte> value = *(Vector128<byte>*)inPtr;
{ Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16);
byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32);
byte* offset2 = offset + 0x20; Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48);
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
Vector128<byte> value = *(Vector128<byte>*)inPtr; *(Vector128<byte>*)offset = value;
Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16); *(Vector128<byte>*)offset2 = value2;
Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32); *(Vector128<byte>*)offset3 = value3;
Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48); *(Vector128<byte>*)offset4 = value4;
*(Vector128<byte>*)offset = value;
*(Vector128<byte>*)offset2 = value2;
*(Vector128<byte>*)offset3 = value3;
*(Vector128<byte>*)offset4 = value4;
}
for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
{
byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
}
for (int x = xStart; x < w; x++, inPtr += bytesPerPixel)
{
byte* offset = outBaseOffset + layoutConverter.GetOffset(x);
*(T*)offset = *(T*)inPtr;
}
inPtr += inStrideGap;
} }
for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
{
byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
}
for (int x = xStart; x < w; x++, inPtr += bytesPerPixel)
{
byte* offset = outBaseOffset + layoutConverter.GetOffset(x);
*(T*)offset = *(T*)inPtr;
}
inPtr += inStrideGap;
} }
} }
inOffs += stride * h * d * layers;
} }
return true; inOffs += stride * h * d * layers;
} }
bool _ = bytesPerPixel switch
{
1 => Convert<byte>(output, data),
2 => Convert<ushort>(output, data),
4 => Convert<uint>(output, data),
8 => Convert<ulong>(output, data),
12 => Convert<Bpp12Pixel>(output, data),
16 => Convert<Vector128<byte>>(output, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
} }
return output; return output;
} }
public static Span<byte> ConvertLinearToBlockLinear(
int width,
int height,
int depth,
int levels,
int layers,
int blockWidth,
int blockHeight,
int bytesPerPixel,
int gobBlocksInY,
int gobBlocksInZ,
int gobBlocksInTileX,
SizeInfo sizeInfo,
ReadOnlySpan<byte> data)
{
return bytesPerPixel switch
{
1 => ConvertLinearToBlockLinear<byte>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
2 => ConvertLinearToBlockLinear<ushort>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
4 => ConvertLinearToBlockLinear<uint>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
8 => ConvertLinearToBlockLinear<ulong>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
12 => ConvertLinearToBlockLinear<Bpp12Pixel>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
16 => ConvertLinearToBlockLinear<Vector128<byte>>(width, height, depth, levels, layers, blockWidth, blockHeight, gobBlocksInY, gobBlocksInZ, gobBlocksInTileX, sizeInfo, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
}
public static Span<byte> ConvertLinearToLinearStrided( public static Span<byte> ConvertLinearToLinearStrided(
int width, int width,
int height, int height,