Distinctive Derivative Differences

Pesky Problems with Procedural UVs

The “Solution”

float3 normal = normalize(i.normal);float2 uv = float2(
// atan returns a value between -pi and pi
// so we divide by pi * 2 to get -0.5 to 0.5
atan2(normal.z, normal.x) / (UNITY_PI * 2.0),
// acos returns 0.0 at the top, pi at the bottom
// so we flip the y to align with Unity's OpenGL style
// texture UVs so 0.0 is at the bottom
acos(-normal.y) / UNITY_PI
);
fixed4 col = tex2D(_MainTex, uv);
// atan returns a value between -pi and pi
// so we divide by pi * 2 to get -0.5 to 0.5
float phi = atan2(normal.z, normal.x) / (UNITY_PI * 2.0);
// 0.0 to 1.0 range
float phi_frac = frac(phi);
float2 uv = float2(
// uses a small bias to prefer the first 'UV set'
fwidth(phi) - 0.0001 < fwidth(phi_frac) ? phi : phi_frac,
// acos returns 0.0 at the top, pi at the bottom
// so we flip the y to align with Unity's OpenGL style
// texture UVs so 0.0 is at the bottom
acos(-normal.y) / UNITY_PI
);
fixed4 col = tex2D(_MainTex, uv);

Screen Space Partial Differences

// OpenGL's reference mip map level calculation converted to HLSL
// texture_coord = uv * texture resolution
float CalcMipLevel(float2 texture_coord)
{
float2 dx = ddx(texture_coord);
float2 dy = ddy(texture_coord);
// get the max squared magnitude of the change along x and y
float delta_max_sqr = max(dot(dx, dx), dot(dy, dy));
// equivalent to log2(sqrt(delta_max_sqr))
return max(0.0, 0.5 * log2(delta_max_sqr));
}

A Coarse Look at a Fine Problem

Newer APIs & Unity

Everything Is Fine

Explicit LOD

// explicit LOD example// atan returns a value between -pi and pi
// so we divide by pi * 2 to get -0.5 to 0.5
float phi = atan2(normal.z, normal.x) / (UNITY_PI * 2.0);
// 0.0 to 1.0 range
float phi_frac = frac(phi);
// acos returns 0.0 at the top, pi at the bottom
// so we flip the y to align with Unity's OpenGL style
// texture UVs so 0.0 is at the bottom
float theta = acos(-normal.y) / UNITY_PI;
// construct the primary uv
float2 uvA = float2(phi, theta);
// construct the secondary uv using phi_frac
float2 uvB = float2(phi_frac, theta);
// get the min mip level of either uv sets
// _TextureName_TexelSize.zw is the texture resolution
float mipLevel = min(
CalcMipLevel(uvA * _MainTex_TexelSize.zw),
CalcMipLevel(uvB * _MainTex_TexelSize.zw)
);
// sample texture with explicit mip level
// the z component is 0.0 because it does nothing
fixed4 col = tex2Dlod(_MainTex, float4(uvA, 0.0, mipLevel));

Explicit Gradients

// explicit gradients example// atan returns a value between -pi and pi
// so we divide by pi * 2 to get -0.5 to 0.5
float phi = atan2(normal.z, normal.x) / (UNITY_PI * 2.0);
// 0.0 to 1.0 range
float phi_frac = frac(phi);
// acos returns 0.0 at the top, pi at the bottom
// so we flip the y to align with Unity's OpenGL style
// texture UVs so 0.0 is at the bottom
float theta = acos(-normal.y) / UNITY_PI;
// construct uv without doing anything special
float2 uv = float2(phi, theta);
// get derivatives for phi and phi_frac
float phi_dx = ddx(phi);
float phi_dy = ddy(phi);
float phi_frac_dx = ddx(phi_frac);
float phi_frac_dy = ddy(phi_frac);
// select the smallest absolute derivatives between phi and phi_frac
float2 dx = float2(
abs(phi_dx) - 0.0001 < abs(phi_frac_dx) ? phi_dx : phi_frac_dx,
ddx(theta)
);
float2 dy = float2(
abs(phi_dy) - 0.0001 < abs(phi_frac_dy) ? phi_dy : phi_frac_dy,
ddy(theta)
);
// sample the texture using our own derivatives
fixed4 col = tex2Dgrad(_MainTex, uv, dx, dy);

Coarse Emulation

// coarse emulation// atan returns a value between -pi and pi
// so we divide by pi * 2 to get -0.5 to 0.5
float phi = atan2(normal.z, normal.x) / (UNITY_PI * 2.0);
// 0.0 to 1.0 range
float phi_frac = frac(phi);
// acos returns 0.0 at the top, pi at the bottom
// so we flip the y to align with Unity's OpenGL style
// texture UVs so 0.0 is at the bottom
float theta = acos(-normal.y) / UNITY_PI;
// get derivatives for phi and phi_frac
float phi_dx = ddx(phi);
float phi_dy = ddy(phi);
float phi_frac_dx = ddx(phi_frac);
float phi_frac_dy = ddy(phi_frac);
// get position within quad
int2 pixel_quad_pos = uint2(vpos) % 2;
// get direction within quad
float2 pixel_quad_dir = float2(pixel_quad_pos) * 2.0 - 1.0;
// get derivatives the "other" pixel column / row in the quad
float phi_dxy = ddx(phi - phi_dy * pixel_quad_dir.y);
float phi_dyx = ddy(phi - phi_dx * pixel_quad_dir.x);
float phi_frac_dxy = ddx(phi_frac - phi_frac_dy * pixel_quad_dir.y);
float phi_frac_dyx = ddy(phi_frac - phi_frac_dx * pixel_quad_dir.x);
// check which column / row in the quad this is and use alternate
// derivatives if it's not the column / row coarse would use
if (pixel_quad_pos.x == 1)
{
phi_dy = phi_dyx;
phi_frac_dy = phi_frac_dyx;
}
if (pixel_quad_pos.y == 1)
{
phi_dx = phi_dxy;
phi_frac_dx = phi_frac_dxy;
}
// fwidth equivalents using the "coarse" derivatives
float phi_fw = abs(phi_dx) + abs(phi_dy);
float phi_frac_fw = abs(phi_frac_dx) + abs(phi_frac_dy);
// construct uvs like Tarini's method
float2 uv = float2(
// uses a small bias to prefer the first 'UV set'
phi_fw - 0.0001 < phi_frac_fw ? phi : phi_frac,
theta);
fixed4 col = tex2D(_MainTex, uv);

Least Worst Quad Derivatives

Nvidia 2080 Super
iPhone SE 2020
// least worst quad derivatives// atan returns a value between -pi and pi
// so we divide by pi * 2 to get -0.5 to 0.5
float phi = atan2(normal.z, normal.x) / (UNITY_PI * 2.0);
// 0.0 to 1.0 range
float phi_frac = frac(phi);
// acos returns 0.0 at the top, pi at the bottom
// so we flip the y to align with Unity's OpenGL style
// texture UVs so 0.0 is at the bottom
float theta = acos(-normal.y) / UNITY_PI;
// get derivatives for phi and phi_frac
float phi_dx = ddx(phi);
float phi_dy = ddy(phi);
float phi_frac_dx = ddx(phi_frac);
float phi_frac_dy = ddy(phi_frac);
// get position within quad
int2 pixel_quad_pos = uint2(vpos) % 2;
// get direction within quad
float2 pixel_quad_dir = float2(pixel_quad_pos) * 2.0 - 1.0;
// get derivatives the "other" pixel column / row in the quad
float phi_dxy = ddx(phi - phi_dy * pixel_quad_dir.y);
float phi_dyx = ddy(phi - phi_dx * pixel_quad_dir.x);
float phi_frac_dxy = ddx(phi_frac - phi_frac_dy * pixel_quad_dir.y);
float phi_frac_dyx = ddy(phi_frac - phi_frac_dx * pixel_quad_dir.x);
// get the worst derivatives for the entire quad
phi_dx = max(abs(phi_dx), abs(phi_dxy));
phi_dy = max(abs(phi_dy), abs(phi_dyx));
phi_frac_dx = max(abs(phi_frac_dx), abs(phi_frac_dxy));
phi_frac_dy = max(abs(phi_frac_dy), abs(phi_frac_dyx));
// fwidth equivalents using the worst derivatives
float phi_fw = abs(phi_dx) + abs(phi_dy);
float phi_frac_fw = abs(phi_frac_dx) + abs(phi_frac_dy);
// construct uvs like Tarini's method
float2 uv = float2(
// uses a small bias to prefer the first 'UV set'
phi_fw - 0.0001 < phi_frac_fw ? phi : phi_frac,
theta);
fixed4 col = tex2D(_MainTex, uv);

In Quad Communcation

Communicative Neighbors

pixel quad phi values and indices
P0 fine derivatives & coarse derivatives of phi
// get derivatives for phi
float phi_dx = ddx(phi);
float phi_dy = ddy(phi);
fine derivatives for each column and row of phi
// get position within the quad
int2 pixel_quad_pos = int2(vpos.xy) % 2;
// -1.0 or 1.0 value depending on which row or column this is
float2 pixel_quad_dir = float2(pixel_quad_pos) * 2.0 - 1.0;
// get the "other" pixel column / row values in the quad by
// adding or subtracting the derivatives from the current value
float phi_other_x = phi - phi_dx * pixelQuadDir.x;
float phi_other_y = phi - phi_dy * pixelQuadDir.y;
Applying derivatives to the current value to get the value of the other column or row. Top represents phi, bottom left is phi_other_x, bottom right is phi_other_y.
// get derivatives the "other" pixel column / row in the quad
float phi_other_x_dy = ddy(phi_other_x);
float phi_other_y_dx = ddx(phi_other_y);
Getting the derivatives of the mirrored values gives each pixel access to all four derivatives in the quad. Top shows phi_dx & phi_dy derivatives, bottom shows phi_other_x_dy & phi_other_y_dx derivatives.

Coarse Communication

Conclusion

Shader Code

Additional Thoughts

Shader Graph

Performance Numbers

  • 1k x 512 DXT1
    none — 25.8 μs
    tarini — 26.3 μs
    lod — 26.5 μs
    gradients — 26.7 μs
    quad comm — 26.6 μs
  • 2k x 1k DXT1
    none — 26.5 μs
    tarini — 26.8 μs
    lod — 27.0 μs
    gradients — 28.2 μs
    quad comm — 27.2 μs
  • 4k x 2k DXT1
    none — 27.1 μs
    tarini — 27.8 μs
    lod — 26.7 μs
    gradients — 36.5 μs
    quad comm — 27.9 μs
  • 8k x 4k RGBA32
    none — 44.0 μs
    tarini — 44.3 μs
    lod — 29.5 μs
    gradients — 63.2 μs
    quad comm — 44.5 μs
  • 2k x 1k DXT1
    tarini — 183.84 μs
    gradients — 196.96 μs
    quad comm — 201.60 μs
  • 8k x 4k RGBA32
    tarini — 354.40 μs
    gradients — 352.16 μs
    quad comm — 354.24 μs
  • 2k x 1k DXT1
    tarini — 12.159 μs
    gradients — 12.621 μs
    quad comm — 18.744 μs
  • 8k x 4k RGBA32
    tarini — 25.603 μs
    gradients — 23.883 μs
    quad comm — 24.330μs
  • 8k x 4k RGBA32
    none — 338 μs
    tarini — 361 μs
    lod — 418 μs
    gradients — 266 μs
    quad comm — 291 μs

ShaderToy Examples

Further reading

--

--

Tech Artist & Graphics Programmer lately focused on Unity VR game dev. https://ko-fi.com/bgolus

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store