/*
==========================================================================================
Cg Acceleration Research
Optimized: Edgar Velázquez Armendáriz - edgar [at] graphics [dot] cornell [dot] edu
Original: Eugene Lee (el77 [at] cornell [dot] edu)
------------------------------------------------------------------------------------------
Reachability.cg
Computes 5x5 reachability.
==========================================================================================
*/
struct vertexInfo {
float4 pos : POSITION;
half4 texCoords[8]; // To hold all the interpolated texture coordinates
};
/**
* Vertex shader for the neighbor reach, interpolates coordinates
*
* # 8 instructions, 1 R-regs
*/
void NeighborReachVert(
uniform float4x4 ModelViewProj : state.matrix.mvp,
in half2 uv : TEXCOORD0,
in float4 pos : POSITION,
out vertexInfo OUT)
{
// Transformed position of the vertex into clip coordinates
OUT.pos = mul(ModelViewProj, pos);
// Using TEXTURE_RECTANGE, coords are not normalized
// Interpolate
OUT.texCoords[0].xy = uv + half2(-1, 0); // -1, 0
OUT.texCoords[0].zw = uv + half2( 1, 0); // 1, 0
OUT.texCoords[1].xy = uv + half2( 0, 1); // 0, 1
}
/**
* Vertex shader for the reachability that performs the multiple texture
* coordinates interpolation in advance.
*
* # 19 instructions, 2 R-regs
*/
void ReachabilityVert(
uniform float4x4 ModelViewProj : state.matrix.mvp,
in half2 uv : TEXCOORD0,
in float4 pos : POSITION,
out vertexInfo OUT)
{
// Transformed position of the vertex into clip coordinates
OUT.pos = mul(ModelViewProj, pos);
// Using TEXTURE_RECTANGE, coords are not normalized
// Interpolate!
OUT.texCoords[0].xy = uv + half2(-2, 0); // -2, 0
OUT.texCoords[0].zw = uv + half2(-1, 0); // -1, 0
OUT.texCoords[1].xy = uv; // 0, 0
OUT.texCoords[1].zw = uv + half2( 1, 0); // 1, 0
OUT.texCoords[2].xy = uv + half2( 2, 0); // 2, 0
OUT.texCoords[2].zw = uv + half2(-2, 1); // -2, 1
OUT.texCoords[3].xy = uv + half2(-1, 1); // -1, 1
OUT.texCoords[3].zw = uv + half2( 0, 1); // 0, 1
OUT.texCoords[4].xy = uv + half2( 1, 1); // 1, 1
OUT.texCoords[4].zw = uv + half2( 2, 1); // 2, 1
OUT.texCoords[5].xy = uv + half2(-2, 2); // -2, 2
OUT.texCoords[5].zw = uv + half2(-1, 2); // -1, 2
OUT.texCoords[6].xy = uv + half2( 0, 2); // 0, 2
OUT.texCoords[6].zw = uv + half2( 1, 2); // 1, 2
OUT.texCoords[7].xy = uv + half2( 2, 2); // 2, 2
}
/**
* Vertex shader for the reachability copy that performs the multiple texture
* coordinates interpolation in advance.
*
* # 17 instructions, 2 R-regs
*/
void CopyReachabilityVert(
uniform float4x4 ModelViewProj : state.matrix.mvp,
in half2 uv : TEXCOORD0,
in float4 pos : POSITION,
out vertexInfo OUT)
{
// Transformed position of the vertex into clip coordinates
OUT.pos = mul(ModelViewProj, pos);
// Using TEXTURE_RECTANGE, coords are not normalized
// Interpolate!
OUT.texCoords[0].xy = uv + half2(-2,-2); // -2,-2
OUT.texCoords[0].zw = uv + half2(-1,-2); // -1,-2
OUT.texCoords[1].xy = uv + half2( 0,-2); // 0,-2
OUT.texCoords[1].zw = uv + half2( 1,-2); // 1,-2
OUT.texCoords[2].xy = uv + half2( 2,-2); // 2,-2
OUT.texCoords[2].zw = uv + half2(-2,-1); // -2,-1
OUT.texCoords[3].xy = uv + half2(-1,-1); // -1,-1
OUT.texCoords[3].zw = uv + half2( 0,-1); // 0,-1
OUT.texCoords[4].xy = uv + half2( 1,-1); // 1,-1
OUT.texCoords[4].zw = uv + half2( 2,-1); // 2,-1
}
// New: # 19 instructions, 2 R-regs, 2 H-regs
// Original: # 28 instructions, 1 R-regs, 2 H-regs
half4 NeighborReach(in half2 pos : WPOS,
in vertexInfo IN,
const uniform samplerRECT pixelClass,
const uniform samplerRECT neighborTableLR,
const uniform samplerRECT neighborTableRL,
const uniform samplerRECT neighborTableVER) : COLOR {
half4 outColor;
half4 olrb; // origin, left, right, bottom
olrb.x = texRECT(pixelClass, pos).g;
olrb.y = texRECT(pixelClass, IN.texCoords[0].xy).g; // -1, 0
olrb.z = texRECT(pixelClass, IN.texCoords[0].zw).g; // 1, 0
olrb.w = texRECT(pixelClass, IN.texCoords[1].xy).g; // 0, 1
olrb = round(olrb * 255);
outColor.x = texRECT(neighborTableRL, olrb.yx).x; // half2(left, origin)
outColor.y = texRECT(neighborTableLR, olrb.xz).x; // half2(origin, right)
outColor.z = texRECT(neighborTableVER, olrb.wx).x; // half2(bottom, origin)
outColor.w = step(14.5, olrb.x); // origin
return outColor / half4(255.0h.xxx, 1);
}
// New: # 140 instructions, 13 R-regs, 2 H-regs
// Original: # 165 instructions, 10 R-regs, 3 H-regs
half4 Reachability(in half2 pos : WPOS,
in vertexInfo IN,
const uniform samplerRECT neighborTable,
const uniform samplerRECT pixelClass,
const uniform samplerRECT orTable,
const uniform samplerRECT chainTable) : COLOR
{
half4 color = half4(0, 0, 0, 0);
//half reachability[15];
half3 reachability013;
half3 reachability456;
half4 reachability789A;
half4 reachabilityBCDE;
half2 argument;
const half3 nr00 = round(texRECT(neighborTable, IN.texCoords[0].xy).rgb * 255); // -2, 0
const half3 nr01 = round(texRECT(neighborTable, IN.texCoords[0].zw).rgb * 255); // -1, 0
const half4 nr02 = round(texRECT(neighborTable, IN.texCoords[1].xy) * 255); // 0, 0
const half3 nr03 = round(texRECT(neighborTable, IN.texCoords[1].zw).rgb * 255); // 1, 0
const half3 nr04 = round(texRECT(neighborTable, IN.texCoords[2].xy).rgb * 255); // 2, 0
const half3 nr05 = round(texRECT(neighborTable, IN.texCoords[2].zw).rgb * 255); // -2, 1
const half3 nr06 = round(texRECT(neighborTable, IN.texCoords[3].xy).rgb * 255); // -1, 1
const half3 nr07 = round(texRECT(neighborTable, IN.texCoords[3].zw).rgb * 255); // 0, 1
const half3 nr08 = round(texRECT(neighborTable, IN.texCoords[4].xy).rgb * 255); // 1, 1
const half3 nr09 = round(texRECT(neighborTable, IN.texCoords[4].zw).rgb * 255); // 2, 1
const half3 nr10 = round(texRECT(neighborTable, IN.texCoords[5].xy).rgb * 255); // -2, 2
const half3 nr11 = round(texRECT(neighborTable, IN.texCoords[5].zw).rgb * 255); // -1, 2
const half3 nr12 = round(texRECT(neighborTable, IN.texCoords[6].xy).rgb * 255); // 0, 2
const half3 nr13 = round(texRECT(neighborTable, IN.texCoords[6].zw).rgb * 255); // 1, 2
const half3 nr14 = round(texRECT(neighborTable, IN.texCoords[7].xy).rgb * 255); // 2, 2
// ROW 0
reachability013.y = nr02.r;
reachability013.x = texRECT(chainTable, half2(nr02.r, nr01.r)).x;
reachability013.z = nr02.g;
reachability456.x = texRECT(chainTable, half2(nr02.g, nr03.g)).x;
// To mask latency
color.g += dot(step(8.0h.xxx, reachability013), half3(4,8,16));
// ROW 1
reachability789A.x = nr02.b;
argument.x = texRECT(chainTable, half2(nr02.r, nr01.b)).x;
argument.y = texRECT(chainTable, half2(nr02.b, nr07.r)).x;
reachability456.z = texRECT(orTable, argument).x;
argument.x = texRECT(chainTable, half2(reachability013.x, nr00.b)).x;
argument.y = texRECT(chainTable, half2(reachability456.z, nr06.r)).x;
reachability456.y = texRECT(orTable, argument).x;
// To mask latency
color.g += dot(step(8.0h.xxx, reachability456), half3(32,64,128));
argument.x = texRECT(chainTable, half2(nr02.g, nr03.b)).x;
argument.y = texRECT(chainTable, half2(nr02.b, nr07.g)).x;
reachability789A.y = texRECT(orTable, argument).x;
argument.x = texRECT(chainTable, half2(reachability456.x, nr04.b)).x;
argument.y = texRECT(chainTable, half2(reachability789A.y, nr08.g)).x;
reachability789A.z = texRECT(orTable, argument).x;
// ROW 2
reachabilityBCDE.y = texRECT(chainTable, float2(nr02.b, nr07.b)).x;
argument.x = texRECT(chainTable, half2(reachability456.z, nr06.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.y, nr12.r)).x;
reachabilityBCDE.x = texRECT(orTable, argument).x;
argument.x = texRECT(chainTable, half2(reachability456.y, nr05.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.x, nr11.r)).x;
reachability789A.w = texRECT(orTable, argument).x;
// To mask latency
color.b += dot(step(8.0h.xxxx, reachability789A), half4(1,2,4,8));
argument.x = texRECT(chainTable, half2(reachability789A.y, nr08.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.y, nr12.g)).x;
reachabilityBCDE.z = texRECT(orTable, argument).x;
argument.x = texRECT(chainTable, half2(reachability789A.z, nr09.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.z, nr13.g)).x;
reachabilityBCDE.w = texRECT(orTable, argument).x;
// To mask latency
color.b += dot(step(8.0h.xxxx, reachabilityBCDE), half4(16,32,64,128));
color.a = nr02.a;
return color / 255.0;
}
// New: # 66 instructions, 2 R-regs, 4 H-regs
// # 70 instructions, 2 R-regs, 5 H-regs - with if branch
// Original: # 114 instructions, 3 R-regs, 4 H-regs
half3 CopyReachability(in half2 pos : WPOS,
in vertexInfo IN,
const uniform samplerRECT pixelClass,
const uniform samplerRECT reachability) : COLOR
{
//half4 neighbor;
half4 outColor = round(texRECT(reachability, pos) * 255);
// Paralelize
// First block
half4 neighborA;
neighborA.x = texRECT(reachability, IN.texCoords[0].xy).b;
neighborA.y = texRECT(reachability, IN.texCoords[0].zw).b;
neighborA.z = texRECT(reachability, IN.texCoords[1].xy).b;
neighborA.w = texRECT(reachability, IN.texCoords[1].zw).b;
// Multiply and round
neighborA = round(neighborA * 255);
// First fmod operations
neighborA.yzw = fmod(neighborA.yzw, half3(128, 128, 32));
// One extra fmod
neighborA.z = fmod(neighborA.z, 64);
// Values at once
neighborA = step(half4(128, 64, 32, 16), neighborA);
outColor.r += dot(neighborA, half4(1,2,4,8));
// Second block
half4 neighborB;
neighborB.x = texRECT(reachability, IN.texCoords[2].xy).b;
neighborB.y = texRECT(reachability, IN.texCoords[2].zw).b;
neighborB.z = texRECT(reachability, IN.texCoords[3].xy).b;
neighborB.w = texRECT(reachability, IN.texCoords[3].zw).b;
// Multiply and round
neighborB = round(neighborB * 255);
// Fmod operations
neighborB = fmod(neighborB, half4(16, 8, 4, 2));
// Values at once
neighborB = step(half4(8, 4, 2, 0.9), neighborB);
outColor.r += dot(neighborB, half4(16,32,64,128));
// (round(tex*255)) < 128 : 0 ? 1
half4 neighbor;
neighbor = round(texRECT(reachability, IN.texCoords[4].xy) * 255); // 1,-1
outColor.g += step(128, neighbor.g);
// (round(tex*255)) mod 128 < 64 ? 0 : 2
neighbor = round(texRECT(reachability, IN.texCoords[0].zw) * 255); // 2,-1
neighbor.g -= 128 * step(128, neighbor.g);
outColor.g += step(64, neighbor.g) * 2;
if(outColor.a > 0) {
outColor.rgb = half3(255, 255, 255);
}
return outColor.rgb / 255.0;
}