```/*
==========================================================================================
Cg Acceleration Research

Optimized:  Edgar Velázquez Armendáriz - edgar [at] graphics [dot] cornell [dot] edu
Original:   Eugene Lee (el77 [at] cornell [dot] edu)
------------------------------------------------------------------------------------------
Reachability.cg

Computes 5x5 reachability.
==========================================================================================
*/

struct vertexInfo {
float4 pos              : POSITION;
half4  texCoords[8];    // To hold all the interpolated texture coordinates
};

/**
* Vertex shader for the neighbor reach, interpolates coordinates
*
* # 8 instructions, 1 R-regs
*/
void NeighborReachVert(
uniform float4x4 ModelViewProj      : state.matrix.mvp,
in  half2 uv                        : TEXCOORD0,
in  float4 pos                      : POSITION,
out vertexInfo OUT)
{
// Transformed position of the vertex into clip coordinates
OUT.pos = mul(ModelViewProj, pos);

// Using TEXTURE_RECTANGE, coords are not normalized

// Interpolate
OUT.texCoords[0].xy = uv + half2(-1, 0);        // -1, 0
OUT.texCoords[0].zw = uv + half2( 1, 0);        //  1, 0
OUT.texCoords[1].xy = uv + half2( 0, 1);        //  0, 1

}

/**
* Vertex shader for the reachability that performs the multiple texture
* coordinates interpolation in advance.
*
* # 19 instructions, 2 R-regs
*/
void ReachabilityVert(
uniform float4x4 ModelViewProj      : state.matrix.mvp,
in  half2 uv                        : TEXCOORD0,
in  float4 pos                      : POSITION,
out vertexInfo OUT)
{

// Transformed position of the vertex into clip coordinates
OUT.pos = mul(ModelViewProj, pos);

// Using TEXTURE_RECTANGE, coords are not normalized

// Interpolate!
OUT.texCoords[0].xy = uv + half2(-2, 0);        // -2, 0
OUT.texCoords[0].zw = uv + half2(-1, 0);        // -1, 0
OUT.texCoords[1].xy = uv;                       //  0, 0
OUT.texCoords[1].zw = uv + half2( 1, 0);        //  1, 0
OUT.texCoords[2].xy = uv + half2( 2, 0);        //  2, 0

OUT.texCoords[2].zw = uv + half2(-2, 1);        // -2, 1
OUT.texCoords[3].xy = uv + half2(-1, 1);        // -1, 1
OUT.texCoords[3].zw = uv + half2( 0, 1);        //  0, 1
OUT.texCoords[4].xy = uv + half2( 1, 1);        //  1, 1
OUT.texCoords[4].zw = uv + half2( 2, 1);        //  2, 1

OUT.texCoords[5].xy = uv + half2(-2, 2);        // -2, 2
OUT.texCoords[5].zw = uv + half2(-1, 2);        // -1, 2
OUT.texCoords[6].xy = uv + half2( 0, 2);        //  0, 2
OUT.texCoords[6].zw = uv + half2( 1, 2);        //  1, 2
OUT.texCoords[7].xy = uv + half2( 2, 2);        //  2, 2

}

/**
* Vertex shader for the reachability copy that performs the multiple texture
* coordinates interpolation in advance.
*
* # 17 instructions, 2 R-regs
*/
void CopyReachabilityVert(
uniform float4x4 ModelViewProj      : state.matrix.mvp,
in  half2 uv                        : TEXCOORD0,
in  float4 pos                      : POSITION,
out vertexInfo OUT)
{

// Transformed position of the vertex into clip coordinates
OUT.pos = mul(ModelViewProj, pos);

// Using TEXTURE_RECTANGE, coords are not normalized

// Interpolate!
OUT.texCoords[0].xy = uv + half2(-2,-2);        // -2,-2
OUT.texCoords[0].zw = uv + half2(-1,-2);        // -1,-2
OUT.texCoords[1].xy = uv + half2( 0,-2);        //  0,-2
OUT.texCoords[1].zw = uv + half2( 1,-2);        //  1,-2
OUT.texCoords[2].xy = uv + half2( 2,-2);        //  2,-2

OUT.texCoords[2].zw = uv + half2(-2,-1);        // -2,-1
OUT.texCoords[3].xy = uv + half2(-1,-1);        // -1,-1
OUT.texCoords[3].zw = uv + half2( 0,-1);        //  0,-1
OUT.texCoords[4].xy = uv + half2( 1,-1);        //  1,-1
OUT.texCoords[4].zw = uv + half2( 2,-1);        //  2,-1

}

// New:      # 19 instructions, 2 R-regs, 2 H-regs
// Original: # 28 instructions, 1 R-regs, 2 H-regs
half4 NeighborReach(in half2 pos : WPOS,
in  vertexInfo IN,
const uniform samplerRECT pixelClass,
const uniform samplerRECT neighborTableLR,
const uniform samplerRECT neighborTableRL,
const uniform samplerRECT neighborTableVER) : COLOR {

half4 outColor;
half4 olrb;         // origin, left, right, bottom

olrb.x = texRECT(pixelClass, pos).g;
olrb.y = texRECT(pixelClass, IN.texCoords[0].xy).g;     // -1, 0
olrb.z = texRECT(pixelClass, IN.texCoords[0].zw).g;     //  1, 0
olrb.w = texRECT(pixelClass, IN.texCoords[1].xy).g;     //  0, 1

olrb = round(olrb * 255);

outColor.x = texRECT(neighborTableRL,  olrb.yx).x;  // half2(left, origin)
outColor.y = texRECT(neighborTableLR,  olrb.xz).x;  // half2(origin, right)
outColor.z = texRECT(neighborTableVER, olrb.wx).x;  // half2(bottom, origin)
outColor.w = step(14.5, olrb.x);    // origin
return outColor / half4(255.0h.xxx, 1);
}

// New:      # 140 instructions, 13 R-regs, 2 H-regs
// Original: # 165 instructions, 10 R-regs, 3 H-regs
half4 Reachability(in half2 pos : WPOS,
in vertexInfo IN,
const uniform samplerRECT neighborTable,
const uniform samplerRECT pixelClass,
const uniform samplerRECT orTable,
const uniform samplerRECT chainTable) : COLOR
{

half4 color = half4(0, 0, 0, 0);

//half reachability[15];
half3 reachability013;
half3 reachability456;
half4 reachability789A;
half4 reachabilityBCDE;

half2 argument;

const half3 nr00 = round(texRECT(neighborTable, IN.texCoords[0].xy).rgb * 255);     // -2, 0
const half3 nr01 = round(texRECT(neighborTable, IN.texCoords[0].zw).rgb * 255);     // -1, 0
const half4 nr02 = round(texRECT(neighborTable, IN.texCoords[1].xy) * 255);         //  0, 0
const half3 nr03 = round(texRECT(neighborTable, IN.texCoords[1].zw).rgb * 255);     //  1, 0
const half3 nr04 = round(texRECT(neighborTable, IN.texCoords[2].xy).rgb * 255);     //  2, 0

const half3 nr05 = round(texRECT(neighborTable, IN.texCoords[2].zw).rgb * 255);     // -2, 1
const half3 nr06 = round(texRECT(neighborTable, IN.texCoords[3].xy).rgb * 255);     // -1, 1
const half3 nr07 = round(texRECT(neighborTable, IN.texCoords[3].zw).rgb * 255);     //  0, 1
const half3 nr08 = round(texRECT(neighborTable, IN.texCoords[4].xy).rgb * 255);     //  1, 1
const half3 nr09 = round(texRECT(neighborTable, IN.texCoords[4].zw).rgb * 255);     //  2, 1

const half3 nr10 = round(texRECT(neighborTable, IN.texCoords[5].xy).rgb * 255);     // -2, 2
const half3 nr11 = round(texRECT(neighborTable, IN.texCoords[5].zw).rgb * 255);     // -1, 2
const half3 nr12 = round(texRECT(neighborTable, IN.texCoords[6].xy).rgb * 255);     //  0, 2
const half3 nr13 = round(texRECT(neighborTable, IN.texCoords[6].zw).rgb * 255);     //  1, 2
const half3 nr14 = round(texRECT(neighborTable, IN.texCoords[7].xy).rgb * 255);     //  2, 2

// ROW 0
reachability013.y = nr02.r;
reachability013.x = texRECT(chainTable, half2(nr02.r, nr01.r)).x;

reachability013.z = nr02.g;
reachability456.x = texRECT(chainTable, half2(nr02.g, nr03.g)).x;

// To mask latency
color.g += dot(step(8.0h.xxx, reachability013),  half3(4,8,16));

// ROW 1
reachability789A.x = nr02.b;

argument.x = texRECT(chainTable, half2(nr02.r, nr01.b)).x;
argument.y = texRECT(chainTable, half2(nr02.b, nr07.r)).x;
reachability456.z = texRECT(orTable, argument).x;

argument.x = texRECT(chainTable, half2(reachability013.x, nr00.b)).x;
argument.y = texRECT(chainTable, half2(reachability456.z, nr06.r)).x;
reachability456.y = texRECT(orTable, argument).x;

// To mask latency
color.g += dot(step(8.0h.xxx, reachability456),  half3(32,64,128));

argument.x = texRECT(chainTable, half2(nr02.g, nr03.b)).x;
argument.y = texRECT(chainTable, half2(nr02.b, nr07.g)).x;
reachability789A.y = texRECT(orTable, argument).x;

argument.x = texRECT(chainTable, half2(reachability456.x, nr04.b)).x;
argument.y = texRECT(chainTable, half2(reachability789A.y, nr08.g)).x;
reachability789A.z = texRECT(orTable, argument).x;

// ROW 2
reachabilityBCDE.y = texRECT(chainTable, float2(nr02.b, nr07.b)).x;

argument.x = texRECT(chainTable, half2(reachability456.z, nr06.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.y, nr12.r)).x;
reachabilityBCDE.x = texRECT(orTable, argument).x;

argument.x = texRECT(chainTable, half2(reachability456.y, nr05.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.x, nr11.r)).x;
reachability789A.w = texRECT(orTable, argument).x;

// To mask latency
color.b += dot(step(8.0h.xxxx, reachability789A), half4(1,2,4,8));

argument.x = texRECT(chainTable, half2(reachability789A.y, nr08.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.y, nr12.g)).x;
reachabilityBCDE.z = texRECT(orTable, argument).x;

argument.x = texRECT(chainTable, half2(reachability789A.z, nr09.b)).x;
argument.y = texRECT(chainTable, half2(reachabilityBCDE.z, nr13.g)).x;
reachabilityBCDE.w = texRECT(orTable, argument).x;

// To mask latency
color.b += dot(step(8.0h.xxxx, reachabilityBCDE), half4(16,32,64,128));

color.a = nr02.a;

return color / 255.0;
}

// New:         # 66 instructions, 2 R-regs, 4 H-regs
//              # 70 instructions, 2 R-regs, 5 H-regs - with if branch
// Original:    # 114 instructions, 3 R-regs, 4 H-regs
half3 CopyReachability(in half2 pos : WPOS,
in vertexInfo IN,
const uniform samplerRECT pixelClass,
const uniform samplerRECT reachability) : COLOR
{

//half4 neighbor;
half4 outColor = round(texRECT(reachability, pos) * 255);

// Paralelize

// First block
half4 neighborA;
neighborA.x = texRECT(reachability, IN.texCoords[0].xy).b;
neighborA.y = texRECT(reachability, IN.texCoords[0].zw).b;
neighborA.z = texRECT(reachability, IN.texCoords[1].xy).b;
neighborA.w = texRECT(reachability, IN.texCoords[1].zw).b;

// Multiply and round
neighborA = round(neighborA * 255);

// First fmod operations
neighborA.yzw = fmod(neighborA.yzw, half3(128, 128, 32));

// One extra fmod
neighborA.z = fmod(neighborA.z, 64);

// Values at once
neighborA = step(half4(128, 64, 32, 16), neighborA);
outColor.r += dot(neighborA, half4(1,2,4,8));

// Second block
half4 neighborB;
neighborB.x = texRECT(reachability, IN.texCoords[2].xy).b;
neighborB.y = texRECT(reachability, IN.texCoords[2].zw).b;
neighborB.z = texRECT(reachability, IN.texCoords[3].xy).b;
neighborB.w = texRECT(reachability, IN.texCoords[3].zw).b;

// Multiply and round
neighborB = round(neighborB * 255);

// Fmod operations
neighborB = fmod(neighborB, half4(16, 8, 4, 2));

// Values at once
neighborB = step(half4(8, 4, 2, 0.9), neighborB);
outColor.r += dot(neighborB, half4(16,32,64,128));

// (round(tex*255)) < 128 : 0 ? 1
half4 neighbor;
neighbor = round(texRECT(reachability, IN.texCoords[4].xy) * 255);      //  1,-1
outColor.g += step(128, neighbor.g);

// (round(tex*255)) mod 128 < 64 ? 0 : 2
neighbor = round(texRECT(reachability, IN.texCoords[0].zw) * 255);      //  2,-1
neighbor.g -= 128 * step(128, neighbor.g);
outColor.g += step(64, neighbor.g) * 2;

if(outColor.a > 0) {
outColor.rgb = half3(255, 255, 255);
}

return outColor.rgb / 255.0;
}
```