/*
==========================================================================================
 Cg Acceleration Research
 
 Optimized: Edgar Velázquez-Armendáriz - edgar [at] graphics [dot] cornell [dot] edu
 Original:  Eugene Lee (el77 [at] cornell [dot] edu)
------------------------------------------------------------------------------------------
 Interpolation5.cg

 Performs edge-respecting 5x5 interpolation.
==========================================================================================
*/


// This version: # 214 instructions, 25 R-regs, 9 H-regs
// Original:     # 325 instructions, 2 R-regs, 9 H-regs
// Time:         6.3 ms
half4 Interpolation5(in half2 pos : WPOS,
    const uniform samplerRECT reachability,
    const uniform samplerRECT colorImage,
    const uniform samplerRECT prioritySeqnum,
    const uniform samplerRECT priorityTable) : COLOR
{
    half modulator, centerModulator, weight = 0;
    half3 selfColor;
    half3 averageColor = float3(0, 0, 0);
    half3 reach = round(texRECT(reachability, pos).rgb * 255);
    half3 color = float3(0, 0, 0);

    half3 rNeighbors[8];
    half3 gNeighbors[8];
    half3 bNeighbors[8];

    half3 rWeights[8];
    half3 gWeights[8];
    half3 bWeights[8];

    rWeights[0] = 1h;
    rWeights[1] = 1h;
    rWeights[2] = 1h;
    rWeights[3] = 1h;
    rWeights[4] = 1h;

    rWeights[5] = 1h;
    rWeights[6] = 4h;
    rWeights[7] = 8h;
    gWeights[0] = 4h;
    gWeights[1] = 1h;

    gWeights[2] = 1h;
    gWeights[3] = 8h;
    //            8h;
    gWeights[4] = 8h;
    gWeights[5] = 1h;
    
    gWeights[6] = 1h;
    gWeights[7] = 4h;
    bWeights[0] = 8h;
    bWeights[1] = 4h;
    bWeights[2] = 1h;

    bWeights[3] = 1h;
    bWeights[4] = 1h;
    bWeights[5] = 1h;
    bWeights[6] = 1h;
    bWeights[7] = 1h;

    rNeighbors[0] = texRECT(colorImage, pos.xy + half2(-2, -2)).rgb;
    rNeighbors[1] = texRECT(colorImage, pos.xy + half2(-1, -2)).rgb;
    rNeighbors[2] = texRECT(colorImage, pos.xy + half2( 0, -2)).rgb;
    rNeighbors[3] = texRECT(colorImage, pos.xy + half2( 1, -2)).rgb;
    rNeighbors[4] = texRECT(colorImage, pos.xy + half2( 2, -2)).rgb;
    rNeighbors[5] = texRECT(colorImage, pos.xy + half2(-2, -1)).rgb;
    rNeighbors[6] = texRECT(colorImage, pos.xy + half2(-1, -1)).rgb;
    rNeighbors[7] = texRECT(colorImage, pos.xy + half2( 0, -1)).rgb;

    gNeighbors[0] = texRECT(colorImage, pos.xy + half2( 1, -1)).rgb;
    gNeighbors[1] = texRECT(colorImage, pos.xy + half2( 2, -1)).rgb;
    gNeighbors[2] = texRECT(colorImage, pos.xy + half2( 2,  0)).rgb;
    gNeighbors[3] = texRECT(colorImage, pos.xy + half2(-1,  0)).rgb;
    gNeighbors[4] = texRECT(colorImage, pos.xy + half2( 1,  0)).rgb;
    gNeighbors[5] = texRECT(colorImage, pos.xy + half2( 2,  0)).rgb;
    gNeighbors[6] = texRECT(colorImage, pos.xy + half2(-2,  1)).rgb;
    gNeighbors[7] = texRECT(colorImage, pos.xy + half2(-1,  1)).rgb;

    bNeighbors[0] = texRECT(colorImage, pos.xy + half2( 0,  1)).rgb;
    bNeighbors[1] = texRECT(colorImage, pos.xy + half2( 1,  1)).rgb;
    bNeighbors[2] = texRECT(colorImage, pos.xy + half2( 2,  1)).rgb;
    bNeighbors[3] = texRECT(colorImage, pos.xy + half2(-2,  2)).rgb;
    bNeighbors[4] = texRECT(colorImage, pos.xy + half2(-1,  2)).rgb;
    bNeighbors[5] = texRECT(colorImage, pos.xy + half2( 0,  2)).rgb;
    bNeighbors[6] = texRECT(colorImage, pos.xy + half2( 1,  2)).rgb;
    bNeighbors[7] = texRECT(colorImage, pos.xy + half2( 2,  2)).rgb;


    // Data for reach.r: 2x4 fmod, 2x4 step operations with those results
    const half4 reachRfmod1 = fmod(reach.rrrr, half4(2,4,8,16));
    const half4 reachRfmod2 = fmod(reach.rrrr, half4(32,64,128,256));
    const half4 reachRstep1 = step(half4(1,2,4,8),      reachRfmod1);
    const half4 reachRstep2 = step(half4(16,32,64,128), reachRfmod2);

    // Data for reach.g: 2x4 fmod, 2x4 step operations with those results
    const half4 reachGfmod1 = fmod(reach.gggg, half4(2,4,8,16));
    const half4 reachGfmod2 = fmod(reach.gggg, half4(32,64,128,256));
    const half4 reachGstep1 = step(half4(1,2,4,8),      reachGfmod1);
    const half4 reachGstep2 = step(half4(16,32,64,128), reachGfmod2);

    // Data for reach.b: 2x4 fmod, 2x4 step operations with those results
    const half4 reachBfmod1 = fmod(reach.bbbb, half4(2,4,8,16));
    const half4 reachBfmod2 = fmod(reach.bbbb, half4(32,64,128,256));
    const half4 reachBstep1 = step(half4(1,2,4,8),      reachBfmod1);
    const half4 reachBstep2 = step(half4(16,32,64,128), reachBfmod2);

    // Data for the rNeighbors.b
    const half4 rNeighborsStep1 = step(0.0001.xxxx, 
        half4(rNeighbors[0].b, rNeighbors[1].b, rNeighbors[2].b, rNeighbors[3].b));
    const half4 rNeighborsStep2 = step(0.0001.xxxx, 
        half4(rNeighbors[4].b, rNeighbors[5].b, rNeighbors[6].b, rNeighbors[7].b));

    // Data for the gNeighbors.b
    const half4 gNeighborsStep1 = step(0.0001.xxxx, 
        half4(gNeighbors[0].b, gNeighbors[1].b, gNeighbors[2].b, gNeighbors[3].b));
    const half4 gNeighborsStep2 = step(0.0001.xxxx, 
        half4(gNeighbors[4].b, gNeighbors[5].b, gNeighbors[6].b, gNeighbors[7].b));

    // Data for the bNeighbors.b
    const half4 bNeighborsStep1 = step(0.0001.xxxx, 
        half4(bNeighbors[0].b, bNeighbors[1].b, bNeighbors[2].b, bNeighbors[3].b));
    const half4 bNeighborsStep2 = step(0.0001.xxxx, 
        half4(bNeighbors[4].b, bNeighbors[5].b, bNeighbors[6].b, bNeighbors[7].b));


    // R - modulators
    const half4 rModulator1 = rNeighborsStep1 * reachRstep1;
    const half4 rModulator2 = rNeighborsStep2 * reachRstep2;

    // G - modulators
    const half4 gModulator1 = gNeighborsStep1 * reachGstep1;
    const half4 gModulator2 = gNeighborsStep2 * reachGstep2;

    // B - modulators
    const half4 bModulator1 = bNeighborsStep1 * reachBstep1;
    const half4 bModulator2 = bNeighborsStep2 * reachBstep2;



    // ****** ROW 0 ******

    modulator = rModulator1.x;
    averageColor += modulator * rNeighbors[0] * rWeights[0];
    weight += modulator * rWeights[0].x;

    modulator = rModulator1.y;
    averageColor += modulator * rNeighbors[1] * rWeights[1];
    weight += modulator * rWeights[1].x;

    modulator = rModulator1.z;
    averageColor += modulator * rNeighbors[2] * rWeights[2];
    weight += modulator * rWeights[2].x;

    modulator = rModulator1.w;
    averageColor += modulator * rNeighbors[3] * rWeights[3];
    weight += modulator * rWeights[3].x;

    modulator = rModulator2.x;
    averageColor += modulator * rNeighbors[4] * rWeights[4];
    weight += modulator * rWeights[4].x;
    

    // ****** ROW 1 ******

    modulator = rModulator2.y;
    averageColor += modulator * rNeighbors[5] * rWeights[5];
    weight += modulator * rWeights[5].x;

    modulator = rModulator2.z;
    averageColor += modulator * rNeighbors[6] * rWeights[6];
    weight += modulator * rWeights[6].x;

    modulator = rModulator2.w;
    averageColor += modulator * rNeighbors[7] * rWeights[7];
    weight += modulator * rWeights[7].x;

    modulator = gModulator1.x;
    averageColor += modulator * gNeighbors[0] * gWeights[0];
    weight += modulator * gWeights[0].x;

    modulator = gModulator1.y;
    averageColor += modulator * gNeighbors[1] * gWeights[1];
    weight += modulator * gWeights[1].x;
    

    // ****** ROW 2 ******
    
    modulator = gModulator1.z;
    averageColor += modulator * gNeighbors[2] * gWeights[2];
    weight += modulator * gWeights[2].x;

    modulator = gModulator1.w;
    averageColor += modulator * gNeighbors[3] * gWeights[3];
    weight += modulator * gWeights[3].x;
    

    selfColor = texRECT(colorImage, pos).rgb;
    centerModulator = step(0.0001, selfColor.b);
    averageColor += centerModulator * selfColor * 32;
    weight += centerModulator * 32;

    
    modulator = gModulator2.x;
    averageColor += modulator * gNeighbors[4] * gWeights[4];
    weight += modulator * gWeights[4].x;

    modulator = gModulator2.y;
    averageColor += modulator * gNeighbors[5] * gWeights[5];
    weight += modulator * gWeights[5].x;
    

    // ****** ROW 3 ******
    
    modulator = gModulator2.z;
    averageColor += modulator * gNeighbors[6] * gWeights[6];
    weight += modulator * gWeights[6].x;

    modulator = gModulator2.w;
    averageColor += modulator * gNeighbors[7] * gWeights[7];
    weight += modulator * gWeights[7].x;

    modulator = bModulator1.x;
    averageColor += modulator * bNeighbors[0] * bWeights[0];
    weight += modulator * bWeights[0].x;

    modulator = bModulator1.y;
    averageColor += modulator * bNeighbors[1] * bWeights[1];
    weight += modulator * bWeights[1].x;

    modulator = bModulator1.z;
    averageColor += modulator * bNeighbors[2] * bWeights[2];
    weight += modulator * bWeights[2].x;
    
    
    // ****** ROW 4 ******  
    
    modulator = bModulator1.w;
    averageColor += modulator * bNeighbors[3] * bWeights[3];
    weight += modulator * bWeights[3].x;

    modulator = bModulator2.x;
    averageColor += modulator * bNeighbors[4] * bWeights[4];
    weight += modulator * bWeights[4].x;

    modulator = bModulator2.y;
    averageColor += modulator * bNeighbors[5] * bWeights[5];
    weight += modulator * bWeights[5].x;

    modulator = bModulator2.z;
    averageColor += modulator * bNeighbors[6] * bWeights[6];
    weight += modulator * bWeights[6].x;

    modulator = bModulator2.w;
    averageColor += modulator * bNeighbors[7] * bWeights[7];
    weight += modulator * bWeights[7].x;


    // Discards pixels without samples in the 5x5 neighborhood
    if (weight < 1) discard;
    

    half4 outColor;

    outColor.rgb = averageColor / weight;
    outColor.a = saturate(weight / 255.0 + centerModulator);

    // Priority calculation
    const half pWeight = outColor.a;
    half priority;

    // If this is an invalid point, get its priority from the table,
    // else just get its previously stablished priority value
    if (pWeight > 64/255.0) {       // The value was already normalized!
        priority = texRECT(prioritySeqnum, pos).r;
    }
    else {
        priority = texRECT(priorityTable, half2(pWeight * 255 + 0.5, 0.5)).r;
    }
    outColor.a = priority;


    return outColor;
}