/*
==========================================================================================
 Cg Acceleration Research

 Optimized: Edgar Velázquez-Armendáriz - edgar [at] graphics [dot] cornell [dot] edu
 Original:  Eugene Lee (el77 [at] cornell [dot] edu), 
            Alex Liberman (al262 [at] cornell [dot] edu)
------------------------------------------------------------------------------------------
 PixelClass.cg

 Classifies pixels into empty, simple, and complex pixels. Also handles ordering of edges.
==========================================================================================
*/


// # 82 instructions, 3 R-regs, 2 H-regs, no if-code
// # 84 instructions, 2 R-regs, 3 H-regs, if code fp40
// This unified shader performs both the pixel classify and the point cull, using MRT!
//   OUT[0] = PixelClass
//   OUT[1] = PointCull
//
// Original:
//   -PixelClass:   # 94 instructions, 3 R-regs, 1 H-regs
//   -PointCull:    # 4 instructions, 2 R-regs, 0 H-regs
void PixelClassPointCull(   in half2 pixelPos                               : WPOS, 
                            const uniform samplerRECT edgeIntersections,
                            const uniform samplerRECT subPixelLocations,
                            const uniform samplerRECT bitExtract,
                            const uniform samplerRECT emptyOrder,
                            out half3 OUT[2]                                : COLOR0)
{
    const static float BOTTOM = 0;
    const static float RIGHT  = 1;
    const static float TOP    = 2;
    const static float LEFT   = 3;  

    
    // Save all intersections in a single half4 vector
    //   - Top:    x
    //   - Botton: y
    //   - Left:   z
    //   - Right:  w
    half4 Intersections;
    
    // Extract the all 4 intersections around the pixel. 
    Intersections.xz = texRECT(edgeIntersections, pixelPos).rg;                                                 // Top-Left
    Intersections.y  = texRECT(edgeIntersections, half2(pixelPos.x, pixelPos.y - 1)).r; // Bottom
    Intersections.w  = texRECT(edgeIntersections, half2(pixelPos.x + 1, pixelPos.y)).g; // Right

    const half4 colorSample = texRECT(subPixelLocations, pixelPos);
    
    // Scale by 255 so that the intersections are in [0, 8]. For all intersections, at once
    Intersections = round(Intersections * 255.0);
    
    // intersection information
    half intersectionCount = dot( step(0.001953125h.xxxx, Intersections), 1.0h.xxxx );

    if (intersectionCount == 2) {
        
        // construct a 5 bit mask whos information is given by the following
        // the 2 MSB: indicates where left = 3, top = 2, right = 1, bottom = 0
        float4 edgeIntersection;
    
        // Top intersection
        if(Intersections.x > 0) {
            edgeIntersection.z = texRECT(bitExtract, float2(Intersections.x, 1)).r;
            edgeIntersection.w = TOP;
            //intersectionCount++;
        }
        
        // Bottom intersection
        if(Intersections.y > 0) {
            edgeIntersection.xy = edgeIntersection.zw;
            edgeIntersection.z = texRECT(bitExtract, float2(Intersections.y, 1)).r;
            edgeIntersection.w = BOTTOM;            
            //intersectionCount++;
        }
        
        // Left intersection
        if(Intersections.z > 0) {
            edgeIntersection.xy = edgeIntersection.zw;
            edgeIntersection.z = texRECT(bitExtract, float2(Intersections.z, 1)).r;
            edgeIntersection.w = LEFT;
            //intersectionCount++;
        }
        
        // Right intersection
        if(Intersections.w > 0) {
            edgeIntersection.xy = edgeIntersection.zw;
            edgeIntersection.z = texRECT(bitExtract, float2(Intersections.w, 1)).r;
            edgeIntersection.w = RIGHT;     
            //intersectionCount++;
        }

        half3 tColor = float3(0, 0, 255);
        //half4 colorSample = texRECT(subPixelLocations, pixelPos);
        half2 subPixelMask = round(colorSample.ba * 255);
        
        if(subPixelMask.x == 0)
            subPixelMask.y = 16;
            
        // Do edge ordering
        //float emptyIndex =    edgeIntersection.y * 256 + edgeIntersection.w * 64 + 
        //                  edgeIntersection.x * 8 + edgeIntersection.z;
        float emptyIndex = dot(edgeIntersection, half4(8,256,1,64));
        float t = texRECT(emptyOrder, float2(emptyIndex, subPixelMask.y)).r;

        if(t >= 2)
            tColor.b = 0;

        if(t == 1 || t == 3) {
            tColor.rg = edgeIntersection.zw * float2(8,4) + edgeIntersection.xy;
        }
        else {
            tColor.rg = edgeIntersection.xy * float2(8,4) + edgeIntersection.zw;
        }

        // Pixel class info
        OUT[0] = tColor / 255.0;

    }
    else {
        OUT[0] = half3( (intersectionCount > 2 ? half2(1, 15/255.0) : half2(0,0) ),1);
    }
    
    // And now writes the point cull
    OUT[1] = half3(colorSample.rg, colorSample.b * OUT[0].b);
}