/*
==========================================================================================
 Cg Acceleration Research

 Optimized:        Edgar Velázquez-Armendáriz - edgar [at] graphics [dot] cornell [dot] edu
 Original version: Eugene Lee (el77 [at] cornell [dot] edu), 
                   Alex Liberman (al262 [at] cornell [dot] edu)
------------------------------------------------------------------------------------------
 EdgeRasters.cg

 Rasterizes edges on an image with 8x8 subpixel accuracy.
==========================================================================================
*/


// vp40: # 40 instructions, 3 R-regs
// orig: # 66 instructions, 4 R-regs
struct VertexOutput {
    float4 position : POSITION;
    float4 edgeVertices : TEXCOORD1;
};


VertexOutput EdgeRastersVP(float4 position : POSITION, 
                           float3 edgeVO : TEXCOORD0,
                           const uniform float width, 
                           const uniform float height,
                           const uniform float4x4 modelViewProjMatrix) {
    
    VertexOutput output;
    float2 iVertex0, iVertex1, direction;
    float4 tVertex0, tVertex1;
    
    // transform each vertex into homogenous clip-space
    tVertex0 = mul(modelViewProjMatrix, position);
    tVertex1 = mul(modelViewProjMatrix, float4(edgeVO.xyz, 1.0f));
    
    // transform each vertex into image space

    // IMPROVEMENT 2: 1 vectorization, new vectors (41 ins)
    float4 iVertexTmp = ((( float4(tVertex0.xy, tVertex1.xy) / float4(tVertex0.ww, tVertex1.ww) ) * 0.5)
        + 0.5f.xxxx) * float4(width, height, width, height);
    iVertex0 = iVertexTmp.xy;
    iVertex1 = iVertexTmp.zw;
    
    // IMPROVEMENT 1: 2 vectorizations, no new vectors (49 ins) 
    //iVertex0 = (((tVertex0 / tVertex0.ww) * 0.5) + 0.5f.xx) * float2(width, height);
    //iVertex1 = (((tVertex1 / tVertex1.ww) * 0.5) + 0.5f.xx) * float2(width, height);

    // ORIGINAL: One by one (66 ins)
    //iVertex0.x = ((tVertex0.x / tVertex0.w) * 0.5 + 0.5) * width;
    //iVertex0.y = ((tVertex0.y / tVertex0.w) * 0.5 + 0.5) * height;
    //iVertex1.x = ((tVertex1.x / tVertex1.w) * 0.5 + 0.5) * width;
    //iVertex1.y = ((tVertex1.y / tVertex1.w) * 0.5 + 0.5) * height;
    

    direction = normalize(iVertex1 - iVertex0) * 2;
    
    // these are small edges
    bool p = (floor(iVertex0.x) == floor(iVertex1.x) && floor(iVertex0.y) == floor(iVertex1.y));
    
    // transform vertex back to homogenous clip-space

    // IMPROVEMENT 3: vectorize (40 ins)
    tVertex0.xy = ((iVertex0 - direction) / float2(width, height) - 0.5f.xx) * tVertex0.ww / 0.5f.xx;
    
    // ORIGINAL: 41 ins after improvement 2
    //tVertex0.x = ((iVertex0.x - direction.x) / width - 0.5) * tVertex0.w / 0.5;
    //tVertex0.y = ((iVertex0.y - direction.y) / height - 0.5) * tVertex0.w / 0.5;
    
    
    output.position = tVertex0;
    //output.position.z -= 0.15*output.position.z;
    
    // cull out small edges
    if(p)
        output.position.x = -1e38;
    
    // order the edges so that the slope is the same for both vertices of an edge 
    // (so that it is passed correctly into the fragment program after interpolation)
    if(((iVertex0.x == iVertex1.x) && (iVertex0.y < iVertex1.y)) || (iVertex0.x < iVertex1.x))
        output.edgeVertices = float4(iVertex0.xy, iVertex1.xy);
    else
        output.edgeVertices = float4(iVertex1.xy, iVertex0.xy);
    
    return output;
}


// fp40: # 23 instructions, 3 R-regs, 2 H-regs
// orig: # 45 instructions, 2 R-regs, 1 H-regs
half3 EdgeRastersFP( in float3 position : WPOS, 
                     in float4 edgeVertices : TEXCOORD1,
                     uniform samplerRECT depthImage,
                     uniform float BIAS) : COLOR0 
{
                     
    // vertices for the edge
    float2 edgeVertex0 = edgeVertices.xy;
    float2 edgeVertex1 = edgeVertices.zw;
    
    // find the bounding positions of the pixel 

    // IMPROVEMENT 1: Vectorize the offsets
    // No instruction count change
    float4 lrtb = position.xxyy + float4(-0.5, 0.5, -0.5, 0.5); // left, right, top, bottom
    float left = lrtb.x;
    float right = lrtb.y;
    float top = lrtb.z;
    float bottom = lrtb.w;

    /*
    float left = position.x - 0.5;
    float right = position.x + 0.5;
    float top = position.y - 0.5;
    float bottom = position.y + 0.5;
    */
    

    // parametrize the line, to P0 + t * direction
    // tLeft, tTop - variables for parametric equations 
    float2 edgeDirection = edgeVertex1 - edgeVertex0;


    // IMPROVEMENT 2: After Improvement 1, vectorize the computation and the test
    // Instruction count: from 44 to 23

    // parametrize the line, to P0 + t * direction
    // tLeft, tTop - variables for parametric equations 
    float2 intersectionXY;
    float2 tLeftTop;
    float2 xyLocation;
    half2 pXY;

    tLeftTop = (lrtb.xw - edgeVertex0) / edgeDirection;

    intersectionXY = edgeDirection * tLeftTop.yx + edgeVertex0;
    pXY = (!((intersectionXY > lrtb.yw) || (intersectionXY < lrtb.xz) || (tLeftTop.yx < 0.0f.xx) || (tLeftTop.yx > 1.0f.xx)));
    xyLocation = exp2(floor((intersectionXY - lrtb.xz) * 7)) * pXY;

    return float3(xyLocation, 0) / 255.0;
                     
}