/*
==========================================================================================
 Cg Acceleration Research

 Edgar Velázquez Armendáriz - edgar [at] graphics [dot] cornell [dot] edu
------------------------------------------------------------------------------------------
 setImageId.cg

 Set Image ID shaders.
==========================================================================================
*/

/**
 * Simple vertex output/input structor
 */
struct vertSimpleData {
    float4 pos      : POSITION;
    half4  col      : COLOR;
};




/**
 * Fragment program to copy the imageID as color. IMPORTANT: Although the 24 bit image Id
 * was passed as RGB color, it has to be written into the GBA channels, because the R
 * channel contains the age, so swizzle will be used
 *
 * fp40: # 1 instructions, 0 R-regs, 0 H-regs
 */
void copyColorFrag( in  half4 IN        : COLOR,
                    out half4 OUT       : COLOR )
{
                        
    OUT.gba = IN.rgb;
}


// This shader receives the PointID encoded in the x,y position, so it has to be transformed
// and also transformed into homogeneous clip space
//
// vp40:    # 29 instructions, 2 R-regs (MIMD branching)
//          # 23 instructions, 2 R-regs (regular code)
void colorCopyVert( uniform float LEN,          // The lenght of the point cloud texture
                    in  vertSimpleData IN,
                    out vertSimpleData OUT ) 
{

    // The original layout of the pointid_flags that was read as vertexes is
    //
    // R  - flags
    // G  - LSB of pointid
    // B
    // A  - MSB

    // A pixel with no point ID and flags = 0x10 means that no point was mapped there. That
    // Translates into an incoming vertex (16,0)
    if ( any(IN.pos.xy != float2(16,0)) ) {

        // First I reconstruct the index
        float2 tmp = float2(1/256.0, 256.0) * IN.pos.xy;

        // This is interesting: the data written to the pointid_flags texture was meant to be
        // unsigned bytes, the scientific notation of the pointid. However, the vertices are
        // interpreted as SIGNED shorts, so any number above 0x7FFF is interpreted as a negative
        // number. With the y-part I have no problem, because the range will never get that high
        // until I have around 8 million points. But the LSB has lot of this troubles, so to convert
        // that byte to the format I need, I just add 0xFF to the integer part of the result, just
        // for the negative numbers.
        // I am using floor and 256, this is valid because all the number have a flags field, therefore
        // the division of IN.pos.x and 256 will always have a fractional part, moving all the results
        // one unit behind. This way the instruction count is reduced from 23 to 20 instructions.
        float index = floor(tmp.x) + tmp.y + (tmp.x < 0 ? 256 : 0);

        // DEBUG!
        //index = IN.col.r * 255.0f + IN.col.g*255.0*256.0 + IN.col.b*255.0*256.0*256.0;

        // I need the fractional and integer part. I can get that info
        // in one Cg instruction. The fractional part is stored in x, and
        // the integral part will be in y
        float2 intFrac;
        intFrac.x = modf(index/LEN, intFrac.y);

        // The regions without points will have and index equal to zero. However, in the real
        // implementation the points used are 32 and above, so writing trash data to point 0 will not
        // be a problem, and it is one less test for this shader

        // Calculates the homogeneous xy coordinates
        intFrac = (1/LEN - 1.0).xx + float2(2.0, 2/LEN) * intFrac;

        // Just copy the position results. z is always 0 and w 1. And by now
        // intFrac contains number in the range [-1, 1] for valid values
        OUT.pos = float4(intFrac, 0, 1);

        // Copies the output color
        OUT.col = IN.col;
        
    }
    else {

        OUT.pos = float4(-2,-2,-2,1);
    }


}