/*
==========================================================================================
Cg Acceleration Research
Edgar Velázquez Armendáriz - edgar [at] graphics [dot] cornell [dot] edu
------------------------------------------------------------------------------------------
setImageId.cg
Set Image ID shaders.
==========================================================================================
*/
/**
* Simple vertex output/input structor
*/
struct vertSimpleData {
float4 pos : POSITION;
half4 col : COLOR;
};
/**
* Fragment program to copy the imageID as color. IMPORTANT: Although the 24 bit image Id
* was passed as RGB color, it has to be written into the GBA channels, because the R
* channel contains the age, so swizzle will be used
*
* fp40: # 1 instructions, 0 R-regs, 0 H-regs
*/
void copyColorFrag( in half4 IN : COLOR,
out half4 OUT : COLOR )
{
OUT.gba = IN.rgb;
}
// This shader receives the PointID encoded in the x,y position, so it has to be transformed
// and also transformed into homogeneous clip space
//
// vp40: # 29 instructions, 2 R-regs (MIMD branching)
// # 23 instructions, 2 R-regs (regular code)
void colorCopyVert( uniform float LEN, // The lenght of the point cloud texture
in vertSimpleData IN,
out vertSimpleData OUT )
{
// The original layout of the pointid_flags that was read as vertexes is
//
// R - flags
// G - LSB of pointid
// B
// A - MSB
// A pixel with no point ID and flags = 0x10 means that no point was mapped there. That
// Translates into an incoming vertex (16,0)
if ( any(IN.pos.xy != float2(16,0)) ) {
// First I reconstruct the index
float2 tmp = float2(1/256.0, 256.0) * IN.pos.xy;
// This is interesting: the data written to the pointid_flags texture was meant to be
// unsigned bytes, the scientific notation of the pointid. However, the vertices are
// interpreted as SIGNED shorts, so any number above 0x7FFF is interpreted as a negative
// number. With the y-part I have no problem, because the range will never get that high
// until I have around 8 million points. But the LSB has lot of this troubles, so to convert
// that byte to the format I need, I just add 0xFF to the integer part of the result, just
// for the negative numbers.
// I am using floor and 256, this is valid because all the number have a flags field, therefore
// the division of IN.pos.x and 256 will always have a fractional part, moving all the results
// one unit behind. This way the instruction count is reduced from 23 to 20 instructions.
float index = floor(tmp.x) + tmp.y + (tmp.x < 0 ? 256 : 0);
// DEBUG!
//index = IN.col.r * 255.0f + IN.col.g*255.0*256.0 + IN.col.b*255.0*256.0*256.0;
// I need the fractional and integer part. I can get that info
// in one Cg instruction. The fractional part is stored in x, and
// the integral part will be in y
float2 intFrac;
intFrac.x = modf(index/LEN, intFrac.y);
// The regions without points will have and index equal to zero. However, in the real
// implementation the points used are 32 and above, so writing trash data to point 0 will not
// be a problem, and it is one less test for this shader
// Calculates the homogeneous xy coordinates
intFrac = (1/LEN - 1.0).xx + float2(2.0, 2/LEN) * intFrac;
// Just copy the position results. z is always 0 and w 1. And by now
// intFrac contains number in the range [-1, 1] for valid values
OUT.pos = float4(intFrac, 0, 1);
// Copies the output color
OUT.col = IN.col;
}
else {
OUT.pos = float4(-2,-2,-2,1);
}
}