/* ========================================================================================== Cg Acceleration Research Edgar Velázquez Armendáriz - edgar [at] graphics [dot] cornell [dot] edu ------------------------------------------------------------------------------------------ setImageId.cg Set Image ID shaders. ========================================================================================== */ /** * Simple vertex output/input structor */ struct vertSimpleData { float4 pos : POSITION; half4 col : COLOR; }; /** * Fragment program to copy the imageID as color. IMPORTANT: Although the 24 bit image Id * was passed as RGB color, it has to be written into the GBA channels, because the R * channel contains the age, so swizzle will be used * * fp40: # 1 instructions, 0 R-regs, 0 H-regs */ void copyColorFrag( in half4 IN : COLOR, out half4 OUT : COLOR ) { OUT.gba = IN.rgb; } // This shader receives the PointID encoded in the x,y position, so it has to be transformed // and also transformed into homogeneous clip space // // vp40: # 29 instructions, 2 R-regs (MIMD branching) // # 23 instructions, 2 R-regs (regular code) void colorCopyVert( uniform float LEN, // The lenght of the point cloud texture in vertSimpleData IN, out vertSimpleData OUT ) { // The original layout of the pointid_flags that was read as vertexes is // // R - flags // G - LSB of pointid // B // A - MSB // A pixel with no point ID and flags = 0x10 means that no point was mapped there. That // Translates into an incoming vertex (16,0) if ( any(IN.pos.xy != float2(16,0)) ) { // First I reconstruct the index float2 tmp = float2(1/256.0, 256.0) * IN.pos.xy; // This is interesting: the data written to the pointid_flags texture was meant to be // unsigned bytes, the scientific notation of the pointid. However, the vertices are // interpreted as SIGNED shorts, so any number above 0x7FFF is interpreted as a negative // number. With the y-part I have no problem, because the range will never get that high // until I have around 8 million points. But the LSB has lot of this troubles, so to convert // that byte to the format I need, I just add 0xFF to the integer part of the result, just // for the negative numbers. // I am using floor and 256, this is valid because all the number have a flags field, therefore // the division of IN.pos.x and 256 will always have a fractional part, moving all the results // one unit behind. This way the instruction count is reduced from 23 to 20 instructions. float index = floor(tmp.x) + tmp.y + (tmp.x < 0 ? 256 : 0); // DEBUG! //index = IN.col.r * 255.0f + IN.col.g*255.0*256.0 + IN.col.b*255.0*256.0*256.0; // I need the fractional and integer part. I can get that info // in one Cg instruction. The fractional part is stored in x, and // the integral part will be in y float2 intFrac; intFrac.x = modf(index/LEN, intFrac.y); // The regions without points will have and index equal to zero. However, in the real // implementation the points used are 32 and above, so writing trash data to point 0 will not // be a problem, and it is one less test for this shader // Calculates the homogeneous xy coordinates intFrac = (1/LEN - 1.0).xx + float2(2.0, 2/LEN) * intFrac; // Just copy the position results. z is always 0 and w 1. And by now // intFrac contains number in the range [-1, 1] for valid values OUT.pos = float4(intFrac, 0, 1); // Copies the output color OUT.col = IN.col; } else { OUT.pos = float4(-2,-2,-2,1); } }