/*==========================================================================================Cg Acceleration ResearchEdgar Velázquez Armendáriz - edgar [at] graphics [dot] cornell [dot] edu------------------------------------------------------------------------------------------setImageId.cgSet Image ID shaders.==========================================================================================*//*** Simple vertex output/input structor*/structvertSimpleData { float4 pos : POSITION; half4 col : COLOR; };/*** Fragment program to copy the imageID as color. IMPORTANT: Although the 24 bit image Id* was passed as RGB color, it has to be written into the GBA channels, because the R* channel contains the age, so swizzle will be used** fp40: # 1 instructions, 0 R-regs, 0 H-regs*/voidcopyColorFrag(inhalf4 IN : COLOR,outhalf4 OUT : COLOR ) { OUT.gba = IN.rgb; }//This shader receives the PointID encoded in the x,y position, so it has to be transformed//and also transformed into homogeneous clip space////vp40: # 29 instructions, 2 R-regs (MIMD branching)//# 23 instructions, 2 R-regs (regular code)voidcolorCopyVert(uniformfloat LEN,//The lenght of the point cloud textureinvertSimpleData IN,outvertSimpleData OUT ) {//The original layout of the pointid_flags that was read as vertexes is////R - flags//G - LSB of pointid//B//A - MSB//A pixel with no point ID and flags = 0x10 means that no point was mapped there. That//Translates into an incoming vertex (16,0)if(any(IN.pos.xy != float2(16,0)) ) {//First I reconstruct the indexfloat2 tmp = float2(1/256.0, 256.0) * IN.pos.xy;//This is interesting: the data written to the pointid_flags texture was meant to be//unsigned bytes, the scientific notation of the pointid. However, the vertices are//interpreted as SIGNED shorts, so any number above 0x7FFF is interpreted as a negative//number. With the y-part I have no problem, because the range will never get that high//until I have around 8 million points. But the LSB has lot of this troubles, so to convert//that byte to the format I need, I just add 0xFF to the integer part of the result, just//for the negative numbers.//I am using floor and 256, this is valid because all the number have a flags field, therefore//the division of IN.pos.x and 256 will always have a fractional part, moving all the results//one unit behind. This way the instruction count is reduced from 23 to 20 instructions.float index =floor(tmp.x) + tmp.y + (tmp.x < 0 ? 256 : 0);//DEBUG!//index = IN.col.r * 255.0f + IN.col.g*255.0*256.0 + IN.col.b*255.0*256.0*256.0;//I need the fractional and integer part. I can get that info//in one Cg instruction. The fractional part is stored in x, and//the integral part will be in yfloat2 intFrac; intFrac.x =modf(index/LEN, intFrac.y);//The regions without points will have and index equal to zero. However, in the real//implementation the points used are 32 and above, so writing trash data to point 0 will not//be a problem, and it is one less test for this shader//Calculates the homogeneous xy coordinatesintFrac = (1/LEN - 1.0).xx + float2(2.0, 2/LEN) * intFrac;//Just copy the position results. z is always 0 and w 1. And by now//intFrac contains number in the range [-1, 1] for valid valuesOUT.pos = float4(intFrac, 0, 1);//Copies the output colorOUT.col = IN.col; }else{ OUT.pos = float4(-2,-2,-2,1); } }