/* ========================================================================================== Cg Acceleration Research Optimized: Edgar Velázquez-Armendáriz - edgar [at] graphics [dot] cornell [dot] edu Original version: Eugene Lee (el77 [at] cornell [dot] edu), Alex Liberman (al262 [at] cornell [dot] edu) ------------------------------------------------------------------------------------------ EdgeRasters.cg Rasterizes edges on an image with 8x8 subpixel accuracy. ========================================================================================== */ // vp40: # 40 instructions, 3 R-regs // orig: # 66 instructions, 4 R-regs struct VertexOutput { float4 position : POSITION; float4 edgeVertices : TEXCOORD1; }; VertexOutput EdgeRastersVP(float4 position : POSITION, float3 edgeVO : TEXCOORD0, const uniform float width, const uniform float height, const uniform float4x4 modelViewProjMatrix) { VertexOutput output; float2 iVertex0, iVertex1, direction; float4 tVertex0, tVertex1; // transform each vertex into homogenous clip-space tVertex0 = mul(modelViewProjMatrix, position); tVertex1 = mul(modelViewProjMatrix, float4(edgeVO.xyz, 1.0f)); // transform each vertex into image space // IMPROVEMENT 2: 1 vectorization, new vectors (41 ins) float4 iVertexTmp = ((( float4(tVertex0.xy, tVertex1.xy) / float4(tVertex0.ww, tVertex1.ww) ) * 0.5) + 0.5f.xxxx) * float4(width, height, width, height); iVertex0 = iVertexTmp.xy; iVertex1 = iVertexTmp.zw; // IMPROVEMENT 1: 2 vectorizations, no new vectors (49 ins) //iVertex0 = (((tVertex0 / tVertex0.ww) * 0.5) + 0.5f.xx) * float2(width, height); //iVertex1 = (((tVertex1 / tVertex1.ww) * 0.5) + 0.5f.xx) * float2(width, height); // ORIGINAL: One by one (66 ins) //iVertex0.x = ((tVertex0.x / tVertex0.w) * 0.5 + 0.5) * width; //iVertex0.y = ((tVertex0.y / tVertex0.w) * 0.5 + 0.5) * height; //iVertex1.x = ((tVertex1.x / tVertex1.w) * 0.5 + 0.5) * width; //iVertex1.y = ((tVertex1.y / tVertex1.w) * 0.5 + 0.5) * height; direction = normalize(iVertex1 - iVertex0) * 2; // these are small edges bool p = (floor(iVertex0.x) == floor(iVertex1.x) && floor(iVertex0.y) == floor(iVertex1.y)); // transform vertex back to homogenous clip-space // IMPROVEMENT 3: vectorize (40 ins) tVertex0.xy = ((iVertex0 - direction) / float2(width, height) - 0.5f.xx) * tVertex0.ww / 0.5f.xx; // ORIGINAL: 41 ins after improvement 2 //tVertex0.x = ((iVertex0.x - direction.x) / width - 0.5) * tVertex0.w / 0.5; //tVertex0.y = ((iVertex0.y - direction.y) / height - 0.5) * tVertex0.w / 0.5; output.position = tVertex0; //output.position.z -= 0.15*output.position.z; // cull out small edges if(p) output.position.x = -1e38; // order the edges so that the slope is the same for both vertices of an edge // (so that it is passed correctly into the fragment program after interpolation) if(((iVertex0.x == iVertex1.x) && (iVertex0.y < iVertex1.y)) || (iVertex0.x < iVertex1.x)) output.edgeVertices = float4(iVertex0.xy, iVertex1.xy); else output.edgeVertices = float4(iVertex1.xy, iVertex0.xy); return output; } // fp40: # 23 instructions, 3 R-regs, 2 H-regs // orig: # 45 instructions, 2 R-regs, 1 H-regs half3 EdgeRastersFP( in float3 position : WPOS, in float4 edgeVertices : TEXCOORD1, uniform samplerRECT depthImage, uniform float BIAS) : COLOR0 { // vertices for the edge float2 edgeVertex0 = edgeVertices.xy; float2 edgeVertex1 = edgeVertices.zw; // find the bounding positions of the pixel // IMPROVEMENT 1: Vectorize the offsets // No instruction count change float4 lrtb = position.xxyy + float4(-0.5, 0.5, -0.5, 0.5); // left, right, top, bottom float left = lrtb.x; float right = lrtb.y; float top = lrtb.z; float bottom = lrtb.w; /* float left = position.x - 0.5; float right = position.x + 0.5; float top = position.y - 0.5; float bottom = position.y + 0.5; */ // parametrize the line, to P0 + t * direction // tLeft, tTop - variables for parametric equations float2 edgeDirection = edgeVertex1 - edgeVertex0; // IMPROVEMENT 2: After Improvement 1, vectorize the computation and the test // Instruction count: from 44 to 23 // parametrize the line, to P0 + t * direction // tLeft, tTop - variables for parametric equations float2 intersectionXY; float2 tLeftTop; float2 xyLocation; half2 pXY; tLeftTop = (lrtb.xw - edgeVertex0) / edgeDirection; intersectionXY = edgeDirection * tLeftTop.yx + edgeVertex0; pXY = (!((intersectionXY > lrtb.yw) || (intersectionXY < lrtb.xz) || (tLeftTop.yx < 0.0f.xx) || (tLeftTop.yx > 1.0f.xx))); xyLocation = exp2(floor((intersectionXY - lrtb.xz) * 7)) * pXY; return float3(xyLocation, 0) / 255.0; }