diff --exclude CVS -ur gpubench-1.0/download.cpp gpub2/download.cpp --- gpubench-1.0/download.cpp 2004-06-26 17:27:14.000000000 -0700 +++ gpub2/download.cpp 2005-02-11 12:10:10.000000000 -0800 @@ -27,11 +27,6 @@ exit(1); } -char singleFetch[] = "!!ARBfp1.0\n" -"TEMP R0;\n" -"TEX R0, fragment.texcoord[0], texture[0], RECT;\n" -"ADD result.color, R0, R0;\n" -"END\n"; int main (int argc, char *argv[]) { @@ -45,20 +40,6 @@ int max = 512; int min = 512; int components = 4; - - int nvtype[5] = {0, - GL_FLOAT_R32_NV, - GL_FLOAT_RG32_NV, - GL_FLOAT_RGB32_NV, - GL_FLOAT_RGBA32_NV}; - int atitype[5] = {0, - GL_ALPHA_FLOAT32_ATI, - GL_LUMINANCE_ALPHA_FLOAT32_ATI, - GL_RGB_FLOAT32_ATI, - GL_RGBA_FLOAT32_ATI}; - - int format[5] = {0, GL_RED, GL_LUMINANCE_ALPHA, - GL_RGB, GL_RGBA}; int opt; int opt_index; @@ -72,6 +53,19 @@ GL_DEPTH_BUFFER_BIT); CHECK_GL(); + char singleFetch[2048]; + + strcpy( singleFetch, "!!ARBfp1.0\n"); + strcat( singleFetch, "TEMP R0;\n"); + + (gpubench_is3DLABS()) ? + strcat( singleFetch, "TEX R0, fragment.texcoord[0], texture[0], 2D;\n") + : strcat( singleFetch, "TEX R0, fragment.texcoord[0], texture[0], RECT;\n"); + + strcat( singleFetch, "ADD result.color, R0, R0;\n"); + strcat( singleFetch, "END\n"); + + static struct option long_options[] = { {"min", 1, 0, 'm'}, {"max", 1, 0, 'x'}, @@ -111,14 +105,27 @@ assert(texdata); if (gpubench_isATI()) { - glTexImage2D (GL_TEXTURE_RECTANGLE_EXT, 0, atitype[components], + + glTexImage2D (GL_TEXTURE_RECTANGLE_EXT, 0, atitype[components], max, max, 0, format[components], GL_FLOAT, texdata); - } else { + + glEnable(GL_TEXTURE_RECTANGLE_EXT); + + } else if ( gpubench_isNV()) { + glTexImage2D (GL_TEXTURE_RECTANGLE_EXT, 0, nvtype[components], max, max, 0, format[components], GL_FLOAT, texdata); - } - glEnable(GL_TEXTURE_RECTANGLE_EXT); + glEnable(GL_TEXTURE_RECTANGLE_EXT); + + } else { + + glTexImage2D (GL_TEXTURE_2D, 0, labstype[components], + max, max, 0, format[components], GL_FLOAT, texdata); + + glEnable(GL_TEXTURE_2D); + } + CHECK_GL(); p = gpubench_loadShaderFromString (singleFetch); @@ -143,20 +150,41 @@ float MBs = i*i*4*components / (1024.0f * 1024.0f); - gpubench_resetTimer(); - passes = 0; - for (j=0; j<300; j++) { - glTexSubImage2D (GL_TEXTURE_RECTANGLE_EXT, - 0, 0, 0, i, i, format[components], GL_FLOAT, texdata); - glBegin(GL_POINTS); - glTexCoord2f(0.5f, 0.5f); - glVertex3f(0.0f, 0.0f, 0.0f); - glEnd(); - passes++; + if ( gpubench_is3DLABS()) { + + gpubench_resetTimer(); + passes = 0; + for (j=0; j<300; j++) { + + glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, i, i, format[components], GL_FLOAT, texdata); + glBegin(GL_POINTS); + glTexCoord2f(0.5f, 0.5f); + glVertex3f(0.0f, 0.0f, 0.0f); + glEnd(); + passes++; + } + glFinish(); + + msec = gpubench_getTimer(); + } + else { + + gpubench_resetTimer(); + passes = 0; + for (j=0; j<300; j++) { + + glTexSubImage2D (GL_TEXTURE_RECTANGLE_EXT, 0, 0, 0, i, i, format[components], GL_FLOAT, texdata); + glBegin(GL_POINTS); + glTexCoord2f(0.5f, 0.5f); + glVertex3f(0.0f, 0.0f, 0.0f); + glEnd(); + passes++; + } + glFinish(); + + msec = gpubench_getTimer(); } - glFinish(); - msec = gpubench_getTimer(); if (!silent) printf("(* %d x %d *)\n", i, i); printf("%d\t%f\n", @@ -172,18 +200,38 @@ float MBs = i*i*4*components / (1024.0f * 1024.0f); passes = 0; - gpubench_resetTimer(); - for (j=0; j<300; j++) { - glTexSubImage2D (GL_TEXTURE_RECTANGLE_EXT, - 0, 0, 0, i, i, format[components], GL_FLOAT, texdata); - glBegin(GL_POINTS); - glTexCoord2f(0.5f, 0.5f); - glVertex3f(0.0f, 0.0f, 0.0f); - glEnd(); - passes++; + + if ( gpubench_is3DLABS()) { + + gpubench_resetTimer(); + for (j=0; j<300; j++) { + + glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, i, i, format[components], GL_FLOAT, texdata); + glBegin(GL_POINTS); + glTexCoord2f(0.5f, 0.5f); + glVertex3f(0.0f, 0.0f, 0.0f); + glEnd(); + passes++; + } + glFinish(); + msec = gpubench_getTimer(); + } + else { + + gpubench_resetTimer(); + for (j=0; j<300; j++) { + + glTexSubImage2D (GL_TEXTURE_RECTANGLE_EXT, 0, 0, 0, i, i, format[components], GL_FLOAT, texdata); + glBegin(GL_POINTS); + glTexCoord2f(0.5f, 0.5f); + glVertex3f(0.0f, 0.0f, 0.0f); + glEnd(); + passes++; + } + glFinish(); + msec = gpubench_getTimer(); } - glFinish(); - msec = gpubench_getTimer(); + if (!silent) printf("(* %d x %d *)\n", i, i); printf("%f\t%f\n", diff --exclude CVS -ur gpubench-1.0/fetchcosts.cpp gpub2/fetchcosts.cpp --- gpubench-1.0/fetchcosts.cpp 2004-07-22 22:18:27.000000000 -0700 +++ gpub2/fetchcosts.cpp 2005-02-11 12:01:04.000000000 -0800 @@ -79,10 +79,8 @@ char tmpStr[1024]; sprintf(fpStr, ""); - int totalFetches = numFetchesPerLevel*(numDependentLevels+1); - if (totalFetches > numInstr) { if (verbose) fprintf(stderr, "More fetches required than instructions allowed. Needed %d, only allowed %d.\n", totalFetches, numInstr); @@ -91,10 +89,8 @@ return; } - strcat(fpStr, "!!ARBfp1.0\n" "PARAM C0=program.env[0];\n"); - // output TEMP registers i = 0; @@ -125,7 +121,9 @@ // another special case, a single TEX fetch if (numInstr == 1) { - strcat(fpStr, "TEX result.color, fragment.texcoord[0], texture[0], RECT;\n"); + (gpubench_is3DLABS()) ? + strcat(fpStr, "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n") + : strcat(fpStr, "TEX result.color, fragment.texcoord[0], texture[0], RECT;\n"); strcat(fpStr, "END\n"); return; } @@ -136,7 +134,9 @@ int curInstr = 0; int curLevel = 0; - strcat(fpStr, "TEX R0, fragment.texcoord[0], texture[0], RECT;\n"); + (gpubench_is3DLABS()) ? + strcat(fpStr, "TEX R0, fragment.texcoord[0], texture[0], 2D;\n") + : strcat(fpStr, "TEX R0, fragment.texcoord[0], texture[0], RECT;\n"); curInstr++; // emit instructions for all the texture fetches. Place each one in its own register @@ -156,12 +156,16 @@ // all the reads as we hope. If they get smarter later on we'll have to // change. srcTex = (useMultiTexture) ? i : 0; - sprintf(tmpStr, "TEX R%d, fragment.texcoord[0], texture[%d], RECT;\n", i, srcTex); + (gpubench_is3DLABS()) ? + sprintf(tmpStr, "TEX R%d, fragment.texcoord[0], texture[%d], 2D;\n", i, srcTex) + : sprintf(tmpStr, "TEX R%d, fragment.texcoord[0], texture[%d], RECT;\n", i, srcTex); strcat(fpStr, tmpStr); } else { srcTex = (useMultiTexture) ? i : 0; - sprintf(tmpStr, "TEX R%d, R%d, texture[%d], RECT;\n", i, srcReg, srcTex); + (gpubench_is3DLABS()) ? + sprintf(tmpStr, "TEX R%d, R%d, texture[%d], 2D;\n", i, srcReg, srcTex) + : sprintf(tmpStr, "TEX R%d, R%d, texture[%d], RECT;\n", i, srcReg, srcTex); strcat(fpStr, tmpStr); } @@ -220,6 +224,9 @@ strcpy(fpStr, ""); } + // debug purpose + // printf("%s\n\n", fpStr); + } @@ -384,7 +391,15 @@ float* texData0 = (float*)malloc(sizeof(float) * components * size * size); float* texData1 = (float*)malloc(sizeof(float) * components * size * size); - for (int i=0;i 1) - texData0[index+1] = texData1[index+1] = (float)i; + texData0[index+1] = texData1[index+1] = (float)i * norm_factor; } else { // random access on all dependent textures - texData0[index] = (float)(rand() % (size+1)); - texData1[index] = (float)(rand() % (size+1)); + texData0[index] = (float)(rand() % (size+1)) * norm_factor; + texData1[index] = (float)(rand() % (size+1)) * norm_factor; if (components > 1) { - texData0[index+1] = (float)(rand() % (size+1)); - texData1[index+1] = (float)(rand() % (size+1)); + texData0[index+1] = (float)(rand() % (size+1)) * norm_factor; + texData1[index+1] = (float)(rand() % (size+1)) * norm_factor; } + + printf( "%f %f", texData1[index], texData0[index]); } if (components > 2) @@ -419,24 +436,26 @@ texData0[index+3] = texData1[index+3] = 1.0f; } } - - int nvtype[5] = {0, - GL_FLOAT_R32_NV, - GL_FLOAT_RG32_NV, - GL_FLOAT_RGB32_NV, - GL_FLOAT_RGBA32_NV}; - int atitype[5] = {0, - GL_ALPHA_FLOAT32_ATI, - GL_LUMINANCE_ALPHA_FLOAT32_ATI, - GL_RGB_FLOAT32_ATI, - GL_RGBA_FLOAT32_ATI}; - int format[5] = {0, GL_RED, GL_LUMINANCE_ALPHA, - GL_RGB, GL_RGBA}; + int textureType; + int componentsType; + + if ( gpubench_isNV()) { - int textureType = (gpubench_isNV()) ? GL_TEXTURE_RECTANGLE_NV : GL_TEXTURE_RECTANGLE_EXT; + textureType = GL_TEXTURE_RECTANGLE_NV; + componentsType = nvtype[components]; + } + else if (gpubench_isATI()){ + + textureType = GL_TEXTURE_RECTANGLE_EXT; + componentsType = atitype[components]; + } + else { // 3DLabs + textureType = GL_TEXTURE_2D; + componentsType = labstype[components]; + } GLuint* texId = (GLuint*)malloc(sizeof(GLuint)*uniqueTextures); @@ -445,7 +464,7 @@ for (i=0;i( size, size, chunksize, renderTriangleNotQuad, numIterations, + (accessType == SINGLE) ? GPUBENCH_ACCESS_SINGLE : GPUBENCH_ACCESS_SEQ); + } + else if (gpubench_isATI()){ + + msec = gpubench_render( size, size, chunksize, renderTriangleNotQuad, numIterations, + (accessType == SINGLE) ? GPUBENCH_ACCESS_SINGLE : GPUBENCH_ACCESS_SEQ); + } + else { // 3DLabs + + msec = gpubench_render( size, size, chunksize, renderTriangleNotQuad, numIterations, + (accessType == SINGLE) ? GPUBENCH_ACCESS_SINGLE : GPUBENCH_ACCESS_SEQ); + } gpubench_releaseShader(); diff --exclude CVS -ur gpubench-1.0/floatbandwidth.cpp gpub2/floatbandwidth.cpp --- gpubench-1.0/floatbandwidth.cpp 2004-07-28 18:33:12.000000000 -0700 +++ gpub2/floatbandwidth.cpp 2005-02-11 10:08:20.000000000 -0800 @@ -13,29 +13,9 @@ // type of access pattern in nodep/dependent access tests. typedef enum {ACCESS_SINGLE, ACCESS_SEQUENTIAL, ACCESS_RANDOM, ACCESS_STRIDED} AccessType; - - static PFNGLACTIVETEXTUREARBPROC glActiveTextureARB; static PFNGLMULTITEXCOORD2FARBPROC glMultiTexCoord2fARB; -#define GL_ALPHA_FLOAT32_ATI 0x8816 -#define GL_LUMINANCE_ALPHA_FLOAT32_ATI 0x8819 -#define GL_RGB_FLOAT32_ATI 0x8815 - -static int nvtype[5] = {0, - GL_FLOAT_R32_NV, - GL_FLOAT_RG32_NV, - GL_FLOAT_RGB32_NV, - GL_FLOAT_RGBA32_NV}; -static int atitype[5] = {0, - GL_ALPHA_FLOAT32_ATI, - GL_LUMINANCE_ALPHA_FLOAT32_ATI, - GL_RGB_FLOAT32_ATI, - GL_RGBA_FLOAT32_ATI}; - -static int format[5] = {0, GL_RED, GL_LUMINANCE_ALPHA, - GL_RGB, GL_RGBA}; - // should be 2, but right now ATI card doesn't seem to work #define NUM_INDEX_TEX_COMPONENTS 4 @@ -78,7 +58,9 @@ } if (isDependent) { - strcat(shaderStr, "TEX t0.xy, fragment.texcoord[0], texture[0], RECT;\n"); + ( gpubench_is3DLABS()) ? + strcat(shaderStr, "TEX t0.xy, fragment.texcoord[0], texture[0], 2D;\n") + : strcat(shaderStr, "TEX t0.xy, fragment.texcoord[0], texture[0], RECT;\n"); firstReg = 1; firstTexture = 1; strcpy(coordStr, "t0"); @@ -107,7 +89,11 @@ int destReg = (limitRegUsage) ? firstReg + (i % 2) : firstReg + i; int srcTex = (multiTexture) ? firstTexture + i : firstTexture; - sprintf(tempStr, "TEX t%d%s, %s, texture[%d], RECT;\n", destReg, mask, coordStr, srcTex); + + ( gpubench_is3DLABS()) ? + sprintf(tempStr, "TEX t%d%s, %s, texture[%d], 2D;\n", destReg, mask, coordStr, srcTex) + : sprintf(tempStr, "TEX t%d%s, %s, texture[%d], RECT;\n", destReg, mask, coordStr, srcTex); + strcat(shaderStr, tempStr); // have to use the value otherwise dead code elim will remove the fetch @@ -167,9 +153,8 @@ strcat(shaderStr, "END\n"); - //printf("%s\n\n", shaderStr); + // printf("%s\n\n", shaderStr); //exit(1); - } @@ -182,13 +167,34 @@ void createIndexTexture(float* data, GLuint handle, int sizex, int sizey) { - int textureType = (gpubench_isNV()) ? GL_TEXTURE_RECTANGLE_NV : GL_TEXTURE_RECTANGLE_EXT; - int components = NUM_INDEX_TEX_COMPONENTS; + int components = NUM_INDEX_TEX_COMPONENTS; glActiveTextureARB(GL_TEXTURE0_ARB); - glBindTexture(textureType, handle); - glTexImage2D(textureType, 0, gpubench_isNV() ? nvtype[components] : atitype[components], + + // Vendor specific + int textureType; + if ( gpubench_isNV()) { + + textureType = GL_TEXTURE_RECTANGLE_NV; + glBindTexture( textureType, handle); + glTexImage2D(textureType, 0, nvtype[components], sizex, sizey, 0, format[components], GL_FLOAT, data); + } + else if ( gpubench_isATI()) { + + textureType = GL_TEXTURE_RECTANGLE_EXT; + glBindTexture( textureType, handle); + glTexImage2D(textureType, 0, atitype[components], + sizex, sizey, 0, format[components], GL_FLOAT, data); + } + else { // gpubench_is3DLABS() + + textureType = GL_TEXTURE_2D; + glBindTexture( textureType, handle); + glTexImage2D( textureType, 0, labstype[components], + sizex, sizey, 0, format[components], GL_FLOAT, data); + } + glTexParameterf(textureType, GL_TEXTURE_WRAP_S, GL_CLAMP); glTexParameterf(textureType, GL_TEXTURE_WRAP_T, GL_CLAMP); glTexParameterf(textureType, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -238,27 +244,49 @@ return 0; } - +template void createIndexTexture(GLuint handle, int sizex, int sizey, int accessType) { int x, y; int components = NUM_INDEX_TEX_COMPONENTS; float* textureData = (float*)malloc(sizeof(float) * components * sizex * sizey); + // 3DLabs uses normalized texture coordinates + float norm_factor_x; + float norm_factor_y; + float norm_size_x; + float norm_size_y; + if ( GPUBench_Arch == GPUBENCH_ARCH_3DLABS) { + + norm_factor_x = (float)1.0 / sizex; + norm_factor_y = (float)1.0 / sizey; + norm_size_x = (float)1.0; + norm_size_y = (float)1.0; + } + else { // Nvidia, ATI + + norm_factor_x = (float)1.0; + norm_factor_y = (float)1.0; + norm_size_x = (float)sizex; + norm_size_y = (float)sizey; + } + // initialize the index texture. Values dependent // on the type of access pattern requested. for (int i=0;i(texHandles[0], max, max, accessType); + } + else if ( gpubench_isATI()) { + createIndexTexture(texHandles[0], max, max, accessType ); + } + else { // 3DLabs + createIndexTexture(texHandles[0], max, max, accessType ); + } + } textureData = (float*)malloc( sizeof(float) * components * max * max ); - // initialize texture data - for (i=0;i(size, max, chunksize, renderTriangleNotQuad, num_iter, + (accessType == ACCESS_SINGLE) ? GPUBENCH_ACCESS_SINGLE : GPUBENCH_ACCESS_SEQ); + } + else if ( gpubench_isATI()) { + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, + (accessType == ACCESS_SINGLE) ? GPUBENCH_ACCESS_SINGLE : GPUBENCH_ACCESS_SEQ); + } + else { // 3DLabs + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, + (accessType == ACCESS_SINGLE) ? GPUBENCH_ACCESS_SINGLE : GPUBENCH_ACCESS_SEQ); + } // size, fetches, time, gb/sec totalBytes = 4.0f*i*components*size*size*num_iter; @@ -703,7 +784,15 @@ if (accessType == ACCESS_RANDOM) { // have to do this again the random case so all the random indices fall // within the active part of the texture - createIndexTexture(texHandles[0], size, size, accessType); + if ( gpubench_isNV()) { + createIndexTexture(texHandles[0], max, max, accessType); + } + else if ( gpubench_isATI()) { + createIndexTexture(texHandles[0], max, max, accessType ); + } + else { // 3DLabs + createIndexTexture(texHandles[0], max, max, accessType ); + } } float timeMs, totalBytes; @@ -712,14 +801,34 @@ if (!onlyMaxFetch) { gpubench_bindShader(indexFetchHandle); - timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + + if ( gpubench_isNV()) { + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else if ( gpubench_isATI()) { + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else { // 3DLabs + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + totalBytes = 4.0f*NUM_INDEX_TEX_COMPONENTS*size*size*num_iter; outputResultLine(size, 0, timeMs, totalBytes, num_iter, doDependent, accessType); } for (i=start;i<=numFetches;i++) { gpubench_bindShader(fpHandles[i-1]); - timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + + if ( gpubench_isNV()) { + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else if ( gpubench_isATI()) { + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else { // 3DLabs + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + totalBytes = 4.0f*(i*components + NUM_INDEX_TEX_COMPONENTS)*size*size*num_iter; outputResultLine(size, i, timeMs, totalBytes, num_iter, doDependent, accessType); } @@ -732,6 +841,8 @@ } else { + printf("Using this branch.\n\n"); + for (int skipx=minskipx; skipx <= maxskipx; skipx++) { for (int skipy=minskipy; skipy <= maxskipy; skipy++) { @@ -745,7 +856,17 @@ float totalBytes; gpubench_bindShader(fpHandles[numFetches-1]); - timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + + if ( gpubench_isNV()) { + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else if ( gpubench_isATI()) { + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else { // 3DLabs + timeMs = gpubench_render(size, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + totalBytes = 4.0f*(numFetches*components + NUM_INDEX_TEX_COMPONENTS)*size*size*num_iter; outputResultLineStrided(size, numFetches, timeMs, totalBytes, num_iter, skipx, skipy, samplesx, samplesy); } diff --exclude CVS -ur gpubench-1.0/fpfilltest.cpp gpub2/fpfilltest.cpp --- gpubench-1.0/fpfilltest.cpp 2004-08-03 13:19:55.000000000 -0700 +++ gpub2/fpfilltest.cpp 2005-02-11 12:00:48.000000000 -0800 @@ -13,12 +13,6 @@ #define GL_LUMINANCE_ALPHA_FLOAT32_ATI 0x8819 #define GL_RGB_FLOAT32_ATI 0x8815 -char singleFetch[] = "!!ARBfp1.0\n" -"TEMP R0;\n" -"TEX R0, fragment.texcoord[0], texture[0], RECT;\n" -"ADD R0, R0, R0;\n" -"ADD result.color, R0, R0;\n" -"END\n"; typedef void (APIENTRY * PFNGLDRAWBUFFERS) (GLsizei n, const GLenum *bufs); static PFNGLDRAWBUFFERS glDrawBuffersATI; @@ -53,7 +47,7 @@ strcat(fpStr,"END\n"); - //fprintf(stderr, fpStr); + fprintf(stderr, fpStr); } @@ -85,36 +79,40 @@ exit(0); } +template void dotest (int size, int wsize, int components, int renderTriangleNotQuad, int usemrt, int chunksize) { + int i, j, k, passes; int textureType; + int internalFormat; float time; float *texData; float foffset; + // program handle static int p = 0; - int nvtype[5] = {0, - GL_FLOAT_R32_NV, - GL_FLOAT_RG32_NV, - GL_FLOAT_RGB32_NV, - GL_FLOAT_RGBA32_NV}; - int atitype[5] = {0, - GL_ALPHA_FLOAT32_ATI, - GL_LUMINANCE_ALPHA_FLOAT32_ATI, - GL_RGB_FLOAT32_ATI, - GL_RGBA_FLOAT32_ATI}; - - int format[5] = {0, GL_RED, GL_LUMINANCE_ALPHA, - GL_RGB, GL_RGBA}; assert(components > 0 && components < 5); - textureType = (gpubench_isNV()) ? - GL_TEXTURE_RECTANGLE_NV : GL_TEXTURE_RECTANGLE_EXT; + if ( GPUBENCH_ARCH_NV == GPUBench_Arch) { + + textureType = GL_TEXTURE_RECTANGLE_NV; + internalFormat = usemrt ? nvtype[1] : nvtype[components]; + } + else if ( GPUBENCH_ARCH_ATI == GPUBench_Arch) { + + textureType = GL_TEXTURE_RECTANGLE_EXT; + internalFormat = usemrt ? atitype[1] : atitype[components]; + } + else { // 3DLabs + + textureType = GL_TEXTURE_2D; + internalFormat = usemrt ? labstype[1] : labstype[components]; + } texData = (float *) malloc (size*size*4*sizeof(float)); for (i=0; i max ) usage(progname); + gpubench_createWindow(20, 20, NULL, "FP FillTest"); GETPROCADDR(PFNGLACTIVETEXTUREARBPROC, glActiveTextureARB); - GETPROCADDR(PFNGLDRAWBUFFERS, glDrawBuffersATI); + + // MRT not supported on 3DLabs at the moment + if ( ! gpubench_is3DLABS()) { + + GETPROCADDR(PFNGLDRAWBUFFERS, glDrawBuffersATI); + } + else { + + usemrt = 0; + } if (usemrt) gpubench_createFloatPbufferAuxComponents(max, max, 0, components-1, 1); @@ -384,6 +447,7 @@ glClear (GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); CHECK_GL(); + if (usemrt) { int bufferList[4]; @@ -417,9 +481,23 @@ fflush(stdout); - for (i=min; i<=max; exp ? (i*=step) : (i+=step)) - dotest(i, max, components, renderTriangleNotQuad, usemrt, - chunksize); + if ( gpubench_isNV()) { + + for (i=min; i<=max; exp ? (i*=step) : (i+=step)) + dotest(i, max, components, renderTriangleNotQuad, usemrt, + chunksize); + } + else if ( gpubench_isATI()) { + + for (i=min; i<=max; exp ? (i*=step) : (i+=step)) + dotest(i, max, components, renderTriangleNotQuad, usemrt, + chunksize); + } else { // 3DLabs + + for (i=min; i<=max; exp ? (i*=step) : (i+=step)) + dotest(i, max, components, renderTriangleNotQuad, usemrt, + chunksize); + } gpubench_destroyPbuffer(); diff --exclude CVS -ur gpubench-1.0/gid.cpp gpub2/gid.cpp --- gpubench-1.0/gid.cpp 2004-06-25 11:52:17.000000000 -0700 +++ gpub2/gid.cpp 2005-02-03 21:13:10.000000000 -0800 @@ -4,6 +4,10 @@ #include "getopt.h" #include "gpubench.h" +#define GL_MAX_TEXTURE_COORDS_ARB 0x8871 +#define GL_MAX_TEXTURE_IMAGE_UNITS_ARB 0x8872 + + void print_extensions(int ugly) { char *s; int i; @@ -30,6 +34,30 @@ free(s); } +void printstring(GLenum e, const char *label) { + if (label) + printf ("%s: ", label); + printf ("%s\n", glGetString(e)); +} + +void printpvint (GLenum e, const char *label) { + int i = -1; + if (label) + printf ("%s", label); + gpubench_getprogramiv(GL_FRAGMENT_PROGRAM_ARB, e, &i); + CHECK_GL(); + printf ("%d\n", i); +} + +void printint (GLenum e, const char *label) { + int i = -1; + if (label) + printf ("%s", label); + glGetIntegerv(e, &i); + CHECK_GL(); + printf ("%d\n", i); +} + int main(int argc, char *argv[]) { @@ -43,23 +71,31 @@ {"version", 0, 0, 'v'}, {"extensions", 0, 0, 'e'}, {"driver", 0, 0, 'd'}, + {"program", 0, 0, 'p'}, {"ugly", 0, 0, 'n'}, {0, 0, 0, 0} }; gpubench_createWindow (100, 100, NULL, "gid"); - while ((opt = getopt_long(argc, argv, "wrvedn", + while ((opt = getopt_long(argc, argv, "wrvedpn", long_options, &opt_index)) != EOF) { switch (opt) { case 'w': - printf ("%s\n", glGetString (GL_VENDOR)); + printstring(GL_VENDOR, NULL); return 0; case 'r': - printf ("%s\n", glGetString (GL_RENDERER)); + printstring(GL_RENDERER, NULL); return 0; case 'v': - printf ("%s\n", glGetString (GL_VERSION)); + printstring(GL_VERSION, NULL); + return 0; + case 'p': + printpvint(GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB, "ALU Instructions: "); + printpvint(GL_MAX_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB, "TEX Instructions: "); + printpvint(GL_MAX_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB, "TEX Indirections: "); + printint(GL_MAX_TEXTURE_IMAGE_UNITS_ARB, "MAX_TEXTURE_IMAGE_UNITS: "); + printint(GL_MAX_TEXTURE_COORDS_ARB, "MAX_TEXTURE_COORDS: "); return 0; case 'e': print_extensions(print_ugly); @@ -77,6 +113,11 @@ printf ("GL_RENDERER: %s\n", glGetString (GL_RENDERER)); printf ("GL_VERSION: %s\n", glGetString (GL_VERSION)); printf ("Driver Version: %s\n", gpubench_getDriverVersion()); + printpvint(GL_MAX_PROGRAM_ALU_INSTRUCTIONS_ARB, "ALU Instructions: "); + printpvint(GL_MAX_PROGRAM_TEX_INSTRUCTIONS_ARB, "TEX Instructions: "); + printpvint(GL_MAX_PROGRAM_TEX_INDIRECTIONS_ARB, "TEX Indirections: "); + printint(GL_MAX_TEXTURE_IMAGE_UNITS_ARB, "MAX_TEXTURE_IMAGE_UNITS: "); + printint(GL_MAX_TEXTURE_COORDS_ARB, "MAX_TEXTURE_COORDS: "); printf ("GL_EXTENSIONS: \n"); print_extensions(print_ugly); diff --exclude CVS -ur gpubench-1.0/glprogram_ati.cpp gpub2/glprogram_ati.cpp --- gpubench-1.0/glprogram_ati.cpp 2004-07-12 13:32:48.000000000 -0700 +++ gpub2/glprogram_ati.cpp 2005-02-11 10:49:26.000000000 -0800 @@ -78,14 +78,15 @@ /* check if there were any errors with the fragment program */ if (glGetError() != GL_NO_ERROR) { + GLint ePos; char* error = (char*) glGetString(GL_PROGRAM_ERROR_STRING_ARB); glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &ePos); /* taking this out helps perl digest the output without stalling */ - /*fprintf(stderr, "Shader error: %s\n", error); + fprintf(stderr, "Shader error: %s\n", error); fprintf(stderr, "FSHADER::Load(): Error loading the program at location %d: %.30s\n", ePos, source + ePos); - fflush(stderr);*/ + fflush(stderr); gpubench_setError(GPUBENCH_ERROR_SHADER_LOAD, "Parsing program"); return GPUBENCH_BOGUS_PROGRAM; @@ -94,6 +95,7 @@ /* make sure that the program has been loaded properly */ if (!glIsProgramARB) GET_PROC(glIsProgramARB); if(!glIsProgramARB(prog_id)) { + gpubench_setError(GPUBENCH_ERROR_SHADER_LOAD, "Invalid program id, something messed up"); return GPUBENCH_BOGUS_PROGRAM; } @@ -156,7 +158,7 @@ fprintf(stderr, "Shader error: %s\n", error); fprintf(stderr, "FSHADER::Load(): Error loading the program at location %d: %.30s\n", ePos, source + ePos); - fflush(stderr); + fflush(stderr); free(source); gpubench_setError(GPUBENCH_ERROR_SHADER_LOAD, "Parse failed"); @@ -178,9 +180,13 @@ void bind_fragment_program_arb (shader_t shader) { + + CHECK_GL(); + if (!glBindProgramARB) GET_PROC(glBindProgramARB); glEnable(GL_FRAGMENT_PROGRAM_ARB); CHECK_GL(); + glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader); CHECK_GL(); diff --exclude CVS -ur gpubench-1.0/glwindow.cpp gpub2/glwindow.cpp --- gpubench-1.0/glwindow.cpp 2004-06-25 11:52:17.000000000 -0700 +++ gpub2/glwindow.cpp 2005-02-04 20:51:06.000000000 -0800 @@ -269,6 +269,15 @@ return (voidop) wglGetProcAddress(name); } +void gpubench_getprogramiv(GLenum p, GLenum e, int *i) { + PFNGLGETPROGRAMIVARBPROC glGetProgramiv; + glGetProgramiv = (PFNGLGETPROGRAMIVARBPROC) + glGetProcAddress("glGetProgramivARB"); + assert(glGetProgramiv); + glGetProgramiv(p, e, i); + return; +} + void gpubench_showWindow(void) { ShowWindow(hwnd, SW_SHOWNORMAL); } @@ -438,9 +447,8 @@ status = wglChoosePixelFormatARB(hdc, iAttributes, fAttributes, 1, &pixelformat, &numFormats); - if ( numFormats && !status ) - { - MessageBox( NULL, "wglChoosePixelFormatARB failed", "Error", MB_OK ); + if ( numFormats && !status ) { + MessageBox( NULL, "wglChoosePixelFormatARB failed.", "Error", MB_OK ); return FALSE; } diff --exclude CVS -ur gpubench-1.0/gpubench.cpp gpub2/gpubench.cpp --- gpubench-1.0/gpubench.cpp 2004-07-27 14:22:42.000000000 -0700 +++ gpub2/gpubench.cpp 2005-02-11 11:11:10.000000000 -0800 @@ -25,29 +25,28 @@ // initializes gpubench for detected architecture static int gpubench_detectArch() { - const char* vendor; + const char* vendor; if (gpubench_arch != GPUBENCH_ARCH_UNKNOWN) return gpubench_arch; strcpy(gpubench_errorStr, ""); - vendor = (const char *) glGetString(GL_VENDOR); - - if (vendor == NULL) { - fprintf(stderr, "NULL vendor string. Is there a window? Create gl window first.\n"); - exit(1); - } - + vendor = (const char *) glGetString (GL_VENDOR); + if (strstr(vendor, "NVIDIA") != NULL) gpubench_arch = GPUBENCH_ARCH_NV; else if (strstr(vendor, "ATI") != NULL) gpubench_arch = GPUBENCH_ARCH_ATI; + else if (strstr(vendor, "3Dlabs") != NULL) { + gpubench_arch = GPUBENCH_ARCH_3DLABS; + // gpubench_arch = GPUBENCH_ARCH_ATI; + } else { fprintf(stderr,"Unknown vendor, quitting.\n"); exit(1); } - + return gpubench_arch; } @@ -64,6 +63,10 @@ return (gpubench_detectArch() == GPUBENCH_ARCH_NV); } +int gpubench_is3DLABS() { + + return (gpubench_detectArch() == GPUBENCH_ARCH_3DLABS); +} const char *gpubench_getDriverVersion(void) { @@ -309,153 +312,6 @@ -float gpubench_render(int size, - int winSize, - int chunksize, - int renderTriangleNotQuad, - int iterations, - int accessType) -{ - - - int i, j, k; - - float foffset; - - if ( (chunksize != 0) && (size % chunksize != 0) ) { - - fprintf(stderr, "Error: Chunksize should be multiple of render target size, not running test.\n"); - return 0.0f; - } - - int wasNotEnabled = glIsEnabled(GL_SCISSOR_TEST); - - glViewport (0, 0, size, size); - glEnable(GL_SCISSOR_TEST); - glScissor( 0, 0, size, size); - - int tex = (accessType == GPUBENCH_ACCESS_SEQ) ? 1 : 0; - - foffset = 2.0f * winSize / size - 1.0f; - - - /* force load of all texture data onto card */ - glBegin(GL_QUADS); - glVertex2f (-1.0f, -1.0f); - glVertex2f (-1.0f, 1.0f); - glVertex2f (1.0f, 1.0f); - glVertex2f (1.0f, -1.0f); - glEnd(); - - glFinish(); - - - gpubench_resetTimer(); - - if (renderTriangleNotQuad) { - - if (chunksize) { - - for (i=0; i (y)) ? (x) : (y)) +// Additional defines + +// For 3DLabs +#define GL_RGBA32F_ARB 0x8814 +#define GL_RGB32F_ARB 0x8815 +#define GL_ALPHA32F_ARB 0x8816 +#define GL_LUMINANCE32F_ARB 0x8818 + +// For ATI +#define GL_ALPHA_FLOAT32_ATI 0x8816 +#define GL_LUMINANCE_ALPHA_FLOAT32_ATI 0x8819 +#define GL_RGB_FLOAT32_ATI 0x8815 + + + +static int nvtype[5] = {0, + GL_FLOAT_R32_NV, + GL_FLOAT_RG32_NV, + GL_FLOAT_RGB32_NV, + GL_FLOAT_RGBA32_NV}; + +static int atitype[5] = {0, + GL_ALPHA_FLOAT32_ATI, + GL_LUMINANCE_ALPHA_FLOAT32_ATI, + GL_RGB_FLOAT32_ATI, + GL_RGBA_FLOAT32_ATI}; + +static int labstype[5] = {0, + GL_ALPHA32F_ARB, + GL_LUMINANCE32F_ARB, + GL_RGB32F_ARB, + GL_RGBA32F_ARB }; + +static int format[5] = {0, + GL_RED, + GL_LUMINANCE_ALPHA, + GL_RGB, + GL_RGBA}; typedef int shader_t; @@ -32,6 +70,7 @@ int gpubench_isATI(); int gpubench_isNV(); +int gpubench_is3DLABS(); const char *gpubench_getDriverVersion(void); @@ -56,9 +95,6 @@ int gpubench_verifyFrameBuffer(float* correct, int xmin, int ymin, int xsize, int ysize, int verbose); -/* generic render big primitive method */ -float gpubench_render(int size, int winSize, int chunksize, - int renderTriangleNotQuad, int iterations, int zeroCoord); shader_t gpubench_loadShaderFromString(const char *fname); shader_t gpubench_loadShader(const char *fname); @@ -66,6 +102,7 @@ void gpubench_bindShader(shader_t shader); void gpubench_releaseShader(); +void gpubench_getprogramiv(GLenum p, GLenum e, int *i); /* added on 6/24/04, but not completely functional yet */ int gpubench_checkError(); @@ -138,4 +175,173 @@ void set_fragment_variable(shader_t shader, int num, float x, float y, float z, float w); +/* generic render big primitive method */ +template +float +gpubench_render(int size, + int winSize, + int chunksize, + int renderTriangleNotQuad, + int iterations, + int accessType) +{ + int i, j, k; + + float foffset; + + if ( (chunksize != 0) && (size % chunksize != 0) ) { + + fprintf(stderr, "Error: Chunksize should be multiple of render target size, not running test.\n"); + return 0.0f; + } + + int wasNotEnabled = glIsEnabled(GL_SCISSOR_TEST); + + glViewport (0, 0, size, size); + glEnable(GL_SCISSOR_TEST); + glScissor( 0, 0, size, size); + + foffset = 2.0f * winSize / size - 1.0f; + + /* force load of all texture data onto card */ + glBegin(GL_QUADS); + glVertex2f (-1.0f, -1.0f); + glVertex2f (-1.0f, 1.0f); + glVertex2f (1.0f, 1.0f); + glVertex2f (1.0f, -1.0f); + glEnd(); + + glFinish(); + + float norm_size = 0.0; + float double_norm_size = 0.0; + + // if GPUBENCH_ACCESS_SINGLE constant texture coordinates (0.0, 0.0) + // otherwise compute correct coordinates + // Distinction while 3DLabs uses texture coordinates in [0,1] + // -- even for floating point textures + // Static branching in template instances + if (accessType == GPUBENCH_ACCESS_SEQ) { + + if ( GPUBENCH_ARCH_3DLABS == GPUBench_Arch) { + + norm_size = (float) 1.0; + } + else { // Nvidia, ATI + + norm_size = (float) size; + } + + //double_norm_size = (float) 2.0 * norm_size; + } + + gpubench_resetTimer(); + + if (renderTriangleNotQuad) { + + if (chunksize) { + + for (i=0; i 0 && components < 5); assert(numInstr > 0); - // 10K should be enough ;-) - fpStr = (char*)malloc(1024*10); + // 100K should be enough ;-) + fpStr = (char*)malloc(1024*100); while (numInstr > 0) { @@ -279,8 +281,21 @@ gpubench_setShaderConst(fpHandle, 1, .5f, .5f, .5f, .5f); - timeMs = gpubench_render(testSize, fbSize, 0, renderTriangleNotQuad, numIterations, GPUBENCH_ACCESS_SEQ); + if (gpubench_isNV()){ + + timeMs = gpubench_render(testSize, fbSize, 0, renderTriangleNotQuad, numIterations, GPUBENCH_ACCESS_SEQ); + } + else if (gpubench_isATI()){ + + timeMs = gpubench_render(testSize, fbSize, 0, renderTriangleNotQuad, numIterations, GPUBENCH_ACCESS_SEQ); + } + else { // 3DLabs + + timeMs = gpubench_render(testSize, fbSize, 0, renderTriangleNotQuad, numIterations, GPUBENCH_ACCESS_SEQ); + } + // printf( "Time elapsed = %f\n\n", timeMs); + printf ("%d %4.4f %s %4d %4d\n", testSize, gpubench_gigaInstrPerSec( (float)(testSize*testSize)*numInstr*numIterations, timeMs), diff --exclude CVS -ur gpubench-1.0/outputbandwidth.cpp gpub2/outputbandwidth.cpp --- gpubench-1.0/outputbandwidth.cpp 2004-07-22 01:16:15.000000000 -0700 +++ gpub2/outputbandwidth.cpp 2005-02-11 08:55:52.000000000 -0800 @@ -285,14 +285,19 @@ for (i=min; i<=max; exp ? (i*=step) : (i+=step)) { + if (gpubench_isNV()){ - msec = gpubench_render(i, - max, - chunksize, - renderTriangleNotQuad, - num_iter, - GPUBENCH_ACCESS_SEQ); - + msec = gpubench_render(i, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else if (gpubench_isATI()){ + + msec = gpubench_render(i, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + else { // 3DLabs + + msec = gpubench_render(i, max, chunksize, renderTriangleNotQuad, num_iter, GPUBENCH_ACCESS_SEQ); + } + //glReadBuffer(GL_FRONT_LEFT); //gpubench_printFrameBuffer(0,0,1,1,components); //glReadBuffer(GL_AUX0); diff --exclude CVS -ur gpubench-1.0/readback.cpp gpub2/readback.cpp --- gpubench-1.0/readback.cpp 2004-08-05 00:36:46.000000000 -0700 +++ gpub2/readback.cpp 2005-02-06 19:11:32.000000000 -0800 @@ -123,11 +123,11 @@ char *name; void (*checkdata) (unsigned char *src, unsigned char *dst, int n); void (*checkdata_float) (float *src, float *dst, int n); -} Format; +} Format_Readback; -Format format[] = {{GL_RGBA, "GL_RGBA ", checkrgba, checkrgba_float}, - {GL_ABGR_EXT, "GL_ABGR_EXT ", checkabgr, checkabgr_float}, - {GL_BGRA, "GL_BGRA ", checkbgra, checkbgra_float}, +Format_Readback format_rb[] = {{GL_RGBA, "GL_RGBA ", checkrgba, checkrgba_float}, + {GL_ABGR_EXT, "GL_ABGR_EXT ", checkabgr, checkabgr_float}, + {GL_BGRA, "GL_BGRA ", checkbgra, checkbgra_float}, }; @@ -211,7 +211,7 @@ glDrawPixels(WIDTH, HEIGHT, GL_RGBA, dofloat?GL_FLOAT:GL_UNSIGNED_BYTE, pix); - glReadPixels(0, 0, WIDTH, HEIGHT, format[which_format].id, + glReadPixels(0, 0, WIDTH, HEIGHT, format_rb[which_format].id, dofloat?GL_FLOAT:GL_UNSIGNED_BYTE, ptr); glDrawPixels(WIDTH, HEIGHT, GL_RGBA, @@ -221,7 +221,7 @@ gpubench_resetTimer(); - glReadPixels(0, 0, WIDTH, HEIGHT, format[which_format].id, + glReadPixels(0, 0, WIDTH, HEIGHT, format_rb[which_format].id, dofloat?GL_FLOAT:GL_UNSIGNED_BYTE, ptr); //memcpy(buf, ptr, WIDTH*HEIGHT*4*elemsize); @@ -229,17 +229,17 @@ msec += gpubench_getTimer(); if (dofloat) - format[which_format].checkdata_float((float *) pix, + format_rb[which_format].checkdata_float((float *) pix, (float *) ptr, WIDTH*HEIGHT); else - format[which_format].checkdata((unsigned char *) pix, + format_rb[which_format].checkdata((unsigned char *) pix, (unsigned char *) ptr, WIDTH*HEIGHT); } printf ("\t%s Mpix/sec: %3.2f MB/sec: %3.2f\n", - format[which_format].name, + format_rb[which_format].name, WIDTH*HEIGHT/msec/1000*loopcount, WIDTH*HEIGHT/msec/1024*1000/1024*loopcount*(dofloat?16:4)); } diff --exclude CVS -ur gpubench-1.0/saxpy.cpp gpub2/saxpy.cpp --- gpubench-1.0/saxpy.cpp 2004-07-24 01:20:16.000000000 -0700 +++ gpub2/saxpy.cpp 2005-02-06 19:11:52.000000000 -0800 @@ -20,21 +20,6 @@ #define GL_RGB_FLOAT32_ATI 0x8815 -static int nvtype[5] = {0, - GL_FLOAT_R32_NV, - GL_FLOAT_RG32_NV, - GL_FLOAT_RGB32_NV, - GL_FLOAT_RGBA32_NV}; -static int atitype[5] = {0, - GL_ALPHA_FLOAT32_ATI, - GL_LUMINANCE_ALPHA_FLOAT32_ATI, - GL_RGB_FLOAT32_ATI, - GL_RGBA_FLOAT32_ATI}; - -static int format[5] = {0, GL_RED, GL_LUMINANCE_ALPHA, - GL_RGB, GL_RGBA}; - - #define ALPHA 1.5f void usage(char *name) {