|
PAPI
5.0.1.0
|
00001 /* 00002 * File: byte_profile.c 00003 * CVS: $Id$ 00004 * Author: Dan Terpstra 00005 * terpstra@cs.utk.edu 00006 * Mods: Maynard Johnson 00007 * maynardj@us.ibm.com 00008 * Mods: <your name here> 00009 * <your email address> 00010 */ 00011 00012 /* This file profiles multiple events with byte level address resolution. 00013 It's patterned after code suggested by John Mellor-Crummey, Rob Fowler, 00014 and Nathan Tallent. 00015 It is intended to illustrate the use of Multiprofiling on a very tight 00016 block of code at byte level resolution of the instruction addresses. 00017 */ 00018 00019 #include "papi_test.h" 00020 #include "prof_utils.h" 00021 #define PROFILE_ALL 00022 00023 static const PAPI_hw_info_t *hw_info; 00024 00025 static int num_events = 0; 00026 00027 #define N (1 << 23) 00028 #define T (10) 00029 00030 double aa[N], bb[N]; 00031 double s = 0, s2 = 0; 00032 00033 static void 00034 cleara( double a[N] ) 00035 { 00036 int i; 00037 00038 for ( i = 0; i < N; i++ ) { 00039 a[i] = 0; 00040 } 00041 } 00042 00043 static int 00044 my_dummy( int i ) 00045 { 00046 return ( i + 1 ); 00047 } 00048 00049 static void 00050 my_main( ) 00051 { 00052 int i, j; 00053 00054 for ( j = 0; j < T; j++ ) { 00055 for ( i = 0; i < N; i++ ) { 00056 bb[i] = 0; 00057 } 00058 cleara( aa ); 00059 memset( aa, 0, sizeof ( aa ) ); 00060 for ( i = 0; i < N; i++ ) { 00061 s += aa[i] * bb[i]; 00062 s2 += aa[i] * aa[i] + bb[i] * bb[i]; 00063 } 00064 } 00065 } 00066 00067 static int 00068 do_profile( caddr_t start, unsigned long plength, unsigned scale, int thresh, 00069 int bucket, unsigned int mask ) { 00070 00071 int i, retval; 00072 unsigned long blength; 00073 int num_buckets,j=0; 00074 00075 int num_bufs = num_events; 00076 int event = num_events; 00077 00078 int events[MAX_TEST_EVENTS]; 00079 char header[BUFSIZ]; 00080 00081 strncpy(header,"address\t\t",BUFSIZ); 00082 00083 //= "address\t\t\tcyc\tins\tfp_ins\n"; 00084 00085 for(i=0;i<MAX_TEST_EVENTS;i++) { 00086 if (mask & test_events[i].mask) { 00087 events[j]=test_events[i].event; 00088 00089 if (events[j]==PAPI_TOT_CYC) { 00090 strncat(header,"\tcyc",BUFSIZ-1); 00091 } 00092 if (events[j]==PAPI_TOT_INS) { 00093 strncat(header,"\tins",BUFSIZ-1); 00094 } 00095 if (events[j]==PAPI_FP_INS) { 00096 strncat(header,"\tfp_ins",BUFSIZ-1); 00097 } 00098 if (events[j]==PAPI_FP_OPS) { 00099 strncat(header,"\tfp_ops",BUFSIZ-1); 00100 } 00101 if (events[j]==PAPI_L2_TCM) { 00102 strncat(header,"\tl2_tcm",BUFSIZ-1); 00103 } 00104 00105 j++; 00106 00107 } 00108 } 00109 00110 strncat(header,"\n",BUFSIZ-1); 00111 00112 00113 00114 blength = prof_size( plength, scale, bucket, &num_buckets ); 00115 prof_alloc( num_bufs, blength ); 00116 00117 if ( !TESTS_QUIET ) 00118 printf( "Overall event counts:\n" ); 00119 00120 for ( i = 0; i < num_events; i++ ) { 00121 if ( ( retval = 00122 PAPI_profil( profbuf[i], ( unsigned int ) blength, start, scale, 00123 EventSet, events[i], thresh, 00124 PAPI_PROFIL_POSIX | bucket ) ) != PAPI_OK ) { 00125 if (retval == PAPI_EINVAL) { 00126 test_warn( __FILE__, __LINE__, "Trying to profile with derived event", 1); 00127 num_events=i; 00128 break; 00129 } 00130 else { 00131 printf("Failed with event %d 0x%x\n",i,events[i]); 00132 test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); 00133 } 00134 } 00135 } 00136 00137 if ( ( retval = PAPI_start( EventSet ) ) != PAPI_OK ) 00138 test_fail( __FILE__, __LINE__, "PAPI_start", retval ); 00139 00140 my_main( ); 00141 00142 if ( ( retval = PAPI_stop( EventSet, values[0] ) ) != PAPI_OK ) 00143 test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); 00144 00145 if ( !TESTS_QUIET ) { 00146 printf( TAB1, "PAPI_TOT_CYC:", ( values[0] )[--event] ); 00147 if ( strcmp( hw_info->model_string, "POWER6" ) != 0 ) { 00148 printf( TAB1, "PAPI_TOT_INS:", ( values[0] )[--event] ); 00149 } 00150 #if defined(__powerpc__) 00151 printf( TAB1, "PAPI_FP_INS", ( values[0] )[--event] ); 00152 #else 00153 if ( strcmp( hw_info->model_string, "Intel Pentium III" ) != 0 ) { 00154 printf( TAB1, "PAPI_FP_OPS:", ( values[0] )[--event] ); 00155 printf( TAB1, "PAPI_L2_TCM:", ( values[0] )[--event] ); 00156 } 00157 #endif 00158 } 00159 00160 for ( i = 0; i < num_events; i++ ) { 00161 if ( ( retval = 00162 PAPI_profil( profbuf[i], ( unsigned int ) blength, start, scale, 00163 EventSet, events[i], 0, 00164 PAPI_PROFIL_POSIX ) ) != PAPI_OK ) 00165 test_fail( __FILE__, __LINE__, "PAPI_profil", retval ); 00166 } 00167 00168 prof_head( blength, bucket, num_buckets, header ); 00169 prof_out( start, num_events, bucket, num_buckets, scale ); 00170 retval = prof_check( num_bufs, bucket, num_buckets ); 00171 for ( i = 0; i < num_bufs; i++ ) { 00172 free( profbuf[i] ); 00173 } 00174 return retval; 00175 } 00176 00177 00178 00179 int 00180 main( int argc, char **argv ) 00181 { 00182 long length; 00183 int mask; 00184 int retval; 00185 const PAPI_exe_info_t *prginfo; 00186 caddr_t start, end; 00187 00188 prof_init( argc, argv, &prginfo ); 00189 00190 hw_info = PAPI_get_hardware_info( ); 00191 if ( hw_info == NULL ) 00192 test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); 00193 00194 mask = MASK_TOT_CYC | MASK_TOT_INS | MASK_FP_OPS | MASK_L2_TCM; 00195 00196 #if defined(__powerpc__) 00197 if ( strcmp( hw_info->model_string, "POWER6" ) == 0 ) 00198 mask = MASK_TOT_CYC | MASK_FP_INS; 00199 else 00200 mask = MASK_TOT_CYC | MASK_TOT_INS | MASK_FP_INS; 00201 #endif 00202 00203 #if defined(ITANIUM2) 00204 mask = MASK_TOT_CYC | MASK_FP_OPS | MASK_L2_TCM | MASK_L1_DCM; 00205 #endif 00206 EventSet = add_test_events( &num_events, &mask, 0 ); 00207 values = allocate_test_space( 1, num_events ); 00208 00209 /* profile the cleara and my_main address space */ 00210 start = ( caddr_t ) cleara; 00211 end = ( caddr_t ) my_dummy; 00212 00213 /* Itanium and PowerPC64 processors return function descriptors instead 00214 * of function addresses. You must dereference the descriptor to get the address. 00215 */ 00216 #if defined(ITANIUM1) || defined(ITANIUM2) || defined(__powerpc64__) 00217 start = ( caddr_t ) ( ( ( struct fdesc * ) start )->ip ); 00218 end = ( caddr_t ) ( ( ( struct fdesc * ) end )->ip ); 00219 #endif 00220 00221 /* call dummy so it doesn't get optimized away */ 00222 retval = my_dummy( 1 ); 00223 00224 length = end - start; 00225 if ( length < 0 ) 00226 test_fail( __FILE__, __LINE__, "Profile length < 0!", ( int ) length ); 00227 00228 prof_print_address 00229 ( "Test case byte_profile: Multi-event profiling at byte resolution.\n", 00230 prginfo ); 00231 prof_print_prof_info( start, end, THRESHOLD, event_name ); 00232 00233 retval = 00234 do_profile( start, ( unsigned ) length, 00235 FULL_SCALE * 2, THRESHOLD, 00236 PAPI_PROFIL_BUCKET_32, mask ); 00237 00238 remove_test_events( &EventSet, mask ); 00239 00240 if ( retval ) 00241 test_pass( __FILE__, values, 1 ); 00242 else 00243 test_fail( __FILE__, __LINE__, "No information in buffers", 1 ); 00244 return 1; 00245 } 00246 00247 00248 00249