|
PAPI
5.0.1.0
|
00001 /****************************/ 00002 /* THIS IS OPEN SOURCE CODE */ 00003 /****************************/ 00004 00005 /* 00006 * File: extras.c 00007 * Author: Philip Mucci 00008 * mucci@cs.utk.edu 00009 * Mods: dan terpstra 00010 * terpstra@cs.utk.edu 00011 * Mods: Haihang You 00012 * you@cs.utk.edu 00013 * Mods: Kevin London 00014 * london@cs.utk.edu 00015 * Mods: Maynard Johnson 00016 * maynardj@us.ibm.com 00017 */ 00018 00019 /* This file contains portable routines to do things that we wish the 00020 vendors did in the kernel extensions or performance libraries. */ 00021 00022 #include "papi.h" 00023 #include "papi_internal.h" 00024 #include "papi_vector.h" 00025 #include "papi_memory.h" 00026 #include "extras.h" 00027 #include "threads.h" 00028 00029 #if (!defined(HAVE_FFSLL) || defined(__bgp__)) 00030 int ffsll( long long lli ); 00031 #endif 00032 00033 /****************/ 00034 /* BEGIN LOCALS */ 00035 /****************/ 00036 00037 static unsigned int _rnum = DEADBEEF; 00038 00039 /**************/ 00040 /* END LOCALS */ 00041 /**************/ 00042 00043 inline_static unsigned short 00044 random_ushort( void ) 00045 { 00046 return ( unsigned short ) ( _rnum = 1664525 * _rnum + 1013904223 ); 00047 } 00048 00049 00050 /* compute the amount by which to increment the bucket. 00051 value is the current value of the bucket 00052 this routine is used by all three profiling cases 00053 it is inlined for speed 00054 */ 00055 inline_static int 00056 profil_increment( long long value, 00057 int flags, long long excess, long long threshold ) 00058 { 00059 int increment = 1; 00060 00061 if ( flags == PAPI_PROFIL_POSIX ) { 00062 return ( 1 ); 00063 } 00064 00065 if ( flags & PAPI_PROFIL_RANDOM ) { 00066 if ( random_ushort( ) <= ( USHRT_MAX / 4 ) ) 00067 return ( 0 ); 00068 } 00069 00070 if ( flags & PAPI_PROFIL_COMPRESS ) { 00071 /* We're likely to ignore the sample if buf[address] gets big. */ 00072 if ( random_ushort( ) < value ) { 00073 return ( 0 ); 00074 } 00075 } 00076 00077 if ( flags & PAPI_PROFIL_WEIGHTED ) { /* Increment is between 1 and 255 */ 00078 if ( excess <= ( long long ) 1 ) 00079 increment = 1; 00080 else if ( excess > threshold ) 00081 increment = 255; 00082 else { 00083 threshold = threshold / ( long long ) 255; 00084 increment = ( int ) ( excess / threshold ); 00085 } 00086 } 00087 return ( increment ); 00088 } 00089 00090 00091 static void 00092 posix_profil( caddr_t address, PAPI_sprofil_t * prof, 00093 int flags, long long excess, long long threshold ) 00094 { 00095 unsigned short *buf16; 00096 unsigned int *buf32; 00097 unsigned long long *buf64; 00098 unsigned long indx; 00099 unsigned long long lloffset; 00100 00101 /* SPECIAL CASE: if starting address is 0 and scale factor is 2 00102 then all counts go into first bin. 00103 */ 00104 if ( ( prof->pr_off == 0 ) && ( prof->pr_scale == 0x2 ) ) 00105 indx = 0; 00106 else { 00107 /* compute the profile buffer offset by: 00108 - subtracting the profiling base address from the pc address 00109 - multiplying by the scaling factor 00110 - dividing by max scale (65536, or 2^^16) 00111 - dividing by implicit 2 (2^^1 for a total of 2^^17), for even addresses 00112 NOTE: 131072 is a valid scale value. It produces byte resolution of addresses 00113 */ 00114 lloffset = 00115 ( unsigned long long ) ( ( address - prof->pr_off ) * 00116 prof->pr_scale ); 00117 indx = ( unsigned long ) ( lloffset >> 17 ); 00118 } 00119 00120 /* confirm addresses within specified range */ 00121 if ( address >= prof->pr_off ) { 00122 /* test first for 16-bit buckets; this should be the fast case */ 00123 if ( flags & PAPI_PROFIL_BUCKET_16 ) { 00124 if ( ( indx * sizeof ( short ) ) < prof->pr_size ) { 00125 buf16 = prof->pr_base; 00126 buf16[indx] = 00127 ( unsigned short ) ( ( unsigned short ) buf16[indx] + 00128 profil_increment( buf16[indx], flags, 00129 excess, 00130 threshold ) ); 00131 PRFDBG( "posix_profil_16() bucket %lu = %u\n", indx, 00132 buf16[indx] ); 00133 } 00134 } 00135 /* next, look for the 32-bit case */ 00136 else if ( flags & PAPI_PROFIL_BUCKET_32 ) { 00137 if ( ( indx * sizeof ( int ) ) < prof->pr_size ) { 00138 buf32 = prof->pr_base; 00139 buf32[indx] = ( unsigned int ) buf32[indx] + 00140 ( unsigned int ) profil_increment( buf32[indx], flags, 00141 excess, threshold ); 00142 PRFDBG( "posix_profil_32() bucket %lu = %u\n", indx, 00143 buf32[indx] ); 00144 } 00145 } 00146 /* finally, fall through to the 64-bit case */ 00147 else { 00148 if ( ( indx * sizeof ( long long ) ) < prof->pr_size ) { 00149 buf64 = prof->pr_base; 00150 buf64[indx] = ( unsigned long long ) buf64[indx] + 00151 ( unsigned long long ) profil_increment( ( long long ) 00152 buf64[indx], flags, 00153 excess, 00154 threshold ); 00155 PRFDBG( "posix_profil_64() bucket %lu = %lld\n", indx, 00156 buf64[indx] ); 00157 } 00158 } 00159 } 00160 } 00161 00162 void 00163 _papi_hwi_dispatch_profile( EventSetInfo_t * ESI, caddr_t pc, 00164 long long over, int profile_index ) 00165 { 00166 EventSetProfileInfo_t *profile = &ESI->profile; 00167 PAPI_sprofil_t *sprof; 00168 caddr_t offset = 0; 00169 caddr_t best_offset = 0; 00170 int count; 00171 int best_index = -1; 00172 int i; 00173 00174 PRFDBG( "handled IP 0x%p\n", pc ); 00175 00176 sprof = profile->prof[profile_index]; 00177 count = profile->count[profile_index]; 00178 00179 for ( i = 0; i < count; i++ ) { 00180 offset = sprof[i].pr_off; 00181 if ( ( offset < pc ) && ( offset > best_offset ) ) { 00182 best_index = i; 00183 best_offset = offset; 00184 } 00185 } 00186 00187 if ( best_index == -1 ) 00188 best_index = 0; 00189 00190 posix_profil( pc, &sprof[best_index], profile->flags, over, 00191 profile->threshold[profile_index] ); 00192 } 00193 00194 /* if isHardware is true, then the processor is using hardware overflow, 00195 else it is using software overflow. Use this parameter instead of 00196 _papi_hwi_system_info.supports_hw_overflow is in CRAY some processors 00197 may use hardware overflow, some may use software overflow. 00198 00199 overflow_bit: if the component can get the overflow bit when overflow 00200 occurs, then this should be passed by the component; 00201 00202 If both genOverflowBit and isHardwareSupport are true, that means 00203 the component doesn't know how to get the overflow bit from the 00204 kernel directly, so we generate the overflow bit in this function 00205 since this function can access the ESI->overflow struct; 00206 (The component can only set genOverflowBit parameter to true if the 00207 hardware doesn't support multiple hardware overflow. If the 00208 component supports multiple hardware overflow and you don't know how 00209 to get the overflow bit, then I don't know how to deal with this 00210 situation). 00211 */ 00212 00213 int 00214 _papi_hwi_dispatch_overflow_signal( void *papiContext, caddr_t address, 00215 int *isHardware, long long overflow_bit, 00216 int genOverflowBit, ThreadInfo_t ** t, 00217 int cidx ) 00218 { 00219 int retval, event_counter, i, overflow_flag, pos; 00220 int papi_index, j; 00221 int profile_index = 0; 00222 long long overflow_vector; 00223 00224 long long temp[_papi_hwd[cidx]->cmp_info.num_cntrs], over; 00225 long long latest = 0; 00226 ThreadInfo_t *thread; 00227 EventSetInfo_t *ESI; 00228 _papi_hwi_context_t *ctx = ( _papi_hwi_context_t * ) papiContext; 00229 00230 OVFDBG( "enter\n" ); 00231 00232 if ( *t ) 00233 thread = *t; 00234 else 00235 *t = thread = _papi_hwi_lookup_thread( 0 ); 00236 00237 if ( thread != NULL ) { 00238 ESI = thread->running_eventset[cidx]; 00239 00240 if ( ( ESI == NULL ) || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) { 00241 OVFDBG( "Either no eventset or eventset not set to overflow.\n" ); 00242 #ifdef ANY_THREAD_GETS_SIGNAL 00243 _papi_hwi_broadcast_signal( thread->tid ); 00244 #endif 00245 return ( PAPI_OK ); 00246 } 00247 00248 if ( ESI->CmpIdx != cidx ) 00249 return ( PAPI_ENOCMP ); 00250 00251 if ( ESI->master != thread ) { 00252 PAPIERROR 00253 ( "eventset->thread 0x%lx vs. current thread 0x%lx mismatch", 00254 ESI->master, thread ); 00255 return ( PAPI_EBUG ); 00256 } 00257 00258 if ( isHardware ) { 00259 if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) { 00260 ESI->state |= PAPI_PAUSED; 00261 *isHardware = 1; 00262 } else 00263 *isHardware = 0; 00264 } 00265 /* Get the latest counter value */ 00266 event_counter = ESI->overflow.event_counter; 00267 00268 overflow_flag = 0; 00269 overflow_vector = 0; 00270 00271 if ( !( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) { 00272 retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop ); 00273 if ( retval < PAPI_OK ) 00274 return ( retval ); 00275 for ( i = 0; i < event_counter; i++ ) { 00276 papi_index = ESI->overflow.EventIndex[i]; 00277 latest = ESI->sw_stop[papi_index]; 00278 temp[i] = -1; 00279 00280 if ( latest >= ( long long ) ESI->overflow.deadline[i] ) { 00281 OVFDBG 00282 ( "dispatch_overflow() latest %lld, deadline %lld, threshold %d\n", 00283 latest, ESI->overflow.deadline[i], 00284 ESI->overflow.threshold[i] ); 00285 pos = ESI->EventInfoArray[papi_index].pos[0]; 00286 overflow_vector ^= ( long long ) 1 << pos; 00287 temp[i] = latest - ESI->overflow.deadline[i]; 00288 overflow_flag = 1; 00289 /* adjust the deadline */ 00290 ESI->overflow.deadline[i] = 00291 latest + ESI->overflow.threshold[i]; 00292 } 00293 } 00294 } else if ( genOverflowBit ) { 00295 /* we had assumed the overflow event can't be derived event */ 00296 papi_index = ESI->overflow.EventIndex[0]; 00297 00298 /* suppose the pos is the same as the counter number 00299 * (this is not true in Itanium, but itanium doesn't 00300 * need us to generate the overflow bit 00301 */ 00302 pos = ESI->EventInfoArray[papi_index].pos[0]; 00303 overflow_vector = ( long long ) 1 << pos; 00304 } else 00305 overflow_vector = overflow_bit; 00306 00307 if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) || overflow_flag ) { 00308 if ( ESI->state & PAPI_PROFILING ) { 00309 int k = 0; 00310 while ( overflow_vector ) { 00311 i = ffsll( overflow_vector ) - 1; 00312 for ( j = 0; j < event_counter; j++ ) { 00313 papi_index = ESI->overflow.EventIndex[j]; 00314 /* This loop is here ONLY because Pentium 4 can have tagged * 00315 * events that contain more than one counter without being * 00316 * derived. You've gotta scan all terms to make sure you * 00317 * find the one to profile. */ 00318 for ( k = 0, pos = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT && pos >= 0; 00319 k++ ) { 00320 pos = ESI->EventInfoArray[papi_index].pos[k]; 00321 if ( i == pos ) { 00322 profile_index = j; 00323 goto foundit; 00324 } 00325 } 00326 } 00327 if ( j == event_counter ) { 00328 PAPIERROR 00329 ( "BUG! overflow_vector is 0, dropping interrupt" ); 00330 return ( PAPI_EBUG ); 00331 } 00332 00333 foundit: 00334 if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) 00335 over = 0; 00336 else 00337 over = temp[profile_index]; 00338 _papi_hwi_dispatch_profile( ESI, address, over, 00339 profile_index ); 00340 overflow_vector ^= ( long long ) 1 << i; 00341 } 00342 /* do not use overflow_vector after this place */ 00343 } else { 00344 ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address, 00345 overflow_vector, ctx->ucontext ); 00346 } 00347 } 00348 ESI->state &= ~( PAPI_PAUSED ); 00349 } 00350 #ifdef ANY_THREAD_GETS_SIGNAL 00351 else { 00352 OVFDBG( "I haven't been noticed by PAPI before\n" ); 00353 _papi_hwi_broadcast_signal( ( *_papi_hwi_thread_id_fn ) ( ) ); 00354 } 00355 #endif 00356 return ( PAPI_OK ); 00357 } 00358 00359 #include <sys/time.h> 00360 #include <errno.h> 00361 #include <string.h> 00362 00363 int _papi_hwi_using_signal[PAPI_NSIG]; 00364 00365 int 00366 _papi_hwi_start_timer( int timer, int signal, int ns ) 00367 { 00368 struct itimerval value; 00369 int us = ns / 1000; 00370 00371 if ( us == 0 ) 00372 us = 1; 00373 00374 #ifdef ANY_THREAD_GETS_SIGNAL 00375 _papi_hwi_lock( INTERNAL_LOCK ); 00376 if ( ( _papi_hwi_using_signal[signal] - 1 ) ) { 00377 INTDBG( "itimer already installed\n" ); 00378 _papi_hwi_unlock( INTERNAL_LOCK ); 00379 return ( PAPI_OK ); 00380 } 00381 _papi_hwi_unlock( INTERNAL_LOCK ); 00382 #else 00383 ( void ) signal; /*unused */ 00384 #endif 00385 00386 value.it_interval.tv_sec = 0; 00387 value.it_interval.tv_usec = us; 00388 value.it_value.tv_sec = 0; 00389 value.it_value.tv_usec = us; 00390 00391 INTDBG( "Installing itimer %d, with %d us interval\n", timer, us ); 00392 if ( setitimer( timer, &value, NULL ) < 0 ) { 00393 PAPIERROR( "setitimer errno %d", errno ); 00394 return ( PAPI_ESYS ); 00395 } 00396 00397 return ( PAPI_OK ); 00398 } 00399 00400 int 00401 _papi_hwi_start_signal( int signal, int need_context, int cidx ) 00402 { 00403 struct sigaction action; 00404 00405 _papi_hwi_lock( INTERNAL_LOCK ); 00406 _papi_hwi_using_signal[signal]++; 00407 if ( _papi_hwi_using_signal[signal] - 1 ) { 00408 INTDBG( "_papi_hwi_using_signal is now %d\n", 00409 _papi_hwi_using_signal[signal] ); 00410 _papi_hwi_unlock( INTERNAL_LOCK ); 00411 return ( PAPI_OK ); 00412 } 00413 00414 memset( &action, 0x00, sizeof ( struct sigaction ) ); 00415 action.sa_flags = SA_RESTART; 00416 action.sa_sigaction = 00417 ( void ( * )( int, siginfo_t *, void * ) ) _papi_hwd[cidx]-> 00418 dispatch_timer; 00419 if ( need_context ) 00420 #if (defined(_BGL) /*|| defined (__bgp__)*/) 00421 action.sa_flags |= SIGPWR; 00422 #else 00423 action.sa_flags |= SA_SIGINFO; 00424 #endif 00425 00426 INTDBG( "installing signal handler\n" ); 00427 if ( sigaction( signal, &action, NULL ) < 0 ) { 00428 PAPIERROR( "sigaction errno %d", errno ); 00429 _papi_hwi_unlock( INTERNAL_LOCK ); 00430 return ( PAPI_ESYS ); 00431 } 00432 00433 INTDBG( "_papi_hwi_using_signal[%d] is now %d.\n", signal, 00434 _papi_hwi_using_signal[signal] ); 00435 _papi_hwi_unlock( INTERNAL_LOCK ); 00436 00437 return ( PAPI_OK ); 00438 } 00439 00440 int 00441 _papi_hwi_stop_signal( int signal ) 00442 { 00443 _papi_hwi_lock( INTERNAL_LOCK ); 00444 if ( --_papi_hwi_using_signal[signal] == 0 ) { 00445 INTDBG( "removing signal handler\n" ); 00446 if ( sigaction( signal, NULL, NULL ) == -1 ) { 00447 PAPIERROR( "sigaction errno %d", errno ); 00448 _papi_hwi_unlock( INTERNAL_LOCK ); 00449 return ( PAPI_ESYS ); 00450 } 00451 } 00452 00453 INTDBG( "_papi_hwi_using_signal[%d] is now %d\n", signal, 00454 _papi_hwi_using_signal[signal] ); 00455 _papi_hwi_unlock( INTERNAL_LOCK ); 00456 00457 return ( PAPI_OK ); 00458 } 00459 00460 int 00461 _papi_hwi_stop_timer( int timer, int signal ) 00462 { 00463 #ifdef ANY_THREAD_GETS_SIGNAL 00464 _papi_hwi_lock( INTERNAL_LOCK ); 00465 if ( _papi_hwi_using_signal[signal] > 1 ) { 00466 INTDBG( "itimer in use by another thread\n" ); 00467 _papi_hwi_unlock( INTERNAL_LOCK ); 00468 return ( PAPI_OK ); 00469 } 00470 _papi_hwi_unlock( INTERNAL_LOCK ); 00471 #else 00472 ( void ) signal; /*unused */ 00473 #endif 00474 00475 INTDBG( "turning off timer\n" ); 00476 if ( setitimer( timer, NULL, NULL ) == -1 ) { 00477 PAPIERROR( "setitimer errno %d", errno ); 00478 return ( PAPI_ESYS ); 00479 } 00480 00481 return ( PAPI_OK ); 00482 } 00483 00484 00485 00486 #if (!defined(HAVE_FFSLL) || defined(__bgp__)) 00487 /* find the first set bit in long long */ 00488 00489 int 00490 ffsll( long long lli ) 00491 { 00492 int i, num, t, tmpint, len; 00493 00494 num = sizeof ( long long ) / sizeof ( int ); 00495 if ( num == 1 ) 00496 return ( ffs( ( int ) lli ) ); 00497 len = sizeof ( int ) * CHAR_BIT; 00498 00499 for ( i = 0; i < num; i++ ) { 00500 tmpint = ( int ) ( ( ( lli >> len ) << len ) ^ lli ); 00501 00502 t = ffs( tmpint ); 00503 if ( t ) { 00504 return ( t + i * len ); 00505 } 00506 lli = lli >> len; 00507 } 00508 return PAPI_OK; 00509 } 00510 #endif