PAPI  5.0.1.0
extras.c
Go to the documentation of this file.
00001 /****************************/
00002 /* THIS IS OPEN SOURCE CODE */
00003 /****************************/
00004 
00005 /* 
00006 * File:    extras.c
00007 * Author:  Philip Mucci
00008 *          mucci@cs.utk.edu
00009 * Mods:    dan terpstra
00010 *          terpstra@cs.utk.edu
00011 * Mods:    Haihang You
00012 *          you@cs.utk.edu
00013 * Mods:    Kevin London
00014 *          london@cs.utk.edu
00015 * Mods:    Maynard Johnson
00016 *          maynardj@us.ibm.com
00017 */
00018 
00019 /* This file contains portable routines to do things that we wish the
00020 vendors did in the kernel extensions or performance libraries. */
00021 
00022 #include "papi.h"
00023 #include "papi_internal.h"
00024 #include "papi_vector.h"
00025 #include "papi_memory.h"
00026 #include "extras.h"
00027 #include "threads.h"
00028 
00029 #if (!defined(HAVE_FFSLL) || defined(__bgp__))
00030 int ffsll( long long lli );
00031 #endif
00032 
00033 /****************/
00034 /* BEGIN LOCALS */
00035 /****************/
00036 
00037 static unsigned int _rnum = DEADBEEF;
00038 
00039 /**************/
00040 /* END LOCALS */
00041 /**************/
00042 
00043 inline_static unsigned short
00044 random_ushort( void )
00045 {
00046     return ( unsigned short ) ( _rnum = 1664525 * _rnum + 1013904223 );
00047 }
00048 
00049 
00050 /* compute the amount by which to increment the bucket.
00051    value is the current value of the bucket
00052    this routine is used by all three profiling cases
00053    it is inlined for speed
00054 */
00055 inline_static int
00056 profil_increment( long long value,
00057                   int flags, long long excess, long long threshold )
00058 {
00059     int increment = 1;
00060 
00061     if ( flags == PAPI_PROFIL_POSIX ) {
00062         return ( 1 );
00063     }
00064 
00065     if ( flags & PAPI_PROFIL_RANDOM ) {
00066         if ( random_ushort(  ) <= ( USHRT_MAX / 4 ) )
00067             return ( 0 );
00068     }
00069 
00070     if ( flags & PAPI_PROFIL_COMPRESS ) {
00071         /* We're likely to ignore the sample if buf[address] gets big. */
00072         if ( random_ushort(  ) < value ) {
00073             return ( 0 );
00074         }
00075     }
00076 
00077     if ( flags & PAPI_PROFIL_WEIGHTED ) {   /* Increment is between 1 and 255 */
00078         if ( excess <= ( long long ) 1 )
00079             increment = 1;
00080         else if ( excess > threshold )
00081             increment = 255;
00082         else {
00083             threshold = threshold / ( long long ) 255;
00084             increment = ( int ) ( excess / threshold );
00085         }
00086     }
00087     return ( increment );
00088 }
00089 
00090 
00091 static void
00092 posix_profil( caddr_t address, PAPI_sprofil_t * prof,
00093               int flags, long long excess, long long threshold )
00094 {
00095     unsigned short *buf16;
00096     unsigned int *buf32;
00097     unsigned long long *buf64;
00098     unsigned long indx;
00099     unsigned long long lloffset;
00100 
00101     /* SPECIAL CASE: if starting address is 0 and scale factor is 2
00102        then all counts go into first bin.
00103      */
00104     if ( ( prof->pr_off == 0 ) && ( prof->pr_scale == 0x2 ) )
00105         indx = 0;
00106     else {
00107         /* compute the profile buffer offset by:
00108            - subtracting the profiling base address from the pc address
00109            - multiplying by the scaling factor
00110            - dividing by max scale (65536, or 2^^16) 
00111            - dividing by implicit 2 (2^^1 for a total of 2^^17), for even addresses
00112            NOTE: 131072 is a valid scale value. It produces byte resolution of addresses
00113          */
00114         lloffset =
00115             ( unsigned long long ) ( ( address - prof->pr_off ) *
00116                                      prof->pr_scale );
00117         indx = ( unsigned long ) ( lloffset >> 17 );
00118     }
00119 
00120     /* confirm addresses within specified range */
00121     if ( address >= prof->pr_off ) {
00122         /* test first for 16-bit buckets; this should be the fast case */
00123         if ( flags & PAPI_PROFIL_BUCKET_16 ) {
00124             if ( ( indx * sizeof ( short ) ) < prof->pr_size ) {
00125                 buf16 = prof->pr_base;
00126                 buf16[indx] =
00127                     ( unsigned short ) ( ( unsigned short ) buf16[indx] +
00128                                          profil_increment( buf16[indx], flags,
00129                                                            excess,
00130                                                            threshold ) );
00131                 PRFDBG( "posix_profil_16() bucket %lu = %u\n", indx,
00132                         buf16[indx] );
00133             }
00134         }
00135         /* next, look for the 32-bit case */
00136         else if ( flags & PAPI_PROFIL_BUCKET_32 ) {
00137             if ( ( indx * sizeof ( int ) ) < prof->pr_size ) {
00138                 buf32 = prof->pr_base;
00139                 buf32[indx] = ( unsigned int ) buf32[indx] +
00140                     ( unsigned int ) profil_increment( buf32[indx], flags,
00141                                                        excess, threshold );
00142                 PRFDBG( "posix_profil_32() bucket %lu = %u\n", indx,
00143                         buf32[indx] );
00144             }
00145         }
00146         /* finally, fall through to the 64-bit case */
00147         else {
00148             if ( ( indx * sizeof ( long long ) ) < prof->pr_size ) {
00149                 buf64 = prof->pr_base;
00150                 buf64[indx] = ( unsigned long long ) buf64[indx] +
00151                     ( unsigned long long ) profil_increment( ( long long )
00152                                                              buf64[indx], flags,
00153                                                              excess,
00154                                                              threshold );
00155                 PRFDBG( "posix_profil_64() bucket %lu = %lld\n", indx,
00156                         buf64[indx] );
00157             }
00158         }
00159     }
00160 }
00161 
00162 void
00163 _papi_hwi_dispatch_profile( EventSetInfo_t * ESI, caddr_t pc,
00164                             long long over, int profile_index )
00165 {
00166     EventSetProfileInfo_t *profile = &ESI->profile;
00167     PAPI_sprofil_t *sprof;
00168     caddr_t offset = 0;
00169     caddr_t best_offset = 0;
00170     int count;
00171     int best_index = -1;
00172     int i;
00173 
00174     PRFDBG( "handled IP 0x%p\n", pc );
00175 
00176     sprof = profile->prof[profile_index];
00177     count = profile->count[profile_index];
00178 
00179     for ( i = 0; i < count; i++ ) {
00180         offset = sprof[i].pr_off;
00181         if ( ( offset < pc ) && ( offset > best_offset ) ) {
00182             best_index = i;
00183             best_offset = offset;
00184         }
00185     }
00186 
00187     if ( best_index == -1 )
00188         best_index = 0;
00189 
00190     posix_profil( pc, &sprof[best_index], profile->flags, over,
00191                   profile->threshold[profile_index] );
00192 }
00193 
00194 /* if isHardware is true, then the processor is using hardware overflow,
00195    else it is using software overflow. Use this parameter instead of 
00196    _papi_hwi_system_info.supports_hw_overflow is in CRAY some processors
00197    may use hardware overflow, some may use software overflow.
00198 
00199    overflow_bit: if the component can get the overflow bit when overflow
00200                  occurs, then this should be passed by the component;
00201 
00202    If both genOverflowBit and isHardwareSupport are true, that means
00203      the component doesn't know how to get the overflow bit from the
00204      kernel directly, so we generate the overflow bit in this function 
00205     since this function can access the ESI->overflow struct;
00206    (The component can only set genOverflowBit parameter to true if the
00207      hardware doesn't support multiple hardware overflow. If the
00208      component supports multiple hardware overflow and you don't know how 
00209      to get the overflow bit, then I don't know how to deal with this 
00210      situation).
00211 */
00212 
00213 int
00214 _papi_hwi_dispatch_overflow_signal( void *papiContext, caddr_t address,
00215                    int *isHardware, long long overflow_bit,
00216                    int genOverflowBit, ThreadInfo_t ** t,
00217                    int cidx )
00218 {
00219     int retval, event_counter, i, overflow_flag, pos;
00220     int papi_index, j;
00221     int profile_index = 0;
00222     long long overflow_vector;
00223 
00224     long long temp[_papi_hwd[cidx]->cmp_info.num_cntrs], over;
00225     long long latest = 0;
00226     ThreadInfo_t *thread;
00227     EventSetInfo_t *ESI;
00228     _papi_hwi_context_t *ctx = ( _papi_hwi_context_t * ) papiContext;
00229 
00230     OVFDBG( "enter\n" );
00231 
00232     if ( *t )
00233         thread = *t;
00234     else
00235         *t = thread = _papi_hwi_lookup_thread( 0 );
00236 
00237     if ( thread != NULL ) {
00238         ESI = thread->running_eventset[cidx];
00239 
00240         if ( ( ESI == NULL ) || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) {
00241             OVFDBG( "Either no eventset or eventset not set to overflow.\n" );
00242 #ifdef ANY_THREAD_GETS_SIGNAL
00243             _papi_hwi_broadcast_signal( thread->tid );
00244 #endif
00245             return ( PAPI_OK );
00246         }
00247 
00248         if ( ESI->CmpIdx != cidx )
00249             return ( PAPI_ENOCMP );
00250 
00251         if ( ESI->master != thread ) {
00252             PAPIERROR
00253                 ( "eventset->thread 0x%lx vs. current thread 0x%lx mismatch",
00254                   ESI->master, thread );
00255             return ( PAPI_EBUG );
00256         }
00257 
00258         if ( isHardware ) {
00259             if ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) {
00260                 ESI->state |= PAPI_PAUSED;
00261                 *isHardware = 1;
00262             } else
00263                 *isHardware = 0;
00264         }
00265         /* Get the latest counter value */
00266         event_counter = ESI->overflow.event_counter;
00267 
00268         overflow_flag = 0;
00269         overflow_vector = 0;
00270 
00271         if ( !( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) {
00272             retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop );
00273             if ( retval < PAPI_OK )
00274                 return ( retval );
00275             for ( i = 0; i < event_counter; i++ ) {
00276                 papi_index = ESI->overflow.EventIndex[i];
00277                 latest = ESI->sw_stop[papi_index];
00278                 temp[i] = -1;
00279 
00280                 if ( latest >= ( long long ) ESI->overflow.deadline[i] ) {
00281                     OVFDBG
00282                         ( "dispatch_overflow() latest %lld, deadline %lld, threshold %d\n",
00283                           latest, ESI->overflow.deadline[i],
00284                           ESI->overflow.threshold[i] );
00285                     pos = ESI->EventInfoArray[papi_index].pos[0];
00286                     overflow_vector ^= ( long long ) 1 << pos;
00287                     temp[i] = latest - ESI->overflow.deadline[i];
00288                     overflow_flag = 1;
00289                     /* adjust the deadline */
00290                     ESI->overflow.deadline[i] =
00291                         latest + ESI->overflow.threshold[i];
00292                 }
00293             }
00294         } else if ( genOverflowBit ) {
00295             /* we had assumed the overflow event can't be derived event */
00296             papi_index = ESI->overflow.EventIndex[0];
00297 
00298             /* suppose the pos is the same as the counter number
00299              * (this is not true in Itanium, but itanium doesn't 
00300              * need us to generate the overflow bit
00301              */
00302             pos = ESI->EventInfoArray[papi_index].pos[0];
00303             overflow_vector = ( long long ) 1 << pos;
00304         } else
00305             overflow_vector = overflow_bit;
00306 
00307         if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) || overflow_flag ) {
00308             if ( ESI->state & PAPI_PROFILING ) {
00309                 int k = 0;
00310                 while ( overflow_vector ) {
00311                     i = ffsll( overflow_vector ) - 1;
00312                     for ( j = 0; j < event_counter; j++ ) {
00313                         papi_index = ESI->overflow.EventIndex[j];
00314                         /* This loop is here ONLY because Pentium 4 can have tagged *
00315                          * events that contain more than one counter without being  *
00316                          * derived. You've gotta scan all terms to make sure you    *
00317                          * find the one to profile. */
00318                         for ( k = 0, pos = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT && pos >= 0;
00319                               k++ ) {
00320                             pos = ESI->EventInfoArray[papi_index].pos[k];
00321                             if ( i == pos ) {
00322                                 profile_index = j;
00323                                 goto foundit;
00324                             }
00325                         }
00326                     }
00327                     if ( j == event_counter ) {
00328                         PAPIERROR
00329                             ( "BUG! overflow_vector is 0, dropping interrupt" );
00330                         return ( PAPI_EBUG );
00331                     }
00332 
00333                   foundit:
00334                     if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) )
00335                         over = 0;
00336                     else
00337                         over = temp[profile_index];
00338                     _papi_hwi_dispatch_profile( ESI, address, over,
00339                                                 profile_index );
00340                     overflow_vector ^= ( long long ) 1 << i;
00341                 }
00342                 /* do not use overflow_vector after this place */
00343             } else {
00344                 ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address,
00345                                        overflow_vector, ctx->ucontext );
00346             }
00347         }
00348         ESI->state &= ~( PAPI_PAUSED );
00349     }
00350 #ifdef ANY_THREAD_GETS_SIGNAL
00351     else {
00352         OVFDBG( "I haven't been noticed by PAPI before\n" );
00353         _papi_hwi_broadcast_signal( ( *_papi_hwi_thread_id_fn ) (  ) );
00354     }
00355 #endif
00356     return ( PAPI_OK );
00357 }
00358 
00359 #include <sys/time.h>
00360 #include <errno.h>
00361 #include <string.h>
00362 
00363 int _papi_hwi_using_signal[PAPI_NSIG];
00364 
00365 int
00366 _papi_hwi_start_timer( int timer, int signal, int ns )
00367 {
00368     struct itimerval value;
00369     int us = ns / 1000;
00370 
00371     if ( us == 0 )
00372         us = 1;
00373 
00374 #ifdef ANY_THREAD_GETS_SIGNAL
00375     _papi_hwi_lock( INTERNAL_LOCK );
00376     if ( ( _papi_hwi_using_signal[signal] - 1 ) ) {
00377         INTDBG( "itimer already installed\n" );
00378         _papi_hwi_unlock( INTERNAL_LOCK );
00379         return ( PAPI_OK );
00380     }
00381     _papi_hwi_unlock( INTERNAL_LOCK );
00382 #else
00383     ( void ) signal;         /*unused */
00384 #endif
00385 
00386     value.it_interval.tv_sec = 0;
00387     value.it_interval.tv_usec = us;
00388     value.it_value.tv_sec = 0;
00389     value.it_value.tv_usec = us;
00390 
00391     INTDBG( "Installing itimer %d, with %d us interval\n", timer, us );
00392     if ( setitimer( timer, &value, NULL ) < 0 ) {
00393         PAPIERROR( "setitimer errno %d", errno );
00394         return ( PAPI_ESYS );
00395     }
00396 
00397     return ( PAPI_OK );
00398 }
00399 
00400 int
00401 _papi_hwi_start_signal( int signal, int need_context, int cidx )
00402 {
00403     struct sigaction action;
00404 
00405     _papi_hwi_lock( INTERNAL_LOCK );
00406     _papi_hwi_using_signal[signal]++;
00407     if ( _papi_hwi_using_signal[signal] - 1 ) {
00408         INTDBG( "_papi_hwi_using_signal is now %d\n",
00409                 _papi_hwi_using_signal[signal] );
00410         _papi_hwi_unlock( INTERNAL_LOCK );
00411         return ( PAPI_OK );
00412     }
00413 
00414     memset( &action, 0x00, sizeof ( struct sigaction ) );
00415     action.sa_flags = SA_RESTART;
00416     action.sa_sigaction =
00417         ( void ( * )( int, siginfo_t *, void * ) ) _papi_hwd[cidx]->
00418         dispatch_timer;
00419     if ( need_context )
00420 #if (defined(_BGL) /*|| defined (__bgp__)*/)
00421         action.sa_flags |= SIGPWR;
00422 #else
00423         action.sa_flags |= SA_SIGINFO;
00424 #endif
00425 
00426     INTDBG( "installing signal handler\n" );
00427     if ( sigaction( signal, &action, NULL ) < 0 ) {
00428         PAPIERROR( "sigaction errno %d", errno );
00429         _papi_hwi_unlock( INTERNAL_LOCK );
00430         return ( PAPI_ESYS );
00431     }
00432 
00433     INTDBG( "_papi_hwi_using_signal[%d] is now %d.\n", signal,
00434             _papi_hwi_using_signal[signal] );
00435     _papi_hwi_unlock( INTERNAL_LOCK );
00436 
00437     return ( PAPI_OK );
00438 }
00439 
00440 int
00441 _papi_hwi_stop_signal( int signal )
00442 {
00443     _papi_hwi_lock( INTERNAL_LOCK );
00444     if ( --_papi_hwi_using_signal[signal] == 0 ) {
00445         INTDBG( "removing signal handler\n" );
00446         if ( sigaction( signal, NULL, NULL ) == -1 ) {
00447             PAPIERROR( "sigaction errno %d", errno );
00448             _papi_hwi_unlock( INTERNAL_LOCK );
00449             return ( PAPI_ESYS );
00450         }
00451     }
00452 
00453     INTDBG( "_papi_hwi_using_signal[%d] is now %d\n", signal,
00454             _papi_hwi_using_signal[signal] );
00455     _papi_hwi_unlock( INTERNAL_LOCK );
00456 
00457     return ( PAPI_OK );
00458 }
00459 
00460 int
00461 _papi_hwi_stop_timer( int timer, int signal )
00462 {
00463 #ifdef ANY_THREAD_GETS_SIGNAL
00464     _papi_hwi_lock( INTERNAL_LOCK );
00465     if ( _papi_hwi_using_signal[signal] > 1 ) {
00466         INTDBG( "itimer in use by another thread\n" );
00467         _papi_hwi_unlock( INTERNAL_LOCK );
00468         return ( PAPI_OK );
00469     }
00470     _papi_hwi_unlock( INTERNAL_LOCK );
00471 #else
00472     ( void ) signal;         /*unused */
00473 #endif
00474 
00475     INTDBG( "turning off timer\n" );
00476     if ( setitimer( timer, NULL, NULL ) == -1 ) {
00477         PAPIERROR( "setitimer errno %d", errno );
00478         return ( PAPI_ESYS );
00479     }
00480 
00481     return ( PAPI_OK );
00482 }
00483 
00484 
00485 
00486 #if (!defined(HAVE_FFSLL) || defined(__bgp__))
00487 /* find the first set bit in long long */
00488 
00489 int
00490 ffsll( long long lli )
00491 {
00492     int i, num, t, tmpint, len;
00493 
00494     num = sizeof ( long long ) / sizeof ( int );
00495     if ( num == 1 )
00496         return ( ffs( ( int ) lli ) );
00497     len = sizeof ( int ) * CHAR_BIT;
00498 
00499     for ( i = 0; i < num; i++ ) {
00500         tmpint = ( int ) ( ( ( lli >> len ) << len ) ^ lli );
00501 
00502         t = ffs( tmpint );
00503         if ( t ) {
00504             return ( t + i * len );
00505         }
00506         lli = lli >> len;
00507     }
00508     return PAPI_OK;
00509 }
00510 #endif
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines