|
PAPI
5.0.1.0
|
00001 /* This file performs the following test: each OMP thread measures flops 00002 for its provided tasks, and compares this to expected flop counts, each 00003 thread having been provided with a random amount of work, such that the 00004 time and order that they complete their measurements varies. 00005 Specifically tested is the case where the value returned for some threads 00006 actually corresponds to that for another thread reading its counter values 00007 at the same time. 00008 00009 - It is based on zero_omp.c but ignored much of its functionality. 00010 - It attempts to use the following two counters. It may use less 00011 depending on hardware counter resource limitations. These are counted 00012 in the default counting domain and default granularity, depending on 00013 the platform. Usually this is the user domain (PAPI_DOM_USER) and 00014 thread context (PAPI_GRN_THR). 00015 00016 + PAPI_FP_INS 00017 + PAPI_TOT_CYC 00018 00019 Each thread inside the Thread routine: 00020 - Do prework (MAX_FLOPS - flops) 00021 - Get cyc. 00022 - Get us. 00023 - Start counters 00024 - Do flops 00025 - Stop and read counters 00026 - Get us. 00027 - Get cyc. 00028 - Return flops 00029 */ 00030 00031 #include "papi_test.h" 00032 00033 #ifdef _OPENMP 00034 #include <omp.h> 00035 #else 00036 #error "This compiler does not understand OPENMP" 00037 #endif 00038 00039 const int MAX_FLOPS = NUM_FLOPS; 00040 00041 extern int TESTS_QUIET; /* Declared in test_utils.c */ 00042 const PAPI_hw_info_t *hw_info = NULL; 00043 00044 long long 00045 Thread( int n ) 00046 { 00047 int retval, num_tests = 1; 00048 int EventSet1 = PAPI_NULL; 00049 int PAPI_event, mask1; 00050 int num_events1; 00051 long long flops; 00052 long long **values; 00053 long long elapsed_us, elapsed_cyc; 00054 char event_name[PAPI_MAX_STR_LEN]; 00055 00056 /* printf("Thread(n=%d) 0x%x started\n", n, omp_get_thread_num()); */ 00057 num_events1 = 2; 00058 00059 /* add PAPI_TOT_CYC and one of the events in PAPI_FP_INS, PAPI_FP_OPS or 00060 PAPI_TOT_INS, depending on the availability of the event on the 00061 platform */ 00062 EventSet1 = add_two_events( &num_events1, &PAPI_event, &mask1 ); 00063 00064 retval = PAPI_event_code_to_name( PAPI_event, event_name ); 00065 if ( retval != PAPI_OK ) 00066 test_fail( __FILE__, __LINE__, "PAPI_event_code_to_name", retval ); 00067 00068 values = allocate_test_space( num_tests, num_events1 ); 00069 00070 do_flops( MAX_FLOPS - n ); /* prework for balance */ 00071 00072 elapsed_us = PAPI_get_real_usec( ); 00073 00074 elapsed_cyc = PAPI_get_real_cyc( ); 00075 00076 retval = PAPI_start( EventSet1 ); 00077 if ( retval != PAPI_OK ) 00078 test_fail( __FILE__, __LINE__, "PAPI_start", retval ); 00079 00080 do_flops( n ); 00081 00082 retval = PAPI_stop( EventSet1, values[0] ); 00083 if ( retval != PAPI_OK ) 00084 test_fail( __FILE__, __LINE__, "PAPI_stop", retval ); 00085 00086 flops = ( values[0] )[0]; 00087 00088 elapsed_us = PAPI_get_real_usec( ) - elapsed_us; 00089 00090 elapsed_cyc = PAPI_get_real_cyc( ) - elapsed_cyc; 00091 00092 remove_test_events( &EventSet1, mask1 ); 00093 00094 if ( !TESTS_QUIET ) { 00095 /*printf("Thread 0x%x %-12s : \t%lld\t%d\n", omp_get_thread_num(), event_name, 00096 (values[0])[0], n); */ 00097 #if 0 00098 printf( "Thread 0x%x PAPI_TOT_CYC: \t%lld\n", omp_get_thread_num( ), 00099 values[0][0] ); 00100 printf( "Thread 0x%x Real usec : \t%lld\n", omp_get_thread_num( ), 00101 elapsed_us ); 00102 printf( "Thread 0x%x Real cycles : \t%lld\n", omp_get_thread_num( ), 00103 elapsed_cyc ); 00104 #endif 00105 } 00106 00107 /* It is illegal for the threads to exit in OpenMP */ 00108 /* test_pass(__FILE__,0,0); */ 00109 free_test_space( values, num_tests ); 00110 00111 PAPI_unregister_thread( ); 00112 /* printf("Thread 0x%x finished\n", omp_get_thread_num()); */ 00113 return flops; 00114 } 00115 00116 int 00117 main( int argc, char **argv ) 00118 { 00119 int tid, retval; 00120 int maxthr = omp_get_max_threads( ); 00121 int flopper = 0; 00122 long long *flops = calloc( maxthr, sizeof ( long long ) ); 00123 long long *flopi = calloc( maxthr, sizeof ( long long ) ); 00124 00125 tests_quiet( argc, argv ); /* Set TESTS_QUIET variable */ 00126 00127 if ( maxthr < 2 ) 00128 test_skip( __FILE__, __LINE__, "omp_get_num_threads < 2", PAPI_EINVAL ); 00129 00130 if ( ( flops == NULL ) || ( flopi == NULL ) ) 00131 test_fail( __FILE__, __LINE__, "calloc", PAPI_ENOMEM ); 00132 00133 retval = PAPI_library_init( PAPI_VER_CURRENT ); 00134 if ( retval != PAPI_VER_CURRENT ) 00135 test_fail( __FILE__, __LINE__, "PAPI_library_init", retval ); 00136 00137 hw_info = PAPI_get_hardware_info( ); 00138 if ( hw_info == NULL ) 00139 test_fail( __FILE__, __LINE__, "PAPI_get_hardware_info", 2 ); 00140 00141 retval = 00142 PAPI_thread_init( ( unsigned 00143 long ( * )( void ) ) ( omp_get_thread_num ) ); 00144 if ( retval != PAPI_OK ) 00145 if ( retval == PAPI_ECMP ) 00146 test_skip( __FILE__, __LINE__, "PAPI_thread_init", retval ); 00147 else 00148 test_fail( __FILE__, __LINE__, "PAPI_thread_init", retval ); 00149 00150 flopper = Thread( 65536 ) / 65536; 00151 printf( "flopper=%d\n", flopper ); 00152 00153 for ( int i = 0; i < 100000; i++ ) 00154 #pragma omp parallel private(tid) 00155 { 00156 tid = omp_get_thread_num( ); 00157 flopi[tid] = rand( ) * 3; 00158 flops[tid] = Thread( ( flopi[tid] / flopper ) % MAX_FLOPS ); 00159 #pragma omp barrier 00160 #pragma omp master 00161 if ( flops[tid] < flopi[tid] ) { 00162 printf( "test iteration=%d\n", i ); 00163 for ( int j = 0; j < omp_get_num_threads( ); j++ ) { 00164 printf( "Thread 0x%x Value %6lld %c %6lld", j, flops[j], 00165 ( flops[j] < flopi[j] ) ? '<' : '=', flopi[j] ); 00166 for ( int k = 0; k < omp_get_num_threads( ); k++ ) 00167 if ( ( k != j ) && ( flops[k] == flops[j] ) ) 00168 printf( " == Thread 0x%x!", k ); 00169 printf( "\n" ); 00170 } 00171 test_fail( __FILE__, __LINE__, "value returned for thread", 00172 PAPI_EBUG ); 00173 } 00174 } 00175 00176 test_pass( __FILE__, NULL, 0 ); 00177 exit( 0 ); 00178 }