|
PAPI
5.0.1.0
|
00001 /**************************************************************************** 00002 *C 00003 *C matrix-hl.f 00004 *C An example of matrix-matrix multiplication and using PAPI high level 00005 *C to look at the performance. written by Kevin London 00006 *C March 2000 00007 *C Added to c tests to check stop 00008 *C**************************************************************************** 00009 */ 00010 #include "papi_test.h" 00011 #include <stdlib.h> 00012 00013 int 00014 main( int argc, char **argv ) 00015 { 00016 00017 #define NROWS1 175 00018 #define NCOLS1 225 00019 #define NROWS2 NCOLS1 00020 #define NCOLS2 150 00021 double p[NROWS1][NCOLS1], q[NROWS2][NCOLS2], r[NROWS1][NCOLS2]; 00022 int i, j, k, num_events, retval; 00023 /* PAPI standardized event to be monitored */ 00024 int event[2]; 00025 /* PAPI values of the counters */ 00026 long long values[2], tmp; 00027 extern int TESTS_QUIET; 00028 00029 tests_quiet( argc, argv ); 00030 00031 /* Setup default values */ 00032 num_events = 0; 00033 00034 /* See how many hardware events at one time are supported 00035 * This also initializes the PAPI library */ 00036 num_events = PAPI_num_counters( ); 00037 if ( num_events < 2 ) { 00038 printf( "This example program requries the architecture to " 00039 "support 2 simultaneous hardware events...shutting down.\n" ); 00040 test_skip( __FILE__, __LINE__, "PAPI_num_counters", 1 ); 00041 } 00042 00043 if ( !TESTS_QUIET ) 00044 printf( "Number of hardware counters supported: %d\n", num_events ); 00045 00046 if ( PAPI_query_event( PAPI_FP_OPS ) == PAPI_OK ) 00047 event[0] = PAPI_FP_OPS; 00048 else if ( PAPI_query_event( PAPI_FP_INS ) == PAPI_OK ) 00049 event[0] = PAPI_FP_INS; 00050 else 00051 event[0] = PAPI_TOT_INS; 00052 00053 /* Time used */ 00054 event[1] = PAPI_TOT_CYC; 00055 00056 /* matrix 1: read in the matrix values */ 00057 for ( i = 0; i < NROWS1; i++ ) 00058 for ( j = 0; j < NCOLS1; j++ ) 00059 p[i][j] = i * j * 1.0; 00060 00061 for ( i = 0; i < NROWS2; i++ ) 00062 for ( j = 0; j < NCOLS2; j++ ) 00063 q[i][j] = i * j * 1.0; 00064 00065 for ( i = 0; i < NROWS1; i++ ) 00066 for ( j = 0; j < NCOLS2; j++ ) 00067 r[i][j] = i * j * 1.0; 00068 00069 /* Set up the counters */ 00070 num_events = 2; 00071 retval = PAPI_start_counters( event, num_events ); 00072 if ( retval != PAPI_OK ) 00073 test_fail( __FILE__, __LINE__, "PAPI_start_counters", retval ); 00074 00075 /* Clear the counter values */ 00076 retval = PAPI_read_counters( values, num_events ); 00077 if ( retval != PAPI_OK ) 00078 test_fail( __FILE__, __LINE__, "PAPI_read_counters", retval ); 00079 00080 /* Compute the matrix-matrix multiplication */ 00081 for ( i = 0; i < NROWS1; i++ ) 00082 for ( j = 0; j < NCOLS2; j++ ) 00083 for ( k = 0; k < NCOLS1; k++ ) 00084 r[i][j] = r[i][j] + p[i][k] * q[k][j]; 00085 00086 /* Stop the counters and put the results in the array values */ 00087 retval = PAPI_stop_counters( values, num_events ); 00088 if ( retval != PAPI_OK ) 00089 test_fail( __FILE__, __LINE__, "PAPI_stop_counters", retval ); 00090 00091 /* Make sure the compiler does not optimize away the multiplication 00092 * with dummy(r); 00093 */ 00094 dummy( r ); 00095 00096 if ( !TESTS_QUIET ) { 00097 if ( event[0] == PAPI_TOT_INS ) { 00098 printf( TAB1, "TOT Instructions:", values[0] ); 00099 } else { 00100 printf( TAB1, "FP Instructions:", values[0] ); 00101 } 00102 printf( TAB1, "Cycles:", values[1] ); 00103 } 00104 00105 /* 00106 * Intel Core overreports flops by 50% when using -O 00107 * Use -O2 or -O3 to produce the expected # of flops 00108 */ 00109 00110 if ( event[0] == PAPI_FP_INS ) { 00111 /* Compare measured FLOPS to expected value */ 00112 tmp = 00113 2 * ( long long ) ( NROWS1 ) * ( long long ) ( NCOLS2 ) * 00114 ( long long ) ( NCOLS1 ); 00115 if ( abs( ( int ) values[0] - ( int ) tmp ) > ( double ) tmp * 0.05 ) { 00116 /* Maybe we are counting FMAs? */ 00117 tmp = tmp / 2; 00118 if ( abs( ( int ) values[0] - ( int ) tmp ) > 00119 ( double ) tmp * 0.05 ) { 00120 printf( "\n" TAB1, "Expected operation count: ", 2 * tmp ); 00121 printf( TAB1, "Or possibly (using FMA): ", tmp ); 00122 printf( TAB1, "Instead I got: ", values[0] ); 00123 test_fail( __FILE__, __LINE__, 00124 "Unexpected FLOP count (check vector operations)", 00125 1 ); 00126 } 00127 } 00128 } 00129 test_pass( __FILE__, 0, 0 ); 00130 return ( PAPI_EMISC ); 00131 }