|
PAPI
5.0.1.0
|
00001 /****************************/ 00002 /* THIS IS OPEN SOURCE CODE */ 00003 /****************************/ 00004 00005 /* 00006 * File: x86_cpuid_info.c 00007 * Author: Dan Terpstra 00008 * terpstra@eecs.utk.edu 00009 * complete rewrite of linux-memory.c to conform to latest docs 00010 * and convert Intel to a table driven implementation. 00011 * Now also supports multiple TLB descriptors 00012 */ 00013 00014 #include <string.h> 00015 #include <stdio.h> 00016 #include "papi.h" 00017 #include "papi_internal.h" 00018 00019 00020 static void init_mem_hierarchy( PAPI_mh_info_t * mh_info ); 00021 static int init_amd( PAPI_mh_info_t * mh_info, int *levels ); 00022 static short int _amd_L2_L3_assoc( unsigned short int pattern ); 00023 static int init_intel( PAPI_mh_info_t * mh_info , int *levels); 00024 00025 static inline void 00026 cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d ) 00027 { 00028 unsigned int op = *a; 00029 // .byte 0x53 == push ebx. it's universal for 32 and 64 bit 00030 // .byte 0x5b == pop ebx. 00031 // Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode. 00032 // Using the opcode directly avoids this problem. 00033 __asm__ __volatile__( ".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":"=a"( *a ), "=S"( *b ), "=c"( *c ), 00034 "=d" 00035 ( *d ) 00036 : "a"( op ) ); 00037 } 00038 00039 int 00040 _x86_cache_info( PAPI_mh_info_t * mh_info ) 00041 { 00042 int retval = 0; 00043 union 00044 { 00045 struct 00046 { 00047 unsigned int ax, bx, cx, dx; 00048 } e; 00049 char vendor[20]; /* leave room for terminator bytes */ 00050 } reg; 00051 00052 /* Don't use cpu_type to determine the processor. 00053 * get the information directly from the chip. 00054 */ 00055 reg.e.ax = 0; /* function code 0: vendor string */ 00056 /* The vendor string is composed of EBX:EDX:ECX. 00057 * by swapping the register addresses in the call below, 00058 * the string is correctly composed in the char array. 00059 */ 00060 cpuid( ®.e.ax, ®.e.bx, ®.e.dx, ®.e.cx ); 00061 reg.vendor[16] = 0; 00062 MEMDBG( "Vendor: %s\n", ®.vendor[4] ); 00063 00064 init_mem_hierarchy( mh_info ); 00065 00066 if ( !strncmp( "GenuineIntel", ®.vendor[4], 12 ) ) { 00067 init_intel( mh_info, &mh_info->levels); 00068 } else if ( !strncmp( "AuthenticAMD", ®.vendor[4], 12 ) ) { 00069 init_amd( mh_info, &mh_info->levels ); 00070 } else { 00071 MEMDBG( "Unsupported cpu type; Not Intel or AMD x86\n" ); 00072 return PAPI_ENOIMPL; 00073 } 00074 00075 /* This works only because an empty cache element is initialized to 0 */ 00076 MEMDBG( "Detected L1: %d L2: %d L3: %d\n", 00077 mh_info->level[0].cache[0].size + mh_info->level[0].cache[1].size, 00078 mh_info->level[1].cache[0].size + mh_info->level[1].cache[1].size, 00079 mh_info->level[2].cache[0].size + mh_info->level[2].cache[1].size ); 00080 return retval; 00081 } 00082 00083 static void 00084 init_mem_hierarchy( PAPI_mh_info_t * mh_info ) 00085 { 00086 int i, j; 00087 PAPI_mh_level_t *L = mh_info->level; 00088 00089 /* initialize entire memory hierarchy structure to benign values */ 00090 for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) { 00091 for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) { 00092 L[i].tlb[j].type = PAPI_MH_TYPE_EMPTY; 00093 L[i].tlb[j].num_entries = 0; 00094 L[i].tlb[j].associativity = 0; 00095 L[i].cache[j].type = PAPI_MH_TYPE_EMPTY; 00096 L[i].cache[j].size = 0; 00097 L[i].cache[j].line_size = 0; 00098 L[i].cache[j].num_lines = 0; 00099 L[i].cache[j].associativity = 0; 00100 } 00101 } 00102 } 00103 00104 static short int 00105 _amd_L2_L3_assoc( unsigned short int pattern ) 00106 { 00107 /* From "CPUID Specification" #25481 Rev 2.28, April 2008 */ 00108 short int assoc[16] = 00109 { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX }; 00110 if ( pattern > 0xF ) 00111 return -1; 00112 return ( assoc[pattern] ); 00113 } 00114 00115 /* Cache configuration for AMD Athlon/Duron */ 00116 static int 00117 init_amd( PAPI_mh_info_t * mh_info, int *num_levels ) 00118 { 00119 union 00120 { 00121 struct 00122 { 00123 unsigned int ax, bx, cx, dx; 00124 } e; 00125 unsigned char byt[16]; 00126 } reg; 00127 int i, j, levels = 0; 00128 PAPI_mh_level_t *L = mh_info->level; 00129 00130 /* 00131 * Layout of CPU information taken from : 00132 * "CPUID Specification" #25481 Rev 2.28, April 2008 for most current info. 00133 */ 00134 00135 MEMDBG( "Initializing AMD memory info\n" ); 00136 /* AMD level 1 cache info */ 00137 reg.e.ax = 0x80000005; /* extended function code 5: L1 Cache and TLB Identifiers */ 00138 cpuid( ®.e.ax, ®.e.bx, ®.e.cx, ®.e.dx ); 00139 00140 MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n", 00141 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx ); 00142 MEMDBG 00143 ( ":\neax: %x %x %x %x\nebx: %x %x %x %x\necx: %x %x %x %x\nedx: %x %x %x %x\n", 00144 reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4], 00145 reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9], 00146 reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14], 00147 reg.byt[15] ); 00148 00149 /* NOTE: We assume L1 cache and TLB always exists */ 00150 /* L1 TLB info */ 00151 00152 /* 4MB memory page information; half the number of entries as 2MB */ 00153 L[0].tlb[0].type = PAPI_MH_TYPE_INST; 00154 L[0].tlb[0].num_entries = reg.byt[0] / 2; 00155 L[0].tlb[0].page_size = 4096 << 10; 00156 L[0].tlb[0].associativity = reg.byt[1]; 00157 00158 L[0].tlb[1].type = PAPI_MH_TYPE_DATA; 00159 L[0].tlb[1].num_entries = reg.byt[2] / 2; 00160 L[0].tlb[1].page_size = 4096 << 10; 00161 L[0].tlb[1].associativity = reg.byt[3]; 00162 00163 /* 2MB memory page information */ 00164 L[0].tlb[2].type = PAPI_MH_TYPE_INST; 00165 L[0].tlb[2].num_entries = reg.byt[0]; 00166 L[0].tlb[2].page_size = 2048 << 10; 00167 L[0].tlb[2].associativity = reg.byt[1]; 00168 00169 L[0].tlb[3].type = PAPI_MH_TYPE_DATA; 00170 L[0].tlb[3].num_entries = reg.byt[2]; 00171 L[0].tlb[3].page_size = 2048 << 10; 00172 L[0].tlb[3].associativity = reg.byt[3]; 00173 00174 /* 4k page information */ 00175 L[0].tlb[4].type = PAPI_MH_TYPE_INST; 00176 L[0].tlb[4].num_entries = reg.byt[4]; 00177 L[0].tlb[4].page_size = 4 << 10; 00178 L[0].tlb[4].associativity = reg.byt[5]; 00179 00180 L[0].tlb[5].type = PAPI_MH_TYPE_DATA; 00181 L[0].tlb[5].num_entries = reg.byt[6]; 00182 L[0].tlb[5].page_size = 4 << 10; 00183 L[0].tlb[5].associativity = reg.byt[7]; 00184 00185 for ( i = 0; i < PAPI_MH_MAX_LEVELS; i++ ) { 00186 if ( L[0].tlb[i].associativity == 0xff ) 00187 L[0].tlb[i].associativity = SHRT_MAX; 00188 } 00189 00190 /* L1 D-cache info */ 00191 L[0].cache[0].type = 00192 PAPI_MH_TYPE_DATA | PAPI_MH_TYPE_WB | PAPI_MH_TYPE_PSEUDO_LRU; 00193 L[0].cache[0].size = reg.byt[11] << 10; 00194 L[0].cache[0].associativity = reg.byt[10]; 00195 L[0].cache[0].line_size = reg.byt[8]; 00196 /* Byt[9] is "Lines per tag" */ 00197 /* Is that == lines per cache? */ 00198 /* L[0].cache[1].num_lines = reg.byt[9]; */ 00199 if ( L[0].cache[0].line_size ) 00200 L[0].cache[0].num_lines = L[0].cache[0].size / L[0].cache[0].line_size; 00201 MEMDBG( "D-Cache Line Count: %d; Computed: %d\n", reg.byt[9], 00202 L[0].cache[0].num_lines ); 00203 00204 /* L1 I-cache info */ 00205 L[0].cache[1].type = PAPI_MH_TYPE_INST; 00206 L[0].cache[1].size = reg.byt[15] << 10; 00207 L[0].cache[1].associativity = reg.byt[14]; 00208 L[0].cache[1].line_size = reg.byt[12]; 00209 /* Byt[13] is "Lines per tag" */ 00210 /* Is that == lines per cache? */ 00211 /* L[0].cache[1].num_lines = reg.byt[13]; */ 00212 if ( L[0].cache[1].line_size ) 00213 L[0].cache[1].num_lines = L[0].cache[1].size / L[0].cache[1].line_size; 00214 MEMDBG( "I-Cache Line Count: %d; Computed: %d\n", reg.byt[13], 00215 L[0].cache[1].num_lines ); 00216 00217 for ( i = 0; i < 2; i++ ) { 00218 if ( L[0].cache[i].associativity == 0xff ) 00219 L[0].cache[i].associativity = SHRT_MAX; 00220 } 00221 00222 /* AMD L2/L3 Cache and L2 TLB info */ 00223 /* NOTE: For safety we assume L2 and L3 cache and TLB may not exist */ 00224 00225 reg.e.ax = 0x80000006; /* extended function code 6: L2/L3 Cache and L2 TLB Identifiers */ 00226 cpuid( ®.e.ax, ®.e.bx, ®.e.cx, ®.e.dx ); 00227 00228 MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n", 00229 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx ); 00230 MEMDBG 00231 ( ":\neax: %x %x %x %x\nebx: %x %x %x %x\necx: %x %x %x %x\nedx: %x %x %x %x\n", 00232 reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4], 00233 reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9], 00234 reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14], 00235 reg.byt[15] ); 00236 00237 /* L2 TLB info */ 00238 00239 if ( reg.byt[0] | reg.byt[1] ) { /* Level 2 ITLB exists */ 00240 /* 4MB ITLB page information; half the number of entries as 2MB */ 00241 L[1].tlb[0].type = PAPI_MH_TYPE_INST; 00242 L[1].tlb[0].num_entries = 00243 ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2; 00244 L[1].tlb[0].page_size = 4096 << 10; 00245 L[1].tlb[0].associativity = 00246 _amd_L2_L3_assoc( ( reg.byt[1] & 0xF0 ) >> 4 ); 00247 00248 /* 2MB ITLB page information */ 00249 L[1].tlb[2].type = PAPI_MH_TYPE_INST; 00250 L[1].tlb[2].num_entries = L[1].tlb[0].num_entries * 2; 00251 L[1].tlb[2].page_size = 2048 << 10; 00252 L[1].tlb[2].associativity = L[1].tlb[0].associativity; 00253 } 00254 00255 if ( reg.byt[2] | reg.byt[3] ) { /* Level 2 DTLB exists */ 00256 /* 4MB DTLB page information; half the number of entries as 2MB */ 00257 L[1].tlb[1].type = PAPI_MH_TYPE_DATA; 00258 L[1].tlb[1].num_entries = 00259 ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2; 00260 L[1].tlb[1].page_size = 4096 << 10; 00261 L[1].tlb[1].associativity = 00262 _amd_L2_L3_assoc( ( reg.byt[3] & 0xF0 ) >> 4 ); 00263 00264 /* 2MB DTLB page information */ 00265 L[1].tlb[3].type = PAPI_MH_TYPE_DATA; 00266 L[1].tlb[3].num_entries = L[1].tlb[1].num_entries * 2; 00267 L[1].tlb[3].page_size = 2048 << 10; 00268 L[1].tlb[3].associativity = L[1].tlb[1].associativity; 00269 } 00270 00271 /* 4k page information */ 00272 if ( reg.byt[4] | reg.byt[5] ) { /* Level 2 ITLB exists */ 00273 L[1].tlb[4].type = PAPI_MH_TYPE_INST; 00274 L[1].tlb[4].num_entries = 00275 ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4]; 00276 L[1].tlb[4].page_size = 4 << 10; 00277 L[1].tlb[4].associativity = 00278 _amd_L2_L3_assoc( ( reg.byt[5] & 0xF0 ) >> 4 ); 00279 } 00280 if ( reg.byt[6] | reg.byt[7] ) { /* Level 2 DTLB exists */ 00281 L[1].tlb[5].type = PAPI_MH_TYPE_DATA; 00282 L[1].tlb[5].num_entries = 00283 ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6]; 00284 L[1].tlb[5].page_size = 4 << 10; 00285 L[1].tlb[5].associativity = 00286 _amd_L2_L3_assoc( ( reg.byt[7] & 0xF0 ) >> 4 ); 00287 } 00288 00289 /* AMD Level 2 cache info */ 00290 if ( reg.e.cx ) { 00291 L[1].cache[0].type = 00292 PAPI_MH_TYPE_UNIFIED | PAPI_MH_TYPE_WT | PAPI_MH_TYPE_PSEUDO_LRU; 00293 L[1].cache[0].size = ( int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 ); /* right shift by 16; multiply by 2^10 */ 00294 L[1].cache[0].associativity = 00295 _amd_L2_L3_assoc( ( reg.byt[9] & 0xF0 ) >> 4 ); 00296 L[1].cache[0].line_size = reg.byt[8]; 00297 /* L[1].cache[0].num_lines = reg.byt[9]&0xF; */ 00298 if ( L[1].cache[0].line_size ) 00299 L[1].cache[0].num_lines = 00300 L[1].cache[0].size / L[1].cache[0].line_size; 00301 MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF, 00302 L[1].cache[0].num_lines ); 00303 } 00304 00305 /* AMD Level 3 cache info (shared across cores) */ 00306 if ( reg.e.dx ) { 00307 L[2].cache[0].type = 00308 PAPI_MH_TYPE_UNIFIED | PAPI_MH_TYPE_WT | PAPI_MH_TYPE_PSEUDO_LRU; 00309 L[2].cache[0].size = ( int ) ( reg.e.dx & 0xfffc0000 ) << 1; /* in blocks of 512KB (2^19) */ 00310 L[2].cache[0].associativity = 00311 _amd_L2_L3_assoc( ( reg.byt[13] & 0xF0 ) >> 4 ); 00312 L[2].cache[0].line_size = reg.byt[12]; 00313 /* L[2].cache[0].num_lines = reg.byt[13]&0xF; */ 00314 if ( L[2].cache[0].line_size ) 00315 L[2].cache[0].num_lines = 00316 L[2].cache[0].size / L[2].cache[0].line_size; 00317 MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF, 00318 L[1].cache[0].num_lines ); 00319 } 00320 for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) { 00321 for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) { 00322 /* Compute the number of levels of hierarchy actually used */ 00323 if ( L[i].tlb[j].type != PAPI_MH_TYPE_EMPTY || 00324 L[i].cache[j].type != PAPI_MH_TYPE_EMPTY ) 00325 levels = i + 1; 00326 } 00327 } 00328 *num_levels = levels; 00329 return PAPI_OK; 00330 } 00331 00332 /* 00333 * The data from this table now comes from figure 3-17 in 00334 * the Intel Architectures Software Reference Manual 2A 00335 * (cpuid instruction section) 00336 * 00337 * Pretviously the information was provided by 00338 * "Intel® Processor Identification and the CPUID Instruction", 00339 * Application Note, AP-485, Nov 2008, 241618-033 00340 * Updated to AP-485, Aug 2009, 241618-036 00341 * 00342 * The following data structure and its instantiation trys to 00343 * capture all the information in Section 2.1.3 of the above 00344 * document. Not all of it is used by PAPI, but it could be. 00345 * As the above document is revised, this table should be 00346 * updated. 00347 */ 00348 00349 #define TLB_SIZES 3 /* number of different page sizes for a single TLB descriptor */ 00350 struct _intel_cache_info 00351 { 00352 int descriptor; /* 0x00 - 0xFF: register descriptor code */ 00353 int level; /* 1 to PAPI_MH_MAX_LEVELS */ 00354 int type; /* Empty, instr, data, vector, unified | TLB */ 00355 int size[TLB_SIZES]; /* cache or TLB page size(s) in kB */ 00356 int associativity; /* SHRT_MAX == fully associative */ 00357 int sector; /* 1 if cache is sectored; else 0 */ 00358 int line_size; /* for cache */ 00359 int entries; /* for TLB */ 00360 }; 00361 00362 static struct _intel_cache_info intel_cache[] = { 00363 // 0x01 00364 {.descriptor = 0x01, 00365 .level = 1, 00366 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00367 .size[0] = 4, 00368 .associativity = 4, 00369 .entries = 32, 00370 }, 00371 // 0x02 00372 {.descriptor = 0x02, 00373 .level = 1, 00374 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00375 .size[0] = 4096, 00376 .associativity = SHRT_MAX, 00377 .entries = 2, 00378 }, 00379 // 0x03 00380 {.descriptor = 0x03, 00381 .level = 1, 00382 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00383 .size[0] = 4, 00384 .associativity = 4, 00385 .entries = 64, 00386 }, 00387 // 0x04 00388 {.descriptor = 0x04, 00389 .level = 1, 00390 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00391 .size[0] = 4096, 00392 .associativity = 4, 00393 .entries = 8, 00394 }, 00395 // 0x05 00396 {.descriptor = 0x05, 00397 .level = 1, 00398 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00399 .size[0] = 4096, 00400 .associativity = 4, 00401 .entries = 32, 00402 }, 00403 // 0x06 00404 {.descriptor = 0x06, 00405 .level = 1, 00406 .type = PAPI_MH_TYPE_INST, 00407 .size[0] = 8, 00408 .associativity = 4, 00409 .line_size = 32, 00410 }, 00411 // 0x08 00412 {.descriptor = 0x08, 00413 .level = 1, 00414 .type = PAPI_MH_TYPE_INST, 00415 .size[0] = 16, 00416 .associativity = 4, 00417 .line_size = 32, 00418 }, 00419 // 0x09 00420 {.descriptor = 0x09, 00421 .level = 1, 00422 .type = PAPI_MH_TYPE_INST, 00423 .size[0] = 32, 00424 .associativity = 4, 00425 .line_size = 64, 00426 }, 00427 // 0x0A 00428 {.descriptor = 0x0A, 00429 .level = 1, 00430 .type = PAPI_MH_TYPE_DATA, 00431 .size[0] = 8, 00432 .associativity = 2, 00433 .line_size = 32, 00434 }, 00435 // 0x0B 00436 {.descriptor = 0x0B, 00437 .level = 1, 00438 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00439 .size[0] = 4096, 00440 .associativity = 4, 00441 .entries = 4, 00442 }, 00443 // 0x0C 00444 {.descriptor = 0x0C, 00445 .level = 1, 00446 .type = PAPI_MH_TYPE_DATA, 00447 .size[0] = 16, 00448 .associativity = 4, 00449 .line_size = 32, 00450 }, 00451 // 0x0D 00452 {.descriptor = 0x0D, 00453 .level = 1, 00454 .type = PAPI_MH_TYPE_DATA, 00455 .size[0] = 16, 00456 .associativity = 4, 00457 .line_size = 64, 00458 }, 00459 // 0x0E 00460 {.descriptor = 0x0E, 00461 .level = 1, 00462 .type = PAPI_MH_TYPE_DATA, 00463 .size[0] = 24, 00464 .associativity = 6, 00465 .line_size = 64, 00466 }, 00467 // 0x21 00468 {.descriptor = 0x21, 00469 .level = 2, 00470 .type = PAPI_MH_TYPE_UNIFIED, 00471 .size[0] = 256, 00472 .associativity = 8, 00473 .line_size = 64, 00474 }, 00475 // 0x22 00476 {.descriptor = 0x22, 00477 .level = 3, 00478 .type = PAPI_MH_TYPE_UNIFIED, 00479 .size[0] = 512, 00480 .associativity = 4, 00481 .sector = 1, 00482 .line_size = 64, 00483 }, 00484 // 0x23 00485 {.descriptor = 0x23, 00486 .level = 3, 00487 .type = PAPI_MH_TYPE_UNIFIED, 00488 .size[0] = 1024, 00489 .associativity = 8, 00490 .sector = 1, 00491 .line_size = 64, 00492 }, 00493 // 0x25 00494 {.descriptor = 0x25, 00495 .level = 3, 00496 .type = PAPI_MH_TYPE_UNIFIED, 00497 .size[0] = 2048, 00498 .associativity = 8, 00499 .sector = 1, 00500 .line_size = 64, 00501 }, 00502 // 0x29 00503 {.descriptor = 0x29, 00504 .level = 3, 00505 .type = PAPI_MH_TYPE_UNIFIED, 00506 .size[0] = 4096, 00507 .associativity = 8, 00508 .sector = 1, 00509 .line_size = 64, 00510 }, 00511 // 0x2C 00512 {.descriptor = 0x2C, 00513 .level = 1, 00514 .type = PAPI_MH_TYPE_DATA, 00515 .size[0] = 32, 00516 .associativity = 8, 00517 .line_size = 64, 00518 }, 00519 // 0x30 00520 {.descriptor = 0x30, 00521 .level = 1, 00522 .type = PAPI_MH_TYPE_INST, 00523 .size[0] = 32, 00524 .associativity = 8, 00525 .line_size = 64, 00526 }, 00527 // 0x39 00528 {.descriptor = 0x39, 00529 .level = 2, 00530 .type = PAPI_MH_TYPE_UNIFIED, 00531 .size[0] = 128, 00532 .associativity = 4, 00533 .sector = 1, 00534 .line_size = 64, 00535 }, 00536 // 0x3A 00537 {.descriptor = 0x3A, 00538 .level = 2, 00539 .type = PAPI_MH_TYPE_UNIFIED, 00540 .size[0] = 192, 00541 .associativity = 6, 00542 .sector = 1, 00543 .line_size = 64, 00544 }, 00545 // 0x3B 00546 {.descriptor = 0x3B, 00547 .level = 2, 00548 .type = PAPI_MH_TYPE_UNIFIED, 00549 .size[0] = 128, 00550 .associativity = 2, 00551 .sector = 1, 00552 .line_size = 64, 00553 }, 00554 // 0x3C 00555 {.descriptor = 0x3C, 00556 .level = 2, 00557 .type = PAPI_MH_TYPE_UNIFIED, 00558 .size[0] = 256, 00559 .associativity = 4, 00560 .sector = 1, 00561 .line_size = 64, 00562 }, 00563 // 0x3D 00564 {.descriptor = 0x3D, 00565 .level = 2, 00566 .type = PAPI_MH_TYPE_UNIFIED, 00567 .size[0] = 384, 00568 .associativity = 6, 00569 .sector = 1, 00570 .line_size = 64, 00571 }, 00572 // 0x3E 00573 {.descriptor = 0x3E, 00574 .level = 2, 00575 .type = PAPI_MH_TYPE_UNIFIED, 00576 .size[0] = 512, 00577 .associativity = 4, 00578 .sector = 1, 00579 .line_size = 64, 00580 }, 00581 // 0x40: no last level cache (??) 00582 // 0x41 00583 {.descriptor = 0x41, 00584 .level = 2, 00585 .type = PAPI_MH_TYPE_UNIFIED, 00586 .size[0] = 128, 00587 .associativity = 4, 00588 .line_size = 32, 00589 }, 00590 // 0x42 00591 {.descriptor = 0x42, 00592 .level = 2, 00593 .type = PAPI_MH_TYPE_UNIFIED, 00594 .size[0] = 256, 00595 .associativity = 4, 00596 .line_size = 32, 00597 }, 00598 // 0x43 00599 {.descriptor = 0x43, 00600 .level = 2, 00601 .type = PAPI_MH_TYPE_UNIFIED, 00602 .size[0] = 512, 00603 .associativity = 4, 00604 .line_size = 32, 00605 }, 00606 // 0x44 00607 {.descriptor = 0x44, 00608 .level = 2, 00609 .type = PAPI_MH_TYPE_UNIFIED, 00610 .size[0] = 1024, 00611 .associativity = 4, 00612 .line_size = 32, 00613 }, 00614 // 0x45 00615 {.descriptor = 0x45, 00616 .level = 2, 00617 .type = PAPI_MH_TYPE_UNIFIED, 00618 .size[0] = 2048, 00619 .associativity = 4, 00620 .line_size = 32, 00621 }, 00622 // 0x46 00623 {.descriptor = 0x46, 00624 .level = 3, 00625 .type = PAPI_MH_TYPE_UNIFIED, 00626 .size[0] = 4096, 00627 .associativity = 4, 00628 .line_size = 64, 00629 }, 00630 // 0x47 00631 {.descriptor = 0x47, 00632 .level = 3, 00633 .type = PAPI_MH_TYPE_UNIFIED, 00634 .size[0] = 8192, 00635 .associativity = 8, 00636 .line_size = 64, 00637 }, 00638 // 0x48 00639 {.descriptor = 0x48, 00640 .level = 2, 00641 .type = PAPI_MH_TYPE_UNIFIED, 00642 .size[0] = 3072, 00643 .associativity = 12, 00644 .line_size = 64, 00645 }, 00646 // 0x49 NOTE: for family 0x0F model 0x06 this is level 3 00647 {.descriptor = 0x49, 00648 .level = 2, 00649 .type = PAPI_MH_TYPE_UNIFIED, 00650 .size[0] = 4096, 00651 .associativity = 16, 00652 .line_size = 64, 00653 }, 00654 // 0x4A 00655 {.descriptor = 0x4A, 00656 .level = 3, 00657 .type = PAPI_MH_TYPE_UNIFIED, 00658 .size[0] = 6144, 00659 .associativity = 12, 00660 .line_size = 64, 00661 }, 00662 // 0x4B 00663 {.descriptor = 0x4B, 00664 .level = 3, 00665 .type = PAPI_MH_TYPE_UNIFIED, 00666 .size[0] = 8192, 00667 .associativity = 16, 00668 .line_size = 64, 00669 }, 00670 // 0x4C 00671 {.descriptor = 0x4C, 00672 .level = 3, 00673 .type = PAPI_MH_TYPE_UNIFIED, 00674 .size[0] = 12288, 00675 .associativity = 12, 00676 .line_size = 64, 00677 }, 00678 // 0x4D 00679 {.descriptor = 0x4D, 00680 .level = 3, 00681 .type = PAPI_MH_TYPE_UNIFIED, 00682 .size[0] = 16384, 00683 .associativity = 16, 00684 .line_size = 64, 00685 }, 00686 // 0x4E 00687 {.descriptor = 0x4E, 00688 .level = 2, 00689 .type = PAPI_MH_TYPE_UNIFIED, 00690 .size[0] = 6144, 00691 .associativity = 24, 00692 .line_size = 64, 00693 }, 00694 // 0x4F 00695 {.descriptor = 0x4F, 00696 .level = 1, 00697 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00698 .size[0] = 4, 00699 .associativity = SHRT_MAX, 00700 .entries = 32, 00701 }, 00702 // 0x50 00703 {.descriptor = 0x50, 00704 .level = 1, 00705 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00706 .size = {4, 2048, 4096}, 00707 .associativity = SHRT_MAX, 00708 .entries = 64, 00709 }, 00710 // 0x51 00711 {.descriptor = 0x51, 00712 .level = 1, 00713 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00714 .size = {4, 2048, 4096}, 00715 .associativity = SHRT_MAX, 00716 .entries = 128, 00717 }, 00718 // 0x52 00719 {.descriptor = 0x52, 00720 .level = 1, 00721 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00722 .size = {4, 2048, 4096}, 00723 .associativity = SHRT_MAX, 00724 .entries = 256, 00725 }, 00726 // 0x55 00727 {.descriptor = 0x55, 00728 .level = 1, 00729 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00730 .size = {2048, 4096, 0}, 00731 .associativity = SHRT_MAX, 00732 .entries = 7, 00733 }, 00734 // 0x56 00735 {.descriptor = 0x56, 00736 .level = 1, 00737 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00738 .size[0] = 4096, 00739 .associativity = 4, 00740 .entries = 16, 00741 }, 00742 // 0x57 00743 {.descriptor = 0x57, 00744 .level = 1, 00745 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00746 .size[0] = 4, 00747 .associativity = 4, 00748 .entries = 16, 00749 }, 00750 // 0x59 00751 {.descriptor = 0x59, 00752 .level = 1, 00753 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00754 .size[0] = 4, 00755 .associativity = SHRT_MAX, 00756 .entries = 16, 00757 }, 00758 // 0x5A 00759 {.descriptor = 0x5A, 00760 .level = 1, 00761 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00762 .size = {2048, 4096, 0}, 00763 .associativity = 4, 00764 .entries = 32, 00765 }, 00766 // 0x5B 00767 {.descriptor = 0x5B, 00768 .level = 1, 00769 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00770 .size = {4, 4096, 0}, 00771 .associativity = SHRT_MAX, 00772 .entries = 64, 00773 }, 00774 // 0x5C 00775 {.descriptor = 0x5C, 00776 .level = 1, 00777 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00778 .size = {4, 4096, 0}, 00779 .associativity = SHRT_MAX, 00780 .entries = 128, 00781 }, 00782 // 0x5D 00783 {.descriptor = 0x5D, 00784 .level = 1, 00785 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 00786 .size = {4, 4096, 0}, 00787 .associativity = SHRT_MAX, 00788 .entries = 256, 00789 }, 00790 // 0x60 00791 {.descriptor = 0x60, 00792 .level = 1, 00793 .type = PAPI_MH_TYPE_DATA, 00794 .size[0] = 16, 00795 .associativity = 8, 00796 .sector = 1, 00797 .line_size = 64, 00798 }, 00799 // 0x66 00800 {.descriptor = 0x66, 00801 .level = 1, 00802 .type = PAPI_MH_TYPE_DATA, 00803 .size[0] = 8, 00804 .associativity = 4, 00805 .sector = 1, 00806 .line_size = 64, 00807 }, 00808 // 0x67 00809 {.descriptor = 0x67, 00810 .level = 1, 00811 .type = PAPI_MH_TYPE_DATA, 00812 .size[0] = 16, 00813 .associativity = 4, 00814 .sector = 1, 00815 .line_size = 64, 00816 }, 00817 // 0x68 00818 {.descriptor = 0x68, 00819 .level = 1, 00820 .type = PAPI_MH_TYPE_DATA, 00821 .size[0] = 32, 00822 .associativity = 4, 00823 .sector = 1, 00824 .line_size = 64, 00825 }, 00826 // 0x70 00827 {.descriptor = 0x70, 00828 .level = 1, 00829 .type = PAPI_MH_TYPE_TRACE, 00830 .size[0] = 12, 00831 .associativity = 8, 00832 }, 00833 // 0x71 00834 {.descriptor = 0x71, 00835 .level = 1, 00836 .type = PAPI_MH_TYPE_TRACE, 00837 .size[0] = 16, 00838 .associativity = 8, 00839 }, 00840 // 0x72 00841 {.descriptor = 0x72, 00842 .level = 1, 00843 .type = PAPI_MH_TYPE_TRACE, 00844 .size[0] = 32, 00845 .associativity = 8, 00846 }, 00847 // 0x73 00848 {.descriptor = 0x73, 00849 .level = 1, 00850 .type = PAPI_MH_TYPE_TRACE, 00851 .size[0] = 64, 00852 .associativity = 8, 00853 }, 00854 // 0x78 00855 {.descriptor = 0x78, 00856 .level = 2, 00857 .type = PAPI_MH_TYPE_UNIFIED, 00858 .size[0] = 1024, 00859 .associativity = 4, 00860 .line_size = 64, 00861 }, 00862 // 0x79 00863 {.descriptor = 0x79, 00864 .level = 2, 00865 .type = PAPI_MH_TYPE_UNIFIED, 00866 .size[0] = 128, 00867 .associativity = 8, 00868 .sector = 1, 00869 .line_size = 64, 00870 }, 00871 // 0x7A 00872 {.descriptor = 0x7A, 00873 .level = 2, 00874 .type = PAPI_MH_TYPE_UNIFIED, 00875 .size[0] = 256, 00876 .associativity = 8, 00877 .sector = 1, 00878 .line_size = 64, 00879 }, 00880 // 0x7B 00881 {.descriptor = 0x7B, 00882 .level = 2, 00883 .type = PAPI_MH_TYPE_UNIFIED, 00884 .size[0] = 512, 00885 .associativity = 8, 00886 .sector = 1, 00887 .line_size = 64, 00888 }, 00889 // 0x7C 00890 {.descriptor = 0x7C, 00891 .level = 2, 00892 .type = PAPI_MH_TYPE_UNIFIED, 00893 .size[0] = 1024, 00894 .associativity = 8, 00895 .sector = 1, 00896 .line_size = 64, 00897 }, 00898 // 0x7D 00899 {.descriptor = 0x7D, 00900 .level = 2, 00901 .type = PAPI_MH_TYPE_UNIFIED, 00902 .size[0] = 2048, 00903 .associativity = 8, 00904 .line_size = 64, 00905 }, 00906 // 0x7F 00907 {.descriptor = 0x7F, 00908 .level = 2, 00909 .type = PAPI_MH_TYPE_UNIFIED, 00910 .size[0] = 512, 00911 .associativity = 2, 00912 .line_size = 64, 00913 }, 00914 // 0x80 00915 {.descriptor = 0x80, 00916 .level = 2, 00917 .type = PAPI_MH_TYPE_UNIFIED, 00918 .size[0] = 512, 00919 .associativity = 8, 00920 .line_size = 64, 00921 }, 00922 // 0x82 00923 {.descriptor = 0x82, 00924 .level = 2, 00925 .type = PAPI_MH_TYPE_UNIFIED, 00926 .size[0] = 256, 00927 .associativity = 8, 00928 .line_size = 32, 00929 }, 00930 // 0x83 00931 {.descriptor = 0x83, 00932 .level = 2, 00933 .type = PAPI_MH_TYPE_UNIFIED, 00934 .size[0] = 512, 00935 .associativity = 8, 00936 .line_size = 32, 00937 }, 00938 // 0x84 00939 {.descriptor = 0x84, 00940 .level = 2, 00941 .type = PAPI_MH_TYPE_UNIFIED, 00942 .size[0] = 1024, 00943 .associativity = 8, 00944 .line_size = 32, 00945 }, 00946 // 0x85 00947 {.descriptor = 0x85, 00948 .level = 2, 00949 .type = PAPI_MH_TYPE_UNIFIED, 00950 .size[0] = 2048, 00951 .associativity = 8, 00952 .line_size = 32, 00953 }, 00954 // 0x86 00955 {.descriptor = 0x86, 00956 .level = 2, 00957 .type = PAPI_MH_TYPE_UNIFIED, 00958 .size[0] = 512, 00959 .associativity = 4, 00960 .line_size = 64, 00961 }, 00962 // 0x87 00963 {.descriptor = 0x87, 00964 .level = 2, 00965 .type = PAPI_MH_TYPE_UNIFIED, 00966 .size[0] = 1024, 00967 .associativity = 8, 00968 .line_size = 64, 00969 }, 00970 // 0xB0 00971 {.descriptor = 0xB0, 00972 .level = 1, 00973 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00974 .size[0] = 4, 00975 .associativity = 4, 00976 .entries = 128, 00977 }, 00978 // 0xB1 NOTE: This is currently the only instance where .entries 00979 // is dependent on .size. It's handled as a code exception. 00980 // If other instances appear in the future, the structure 00981 // should probably change to accomodate it. 00982 {.descriptor = 0xB1, 00983 .level = 1, 00984 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00985 .size = {2048, 4096, 0}, 00986 .associativity = 4, 00987 .entries = 8, /* or 4 if size = 4096 */ 00988 }, 00989 // 0xB2 00990 {.descriptor = 0xB2, 00991 .level = 1, 00992 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST, 00993 .size[0] = 4, 00994 .associativity = 4, 00995 .entries = 64, 00996 }, 00997 // 0xB3 00998 {.descriptor = 0xB3, 00999 .level = 1, 01000 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 01001 .size[0] = 4, 01002 .associativity = 4, 01003 .entries = 128, 01004 }, 01005 // 0xB4 01006 {.descriptor = 0xB4, 01007 .level = 1, 01008 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 01009 .size[0] = 4, 01010 .associativity = 4, 01011 .entries = 256, 01012 }, 01013 // 0xBA 01014 {.descriptor = 0xBA, 01015 .level = 1, 01016 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 01017 .size[0] = 4, 01018 .associativity = 4, 01019 .entries = 64, 01020 }, 01021 // 0xC0 01022 {.descriptor = 0xBA, 01023 .level = 1, 01024 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA, 01025 .size = {4,4096}, 01026 .associativity = 4, 01027 .entries = 8, 01028 }, 01029 // 0xCA 01030 {.descriptor = 0xCA, 01031 .level = 2, 01032 .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_UNIFIED, 01033 .size[0] = 4, 01034 .associativity = 4, 01035 .entries = 512, 01036 }, 01037 // 0xD0 01038 {.descriptor = 0xD0, 01039 .level = 3, 01040 .type = PAPI_MH_TYPE_UNIFIED, 01041 .size[0] = 512, 01042 .associativity = 4, 01043 .line_size = 64, 01044 }, 01045 // 0xD1 01046 {.descriptor = 0xD1, 01047 .level = 3, 01048 .type = PAPI_MH_TYPE_UNIFIED, 01049 .size[0] = 1024, 01050 .associativity = 4, 01051 .line_size = 64, 01052 }, 01053 // 0xD2 01054 {.descriptor = 0xD2, 01055 .level = 3, 01056 .type = PAPI_MH_TYPE_UNIFIED, 01057 .size[0] = 2048, 01058 .associativity = 4, 01059 .line_size = 64, 01060 }, 01061 // 0xD6 01062 {.descriptor = 0xD6, 01063 .level = 3, 01064 .type = PAPI_MH_TYPE_UNIFIED, 01065 .size[0] = 1024, 01066 .associativity = 8, 01067 .line_size = 64, 01068 }, 01069 // 0xD7 01070 {.descriptor = 0xD7, 01071 .level = 3, 01072 .type = PAPI_MH_TYPE_UNIFIED, 01073 .size[0] = 2048, 01074 .associativity = 8, 01075 .line_size = 64, 01076 }, 01077 // 0xD8 01078 {.descriptor = 0xD8, 01079 .level = 3, 01080 .type = PAPI_MH_TYPE_UNIFIED, 01081 .size[0] = 4096, 01082 .associativity = 8, 01083 .line_size = 64, 01084 }, 01085 // 0xDC 01086 {.descriptor = 0xDC, 01087 .level = 3, 01088 .type = PAPI_MH_TYPE_UNIFIED, 01089 .size[0] = 1536, 01090 .associativity = 12, 01091 .line_size = 64, 01092 }, 01093 // 0xDD 01094 {.descriptor = 0xDD, 01095 .level = 3, 01096 .type = PAPI_MH_TYPE_UNIFIED, 01097 .size[0] = 3072, 01098 .associativity = 12, 01099 .line_size = 64, 01100 }, 01101 // 0xDE 01102 {.descriptor = 0xDE, 01103 .level = 3, 01104 .type = PAPI_MH_TYPE_UNIFIED, 01105 .size[0] = 6144, 01106 .associativity = 12, 01107 .line_size = 64, 01108 }, 01109 // 0xE2 01110 {.descriptor = 0xE2, 01111 .level = 3, 01112 .type = PAPI_MH_TYPE_UNIFIED, 01113 .size[0] = 2048, 01114 .associativity = 16, 01115 .line_size = 64, 01116 }, 01117 // 0xE3 01118 {.descriptor = 0xE3, 01119 .level = 3, 01120 .type = PAPI_MH_TYPE_UNIFIED, 01121 .size[0] = 4096, 01122 .associativity = 16, 01123 .line_size = 64, 01124 }, 01125 // 0xE4 01126 {.descriptor = 0xE4, 01127 .level = 3, 01128 .type = PAPI_MH_TYPE_UNIFIED, 01129 .size[0] = 8192, 01130 .associativity = 16, 01131 .line_size = 64, 01132 }, 01133 // 0xEA 01134 {.descriptor = 0xEA, 01135 .level = 3, 01136 .type = PAPI_MH_TYPE_UNIFIED, 01137 .size[0] = 12288, 01138 .associativity = 24, 01139 .line_size = 64, 01140 }, 01141 // 0xEB 01142 {.descriptor = 0xEB, 01143 .level = 3, 01144 .type = PAPI_MH_TYPE_UNIFIED, 01145 .size[0] = 18432, 01146 .associativity = 24, 01147 .line_size = 64, 01148 }, 01149 // 0xEC 01150 {.descriptor = 0xEC, 01151 .level = 3, 01152 .type = PAPI_MH_TYPE_UNIFIED, 01153 .size[0] = 24576, 01154 .associativity = 24, 01155 .line_size = 64, 01156 }, 01157 // 0xF0 01158 {.descriptor = 0xF0, 01159 .level = 1, 01160 .type = PAPI_MH_TYPE_PREF, 01161 .size[0] = 64, 01162 }, 01163 // 0xF1 01164 {.descriptor = 0xF1, 01165 .level = 1, 01166 .type = PAPI_MH_TYPE_PREF, 01167 .size[0] = 128, 01168 }, 01169 }; 01170 01171 #ifdef DEBUG 01172 static void 01173 print_intel_cache_table( ) 01174 { 01175 int i, j, k = 01176 ( int ) ( sizeof ( intel_cache ) / 01177 sizeof ( struct _intel_cache_info ) ); 01178 for ( i = 0; i < k; i++ ) { 01179 printf( "%d.\tDescriptor: 0x%x\n", i, intel_cache[i].descriptor ); 01180 printf( "\t Level: %d\n", intel_cache[i].level ); 01181 printf( "\t Type: %d\n", intel_cache[i].type ); 01182 printf( "\t Size(s): " ); 01183 for ( j = 0; j < TLB_SIZES; j++ ) 01184 printf( "%d, ", intel_cache[i].size[j] ); 01185 printf( "\n" ); 01186 printf( "\t Assoc: %d\n", intel_cache[i].associativity ); 01187 printf( "\t Sector: %d\n", intel_cache[i].sector ); 01188 printf( "\t Line Size: %d\n", intel_cache[i].line_size ); 01189 printf( "\t Entries: %d\n", intel_cache[i].entries ); 01190 printf( "\n" ); 01191 } 01192 } 01193 #endif 01194 01195 /* Given a specific cache descriptor, this routine decodes the information from a table 01196 * of such descriptors and fills out one or more records in a PAPI data structure. 01197 * Called only by init_intel() 01198 */ 01199 static void 01200 intel_decode_descriptor( struct _intel_cache_info *d, PAPI_mh_level_t * L ) 01201 { 01202 int i, next; 01203 int level = d->level - 1; 01204 PAPI_mh_tlb_info_t *t; 01205 PAPI_mh_cache_info_t *c; 01206 01207 if ( d->descriptor == 0x49 ) { /* special case */ 01208 unsigned int r_eax, r_ebx, r_ecx, r_edx; 01209 r_eax = 0x1; /* function code 1: family & model */ 01210 cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx ); 01211 /* override table for Family F, model 6 only */ 01212 if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 ) 01213 level = 3; 01214 } 01215 if ( d->type & PAPI_MH_TYPE_TLB ) { 01216 for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) { 01217 if ( L[level].tlb[next].type == PAPI_MH_TYPE_EMPTY ) 01218 break; 01219 } 01220 /* expand TLB entries for multiple possible page sizes */ 01221 for ( i = 0; i < TLB_SIZES && next < PAPI_MH_MAX_LEVELS && d->size[i]; 01222 i++, next++ ) { 01223 // printf("Level %d Descriptor: %x TLB type %x next: %d, i: %d\n", level, d->descriptor, d->type, next, i); 01224 t = &L[level].tlb[next]; 01225 t->type = PAPI_MH_CACHE_TYPE( d->type ); 01226 t->num_entries = d->entries; 01227 t->page_size = d->size[i] << 10; /* minimum page size in KB */ 01228 t->associativity = d->associativity; 01229 /* another special case */ 01230 if ( d->descriptor == 0xB1 && d->size[i] == 4096 ) 01231 t->num_entries = d->entries / 2; 01232 } 01233 } else { 01234 for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) { 01235 if ( L[level].cache[next].type == PAPI_MH_TYPE_EMPTY ) 01236 break; 01237 } 01238 // printf("Level %d Descriptor: %x Cache type %x next: %d\n", level, d->descriptor, d->type, next); 01239 c = &L[level].cache[next]; 01240 c->type = PAPI_MH_CACHE_TYPE( d->type ); 01241 c->size = d->size[0] << 10; /* convert from KB to bytes */ 01242 c->associativity = d->associativity; 01243 if ( d->line_size ) { 01244 c->line_size = d->line_size; 01245 c->num_lines = c->size / c->line_size; 01246 } 01247 } 01248 } 01249 01250 static inline void 01251 cpuid2 ( unsigned int* eax, unsigned int* ebx, 01252 unsigned int* ecx, unsigned int* edx, 01253 unsigned int index, unsigned int ecx_in ) 01254 { 01255 unsigned int a,b,c,d; 01256 __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b" 01257 : "=a" (a), "=S" (b), "=c" (c), "=d" (d) \ 01258 : "0" (index), "2"(ecx_in) ); 01259 *eax = a; *ebx = b; *ecx = c; *edx = d; 01260 } 01261 01262 01263 static int 01264 init_intel_leaf4( PAPI_mh_info_t * mh_info, int *num_levels ) 01265 { 01266 01267 unsigned int eax, ebx, ecx, edx; 01268 unsigned int maxidx, ecx_in; 01269 int next; 01270 01271 int cache_type,cache_level,cache_selfinit,cache_fullyassoc; 01272 int cache_linesize,cache_partitions,cache_ways,cache_sets; 01273 01274 PAPI_mh_cache_info_t *c; 01275 01276 *num_levels=0; 01277 01278 cpuid2(&eax,&ebx,&ecx,&edx, 0, 0); 01279 maxidx = eax; 01280 01281 if (maxidx<4) { 01282 MEMDBG("Warning! CPUID Index 4 not supported!\n"); 01283 return PAPI_ENOSUPP; 01284 } 01285 01286 ecx_in=0; 01287 while(1) { 01288 cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in); 01289 01290 01291 01292 /* decoded as per table 3-12 in Intel Software Developer's Manual Volume 2A */ 01293 01294 cache_type=eax&0x1f; 01295 if (cache_type==0) break; 01296 01297 cache_level=(eax>>5)&0x3; 01298 cache_selfinit=(eax>>8)&0x1; 01299 cache_fullyassoc=(eax>>9)&0x1; 01300 01301 cache_linesize=(ebx&0xfff)+1; 01302 cache_partitions=((ebx>>12)&0x3ff)+1; 01303 cache_ways=((ebx>>22)&0x3ff)+1; 01304 01305 cache_sets=(ecx)+1; 01306 01307 /* should we export this info? 01308 01309 cache_maxshare=((eax>>14)&0xfff)+1; 01310 cache_maxpackage=((eax>>26)&0x3f)+1; 01311 01312 cache_wb=(edx)&1; 01313 cache_inclusive=(edx>>1)&1; 01314 cache_indexing=(edx>>2)&1; 01315 */ 01316 01317 if (cache_level>*num_levels) *num_levels=cache_level; 01318 01319 /* find next slot available to hold cache info */ 01320 for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) { 01321 if ( mh_info->level[cache_level-1].cache[next].type == PAPI_MH_TYPE_EMPTY ) break; 01322 } 01323 01324 c=&(mh_info->level[cache_level-1].cache[next]); 01325 01326 switch(cache_type) { 01327 case 1: MEMDBG("L%d Data Cache\n",cache_level); 01328 c->type=PAPI_MH_TYPE_DATA; 01329 break; 01330 case 2: MEMDBG("L%d Instruction Cache\n",cache_level); 01331 c->type=PAPI_MH_TYPE_INST; 01332 break; 01333 case 3: MEMDBG("L%d Unified Cache\n",cache_level); 01334 c->type=PAPI_MH_TYPE_UNIFIED; 01335 break; 01336 } 01337 01338 if (cache_selfinit) { MEMDBG("\tSelf-init\n"); } 01339 if (cache_fullyassoc) { MEMDBG("\tFully Associtative\n"); } 01340 01341 //MEMDBG("\tMax logical processors sharing cache: %d\n",cache_maxshare); 01342 //MEMDBG("\tMax logical processors sharing package: %d\n",cache_maxpackage); 01343 01344 MEMDBG("\tCache linesize: %d\n",cache_linesize); 01345 01346 MEMDBG("\tCache partitions: %d\n",cache_partitions); 01347 MEMDBG("\tCache associaticity: %d\n",cache_ways); 01348 01349 MEMDBG("\tCache sets: %d\n",cache_sets); 01350 MEMDBG("\tCache size = %dkB\n", 01351 (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024); 01352 01353 //MEMDBG("\tWBINVD/INVD acts on lower caches: %d\n",cache_wb); 01354 //MEMDBG("\tCache is not inclusive: %d\n",cache_inclusive); 01355 //MEMDBG("\tComplex cache indexing: %d\n",cache_indexing); 01356 01357 c->line_size=cache_linesize; 01358 if (cache_fullyassoc) { 01359 c->associativity=SHRT_MAX; 01360 } 01361 else { 01362 c->associativity=cache_ways; 01363 } 01364 c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets); 01365 c->num_lines=cache_ways*cache_partitions*cache_sets; 01366 01367 ecx_in++; 01368 } 01369 return PAPI_OK; 01370 } 01371 01372 static int 01373 init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels) 01374 { 01375 /* cpuid() returns memory copies of 4 32-bit registers 01376 * this union allows them to be accessed as either registers 01377 * or individual bytes. Remember that Intel is little-endian. 01378 */ 01379 union 01380 { 01381 struct 01382 { 01383 unsigned int ax, bx, cx, dx; 01384 } e; 01385 unsigned char descrip[16]; 01386 } reg; 01387 01388 int r; /* register boundary index */ 01389 int b; /* byte index into a register */ 01390 int i; /* byte index into the descrip array */ 01391 int t; /* table index into the static descriptor table */ 01392 int count; /* how many times to call cpuid; from eax:lsb */ 01393 int size; /* size of the descriptor table */ 01394 int last_level = 0; /* how many levels in the cache hierarchy */ 01395 01396 int need_leaf4=0; 01397 01398 /* All of Intel's cache info is in 1 call to cpuid 01399 * however it is a table lookup :( 01400 */ 01401 MEMDBG( "Initializing Intel Cache and TLB descriptors\n" ); 01402 01403 #ifdef DEBUG 01404 if ( ISLEVEL( DEBUG_MEMORY ) ) 01405 print_intel_cache_table( ); 01406 #endif 01407 01408 reg.e.ax = 0x2; /* function code 2: cache descriptors */ 01409 cpuid( ®.e.ax, ®.e.bx, ®.e.cx, ®.e.dx ); 01410 01411 MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n", 01412 reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx ); 01413 MEMDBG 01414 ( ":\nd0: %x %x %x %x\nd1: %x %x %x %x\nd2: %x %x %x %x\nd3: %x %x %x %x\n", 01415 reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3], 01416 reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7], 01417 reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11], 01418 reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] ); 01419 01420 count = reg.descrip[0]; /* # times to repeat CPUID call. Not implemented. */ 01421 size = ( sizeof ( intel_cache ) / sizeof ( struct _intel_cache_info ) ); /* # descriptors */ 01422 MEMDBG( "Repeat cpuid(2,...) %d times. If not 1, code is broken.\n", 01423 count ); 01424 if (count!=1) { 01425 fprintf(stderr,"Warning: Unhandled cpuid count of %d\n",count); 01426 } 01427 01428 for ( r = 0; r < 4; r++ ) { /* walk the registers */ 01429 if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) { /* only process if high order bit is 0 */ 01430 for ( b = 3; b >= 0; b-- ) { /* walk the descriptor bytes from high to low */ 01431 i = r * 4 + b; /* calculate an index into the array of descriptors */ 01432 if ( i ) { /* skip the low order byte in eax [0]; it's the count (see above) */ 01433 if ( reg.descrip[i] == 0xff ) { 01434 MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n"); 01435 need_leaf4=1; 01436 return PAPI_ENOSUPP; 01437 /* we might continue instead */ 01438 /* in order to get TLB info */ 01439 /* continue; */ 01440 } 01441 for ( t = 0; t < size; t++ ) { /* walk the descriptor table */ 01442 if ( reg.descrip[i] == intel_cache[t].descriptor ) { /* find match */ 01443 if ( intel_cache[t].level > last_level ) 01444 last_level = intel_cache[t].level; 01445 intel_decode_descriptor( &intel_cache[t], 01446 mh_info->level ); 01447 } 01448 } 01449 } 01450 } 01451 } 01452 } 01453 MEMDBG( "# of Levels: %d\n", last_level ); 01454 *num_levels=last_level; 01455 if (need_leaf4) { 01456 return PAPI_ENOSUPP; 01457 } 01458 return PAPI_OK; 01459 } 01460 01461 01462 static int 01463 init_intel( PAPI_mh_info_t * mh_info, int *levels ) 01464 { 01465 01466 int result; 01467 int num_levels; 01468 01469 /* try using the oldest leaf2 method first */ 01470 result=init_intel_leaf2(mh_info, &num_levels); 01471 01472 if (result!=PAPI_OK) { 01473 /* All Core2 and newer also support leaf4 detection */ 01474 /* Starting with Westmere *only* leaf4 is supported */ 01475 result=init_intel_leaf4(mh_info, &num_levels); 01476 } 01477 01478 *levels=num_levels; 01479 return PAPI_OK; 01480 } 01481 01482 01483 /* Returns 1 if hypervisor detected */ 01484 /* Returns 0 if none found. */ 01485 int 01486 _x86_detect_hypervisor(char *vendor_name) 01487 { 01488 unsigned int eax, ebx, ecx, edx; 01489 char hyper_vendor_id[13]; 01490 01491 cpuid2(&eax, &ebx, &ecx, &edx,0x1,0); 01492 /* This is the hypervisor bit, ecx bit 31 */ 01493 if (ecx&0x80000000) { 01494 /* There are various values in the 0x4000000X range */ 01495 /* It is questionable how standard they are */ 01496 /* For now we just return the name. */ 01497 cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0); 01498 memcpy(hyper_vendor_id + 0, &ebx, 4); 01499 memcpy(hyper_vendor_id + 4, &ecx, 4); 01500 memcpy(hyper_vendor_id + 8, &edx, 4); 01501 hyper_vendor_id[12] = '\0'; 01502 strncpy(vendor_name,hyper_vendor_id,PAPI_MAX_STR_LEN); 01503 return 1; 01504 } 01505 else { 01506 strncpy(vendor_name,"none",PAPI_MAX_STR_LEN); 01507 } 01508 return 0; 01509 } 01510 01511 01512 01513 01514