PAPI  5.0.1.0
x86_cpuid_info.c
Go to the documentation of this file.
00001 /****************************/
00002 /* THIS IS OPEN SOURCE CODE */
00003 /****************************/
00004 
00005 /* 
00006 * File:    x86_cpuid_info.c
00007 * Author:  Dan Terpstra
00008 *          terpstra@eecs.utk.edu
00009 *          complete rewrite of linux-memory.c to conform to latest docs
00010 *          and convert Intel to a table driven implementation.
00011 *          Now also supports multiple TLB descriptors
00012 */
00013 
00014 #include <string.h>
00015 #include <stdio.h>
00016 #include "papi.h"
00017 #include "papi_internal.h"
00018 
00019 
00020 static void init_mem_hierarchy( PAPI_mh_info_t * mh_info );
00021 static int init_amd( PAPI_mh_info_t * mh_info, int *levels );
00022 static short int _amd_L2_L3_assoc( unsigned short int pattern );
00023 static int init_intel( PAPI_mh_info_t * mh_info , int *levels);
00024 
00025 static inline void
00026 cpuid( unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d )
00027 {
00028     unsigned int op = *a;
00029     // .byte 0x53 == push ebx. it's universal for 32 and 64 bit
00030     // .byte 0x5b == pop ebx.
00031     // Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
00032     // Using the opcode directly avoids this problem.
00033   __asm__ __volatile__( ".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b":"=a"( *a ), "=S"( *b ), "=c"( *c ),
00034                           "=d"
00035                           ( *d )
00036   :                   "a"( op ) );
00037 }
00038 
00039 int
00040 _x86_cache_info( PAPI_mh_info_t * mh_info )
00041 {
00042     int retval = 0;
00043     union
00044     {
00045         struct
00046         {
00047             unsigned int ax, bx, cx, dx;
00048         } e;
00049         char vendor[20];               /* leave room for terminator bytes */
00050     } reg;
00051 
00052     /* Don't use cpu_type to determine the processor.
00053      * get the information directly from the chip.
00054      */
00055     reg.e.ax = 0;            /* function code 0: vendor string */
00056     /* The vendor string is composed of EBX:EDX:ECX.
00057      * by swapping the register addresses in the call below,
00058      * the string is correctly composed in the char array.
00059      */
00060     cpuid( &reg.e.ax, &reg.e.bx, &reg.e.dx, &reg.e.cx );
00061     reg.vendor[16] = 0;
00062     MEMDBG( "Vendor: %s\n", &reg.vendor[4] );
00063 
00064     init_mem_hierarchy( mh_info );
00065 
00066     if ( !strncmp( "GenuineIntel", &reg.vendor[4], 12 ) ) {
00067             init_intel( mh_info, &mh_info->levels);
00068     } else if ( !strncmp( "AuthenticAMD", &reg.vendor[4], 12 ) ) {
00069       init_amd( mh_info, &mh_info->levels );
00070     } else {
00071         MEMDBG( "Unsupported cpu type; Not Intel or AMD x86\n" );
00072         return PAPI_ENOIMPL;
00073     }
00074 
00075     /* This works only because an empty cache element is initialized to 0 */
00076     MEMDBG( "Detected L1: %d L2: %d  L3: %d\n",
00077             mh_info->level[0].cache[0].size + mh_info->level[0].cache[1].size,
00078             mh_info->level[1].cache[0].size + mh_info->level[1].cache[1].size,
00079             mh_info->level[2].cache[0].size + mh_info->level[2].cache[1].size );
00080     return retval;
00081 }
00082 
00083 static void
00084 init_mem_hierarchy( PAPI_mh_info_t * mh_info )
00085 {
00086     int i, j;
00087     PAPI_mh_level_t *L = mh_info->level;
00088 
00089     /* initialize entire memory hierarchy structure to benign values */
00090     for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
00091         for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
00092             L[i].tlb[j].type = PAPI_MH_TYPE_EMPTY;
00093             L[i].tlb[j].num_entries = 0;
00094             L[i].tlb[j].associativity = 0;
00095             L[i].cache[j].type = PAPI_MH_TYPE_EMPTY;
00096             L[i].cache[j].size = 0;
00097             L[i].cache[j].line_size = 0;
00098             L[i].cache[j].num_lines = 0;
00099             L[i].cache[j].associativity = 0;
00100         }
00101     }
00102 }
00103 
00104 static short int
00105 _amd_L2_L3_assoc( unsigned short int pattern )
00106 {
00107     /* From "CPUID Specification" #25481 Rev 2.28, April 2008 */
00108     short int assoc[16] =
00109         { 0, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, SHRT_MAX };
00110     if ( pattern > 0xF )
00111         return -1;
00112     return ( assoc[pattern] );
00113 }
00114 
00115 /* Cache configuration for AMD Athlon/Duron */
00116 static int
00117 init_amd( PAPI_mh_info_t * mh_info, int *num_levels )
00118 {
00119     union
00120     {
00121         struct
00122         {
00123             unsigned int ax, bx, cx, dx;
00124         } e;
00125         unsigned char byt[16];
00126     } reg;
00127     int i, j, levels = 0;
00128     PAPI_mh_level_t *L = mh_info->level;
00129 
00130     /*
00131      * Layout of CPU information taken from :
00132      * "CPUID Specification" #25481 Rev 2.28, April 2008 for most current info.
00133      */
00134 
00135     MEMDBG( "Initializing AMD memory info\n" );
00136     /* AMD level 1 cache info */
00137     reg.e.ax = 0x80000005;   /* extended function code 5: L1 Cache and TLB Identifiers */
00138     cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
00139 
00140     MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n",
00141             reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
00142     MEMDBG
00143         ( ":\neax: %x %x %x %x\nebx: %x %x %x %x\necx: %x %x %x %x\nedx: %x %x %x %x\n",
00144           reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
00145           reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
00146           reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
00147           reg.byt[15] );
00148 
00149     /* NOTE: We assume L1 cache and TLB always exists */
00150     /* L1 TLB info */
00151 
00152     /* 4MB memory page information; half the number of entries as 2MB */
00153     L[0].tlb[0].type = PAPI_MH_TYPE_INST;
00154     L[0].tlb[0].num_entries = reg.byt[0] / 2;
00155     L[0].tlb[0].page_size = 4096 << 10;
00156     L[0].tlb[0].associativity = reg.byt[1];
00157 
00158     L[0].tlb[1].type = PAPI_MH_TYPE_DATA;
00159     L[0].tlb[1].num_entries = reg.byt[2] / 2;
00160     L[0].tlb[1].page_size = 4096 << 10;
00161     L[0].tlb[1].associativity = reg.byt[3];
00162 
00163     /* 2MB memory page information */
00164     L[0].tlb[2].type = PAPI_MH_TYPE_INST;
00165     L[0].tlb[2].num_entries = reg.byt[0];
00166     L[0].tlb[2].page_size = 2048 << 10;
00167     L[0].tlb[2].associativity = reg.byt[1];
00168 
00169     L[0].tlb[3].type = PAPI_MH_TYPE_DATA;
00170     L[0].tlb[3].num_entries = reg.byt[2];
00171     L[0].tlb[3].page_size = 2048 << 10;
00172     L[0].tlb[3].associativity = reg.byt[3];
00173 
00174     /* 4k page information */
00175     L[0].tlb[4].type = PAPI_MH_TYPE_INST;
00176     L[0].tlb[4].num_entries = reg.byt[4];
00177     L[0].tlb[4].page_size = 4 << 10;
00178     L[0].tlb[4].associativity = reg.byt[5];
00179 
00180     L[0].tlb[5].type = PAPI_MH_TYPE_DATA;
00181     L[0].tlb[5].num_entries = reg.byt[6];
00182     L[0].tlb[5].page_size = 4 << 10;
00183     L[0].tlb[5].associativity = reg.byt[7];
00184 
00185     for ( i = 0; i < PAPI_MH_MAX_LEVELS; i++ ) {
00186         if ( L[0].tlb[i].associativity == 0xff )
00187             L[0].tlb[i].associativity = SHRT_MAX;
00188     }
00189 
00190     /* L1 D-cache info */
00191     L[0].cache[0].type =
00192         PAPI_MH_TYPE_DATA | PAPI_MH_TYPE_WB | PAPI_MH_TYPE_PSEUDO_LRU;
00193     L[0].cache[0].size = reg.byt[11] << 10;
00194     L[0].cache[0].associativity = reg.byt[10];
00195     L[0].cache[0].line_size = reg.byt[8];
00196     /* Byt[9] is "Lines per tag" */
00197     /* Is that == lines per cache? */
00198     /* L[0].cache[1].num_lines = reg.byt[9]; */
00199     if ( L[0].cache[0].line_size )
00200         L[0].cache[0].num_lines = L[0].cache[0].size / L[0].cache[0].line_size;
00201     MEMDBG( "D-Cache Line Count: %d; Computed: %d\n", reg.byt[9],
00202             L[0].cache[0].num_lines );
00203 
00204     /* L1 I-cache info */
00205     L[0].cache[1].type = PAPI_MH_TYPE_INST;
00206     L[0].cache[1].size = reg.byt[15] << 10;
00207     L[0].cache[1].associativity = reg.byt[14];
00208     L[0].cache[1].line_size = reg.byt[12];
00209     /* Byt[13] is "Lines per tag" */
00210     /* Is that == lines per cache? */
00211     /* L[0].cache[1].num_lines = reg.byt[13]; */
00212     if ( L[0].cache[1].line_size )
00213         L[0].cache[1].num_lines = L[0].cache[1].size / L[0].cache[1].line_size;
00214     MEMDBG( "I-Cache Line Count: %d; Computed: %d\n", reg.byt[13],
00215             L[0].cache[1].num_lines );
00216 
00217     for ( i = 0; i < 2; i++ ) {
00218         if ( L[0].cache[i].associativity == 0xff )
00219             L[0].cache[i].associativity = SHRT_MAX;
00220     }
00221 
00222     /* AMD L2/L3 Cache and L2 TLB info */
00223     /* NOTE: For safety we assume L2 and L3 cache and TLB may not exist */
00224 
00225     reg.e.ax = 0x80000006;   /* extended function code 6: L2/L3 Cache and L2 TLB Identifiers */
00226     cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
00227 
00228     MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n",
00229             reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
00230     MEMDBG
00231         ( ":\neax: %x %x %x %x\nebx: %x %x %x %x\necx: %x %x %x %x\nedx: %x %x %x %x\n",
00232           reg.byt[0], reg.byt[1], reg.byt[2], reg.byt[3], reg.byt[4],
00233           reg.byt[5], reg.byt[6], reg.byt[7], reg.byt[8], reg.byt[9],
00234           reg.byt[10], reg.byt[11], reg.byt[12], reg.byt[13], reg.byt[14],
00235           reg.byt[15] );
00236 
00237     /* L2 TLB info */
00238 
00239     if ( reg.byt[0] | reg.byt[1] ) {    /* Level 2 ITLB exists */
00240         /* 4MB ITLB page information; half the number of entries as 2MB */
00241         L[1].tlb[0].type = PAPI_MH_TYPE_INST;
00242         L[1].tlb[0].num_entries =
00243             ( ( ( short ) ( reg.byt[1] & 0xF ) << 8 ) + reg.byt[0] ) / 2;
00244         L[1].tlb[0].page_size = 4096 << 10;
00245         L[1].tlb[0].associativity =
00246             _amd_L2_L3_assoc( ( reg.byt[1] & 0xF0 ) >> 4 );
00247 
00248         /* 2MB ITLB page information */
00249         L[1].tlb[2].type = PAPI_MH_TYPE_INST;
00250         L[1].tlb[2].num_entries = L[1].tlb[0].num_entries * 2;
00251         L[1].tlb[2].page_size = 2048 << 10;
00252         L[1].tlb[2].associativity = L[1].tlb[0].associativity;
00253     }
00254 
00255     if ( reg.byt[2] | reg.byt[3] ) {    /* Level 2 DTLB exists */
00256         /* 4MB DTLB page information; half the number of entries as 2MB */
00257         L[1].tlb[1].type = PAPI_MH_TYPE_DATA;
00258         L[1].tlb[1].num_entries =
00259             ( ( ( short ) ( reg.byt[3] & 0xF ) << 8 ) + reg.byt[2] ) / 2;
00260         L[1].tlb[1].page_size = 4096 << 10;
00261         L[1].tlb[1].associativity =
00262             _amd_L2_L3_assoc( ( reg.byt[3] & 0xF0 ) >> 4 );
00263 
00264         /* 2MB DTLB page information */
00265         L[1].tlb[3].type = PAPI_MH_TYPE_DATA;
00266         L[1].tlb[3].num_entries = L[1].tlb[1].num_entries * 2;
00267         L[1].tlb[3].page_size = 2048 << 10;
00268         L[1].tlb[3].associativity = L[1].tlb[1].associativity;
00269     }
00270 
00271     /* 4k page information */
00272     if ( reg.byt[4] | reg.byt[5] ) {    /* Level 2 ITLB exists */
00273         L[1].tlb[4].type = PAPI_MH_TYPE_INST;
00274         L[1].tlb[4].num_entries =
00275             ( ( short ) ( reg.byt[5] & 0xF ) << 8 ) + reg.byt[4];
00276         L[1].tlb[4].page_size = 4 << 10;
00277         L[1].tlb[4].associativity =
00278             _amd_L2_L3_assoc( ( reg.byt[5] & 0xF0 ) >> 4 );
00279     }
00280     if ( reg.byt[6] | reg.byt[7] ) {    /* Level 2 DTLB exists */
00281         L[1].tlb[5].type = PAPI_MH_TYPE_DATA;
00282         L[1].tlb[5].num_entries =
00283             ( ( short ) ( reg.byt[7] & 0xF ) << 8 ) + reg.byt[6];
00284         L[1].tlb[5].page_size = 4 << 10;
00285         L[1].tlb[5].associativity =
00286             _amd_L2_L3_assoc( ( reg.byt[7] & 0xF0 ) >> 4 );
00287     }
00288 
00289     /* AMD Level 2 cache info */
00290     if ( reg.e.cx ) {
00291         L[1].cache[0].type =
00292             PAPI_MH_TYPE_UNIFIED | PAPI_MH_TYPE_WT | PAPI_MH_TYPE_PSEUDO_LRU;
00293         L[1].cache[0].size = ( int ) ( ( reg.e.cx & 0xffff0000 ) >> 6 );    /* right shift by 16; multiply by 2^10 */
00294         L[1].cache[0].associativity =
00295             _amd_L2_L3_assoc( ( reg.byt[9] & 0xF0 ) >> 4 );
00296         L[1].cache[0].line_size = reg.byt[8];
00297 /*      L[1].cache[0].num_lines = reg.byt[9]&0xF; */
00298         if ( L[1].cache[0].line_size )
00299             L[1].cache[0].num_lines =
00300                 L[1].cache[0].size / L[1].cache[0].line_size;
00301         MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[9] & 0xF,
00302                 L[1].cache[0].num_lines );
00303     }
00304 
00305     /* AMD Level 3 cache info (shared across cores) */
00306     if ( reg.e.dx ) {
00307         L[2].cache[0].type =
00308             PAPI_MH_TYPE_UNIFIED | PAPI_MH_TYPE_WT | PAPI_MH_TYPE_PSEUDO_LRU;
00309         L[2].cache[0].size = ( int ) ( reg.e.dx & 0xfffc0000 ) << 1;    /* in blocks of 512KB (2^19) */
00310         L[2].cache[0].associativity =
00311             _amd_L2_L3_assoc( ( reg.byt[13] & 0xF0 ) >> 4 );
00312         L[2].cache[0].line_size = reg.byt[12];
00313 /*      L[2].cache[0].num_lines = reg.byt[13]&0xF; */
00314         if ( L[2].cache[0].line_size )
00315             L[2].cache[0].num_lines =
00316                 L[2].cache[0].size / L[2].cache[0].line_size;
00317         MEMDBG( "U-Cache Line Count: %d; Computed: %d\n", reg.byt[13] & 0xF,
00318                 L[1].cache[0].num_lines );
00319     }
00320     for ( i = 0; i < PAPI_MAX_MEM_HIERARCHY_LEVELS; i++ ) {
00321         for ( j = 0; j < PAPI_MH_MAX_LEVELS; j++ ) {
00322             /* Compute the number of levels of hierarchy actually used */
00323             if ( L[i].tlb[j].type != PAPI_MH_TYPE_EMPTY ||
00324                  L[i].cache[j].type != PAPI_MH_TYPE_EMPTY )
00325                 levels = i + 1;
00326         }
00327     }
00328     *num_levels = levels;
00329     return PAPI_OK;
00330 }
00331 
00332    /*
00333     * The data from this table now comes from figure 3-17 in
00334     *  the Intel Architectures Software Reference Manual 2A
00335     *  (cpuid instruction section)
00336     * 
00337     * Pretviously the information was provided by
00338     * "Intel® Processor Identification and the CPUID Instruction",
00339     * Application Note, AP-485, Nov 2008, 241618-033
00340     * Updated to AP-485, Aug 2009, 241618-036
00341     *
00342     * The following data structure and its instantiation trys to
00343     * capture all the information in Section 2.1.3 of the above
00344     * document. Not all of it is used by PAPI, but it could be.
00345     * As the above document is revised, this table should be
00346     * updated.
00347     */
00348 
00349 #define TLB_SIZES 3          /* number of different page sizes for a single TLB descriptor */
00350 struct _intel_cache_info
00351 {
00352     int descriptor;                    /* 0x00 - 0xFF: register descriptor code */
00353     int level;                         /* 1 to PAPI_MH_MAX_LEVELS */
00354     int type;                          /* Empty, instr, data, vector, unified | TLB */
00355     int size[TLB_SIZES];               /* cache or  TLB page size(s) in kB */
00356     int associativity;                 /* SHRT_MAX == fully associative */
00357     int sector;                        /* 1 if cache is sectored; else 0 */
00358     int line_size;                     /* for cache */
00359     int entries;                       /* for TLB */
00360 };
00361 
00362 static struct _intel_cache_info intel_cache[] = {
00363 // 0x01
00364     {.descriptor = 0x01,
00365      .level = 1,
00366      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00367      .size[0] = 4,
00368      .associativity = 4,
00369      .entries = 32,
00370      },
00371 // 0x02
00372     {.descriptor = 0x02,
00373      .level = 1,
00374      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00375      .size[0] = 4096,
00376      .associativity = SHRT_MAX,
00377      .entries = 2,
00378      },
00379 // 0x03
00380     {.descriptor = 0x03,
00381      .level = 1,
00382      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00383      .size[0] = 4,
00384      .associativity = 4,
00385      .entries = 64,
00386      },
00387 // 0x04
00388     {.descriptor = 0x04,
00389      .level = 1,
00390      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00391      .size[0] = 4096,
00392      .associativity = 4,
00393      .entries = 8,
00394      },
00395 // 0x05
00396     {.descriptor = 0x05,
00397      .level = 1,
00398      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00399      .size[0] = 4096,
00400      .associativity = 4,
00401      .entries = 32,
00402      },
00403 // 0x06
00404     {.descriptor = 0x06,
00405      .level = 1,
00406      .type = PAPI_MH_TYPE_INST,
00407      .size[0] = 8,
00408      .associativity = 4,
00409      .line_size = 32,
00410      },
00411 // 0x08
00412     {.descriptor = 0x08,
00413      .level = 1,
00414      .type = PAPI_MH_TYPE_INST,
00415      .size[0] = 16,
00416      .associativity = 4,
00417      .line_size = 32,
00418      },
00419 // 0x09
00420     {.descriptor = 0x09,
00421      .level = 1,
00422      .type = PAPI_MH_TYPE_INST,
00423      .size[0] = 32,
00424      .associativity = 4,
00425      .line_size = 64,
00426      },
00427 // 0x0A
00428     {.descriptor = 0x0A,
00429      .level = 1,
00430      .type = PAPI_MH_TYPE_DATA,
00431      .size[0] = 8,
00432      .associativity = 2,
00433      .line_size = 32,
00434      },
00435 // 0x0B
00436     {.descriptor = 0x0B,
00437      .level = 1,
00438      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00439      .size[0] = 4096,
00440      .associativity = 4,
00441      .entries = 4,
00442      },   
00443 // 0x0C
00444     {.descriptor = 0x0C,
00445      .level = 1,
00446      .type = PAPI_MH_TYPE_DATA,
00447      .size[0] = 16,
00448      .associativity = 4,
00449      .line_size = 32,
00450      },
00451 // 0x0D
00452     {.descriptor = 0x0D,
00453      .level = 1,
00454      .type = PAPI_MH_TYPE_DATA,
00455      .size[0] = 16,
00456      .associativity = 4,
00457      .line_size = 64,
00458      },
00459 // 0x0E
00460     {.descriptor = 0x0E,
00461      .level = 1,
00462      .type = PAPI_MH_TYPE_DATA,
00463      .size[0] = 24,
00464      .associativity = 6,
00465      .line_size = 64,
00466      },   
00467 // 0x21
00468     {.descriptor = 0x21,
00469      .level = 2,
00470      .type = PAPI_MH_TYPE_UNIFIED,
00471      .size[0] = 256,
00472      .associativity = 8,
00473      .line_size = 64,
00474      },
00475 // 0x22
00476     {.descriptor = 0x22,
00477      .level = 3,
00478      .type = PAPI_MH_TYPE_UNIFIED,
00479      .size[0] = 512,
00480      .associativity = 4,
00481      .sector = 1,
00482      .line_size = 64,
00483      },
00484 // 0x23
00485     {.descriptor = 0x23,
00486      .level = 3,
00487      .type = PAPI_MH_TYPE_UNIFIED,
00488      .size[0] = 1024,
00489      .associativity = 8,
00490      .sector = 1,
00491      .line_size = 64,
00492      },
00493 // 0x25
00494     {.descriptor = 0x25,
00495      .level = 3,
00496      .type = PAPI_MH_TYPE_UNIFIED,
00497      .size[0] = 2048,
00498      .associativity = 8,
00499      .sector = 1,
00500      .line_size = 64,
00501      },
00502 // 0x29
00503     {.descriptor = 0x29,
00504      .level = 3,
00505      .type = PAPI_MH_TYPE_UNIFIED,
00506      .size[0] = 4096,
00507      .associativity = 8,
00508      .sector = 1,
00509      .line_size = 64,
00510      },
00511 // 0x2C
00512     {.descriptor = 0x2C,
00513      .level = 1,
00514      .type = PAPI_MH_TYPE_DATA,
00515      .size[0] = 32,
00516      .associativity = 8,
00517      .line_size = 64,
00518      },
00519 // 0x30
00520     {.descriptor = 0x30,
00521      .level = 1,
00522      .type = PAPI_MH_TYPE_INST,
00523      .size[0] = 32,
00524      .associativity = 8,
00525      .line_size = 64,
00526      },
00527 // 0x39
00528     {.descriptor = 0x39,
00529      .level = 2,
00530      .type = PAPI_MH_TYPE_UNIFIED,
00531      .size[0] = 128,
00532      .associativity = 4,
00533      .sector = 1,
00534      .line_size = 64,
00535      },
00536 // 0x3A
00537     {.descriptor = 0x3A,
00538      .level = 2,
00539      .type = PAPI_MH_TYPE_UNIFIED,
00540      .size[0] = 192,
00541      .associativity = 6,
00542      .sector = 1,
00543      .line_size = 64,
00544      },
00545 // 0x3B
00546     {.descriptor = 0x3B,
00547      .level = 2,
00548      .type = PAPI_MH_TYPE_UNIFIED,
00549      .size[0] = 128,
00550      .associativity = 2,
00551      .sector = 1,
00552      .line_size = 64,
00553      },
00554 // 0x3C
00555     {.descriptor = 0x3C,
00556      .level = 2,
00557      .type = PAPI_MH_TYPE_UNIFIED,
00558      .size[0] = 256,
00559      .associativity = 4,
00560      .sector = 1,
00561      .line_size = 64,
00562      },
00563 // 0x3D
00564     {.descriptor = 0x3D,
00565      .level = 2,
00566      .type = PAPI_MH_TYPE_UNIFIED,
00567      .size[0] = 384,
00568      .associativity = 6,
00569      .sector = 1,
00570      .line_size = 64,
00571      },
00572 // 0x3E
00573     {.descriptor = 0x3E,
00574      .level = 2,
00575      .type = PAPI_MH_TYPE_UNIFIED,
00576      .size[0] = 512,
00577      .associativity = 4,
00578      .sector = 1,
00579      .line_size = 64,
00580      },
00581 // 0x40: no last level cache (??)
00582 // 0x41
00583     {.descriptor = 0x41,
00584      .level = 2,
00585      .type = PAPI_MH_TYPE_UNIFIED,
00586      .size[0] = 128,
00587      .associativity = 4,
00588      .line_size = 32,
00589      },
00590 // 0x42
00591     {.descriptor = 0x42,
00592      .level = 2,
00593      .type = PAPI_MH_TYPE_UNIFIED,
00594      .size[0] = 256,
00595      .associativity = 4,
00596      .line_size = 32,
00597      },
00598 // 0x43
00599     {.descriptor = 0x43,
00600      .level = 2,
00601      .type = PAPI_MH_TYPE_UNIFIED,
00602      .size[0] = 512,
00603      .associativity = 4,
00604      .line_size = 32,
00605      },
00606 // 0x44
00607     {.descriptor = 0x44,
00608      .level = 2,
00609      .type = PAPI_MH_TYPE_UNIFIED,
00610      .size[0] = 1024,
00611      .associativity = 4,
00612      .line_size = 32,
00613      },
00614 // 0x45
00615     {.descriptor = 0x45,
00616      .level = 2,
00617      .type = PAPI_MH_TYPE_UNIFIED,
00618      .size[0] = 2048,
00619      .associativity = 4,
00620      .line_size = 32,
00621      },
00622 // 0x46
00623     {.descriptor = 0x46,
00624      .level = 3,
00625      .type = PAPI_MH_TYPE_UNIFIED,
00626      .size[0] = 4096,
00627      .associativity = 4,
00628      .line_size = 64,
00629      },
00630 // 0x47
00631     {.descriptor = 0x47,
00632      .level = 3,
00633      .type = PAPI_MH_TYPE_UNIFIED,
00634      .size[0] = 8192,
00635      .associativity = 8,
00636      .line_size = 64,
00637      },
00638 // 0x48
00639     {.descriptor = 0x48,
00640      .level = 2,
00641      .type = PAPI_MH_TYPE_UNIFIED,
00642      .size[0] = 3072,
00643      .associativity = 12,
00644      .line_size = 64,
00645      },
00646 // 0x49 NOTE: for family 0x0F model 0x06 this is level 3
00647     {.descriptor = 0x49,
00648      .level = 2,
00649      .type = PAPI_MH_TYPE_UNIFIED,
00650      .size[0] = 4096,
00651      .associativity = 16,
00652      .line_size = 64,
00653      },
00654 // 0x4A
00655     {.descriptor = 0x4A,
00656      .level = 3,
00657      .type = PAPI_MH_TYPE_UNIFIED,
00658      .size[0] = 6144,
00659      .associativity = 12,
00660      .line_size = 64,
00661      },
00662 // 0x4B
00663     {.descriptor = 0x4B,
00664      .level = 3,
00665      .type = PAPI_MH_TYPE_UNIFIED,
00666      .size[0] = 8192,
00667      .associativity = 16,
00668      .line_size = 64,
00669      },
00670 // 0x4C
00671     {.descriptor = 0x4C,
00672      .level = 3,
00673      .type = PAPI_MH_TYPE_UNIFIED,
00674      .size[0] = 12288,
00675      .associativity = 12,
00676      .line_size = 64,
00677      },
00678 // 0x4D
00679     {.descriptor = 0x4D,
00680      .level = 3,
00681      .type = PAPI_MH_TYPE_UNIFIED,
00682      .size[0] = 16384,
00683      .associativity = 16,
00684      .line_size = 64,
00685      },
00686 // 0x4E
00687     {.descriptor = 0x4E,
00688      .level = 2,
00689      .type = PAPI_MH_TYPE_UNIFIED,
00690      .size[0] = 6144,
00691      .associativity = 24,
00692      .line_size = 64,
00693      },
00694 // 0x4F
00695     {.descriptor = 0x4F,
00696      .level = 1,
00697      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00698      .size[0] = 4,
00699      .associativity = SHRT_MAX,
00700      .entries = 32,
00701      },
00702 // 0x50
00703     {.descriptor = 0x50,
00704      .level = 1,
00705      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00706      .size = {4, 2048, 4096},
00707      .associativity = SHRT_MAX,
00708      .entries = 64,
00709      },
00710 // 0x51
00711     {.descriptor = 0x51,
00712      .level = 1,
00713      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00714      .size = {4, 2048, 4096},
00715      .associativity = SHRT_MAX,
00716      .entries = 128,
00717      },
00718 // 0x52
00719     {.descriptor = 0x52,
00720      .level = 1,
00721      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00722      .size = {4, 2048, 4096},
00723      .associativity = SHRT_MAX,
00724      .entries = 256,
00725      },
00726 // 0x55
00727     {.descriptor = 0x55,
00728      .level = 1,
00729      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00730      .size = {2048, 4096, 0},
00731      .associativity = SHRT_MAX,
00732      .entries = 7,
00733      },
00734 // 0x56
00735     {.descriptor = 0x56,
00736      .level = 1,
00737      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00738      .size[0] = 4096,
00739      .associativity = 4,
00740      .entries = 16,
00741      },
00742 // 0x57
00743     {.descriptor = 0x57,
00744      .level = 1,
00745      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00746      .size[0] = 4,
00747      .associativity = 4,
00748      .entries = 16,
00749      },
00750 // 0x59
00751     {.descriptor = 0x59,
00752      .level = 1,
00753      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00754      .size[0] = 4,
00755      .associativity = SHRT_MAX,
00756      .entries = 16,
00757      },   
00758 // 0x5A
00759     {.descriptor = 0x5A,
00760      .level = 1,
00761      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00762      .size = {2048, 4096, 0},
00763      .associativity = 4,
00764      .entries = 32,
00765      },
00766 // 0x5B
00767     {.descriptor = 0x5B,
00768      .level = 1,
00769      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00770      .size = {4, 4096, 0},
00771      .associativity = SHRT_MAX,
00772      .entries = 64,
00773      },
00774 // 0x5C
00775     {.descriptor = 0x5C,
00776      .level = 1,
00777      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00778      .size = {4, 4096, 0},
00779      .associativity = SHRT_MAX,
00780      .entries = 128,
00781      },
00782 // 0x5D
00783     {.descriptor = 0x5D,
00784      .level = 1,
00785      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
00786      .size = {4, 4096, 0},
00787      .associativity = SHRT_MAX,
00788      .entries = 256,
00789      },
00790 // 0x60
00791     {.descriptor = 0x60,
00792      .level = 1,
00793      .type = PAPI_MH_TYPE_DATA,
00794      .size[0] = 16,
00795      .associativity = 8,
00796      .sector = 1,
00797      .line_size = 64,
00798      },
00799 // 0x66
00800     {.descriptor = 0x66,
00801      .level = 1,
00802      .type = PAPI_MH_TYPE_DATA,
00803      .size[0] = 8,
00804      .associativity = 4,
00805      .sector = 1,
00806      .line_size = 64,
00807      },
00808 // 0x67
00809     {.descriptor = 0x67,
00810      .level = 1,
00811      .type = PAPI_MH_TYPE_DATA,
00812      .size[0] = 16,
00813      .associativity = 4,
00814      .sector = 1,
00815      .line_size = 64,
00816      },
00817 // 0x68
00818     {.descriptor = 0x68,
00819      .level = 1,
00820      .type = PAPI_MH_TYPE_DATA,
00821      .size[0] = 32,
00822      .associativity = 4,
00823      .sector = 1,
00824      .line_size = 64,
00825      },
00826 // 0x70
00827     {.descriptor = 0x70,
00828      .level = 1,
00829      .type = PAPI_MH_TYPE_TRACE,
00830      .size[0] = 12,
00831      .associativity = 8,
00832      },
00833 // 0x71
00834     {.descriptor = 0x71,
00835      .level = 1,
00836      .type = PAPI_MH_TYPE_TRACE,
00837      .size[0] = 16,
00838      .associativity = 8,
00839      },
00840 // 0x72
00841     {.descriptor = 0x72,
00842      .level = 1,
00843      .type = PAPI_MH_TYPE_TRACE,
00844      .size[0] = 32,
00845      .associativity = 8,
00846      },
00847 // 0x73
00848     {.descriptor = 0x73,
00849      .level = 1,
00850      .type = PAPI_MH_TYPE_TRACE,
00851      .size[0] = 64,
00852      .associativity = 8,
00853      },
00854 // 0x78
00855     {.descriptor = 0x78,
00856      .level = 2,
00857      .type = PAPI_MH_TYPE_UNIFIED,
00858      .size[0] = 1024,
00859      .associativity = 4,
00860      .line_size = 64,
00861      },
00862 // 0x79
00863     {.descriptor = 0x79,
00864      .level = 2,
00865      .type = PAPI_MH_TYPE_UNIFIED,
00866      .size[0] = 128,
00867      .associativity = 8,
00868      .sector = 1,
00869      .line_size = 64,
00870      },
00871 // 0x7A
00872     {.descriptor = 0x7A,
00873      .level = 2,
00874      .type = PAPI_MH_TYPE_UNIFIED,
00875      .size[0] = 256,
00876      .associativity = 8,
00877      .sector = 1,
00878      .line_size = 64,
00879      },
00880 // 0x7B
00881     {.descriptor = 0x7B,
00882      .level = 2,
00883      .type = PAPI_MH_TYPE_UNIFIED,
00884      .size[0] = 512,
00885      .associativity = 8,
00886      .sector = 1,
00887      .line_size = 64,
00888      },
00889 // 0x7C
00890     {.descriptor = 0x7C,
00891      .level = 2,
00892      .type = PAPI_MH_TYPE_UNIFIED,
00893      .size[0] = 1024,
00894      .associativity = 8,
00895      .sector = 1,
00896      .line_size = 64,
00897      },
00898 // 0x7D
00899     {.descriptor = 0x7D,
00900      .level = 2,
00901      .type = PAPI_MH_TYPE_UNIFIED,
00902      .size[0] = 2048,
00903      .associativity = 8,
00904      .line_size = 64,
00905      },
00906 // 0x7F
00907     {.descriptor = 0x7F,
00908      .level = 2,
00909      .type = PAPI_MH_TYPE_UNIFIED,
00910      .size[0] = 512,
00911      .associativity = 2,
00912      .line_size = 64,
00913      },
00914 // 0x80
00915     {.descriptor = 0x80,
00916      .level = 2,
00917      .type = PAPI_MH_TYPE_UNIFIED,
00918      .size[0] = 512,
00919      .associativity = 8,
00920      .line_size = 64,
00921      },   
00922 // 0x82
00923     {.descriptor = 0x82,
00924      .level = 2,
00925      .type = PAPI_MH_TYPE_UNIFIED,
00926      .size[0] = 256,
00927      .associativity = 8,
00928      .line_size = 32,
00929      },
00930 // 0x83
00931     {.descriptor = 0x83,
00932      .level = 2,
00933      .type = PAPI_MH_TYPE_UNIFIED,
00934      .size[0] = 512,
00935      .associativity = 8,
00936      .line_size = 32,
00937      },
00938 // 0x84
00939     {.descriptor = 0x84,
00940      .level = 2,
00941      .type = PAPI_MH_TYPE_UNIFIED,
00942      .size[0] = 1024,
00943      .associativity = 8,
00944      .line_size = 32,
00945      },
00946 // 0x85
00947     {.descriptor = 0x85,
00948      .level = 2,
00949      .type = PAPI_MH_TYPE_UNIFIED,
00950      .size[0] = 2048,
00951      .associativity = 8,
00952      .line_size = 32,
00953      },
00954 // 0x86
00955     {.descriptor = 0x86,
00956      .level = 2,
00957      .type = PAPI_MH_TYPE_UNIFIED,
00958      .size[0] = 512,
00959      .associativity = 4,
00960      .line_size = 64,
00961      },
00962 // 0x87
00963     {.descriptor = 0x87,
00964      .level = 2,
00965      .type = PAPI_MH_TYPE_UNIFIED,
00966      .size[0] = 1024,
00967      .associativity = 8,
00968      .line_size = 64,
00969      },
00970 // 0xB0
00971     {.descriptor = 0xB0,
00972      .level = 1,
00973      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00974      .size[0] = 4,
00975      .associativity = 4,
00976      .entries = 128,
00977      },
00978 // 0xB1 NOTE: This is currently the only instance where .entries
00979 //      is dependent on .size. It's handled as a code exception.
00980 //      If other instances appear in the future, the structure
00981 //      should probably change to accomodate it.
00982     {.descriptor = 0xB1,
00983      .level = 1,
00984      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00985      .size = {2048, 4096, 0},
00986      .associativity = 4,
00987      .entries = 8,           /* or 4 if size = 4096 */
00988      },
00989 // 0xB2
00990     {.descriptor = 0xB2,
00991      .level = 1,
00992      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_INST,
00993      .size[0] = 4,
00994      .associativity = 4,
00995      .entries = 64,
00996      },
00997 // 0xB3
00998     {.descriptor = 0xB3,
00999      .level = 1,
01000      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
01001      .size[0] = 4,
01002      .associativity = 4,
01003      .entries = 128,
01004      },
01005 // 0xB4
01006     {.descriptor = 0xB4,
01007      .level = 1,
01008      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
01009      .size[0] = 4,
01010      .associativity = 4,
01011      .entries = 256,
01012      },
01013 // 0xBA
01014     {.descriptor = 0xBA,
01015      .level = 1,
01016      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
01017      .size[0] = 4,
01018      .associativity = 4,
01019      .entries = 64,
01020      },   
01021 // 0xC0
01022     {.descriptor = 0xBA,
01023      .level = 1,
01024      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_DATA,
01025      .size = {4,4096},
01026      .associativity = 4,
01027      .entries = 8,
01028      },      
01029 // 0xCA
01030     {.descriptor = 0xCA,
01031      .level = 2,
01032      .type = PAPI_MH_TYPE_TLB | PAPI_MH_TYPE_UNIFIED,
01033      .size[0] = 4,
01034      .associativity = 4,
01035      .entries = 512,
01036      },
01037 // 0xD0
01038     {.descriptor = 0xD0,
01039      .level = 3,
01040      .type = PAPI_MH_TYPE_UNIFIED,
01041      .size[0] = 512,
01042      .associativity = 4,
01043      .line_size = 64,
01044      },
01045 // 0xD1
01046     {.descriptor = 0xD1,
01047      .level = 3,
01048      .type = PAPI_MH_TYPE_UNIFIED,
01049      .size[0] = 1024,
01050      .associativity = 4,
01051      .line_size = 64,
01052      },
01053 // 0xD2
01054     {.descriptor = 0xD2,
01055      .level = 3,
01056      .type = PAPI_MH_TYPE_UNIFIED,
01057      .size[0] = 2048,
01058      .associativity = 4,
01059      .line_size = 64,
01060      },
01061 // 0xD6
01062     {.descriptor = 0xD6,
01063      .level = 3,
01064      .type = PAPI_MH_TYPE_UNIFIED,
01065      .size[0] = 1024,
01066      .associativity = 8,
01067      .line_size = 64,
01068      },
01069 // 0xD7
01070     {.descriptor = 0xD7,
01071      .level = 3,
01072      .type = PAPI_MH_TYPE_UNIFIED,
01073      .size[0] = 2048,
01074      .associativity = 8,
01075      .line_size = 64,
01076      },
01077 // 0xD8
01078     {.descriptor = 0xD8,
01079      .level = 3,
01080      .type = PAPI_MH_TYPE_UNIFIED,
01081      .size[0] = 4096,
01082      .associativity = 8,
01083      .line_size = 64,
01084      },
01085 // 0xDC
01086     {.descriptor = 0xDC,
01087      .level = 3,
01088      .type = PAPI_MH_TYPE_UNIFIED,
01089      .size[0] = 1536,
01090      .associativity = 12,
01091      .line_size = 64,
01092      },
01093 // 0xDD
01094     {.descriptor = 0xDD,
01095      .level = 3,
01096      .type = PAPI_MH_TYPE_UNIFIED,
01097      .size[0] = 3072,
01098      .associativity = 12,
01099      .line_size = 64,
01100      },
01101 // 0xDE
01102     {.descriptor = 0xDE,
01103      .level = 3,
01104      .type = PAPI_MH_TYPE_UNIFIED,
01105      .size[0] = 6144,
01106      .associativity = 12,
01107      .line_size = 64,
01108      },
01109 // 0xE2
01110     {.descriptor = 0xE2,
01111      .level = 3,
01112      .type = PAPI_MH_TYPE_UNIFIED,
01113      .size[0] = 2048,
01114      .associativity = 16,
01115      .line_size = 64,
01116      },
01117 // 0xE3
01118     {.descriptor = 0xE3,
01119      .level = 3,
01120      .type = PAPI_MH_TYPE_UNIFIED,
01121      .size[0] = 4096,
01122      .associativity = 16,
01123      .line_size = 64,
01124      },
01125 // 0xE4
01126     {.descriptor = 0xE4,
01127      .level = 3,
01128      .type = PAPI_MH_TYPE_UNIFIED,
01129      .size[0] = 8192,
01130      .associativity = 16,
01131      .line_size = 64,
01132      },
01133 // 0xEA
01134     {.descriptor = 0xEA,
01135      .level = 3,
01136      .type = PAPI_MH_TYPE_UNIFIED,
01137      .size[0] = 12288,
01138      .associativity = 24,
01139      .line_size = 64,
01140      },
01141 // 0xEB
01142     {.descriptor = 0xEB,
01143      .level = 3,
01144      .type = PAPI_MH_TYPE_UNIFIED,
01145      .size[0] = 18432,
01146      .associativity = 24,
01147      .line_size = 64,
01148      },
01149 // 0xEC
01150     {.descriptor = 0xEC,
01151      .level = 3,
01152      .type = PAPI_MH_TYPE_UNIFIED,
01153      .size[0] = 24576,
01154      .associativity = 24,
01155      .line_size = 64,
01156      },
01157 // 0xF0
01158     {.descriptor = 0xF0,
01159      .level = 1,
01160      .type = PAPI_MH_TYPE_PREF,
01161      .size[0] = 64,
01162      },
01163 // 0xF1
01164     {.descriptor = 0xF1,
01165      .level = 1,
01166      .type = PAPI_MH_TYPE_PREF,
01167      .size[0] = 128,
01168      },
01169 };
01170 
01171 #ifdef DEBUG
01172 static void
01173 print_intel_cache_table(  )
01174 {
01175     int i, j, k =
01176         ( int ) ( sizeof ( intel_cache ) /
01177                   sizeof ( struct _intel_cache_info ) );
01178     for ( i = 0; i < k; i++ ) {
01179         printf( "%d.\tDescriptor: 0x%x\n", i, intel_cache[i].descriptor );
01180         printf( "\t  Level:     %d\n", intel_cache[i].level );
01181         printf( "\t  Type:      %d\n", intel_cache[i].type );
01182         printf( "\t  Size(s):   " );
01183         for ( j = 0; j < TLB_SIZES; j++ )
01184             printf( "%d, ", intel_cache[i].size[j] );
01185         printf( "\n" );
01186         printf( "\t  Assoc:     %d\n", intel_cache[i].associativity );
01187         printf( "\t  Sector:    %d\n", intel_cache[i].sector );
01188         printf( "\t  Line Size: %d\n", intel_cache[i].line_size );
01189         printf( "\t  Entries:   %d\n", intel_cache[i].entries );
01190         printf( "\n" );
01191     }
01192 }
01193 #endif
01194 
01195 /* Given a specific cache descriptor, this routine decodes the information from a table
01196  * of such descriptors and fills out one or more records in a PAPI data structure.
01197  * Called only by init_intel()
01198  */
01199 static void
01200 intel_decode_descriptor( struct _intel_cache_info *d, PAPI_mh_level_t * L )
01201 {
01202     int i, next;
01203     int level = d->level - 1;
01204     PAPI_mh_tlb_info_t *t;
01205     PAPI_mh_cache_info_t *c;
01206 
01207     if ( d->descriptor == 0x49 ) {  /* special case */
01208         unsigned int r_eax, r_ebx, r_ecx, r_edx;
01209         r_eax = 0x1;         /* function code 1: family & model */
01210         cpuid( &r_eax, &r_ebx, &r_ecx, &r_edx );
01211         /* override table for Family F, model 6 only */
01212         if ( ( r_eax & 0x0FFF3FF0 ) == 0xF60 )
01213             level = 3;
01214     }
01215     if ( d->type & PAPI_MH_TYPE_TLB ) {
01216         for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
01217             if ( L[level].tlb[next].type == PAPI_MH_TYPE_EMPTY )
01218                 break;
01219         }
01220         /* expand TLB entries for multiple possible page sizes */
01221         for ( i = 0; i < TLB_SIZES && next < PAPI_MH_MAX_LEVELS && d->size[i];
01222               i++, next++ ) {
01223 //          printf("Level %d Descriptor: %x TLB type %x next: %d, i: %d\n", level, d->descriptor, d->type, next, i);
01224             t = &L[level].tlb[next];
01225             t->type = PAPI_MH_CACHE_TYPE( d->type );
01226             t->num_entries = d->entries;
01227             t->page_size = d->size[i] << 10;    /* minimum page size in KB */
01228             t->associativity = d->associativity;
01229             /* another special case */
01230             if ( d->descriptor == 0xB1 && d->size[i] == 4096 )
01231                 t->num_entries = d->entries / 2;
01232         }
01233     } else {
01234         for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
01235             if ( L[level].cache[next].type == PAPI_MH_TYPE_EMPTY )
01236                 break;
01237         }
01238 //      printf("Level %d Descriptor: %x Cache type %x next: %d\n", level, d->descriptor, d->type, next);
01239         c = &L[level].cache[next];
01240         c->type = PAPI_MH_CACHE_TYPE( d->type );
01241         c->size = d->size[0] << 10; /* convert from KB to bytes */
01242         c->associativity = d->associativity;
01243         if ( d->line_size ) {
01244             c->line_size = d->line_size;
01245             c->num_lines = c->size / c->line_size;
01246         }
01247     }
01248 }
01249 
01250 static inline void
01251 cpuid2 ( unsigned int* eax, unsigned int* ebx, 
01252                     unsigned int* ecx, unsigned int* edx, 
01253                     unsigned int index, unsigned int ecx_in )
01254 {
01255   unsigned int a,b,c,d;
01256   __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
01257         : "=a" (a), "=S" (b), "=c" (c), "=d" (d) \
01258         : "0" (index), "2"(ecx_in) );
01259   *eax = a; *ebx = b; *ecx = c; *edx = d;
01260 }
01261 
01262 
01263 static int
01264 init_intel_leaf4( PAPI_mh_info_t * mh_info, int *num_levels )
01265 {
01266 
01267   unsigned int eax, ebx, ecx, edx;
01268   unsigned int maxidx, ecx_in;
01269   int next;
01270 
01271   int cache_type,cache_level,cache_selfinit,cache_fullyassoc;
01272   int cache_linesize,cache_partitions,cache_ways,cache_sets;
01273 
01274   PAPI_mh_cache_info_t *c;
01275 
01276   *num_levels=0;
01277 
01278   cpuid2(&eax,&ebx,&ecx,&edx, 0, 0);
01279   maxidx = eax;
01280   
01281   if (maxidx<4) {
01282     MEMDBG("Warning!  CPUID Index 4 not supported!\n");
01283     return PAPI_ENOSUPP;
01284   }
01285 
01286   ecx_in=0;
01287   while(1) {
01288     cpuid2(&eax,&ebx,&ecx,&edx, 4, ecx_in);
01289 
01290 
01291     
01292     /* decoded as per table 3-12 in Intel Software Developer's Manual Volume 2A */
01293      
01294     cache_type=eax&0x1f;
01295     if (cache_type==0) break;     
01296      
01297     cache_level=(eax>>5)&0x3;
01298     cache_selfinit=(eax>>8)&0x1;
01299     cache_fullyassoc=(eax>>9)&0x1;
01300 
01301     cache_linesize=(ebx&0xfff)+1;
01302     cache_partitions=((ebx>>12)&0x3ff)+1;
01303     cache_ways=((ebx>>22)&0x3ff)+1;
01304        
01305     cache_sets=(ecx)+1;
01306 
01307     /* should we export this info?
01308 
01309     cache_maxshare=((eax>>14)&0xfff)+1;
01310     cache_maxpackage=((eax>>26)&0x3f)+1;
01311      
01312     cache_wb=(edx)&1;
01313     cache_inclusive=(edx>>1)&1;
01314     cache_indexing=(edx>>2)&1;
01315     */
01316 
01317     if (cache_level>*num_levels) *num_levels=cache_level;
01318 
01319     /* find next slot available to hold cache info */
01320     for ( next = 0; next < PAPI_MH_MAX_LEVELS - 1; next++ ) {
01321         if ( mh_info->level[cache_level-1].cache[next].type == PAPI_MH_TYPE_EMPTY ) break;
01322     }
01323 
01324     c=&(mh_info->level[cache_level-1].cache[next]);
01325 
01326     switch(cache_type) {
01327       case 1: MEMDBG("L%d Data Cache\n",cache_level); 
01328     c->type=PAPI_MH_TYPE_DATA;
01329     break;
01330       case 2: MEMDBG("L%d Instruction Cache\n",cache_level); 
01331     c->type=PAPI_MH_TYPE_INST;
01332     break;
01333       case 3: MEMDBG("L%d Unified Cache\n",cache_level); 
01334     c->type=PAPI_MH_TYPE_UNIFIED;
01335     break;
01336     }
01337      
01338     if (cache_selfinit) { MEMDBG("\tSelf-init\n"); }
01339     if (cache_fullyassoc) { MEMDBG("\tFully Associtative\n"); }
01340      
01341     //MEMDBG("\tMax logical processors sharing cache: %d\n",cache_maxshare);
01342     //MEMDBG("\tMax logical processors sharing package: %d\n",cache_maxpackage);
01343      
01344     MEMDBG("\tCache linesize: %d\n",cache_linesize);
01345 
01346     MEMDBG("\tCache partitions: %d\n",cache_partitions);
01347     MEMDBG("\tCache associaticity: %d\n",cache_ways);
01348 
01349     MEMDBG("\tCache sets: %d\n",cache_sets);
01350     MEMDBG("\tCache size = %dkB\n",
01351        (cache_ways*cache_partitions*cache_linesize*cache_sets)/1024);
01352 
01353     //MEMDBG("\tWBINVD/INVD acts on lower caches: %d\n",cache_wb);
01354     //MEMDBG("\tCache is not inclusive: %d\n",cache_inclusive);
01355     //MEMDBG("\tComplex cache indexing: %d\n",cache_indexing);
01356 
01357     c->line_size=cache_linesize;
01358     if (cache_fullyassoc) {
01359        c->associativity=SHRT_MAX;
01360     }
01361     else {
01362        c->associativity=cache_ways;
01363     }
01364     c->size=(cache_ways*cache_partitions*cache_linesize*cache_sets);
01365     c->num_lines=cache_ways*cache_partitions*cache_sets;
01366      
01367     ecx_in++;
01368   }
01369   return PAPI_OK;
01370 }
01371 
01372 static int
01373 init_intel_leaf2( PAPI_mh_info_t * mh_info , int *num_levels)
01374 {
01375     /* cpuid() returns memory copies of 4 32-bit registers
01376      * this union allows them to be accessed as either registers
01377      * or individual bytes. Remember that Intel is little-endian.
01378      */
01379     union
01380     {
01381         struct
01382         {
01383             unsigned int ax, bx, cx, dx;
01384         } e;
01385         unsigned char descrip[16];
01386     } reg;
01387 
01388     int r;                             /* register boundary index */
01389     int b;                             /* byte index into a register */
01390     int i;                             /* byte index into the descrip array */
01391     int t;                             /* table index into the static descriptor table */
01392     int count;                         /* how many times to call cpuid; from eax:lsb */
01393     int size;                          /* size of the descriptor table */
01394     int last_level = 0;                /* how many levels in the cache hierarchy */
01395 
01396     int need_leaf4=0;
01397 
01398     /* All of Intel's cache info is in 1 call to cpuid
01399      * however it is a table lookup :(
01400      */
01401     MEMDBG( "Initializing Intel Cache and TLB descriptors\n" );
01402 
01403 #ifdef DEBUG
01404     if ( ISLEVEL( DEBUG_MEMORY ) )
01405         print_intel_cache_table(  );
01406 #endif
01407 
01408     reg.e.ax = 0x2;          /* function code 2: cache descriptors */
01409     cpuid( &reg.e.ax, &reg.e.bx, &reg.e.cx, &reg.e.dx );
01410 
01411     MEMDBG( "e.ax=0x%8.8x e.bx=0x%8.8x e.cx=0x%8.8x e.dx=0x%8.8x\n",
01412             reg.e.ax, reg.e.bx, reg.e.cx, reg.e.dx );
01413     MEMDBG
01414         ( ":\nd0: %x %x %x %x\nd1: %x %x %x %x\nd2: %x %x %x %x\nd3: %x %x %x %x\n",
01415           reg.descrip[0], reg.descrip[1], reg.descrip[2], reg.descrip[3],
01416           reg.descrip[4], reg.descrip[5], reg.descrip[6], reg.descrip[7],
01417           reg.descrip[8], reg.descrip[9], reg.descrip[10], reg.descrip[11],
01418           reg.descrip[12], reg.descrip[13], reg.descrip[14], reg.descrip[15] );
01419 
01420     count = reg.descrip[0];  /* # times to repeat CPUID call. Not implemented. */
01421     size = ( sizeof ( intel_cache ) / sizeof ( struct _intel_cache_info ) );    /* # descriptors */
01422     MEMDBG( "Repeat cpuid(2,...) %d times. If not 1, code is broken.\n",
01423             count );
01424     if (count!=1) {
01425        fprintf(stderr,"Warning: Unhandled cpuid count of %d\n",count);
01426     }
01427 
01428     for ( r = 0; r < 4; r++ ) { /* walk the registers */
01429         if ( ( reg.descrip[r * 4 + 3] & 0x80 ) == 0 ) { /* only process if high order bit is 0 */
01430             for ( b = 3; b >= 0; b-- ) {    /* walk the descriptor bytes from high to low */
01431                 i = r * 4 + b;  /* calculate an index into the array of descriptors */
01432                 if ( i ) {   /* skip the low order byte in eax [0]; it's the count (see above) */
01433                    if ( reg.descrip[i] == 0xff ) {
01434                       MEMDBG("Warning! PAPI x86_cache: must implement cpuid leaf 4\n");
01435                       need_leaf4=1;
01436                       return PAPI_ENOSUPP;
01437                       /* we might continue instead */
01438                       /* in order to get TLB info  */
01439                       /* continue;                 */
01440                    }
01441                     for ( t = 0; t < size; t++ ) {  /* walk the descriptor table */                    
01442                         if ( reg.descrip[i] == intel_cache[t].descriptor ) {    /* find match */
01443                             if ( intel_cache[t].level > last_level )
01444                                 last_level = intel_cache[t].level;
01445                             intel_decode_descriptor( &intel_cache[t],
01446                                                      mh_info->level );
01447                         }
01448                     }
01449                 }
01450             }
01451         }
01452     }
01453     MEMDBG( "# of Levels: %d\n", last_level );
01454     *num_levels=last_level;
01455     if (need_leaf4) {
01456        return PAPI_ENOSUPP;
01457     }
01458     return PAPI_OK;
01459 }
01460 
01461 
01462 static int
01463 init_intel( PAPI_mh_info_t * mh_info, int *levels )
01464 {
01465 
01466   int result;
01467   int num_levels;
01468 
01469   /* try using the oldest leaf2 method first */
01470   result=init_intel_leaf2(mh_info, &num_levels);
01471   
01472   if (result!=PAPI_OK) {
01473      /* All Core2 and newer also support leaf4 detection */
01474      /* Starting with Westmere *only* leaf4 is supported */
01475      result=init_intel_leaf4(mh_info, &num_levels);
01476   }
01477 
01478   *levels=num_levels;
01479   return PAPI_OK;
01480 }
01481 
01482 
01483 /* Returns 1 if hypervisor detected */
01484 /* Returns 0 if none found.         */
01485 int 
01486 _x86_detect_hypervisor(char *vendor_name)
01487 {
01488   unsigned int eax, ebx, ecx, edx;
01489   char hyper_vendor_id[13];
01490 
01491   cpuid2(&eax, &ebx, &ecx, &edx,0x1,0);
01492   /* This is the hypervisor bit, ecx bit 31 */
01493   if  (ecx&0x80000000) {
01494     /* There are various values in the 0x4000000X range */
01495     /* It is questionable how standard they are         */
01496     /* For now we just return the name.                 */
01497     cpuid2(&eax, &ebx, &ecx, &edx, 0x40000000,0);
01498     memcpy(hyper_vendor_id + 0, &ebx, 4);
01499     memcpy(hyper_vendor_id + 4, &ecx, 4);
01500     memcpy(hyper_vendor_id + 8, &edx, 4);
01501     hyper_vendor_id[12] = '\0';
01502     strncpy(vendor_name,hyper_vendor_id,PAPI_MAX_STR_LEN);
01503     return 1;
01504   }
01505   else {
01506     strncpy(vendor_name,"none",PAPI_MAX_STR_LEN);
01507   }
01508   return 0;
01509 }
01510 
01511 
01512 
01513 
01514 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines