PAPI  5.0.1.0
vector.c File Reference
Include dependency graph for vector.c:

Go to the source code of this file.

Defines

#define NUMBER   100

Functions

void inline_packed_sse_add (float *aa, float *bb, float *cc)
void inline_packed_sse_mul (float *aa, float *bb, float *cc)
void inline_packed_sse2_add (double *aa, double *bb, double *cc)
void inline_packed_sse2_mul (double *aa, double *bb, double *cc)
void inline_unpacked_sse_add (float *aa, float *bb, float *cc)
void inline_unpacked_sse_mul (float *aa, float *bb, float *cc)
void inline_unpacked_sse2_add (double *aa, double *bb, double *cc)
void inline_unpacked_sse2_mul (double *aa, double *bb, double *cc)
int main (int argc, char **argv)

Define Documentation

#define NUMBER   100

Definition at line 4 of file vector.c.


Function Documentation

void inline_packed_sse2_add ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 27 of file vector.c.

{
    __asm__ __volatile__( "movapd (%0), %%xmm0;"
                          "movapd (%1), %%xmm1;"
                          "addpd %%xmm0, %%xmm1;"
                          "movapd %%xmm1, (%2);"::"r"( aa ),
                          "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

void inline_packed_sse2_mul ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 37 of file vector.c.

{
    __asm__ __volatile__( "movapd (%0), %%xmm0;"
                          "movapd (%1), %%xmm1;"
                          "mulpd %%xmm0, %%xmm1;"
                          "movapd %%xmm1, (%2);"::"r"( aa ),
                          "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

void inline_packed_sse_add ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 7 of file vector.c.

{
    __asm__ __volatile__( "movaps (%0), %%xmm0;"
                          "movaps (%1), %%xmm1;"
                          "addps %%xmm0, %%xmm1;"
                          "movaps %%xmm1, (%2);"::"r"( aa ),
                          "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

void inline_packed_sse_mul ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 17 of file vector.c.

{
    __asm__ __volatile__( "movaps (%0), %%xmm0;"
                          "movaps (%1), %%xmm1;"
                          "mulps %%xmm0, %%xmm1;"
                          "movaps %%xmm1, (%2);"::"r"( aa ),
                          "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

void inline_unpacked_sse2_add ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 65 of file vector.c.

{
    __asm__ __volatile__( "movsd (%0), %%xmm0;"
                          "movsd (%1), %%xmm1;"
                          "addsd %%xmm0, %%xmm1;"
                          "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

void inline_unpacked_sse2_mul ( double *  aa,
double *  bb,
double *  cc 
) [inline]

Definition at line 74 of file vector.c.

{
    __asm__ __volatile__( "movsd (%0), %%xmm0;"
                          "movsd (%1), %%xmm1;"
                          "mulsd %%xmm0, %%xmm1;"
                          "movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

void inline_unpacked_sse_add ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 47 of file vector.c.

{
    __asm__ __volatile__( "movss (%0), %%xmm0;"
                          "movss (%1), %%xmm1;"
                          "addss %%xmm0, %%xmm1;"
                          "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

void inline_unpacked_sse_mul ( float *  aa,
float *  bb,
float *  cc 
) [inline]

Definition at line 56 of file vector.c.

{
    __asm__ __volatile__( "movss (%0), %%xmm0;"
                          "movss (%1), %%xmm1;"
                          "mulss %%xmm0, %%xmm1;"
                          "movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
                          :"%xmm0", "%xmm1" );
}

Here is the caller graph for this function:

int main ( int  argc,
char **  argv 
)

Definition at line 84 of file vector.c.

{
    int i, packed = 0, sse = 0;
    float a[4] = { 1.0, 2.0, 3.0, 4.0 };
    float b[4] = { 2.0, 3.0, 4.0, 5.0 };
    float c[4] = { 0.0, 0.0, 0.0, 0.0 };
    double d[4] = { 1.0, 2.0, 3.0, 4.0 };
    double e[4] = { 2.0, 3.0, 4.0, 5.0 };
    double f[4] = { 0.0, 0.0, 0.0, 0.0 };

    if ( argc != 3 ) {
      bail:
        printf( "Usage %s: <packed|unpacked> <sse|sse2>\n", argv[0] );
        exit( 1 );
    }
    if ( strcasecmp( argv[1], "packed" ) == 0 )
        packed = 1;
    else if ( strcasecmp( argv[1], "unpacked" ) == 0 )
        packed = 0;
    else
        goto bail;
    if ( strcasecmp( argv[2], "sse" ) == 0 )
        sse = 1;
    else if ( strcasecmp( argv[2], "sse2" ) == 0 )
        sse = 0;
    else
        goto bail;

#if 0
    if ( ( sse ) &&
         ( system( "cat /proc/cpuinfo | grep sse > /dev/null" ) != 0 ) ) {
        printf( "This processor does not have SSE.\n" );
        exit( 1 );
    }
    if ( ( sse == 0 ) &&
         ( system( "cat /proc/cpuinfo | grep sse2 > /dev/null" ) != 0 ) ) {
        printf( "This processor does not have SSE2.\n" );
        exit( 1 );
    }
#endif

    printf( "Vector 1: %f %f %f %f\n", a[0], a[1], a[2], a[3] );
    printf( "Vector 2: %f %f %f %f\n\n", b[0], b[1], b[2], b[3] );

    if ( ( packed == 0 ) && ( sse == 1 ) ) {
        for ( i = 0; i < NUMBER; i++ ) {
            inline_unpacked_sse_add( &a[0], &b[0], &c[0] );
        }
        printf( "%d SSE Unpacked Adds: Result %f\n", NUMBER, c[0] );

        for ( i = 0; i < NUMBER; i++ ) {
            inline_unpacked_sse_mul( &a[0], &b[0], &c[0] );
        }
        printf( "%d SSE Unpacked Muls: Result %f\n", NUMBER, c[0] );
    }
    if ( ( packed == 1 ) && ( sse == 1 ) ) {
        for ( i = 0; i < NUMBER; i++ ) {
            inline_packed_sse_add( a, b, c );
        }
        printf( "%d SSE Packed Adds: Result %f %f %f %f\n", NUMBER, c[0], c[1],
                c[2], c[3] );
        for ( i = 0; i < NUMBER; i++ ) {
            inline_packed_sse_mul( a, b, c );
        }
        printf( "%d SSE Packed Muls: Result %f %f %f %f\n", NUMBER, c[0], c[1],
                c[2], c[3] );
    }

    if ( ( packed == 0 ) && ( sse == 0 ) ) {
        for ( i = 0; i < NUMBER; i++ ) {
            inline_unpacked_sse2_add( &d[0], &e[0], &f[0] );
        }
        printf( "%d SSE2 Unpacked Adds: Result %f\n", NUMBER, c[0] );

        for ( i = 0; i < NUMBER; i++ ) {
            inline_unpacked_sse2_mul( &d[0], &e[0], &f[0] );
        }
        printf( "%d SSE2 Unpacked Muls: Result %f\n", NUMBER, c[0] );
    }
    if ( ( packed == 1 ) && ( sse == 0 ) ) {
        for ( i = 0; i < NUMBER; i++ ) {
            inline_packed_sse2_add( &d[0], &e[0], &f[0] );
        }
        printf( "%d SSE2 Packed Adds: Result %f\n", NUMBER, c[0] );

        for ( i = 0; i < NUMBER; i++ ) {
            inline_packed_sse2_mul( &d[0], &e[0], &f[0] );
        }
        printf( "%d SSE2 Packed Muls: Result %f\n", NUMBER, c[0] );
    }


    exit( 0 );
}

Here is the call graph for this function:

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines