//----------------------------------------------------------------------------------------------

#include <spu_mfcio.h>

//----------------------------------------------------------------------------------------------

typedef union {
    unsigned long long ull;
    unsigned int ui[2];
} addr64;

struct args_t {
    double *ppuA;
    double *ppuB;
    double *ppuC;
    int decrementer_ticks;
};

//----------------------------------------------------------------------------------------------

float spuA[64*64] __attribute__ ((aligned (16*1024)));
float spuB[64*64] __attribute__ ((aligned (16*1024)));
float spuC[64*64] __attribute__ ((aligned (16*1024)));

//----------------------------------------------------------------------------------------------

//vector float
vector unsigned int 
  spu_mm_tile(float *A, float *B, float *C);

//----------------------------------------------------------------------------------------------

int main(int speid, addr64 argp, addr64 envp)
{
    struct args_t spu_args __attribute__ ((aligned (16)));
    unsigned int ppu_args_p;

    int start;
    int i;
    unsigned int a0,b0,c0,d0;

    unsigned int a1,b1,c1,d1;
    ppu_args_p = argp.ui[1];

    mfc_get(&spu_args, ppu_args_p, sizeof(struct args_t), 0, 0, 0);
    mfc_write_tag_mask(0x01 << 0);
    mfc_read_tag_status_all();


    mfc_get(spuA, (unsigned int)spu_args.ppuA, 64*64*sizeof(float), 0, 0, 0);
    mfc_get(spuB, (unsigned int)spu_args.ppuB, 64*64*sizeof(float), 0, 0, 0);
    mfc_get(spuC, (unsigned int)spu_args.ppuC, 64*64*sizeof(float), 0, 0, 0);
    mfc_write_tag_mask(0x01 << 0);
    mfc_read_tag_status_all();
//vector unsigned int v = 
    spu_mm_tile(spuA, spuB, spuC);
//a0 = spu_extract(v,0); \
	b0 = spu_extract(v,1); \
	c0 = spu_extract(v,2); \
	d0 = spu_extract(v,3); \
	printf("Tile: %04x %04x  %04x %04x  %04x %04x  %04x %04x\n", a0>>16,a0<<16>>16,b0>>16,b0<<16>>16,c0>>16,c0<<16>>16,d0>>16,d0<<16>>16);

    mfc_put(spuC, (unsigned int)spu_args.ppuC, 64*64*sizeof(float), 0, 0, 0); 
    mfc_write_tag_mask(0x01 << 0); 
    mfc_read_tag_status_all(); 

    spu_write_decrementer(-1); 
    start = spu_read_decrementer (); 
    for (i = 0; i < 100000; i++) 
        spu_mm_tile(spuA, spuB, spuC);
    spu_args.decrementer_ticks = start - spu_read_decrementer();

    mfc_put(&spu_args, ppu_args_p, sizeof(struct args_t), 0, 0, 0);
    mfc_write_tag_mask(0x01 << 0);
    mfc_read_tag_status_all();

    return (0);
}

//----------------------------------------------------------------------------------------------


