double memavg0;
double memavg1;
double memavg2;
double lsmem;

/* default block size  */
#define DEFAULT_BLOCK_SIZE 4096

/* MB to transfer per test */
#define DEFAULT_CHUNKS 32

/* how many runs to average by default */
#define DEFAULT_NR_LOOPS 300

/* we have 3 tests at the moment */
#define MAX_TESTS 3

/* allocate a test array and fill it with data
 * so as to force Linux to _really_ allocate it */
long *make_array(unsigned long long asize) {
        unsigned long long t;
        unsigned int long_size=sizeof(long);
        long *a;

        a=calloc(asize, long_size);

        if(NULL==a) {
                perror("Error allocating memory");
                exit(1);
        }

        /* make sure both arrays are allocated, fill with pattern */
        for(t=0; t<asize; t++) {
                a[t]=0xaa;
        }
        return a;
}

/* actual benchmark */
/* asize: number of type 'long' elements in test arrays
 * long_size: sizeof(long) cached
 * type: 0=use memcpy, 1=use dumb copy loop (whatever GCC thinks best)
 *
 * return value: elapsed time in seconds
 */
double worker(unsigned long long asize, long *a, long *b, int type, unsigned long long block_size) {
        unsigned long long t;
        struct timeval starttime,endtime;
        double te;
        unsigned int long_size=sizeof(long);
        long *c; /* where are we in test=2? */
        /* array size in bytes */
        unsigned long long array_bytes=asize*long_size;

        if(type==1) { /* memcpy test */
                /* timer starts */
                gettimeofday(&starttime, NULL);
                memcpy(b,a,array_bytes);
                /* timer stops */
                gettimeofday(&endtime, NULL);
        } else if(type==2) { /* memcpy block test */
                gettimeofday(&starttime, NULL);
                for(t=0; t<array_bytes; t+=block_size) {
                        c=mempcpy(b,a,block_size);
                }
                if(t>array_bytes){
                        c=mempcpy(b,a,t-array_bytes);
		}
                gettimeofday(&endtime, NULL);
        } else { /* dumb test */
                gettimeofday(&starttime, NULL);
                for(t=0; t<asize; t++) {
                                b[t]=a[t];
		}
                gettimeofday(&endtime, NULL);
	}
		te=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
                return te;
}

/* ------------------------------------------------------ */

/* pretty print worker's output in human-readable terms */
/* te: elapsed time in seconds
 * mt: amount of transferred data in MiB
 * type: see 'worker' above
 *
 * return value: -
 */

void printout(double te, double mt, int type) {
        switch(type) {
                case 0:
                                printf("Method: MEMCPY\t");
                                break;
                case 1: printf("Method: DUMB\t");
                                break;
                case 2: printf("Method: MCBLOCK\t");
                                break;
        }
        printf("Elapsed: %.5f\t",te);
        printf("MiB: %.5f\t",mt);
        printf("Copy: %.3f MiB/s\n", mt/te);
        return;
}

/* ------------------------------------------------------ */

int memtests() {
        unsigned int long_size=0;
        double te,te_sum; /* time elapsed */
        unsigned long long asize=0; /* array size (elements in array) */
        int i;
        long *a, *b; /* the two arrays to be copied from/to */
       /*  int o; getopt options */
        int testno;

        /* options */
        int nr_loops=DEFAULT_NR_LOOPS; /* how many runs to average? */
        unsigned long long block_size=DEFAULT_BLOCK_SIZE; /* fixed memcpy block size for -t2 */
        int showavg=1; /* show average, -a */
        /* what tests to run (-t x) */
        int tests[MAX_TESTS];
        double mt=0; /* MiBytes transferred == array size in MiB */
        int quiet=0; /* suppress extra messages */

        tests[0]=0;
        tests[1]=0;
        tests[2]=0;

        /* default is to run all tests if no specific tests were requested */
        if( (tests[0]+tests[1]+tests[2]==0)) {
                tests[0]=1;
                tests[1]=1;
                tests[2]=1;
        }

	mt=DEFAULT_CHUNKS;

        /* ------------------------------------------------------ */

        long_size=sizeof(long); /* the size of long on this platform */
        asize=1024*1024/long_size*mt; /* how many longs then in one array? */

      /*  if(asize*long_size < block_size) {
                printf("Error: array size larger than block size!\n");
                usage();
                exit(1);
        } */

        if(!quiet) {
                printf("\tLong uses %d bytes. ",long_size);
                printf("\tAllocating 2*%lld elements = %lld bytes of memory.\n",asize,2*asize*long_size);
                if(tests[2]) {
                        printf("\tUsing %lld bytes as blocks for memcpy block copy test.\n",block_size);
                }
        }

        a=make_array(asize);
        b=make_array(asize);

        /* ------------------------------------------------------ */
        if(!quiet) {
                printf("\tDoing %d runs per test.\n",nr_loops);
		printf("\tTest running...\n\n");
        }

        /* run all tests requested, the proper number of times */
        for(testno=0; testno<MAX_TESTS; testno++) {
                te_sum=0;

                if(tests[testno]) {
                        for (i=0; i<nr_loops; i++) {
                                te=worker(asize,a,b,testno,block_size);
                                te_sum+=te;
                                /*printf("%d\t",i);
                                printout(te,mt,testno);*/
                        }
                        if(showavg) {
                                printf("\tTest #%d: AVG of %d Runs\t", testno+1, nr_loops);
					if(testno == 0) {
						memavg0 = (mt*nr_loops/te_sum);
					} else if(testno == 1) {
						memavg1 = (mt*nr_loops/te_sum);
					} else { 
						memavg2 = (mt*nr_loops/te_sum);
					}
                                printout(te_sum/nr_loops,mt,testno); 
                       }
                }
	}
	free(a);
	free(b);
	lsmem = (memavg0 + memavg1 + memavg2) / 750;
        return lsmem;
}
