#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
//#include <sys/types.h>
#define __STDC_FORMAT_MACROS 1
#include <inttypes.h>
#include <string.h>
//#include <math.h>

#define LRAND(s) \
(((s) = (s) * 41943011 - 2147483647) >> 32)

#if defined(__i386__)

static __inline__ uint64_t rdtsc(void)
{
  uint64_t x;
     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
     return x;
}
#elif defined(__x86_64__)


static __inline__ uint64_t rdtsc(void)
{
  uint32_t hi, lo;
  __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
  return ( (uint64_t)lo)|( ((uint64_t)hi)<<32 );
}

#elif defined(__powerpc__)


static __inline__ uint64_t rdtsc(void)
{
  uint64_t result=0;
  uint32_t upper, lower,tmp;
  __asm__ volatile(
                "0:                  \n"
                "\tmftbu   %0           \n"
                "\tmftb    %1           \n"
                "\tmftbu   %2           \n"
                "\tcmpw    %2,%0        \n"
                "\tbne     0b         \n"
                : "=r"(upper),"=r"(lower),"=r"(tmp)
                );
  result = upper;
  result = result<<32;
  result = result|lower;

  return(result);
}

#endif

int main(int argc, char **argv){
	FILE *out;
	const uint32_t max_size = 32768;
	const uint32_t reruns = 8;
	uint32_t size = 0;
	char dest[max_size];
#ifndef MEMSET
	char src[max_size];
#else
	char src = 0;
#endif
	uint64_t clocks1;
	uint64_t total;
	uint32_t num1, num2;
	uint64_t rseed = rdtsc();


	if(argc != 2)
		exit(1);

	out = fopen(argv[1], "w");

        for(size=1; size<max_size+1; size++){
                total = 0;
//#ifndef MEMSET
//		(void)__builtin_memcpy(dest, src, size);
//#else
//		(void)__builtin_memset(dest, src, size);
//#endif

for(num2=0; num2<size; num2++){
#ifndef MEMSET
*(((char *)src)+num2) = (char)(LRAND(rseed) % 256);
#else
src = (char)(LRAND(rseed) % 256);
#endif
}


                for(num1=0; num1<reruns; num1++){

                        clocks1 = rdtsc();
#ifndef MEMSET
                        (void)__builtin_memcpy(dest, src, size);
#else
                        (void)__builtin_memset(dest, src, size);
#endif
                        //clocks2 = rdtsc();
                        total += rdtsc()-clocks1;
for(num2=0; num2<size; num2++){
#ifndef MEMSET
src[num2] += dest[num2];
#else
src += dest[num2];
#endif
}

                }
                //total /= reruns;
                fprintf(out, "%"PRIu64"\n", total/reruns);
                //if(size != max_size)
                //fprintf(out, "\n");
        }





	fclose(out);
	return 0;
}
