#include <cuda/cuda.h>
#include <stdio.h>

static unsigned long inKB(unsigned long bytes)
{ return bytes/1024; }

static unsigned long inMB(unsigned long bytes)
{ return bytes/(1024*1024); }

static void printStats(unsigned long free, unsigned long total, unsigned long used)
{
used = total - free;
  printf("^^^^ Free : %lu bytes (%lu KB) (%lu MB)\n", free, inKB(free), inMB(free));
  printf("^^^^ Used : %lu bytes (%lu KB) (%lu MB)\n", used, inKB(used), inMB(used));
  printf("^^^^ Total: %lu bytes (%lu KB) (%lu MB)\n", total, inKB(total), inMB(total));
  printf("^^^^ %f%% free, %f%% used\n", 100.0*free/(double)total, 100.0*(total - free)/(double)total);
}

int main(int argc, char **argv)
{
  unsigned int free, total, used;
  int gpuCount, i;
  CUresult res;
  CUdevice dev;
  CUcontext ctx;

  cuInit(0);

  cuDeviceGetCount(&gpuCount);
  printf("Detected %d GPU\n",gpuCount);

//while (1==1) {
  for (i=0; i<gpuCount; i++)
  {
  cuDeviceGet(&dev,i);
  cuCtxCreate(&ctx, 0, dev);
//while(1 == 1) {
  res = cuMemGetInfo(&free, &total);
  if(res != CUDA_SUCCESS)
      printf("!!!! cuMemGetInfo failed! (status = %x)", res);
  printf("^^^^ Device: %d\n",i);
  printStats(free, total, used);
//sleep(10);
//}
  cuCtxDetach(ctx);
  }


  return 0;
}
