/*
 * wr-nexgen : write data from RAM to target file(s) or device, 
 *             used for e.g. file system write throughput tests
 *
 * $ gcc -O3 -Wall -D_LARGEFILE64_SOURCE=1 wr-nexgen.c -o wr-nexgen
 * $ gcc -msse3 -Wall -O4 -D_LARGEFILE64_SOURCE=1 wr-nexgen.c -o wr-nexgen
 */

#define __USE_GNU
#define _GNU_SOURCE 1
#define _LARGEFILE64_SOURCE 1  /* Large File Support (LFS) '*64()' functions. */
#define _FILE_OFFSET_BITS 64   /* Automatic '*()' --> '*64()' replacement. */
#define _XOPEN_SOURCE 600
#include <fcntl.h>
//#define HAVE_XFS 1

#ifdef HAVE_XFS
#include <xfs/xfs.h> // needs the xfslibs-dev package
#endif

#include <sys/time.h>  /* gettimeofday() */
#include <sys/mman.h>  /* mmap() */
#include <sys/types.h>
#include <sys/stat.h>

#include <limits.h>

#ifndef __USE_GNU
#define __USE_GNU
#endif
#include <sched.h>
#include <pthread.h>

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#define _XOPEN_SOURCE 600
#include <fcntl.h>

#include <unistd.h>    /* gettimeofday(), usleep() */
#include <errno.h>
#include <string.h>    /* strstr() */

#define USE_MMAP 0

#   define cpu_set_t cpuset_t
#	include <mach/mach_init.h>
#	include <mach/thread_policy.h>
#	define cpu_set_t thread_affinity_policy_data_t
#	define CPU_SET(cpu_id, new_mask) \
 		*new_mask.affinity_tag = (cpu_id + 1)
#	define CPU_ZERO(new_mask) \
 		*new_mask.affinity_tag = THREAD_AFFINITY_TAG_NULL
#   define SET_AFFINITY(pid, size, mask)	\
 		thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY, mask, THREAD_AFFINITY_POLICY_COUNT)
#   define GET_AFFINITY(pid, size, mask)	\
 		thread_policy_get(mach_thread_self(), THREAD_AFFINITY_POLICY, mask, THREAD_AFFINITY_POLICY_COUNT)

typedef struct sDir {
  char *nameTemplate;
  int nameNumber;
  int blocksPerFile;
  int fid;
  int nowBlocks;
  double prevOperTime;
  double maxOperTime;
} tDir, *pDir;


/* Buffer occupancy counters. */
long byteRate;  /* bytes per second */
long bytesInBuffer;  /* how much unread data in buffer */
long maxBytesInBuffer;  /* the max of previous */

double tim(void) {
  struct timeval tv;
  double t;
  static double prevT = 0.0;

  assert( gettimeofday(&tv, NULL) == 0 );
  t = (double)tv.tv_sec + (double)tv.tv_usec/1000000.0;
  if ((prevT > 0.0) && (byteRate > 0)) {  /* has been called once; call #2.. */
    /* Update the amount of data "flown" in & its max. */
    bytesInBuffer += (int)(byteRate*(t - prevT));
    if (bytesInBuffer > maxBytesInBuffer) {
      maxBytesInBuffer = bytesInBuffer;
    }
  }
  prevT = t;
  return t;
}

double systime_sec(void) {
  struct timeval tv;
  assert(gettimeofday(&tv, NULL) == 0);
  return (double)tv.tv_sec + (double)tv.tv_usec/1e6;
}

void randbuf(char *buf, ssize_t nbytes) {
  int i;
  for (i=0; i<nbytes; i++) {
    buf[i] = (char)rand();
  }
}

void realtime_init() {
  struct sched_param sp;
  cpu_set_t cpuset;
  int cpu = 0;

  CPU_ZERO(&cpuset);
  CPU_SET(cpu, &cpuset);
  //sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
  SET_AFFINITY(0, sizeof(cpu_set_t), &cpuset);
  printf("Program tied to CPU#%d\n", cpu);

/*
  sp.sched_priority = 50; //sched_get_priority_max(SCHED_RR); however, 99 not recommended!
  if (sched_setscheduler(0, SCHED_RR, &sp) < 0) {
      printf("Warning: could not change to Round-Robin scheduler\n");
  } else {
      printf("Changed to SCHED_RR with priority %d\n", sp.sched_priority); 
  }
*/

  fflush(stdout);
}

int main(int argc, char *argv[]) {

  #if USE_MMAP
  int fd_mmap;
  #endif

  /* memory and file */
  int   readMode;
  int   blksize;
  off_t totalblks;
  off_t memblks;
  char *memblk;

  double flen_MByte;
  double mlen_MByte;

  char *p;
  int   ndirs;
  pDir  dirs;

  /* 1% and 1s progress output. */
  int onePercentBlocks;
  double startTime;
  double iter_prevTime, iter_currTime;
  int    iter_counter;
  off_t  iter_blocks;

  /* rate statistic values */
  double mbits;
  double recordStart;
  double totalDur;
  int    usleeps;

  /* loop counters */
  int    i;
  off_t  blk;

  #if _LARGEFILE64_SOURCE
  fprintf(stderr, "wr-nexgen compiled for LFS 64-bit file support\n");
  #endif
  if (argc < 6) {
    fprintf(stderr, "%s: needs at least byterate blocksize totalblocks memblocks ndirs { path%%d blks }\n\n", argv[0]);
    fprintf(stderr,"     byterate : 0 for unlimited\n"
                   "     ndirs    : how many { path%%d blks } output files follow\n\n");
    exit(EXIT_FAILURE);
  }

#define TIMESTART(i) \
      { dirs[i].prevOperTime = tim(); }
#define TIMESTOP(i) \
      { double dur = tim() - dirs[i].prevOperTime; \
        if (dur > dirs[i].maxOperTime) dirs[i].maxOperTime = dur; }

  /* Init variables according to command line, allocate buffers. */
  readMode = (strstr(argv[0], "rd") != NULL);
  /* later, after rand() time measurement: byteRate = atol(argv[1]); */
  blksize = atoi(argv[2]);
  totalblks = atoll(argv[3]);
  memblks = atoll(argv[4]);
  mlen_MByte = (memblks*blksize)/(1024.0*1024.0);
  flen_MByte = (totalblks*blksize)/(1024.0*1024.0);
  fprintf(stderr, "RAM buffer size=%5.2f MB\n"
                  "Final file size=%6.2f MB\n",
                   mlen_MByte, flen_MByte 
         );

  /* Perform real-time inits (memory lock etc), then allocate memory */
  realtime_init();
  #if USE_MMAP
  fd_mmap = open("/tmp/wr-mmap", O_RDWR | O_CREAT);
  if (fd_mmap == -1) {
     fprintf(stderr, "/tmp/wr-mmap open failed, errno=%d\n", errno);
     exit(EXIT_FAILURE);
  }
  memblk  = mmap((void*)0, memblks*blksize, PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED, fd_mmap, 0 );
  if (memblk == MAP_FAILED) {
     fprintf(stderr, "mmap failed, errno=%d\n", errno);
     exit(EXIT_FAILURE);
  }
  #else
  assert( (memblk = valloc( (size_t)(memblks*blksize))) != NULL );
  #endif

  /* Init memblk with random values. */
  byteRate = 0L;
  {
    double st = tim();

    printf("Initializing memory buffer with rand()...");
    fflush(stdout);
    randbuf(memblk, (memblks*blksize));
    printf("took %f seconds.\n", tim()-st);
  }
//  madvise(memblk, memblks*blksize, MADV_SEQUENTIAL|MADV_DONTNEED);
  byteRate = atol(argv[1]);
  ndirs = atoi(argv[5]);
  assert( (argc >= (6 + ndirs*2)) );
  /* Allocate one extra struct for call-to-call time statistics. */
  assert( (dirs = malloc((ndirs+1)*sizeof(tDir))) != NULL );
  for (i=0; i<ndirs; i++) {
    dirs[i].nameTemplate = argv[6+i*2];
    dirs[i].nameNumber = 0;
    dirs[i].blocksPerFile = atoi(argv[7+i*2]);
    dirs[i].nowBlocks = dirs[i].blocksPerFile;  /* init to end */
    dirs[i].maxOperTime = 0.0;  /* init to smallest value */
  }  /* for each directory */
  dirs[ndirs].nameTemplate =   "call-to-call  ";
  dirs[ndirs].maxOperTime = 0.0;

  /* Start looping for total number of blocks. */
  bytesInBuffer    = maxBytesInBuffer = usleeps = 0;
  recordStart      = tim();
  startTime        = recordStart;
  iter_prevTime    = recordStart;
  TIMESTART(ndirs);  /* call-to-call (not per file) */
  iter_counter     = 0;
  iter_blocks      = 0;
  onePercentBlocks = totalblks / 100;
  for (blk=0; blk< totalblks; blk++) {
    i = blk % ndirs;
    if (dirs[i].nowBlocks >= dirs[i].blocksPerFile) {
      /* Open a new file in this directory / template. */
      char pn[255];
      int n = blk / dirs[i].blocksPerFile;

      snprintf(pn, sizeof(pn), dirs[i].nameTemplate, n);
      if (n==0) TIMESTART(i);  /* only for first file */
      if (readMode) {
        assert( (dirs[i].fid = open(pn, O_RDONLY /*| O_DIRECT*/)) != -1 );
      } else {
        int flgs = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH; // |O_DIRECT;
        dirs[i].fid = creat(pn, flgs);
        // posix_fadvise(dirs[i].fid, 0, 0, POSIX_FADV_SEQUENTIAL|POSIX_FADV_DONTNEED);
        //if (flgs & O_DIRECT)
        //   fprintf(stderr, "creat(%s) with O_DIRECT\n", pn);
        #ifdef HAVE_XFS
        {
            struct fsxattr xa;
            xfsctl(pn, dirs[i].fid, XFS_IOC_FSGETXATTR, &xa);
            xa.fsx_xflags |= XFS_XFLAG_REALTIME;
            xfsctl(pn, dirs[i].fid, XFS_IOC_FSSETXATTR, &xa);
            fprintf(stderr, "Attempted to set XFS Realtime flag on the file\n");
        }
        #endif
        if (dirs[i].fid == -1) {
           fprintf(stderr, "creat() error: %d '%s'\n", errno, strerror(errno));
           exit(EXIT_FAILURE);
        }
//        assert( (dirs[i].fid = open(pn, O_WRONLY | O_CREAT /*| O_DIRECT*/)) != -1 );
      }
      dirs[i].nowBlocks = 0;
    }

    /* If we are doing rate-limited testing, check for available new data. */
    if (byteRate > 0L) {
      while (bytesInBuffer < blksize) {
        /* Not one full block in buffer, wait. */
        usleep(1000);  /* a small amount, probably ends up to be 10--20msec */
        usleeps++;
        (void)tim();  /* update 'bytesInBuffer' */
      }  /* while not at least one full block in buffer */
    }

    /* Write (or read) one block. */
    p = &(memblk[(blk % memblks)*blksize]);
    *(p+2) = 127; // write to make page 'dirty'
    if (readMode) {
      assert( (read(dirs[i].fid,
                     p,
                     blksize)) == blksize );
    } else {
      assert( (write(dirs[i].fid,
                     p,
                     blksize)) == blksize );
    }
    TIMESTOP(i);  /* accumulates close--open-write in worst case */
    TIMESTOP(ndirs);  /* call-to-call */
    /* Once the previous have updated 'bytesInBuffer' increase, */
    /* only then decrement for the amount written. */
    bytesInBuffer -= blksize;
    TIMESTART(i);
    TIMESTART(ndirs);
    dirs[i].nowBlocks++;
	
    ++iter_blocks;
    ++iter_counter; iter_counter %= 32;
    iter_currTime = systime_sec();

    /* 1% or 1s progress indication. */
    if( (--onePercentBlocks <= 0) || 
        ((iter_counter==31) && (iter_currTime-iter_prevTime)>0.5) ) {

      double t = tim();
      double mbits_curr;

      mbits      = 8.0e-6 * (blk*blksize) / (iter_currTime - startTime);
      mbits_curr = 8.0e-6 * (iter_blocks*blksize) / (iter_currTime-iter_prevTime);

      printf("%3.2f%% \t %8.3f \t %f \t %f\n",
              100.0*(double)blk/totalblks, 
              (iter_currTime - startTime),
              mbits, mbits_curr
            );

      onePercentBlocks = totalblks / 100;
      iter_prevTime    = iter_currTime;
      iter_blocks      = 0;
      iter_counter     = 0;

    }  /* if 1% block counter or 1s expired */

    /* Time to switch to another file? */
    if (dirs[i].nowBlocks >= dirs[i].blocksPerFile) {
      /* Close the ready file before opening a new. */
      assert( (close(dirs[i].fid) != -1) );
    }
  }  /* for each block */
  TIMESTOP(ndirs);
  totalDur = tim() - recordStart;

  /* Final report. */
  mbits = 8e-6 * (totalblks*blksize) / totalDur;
  printf("Took %.2f seconds for %.2f MBytes: %.3f Mbits(dec)/s (%.2f%% of PCI33).\n",
         totalDur,
         flen_MByte,
         mbits,
         mbits/(33.0*4.0*8.0)*100.0
         );
  if (byteRate > 0) {
    printf("Max. unwritten bytes in buffer %Lu  (%d usleeps()).\n",
           (unsigned long long)maxBytesInBuffer,
           usleeps
           );
  }
  for (i=0; i<=ndirs; i++) {
    printf("%s  max. %f seconds.\n",
           dirs[i].nameTemplate,
           dirs[i].maxOperTime
           );
  }  /* for each directory */

  #if USE_MMAP
  munmap(memblk, memblks*blksize);
  #endif

  return(EXIT_SUCCESS);
}  /* main */

