/*
 * vsib.c -- Linux character driver for VSIbrute PCI board.
 *
 * [indent settings: -i3 -ts3 -br -bad -bap -c2 -lps -fca -bbb -nut -l100 -npcs -ppi3]
 *
 * Copyright (C) 2001--2002 Ari Mujunen, Ari.Mujunen@hut.fi
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2, or (at your option) any
 * later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

#include <linux/module.h>
#include <linux/version.h>
// #include <linux/modversions.h>

#include <linux/autoconf.h>
#include <linux/init.h>

#include <linux/types.h>
#include <linux/fs.h>
#include <linux/mm.h>            /* for 'verify_area()' */
#include <linux/errno.h>         /* for '-EBUSY' and other error codes */
#include <asm/uaccess.h>         /* for 'copy_to_user()' et al */
#include <linux/pci.h>
#include <asm/io.h>              /* for 'inp/out' and 'virt_to_bus()' */
#include <linux/ioport.h>        /* for 'check/request_region()' */
#include <linux/bigphysarea.h>   /* for getting large DMA buffer */
#include <asm/div64.h>           /* for getting 64-bit '%' op */

#define MIN_M(X, Y)  ((X) < (Y) ? (X) : (Y))

#include "vsib_ioctl.h"

// vsib module configuration and debug code flags
#define DEBUG_ALL_WRITES      0
#define DEBUG_ALL_READS       0
#define DEBUG_DESCRINIT       0
#define NORMAL_LINEAR_DESCRS  1
#define USE_ABORT             1
#define USE_ABORT_OR_STOP     1
#define USE_DEVFS_OR_SYSFS    1
#define VSIB_KLOG             0
#define VSIB_VERIFYAREA       0
#define SINGLEBLOCK           0 /* Non-scatter-gather test mode.  Test/debug only. */


//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
// "KOCOMPAT.H"
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------

#if USE_DEVFS_OR_SYSFS
#   if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
       // 'udev' package is required for automagic /dev/vsib entry
#      if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,12))
#         define USE_SYSFS_26_pre12    1    // use class_simple_*
#      else
#         define USE_SYSFS_26_post12   1    // use class_device_*
#      endif
#   else
#   endif
#endif

#define VSIBMOD_INC_USE_COUNT   try_module_get(THIS_MODULE)
#define VSIBMOD_DEC_USE_COUNT   module_put(THIS_MODULE)

#define VSIB_MODULE_INT_PARAM(n, v)      static int n = v; module_param(n, int, 0)
#define VSIB_MODULE_UINT_PARAM(n, v)     static unsigned int n = v; module_param(n, uint, 0)

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
#   define CLASS_CREATE(owner, name)                   class_create(owner, name)
#   define CLASS_DESTROY(class)                        class_destroy(class)
#   define CLASS_DEV_CREATE(class, devt, device, name) device_create(class, NULL, devt, device, name)
// /usr/src/linux-headers-2.6.31-15/include/linux/device.h
// extern struct device *device_create(struct class *cls, struct device *parent,
//                                    dev_t devt, void *drvdata,
//                                    const char *fmt, ...)
//                                    __attribute__((format(printf, 5, 6)));
#   define CLASS_DEV_DESTROY(class, devt)              device_destroy(class, devt)
#   define _CLASS_T                                    class
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
#   define CLASS_CREATE(owner, name)                   class_create(owner, name)
#   define CLASS_DESTROY(class)                        class_destroy(class)
#   define CLASS_DEV_CREATE(class, devt, device, name) device_create(class, device, devt, name)
#   define CLASS_DEV_DESTROY(class, devt)              device_destroy(class, devt)
#   define _CLASS_T                                    class
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
#   define CLASS_CREATE(owner, name)                   class_create(owner, name)
#   define CLASS_DESTROY(class)                        class_destroy(class)
#   define CLASS_CREATE(owner, name)                   class_create(owner, name)
#   define CLASS_DEV_CREATE(class, devt, device, name) class_device_create(class, NULL, devt, device, name)
#   define CLASS_DEV_DESTROY(class, devt)              device_destroy(class, devt)
#   define _CLASS_T                                    class
#elif (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,12))
#   define CLASS_CREATE(owner, name)                   class_simple_create(owner, name)
#   define CLASS_DESTROY(class)                        class_simple_destroy(class)
#   define CLASS_DEV_CREATE(class, devt, device, name) class_simple_device_add(class, devt, NULL, name)
#   define CLASS_DEV_DESTROY(class, devt)              class_simple_device_remove(devt)
#   define _CLASS_T                                    class_simple
#else
#   define CLASS_CREATE(owner, name)                   class_create(owner, name)
#   define CLASS_DESTROY(class)                        class_destroy(class)
#   define CLASS_CREATE(owner, name)                   class_create(owner, name)
#   define CLASS_DEV_CREATE(class, devt, device, name) class_device_create(class, devt, device, name)
#   define CLASS_DEV_DESTROY(class, devt)              class_device_destroy(class, devt)
#   define _CLASS_T                                    class
#endif

struct _CLASS_T* pClassVSIB;

//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
// MODULE/CHARDEV INTERFACE
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------

/*
 * Exported / local declarations
 */
static loff_t   lseek_vsib  (struct file *filep, loff_t offset, int orig);
static ssize_t  write_vsib  (struct file *filep, const char *buf, size_t count, loff_t * offset);
static ssize_t  read_vsib   (struct file *filep, char *buf, size_t count, loff_t * offset);
static int      ioctl_vsib  (struct inode *node, struct file *filep, unsigned int cmd, unsigned long int arg);
static int      open_vsib   (struct inode *node, struct file *filep);
static int      release_vsib(struct inode *node, struct file *filep);

/*
 * The struct of file operation function pointers  which we will register with VFS.
 */
struct file_operations vsib_fops = {
  .owner = THIS_MODULE,
  .llseek = lseek_vsib,
  .read = read_vsib,
  .write = write_vsib,
  .ioctl = ioctl_vsib,
  .open = open_vsib,
  .release = release_vsib,
};

/*
 * Module information.
 */
#define MYNAME "vsib"
#define MYCLASS "misc"
#define NUMOFIOADDR     (256)
#define NUMOFCMDADDR    (4)
#define VSIB_DEVICE_ID  (0x5406)

#ifdef MODULE_LICENSE
MODULE_AUTHOR("Ari.Mujunen@hut.fi, Jan.Wagner@hut.fi");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("A char driver for Metsahovi VSIB I/O PCI board");
MODULE_SUPPORTED_DEVICE("Metsahovi VSIB I/O PCI board");
#endif


//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
//  PARAMETERS AVAILABLE FOR INSMOD/MODPROBE
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------

VSIB_MODULE_UINT_PARAM(fix, VSIB_MODE_STOP);
MODULE_PARM_DESC(fix, "VSIB command word value to be loaded after init");

/* during module loading; can be used to quickly test command words. */

int          io = 0;                   /* zero meaning "auto"detect/default   */
char         *plxaddr = NULL;          /* memory-mapped PLX conf area         */
unsigned int *cmdaddr = NULL;          /* memory-mapped command register      */
unsigned int localaddr = 0x60000000;   /* VSIbrute on-board DMA local address */

/* for all DMA transfers (kept constant, all data from the same address */

VSIB_MODULE_INT_PARAM(descrs, 1000);      /* number of DMA descriptors and data memory * blocks */
MODULE_PARM_DESC(descrs, "Number of scatter/gather DMA descriptors");

int          descrbufsize = 0;            /* size of DMA descr buffer, aligned to 16 * bytes    */
int          allocated_descrbufsize = 0;  /* larger area needed to provide aligned              */

VSIB_MODULE_INT_PARAM(bigbufsize, 0);
MODULE_PARM_DESC(bigbufsize, "Size of the secondary large ring buffer");

/* My own major (xxx: dynamically allocated) device number. */
static int vsib_major;

/* xxx: */
static char *vsib_descrbuf;            /* the whole 'allocated_descrbufsize' buffer; */

// normal virt. address, not specially aligned
typedef unsigned int tAddress32;
static tAddress32 vsib_descrbuf_bus;   /* bus address of start of bigphysbuf */

/*
* (to ensure that alignment calculations are within the allocated buffer
* static char *vsib_descr;
* // the 128kB-aligned part inside whole dmabuf
*/
static tAddress32 vsib_descr;          /* bus addresses for DMA chip */
static tAddress32 vsib_descrend;       /* bus addresses for DMA chip */

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
// VSIB PCI device data required for enumeration/finding
// Note: static, load only one instance of vsib module!
static struct pci_dev *p_vsib_dev = NULL;
#endif

/*
 * PLX DMA descriptors, as accepted by hardware.
 */
typedef struct sPLXDMADescr
{
   tAddress32 baddr;            /* bus addr ptr to data area of this * descriptor */
   tAddress32 laddr;            /* PLX local bus address value for this * transfer block */
   unsigned int size;           /* number of bytes to transfer */
   tAddress32 next;             /* bus addr ptr to next descr; typically next in table */
} tPLXDMADescr;
static tPLXDMADescr *vsib_dd;   /* virt addr ptr to DMA descr table */

/*
 * This is initialized to the start of aligned area 'vsib_descr'.
 * xxx:
 */
static char *vsib_bigbuf;       /* the large buffer; normal virt. address */
static tAddress32 vsib_big;     /* bus addresses for DMA chip */

/* Keeping max values of fill degrees of both DMA and big buffers. */
static int vsib_max_big_used = 0;        /* for read() ring buffer full detect, same DMA transfer */
static int vsib_global_max_big_used = 0; /* for read() ring buffer full detect, different DMA transfers */
static int vsib_max_big_emptied = 0;     /* for write() ring buffer empty * detect */

/*
 * PLX chip definitions.
 */

/* Hardware defines the descriptors as four 4-byte words. */
#define PLX_DMA_DESCR_SIZE 16

/* PLX DMA cmd/status registers are only one byte each. */
#define PLX_DMA_CH0_COMMAND_STATUS 0xa8
#define PLX_DMA_CH1_COMMAND_STATUS 0xa9
#define PLX_DMA_CS_ENABLE 0x01
#define PLX_DMA_CS_START 0x02
#define PLX_DMA_CS_ABORT 0x04
#define PLX_DMA_CS_CLEAR_INT 0x08
#define PLX_DMA_CS_DONE 0x10
#define PLX_DMA_CH0_getStatus (readb(plxaddr + PLX_DMA_CH0_COMMAND_STATUS))
#define PLX_DMA_CH0_isDone (PLX_DMA_CH0_getStatus & PLX_DMA_CS_DONE)
#define PLX_DMA_CH0_putCommand(x) writeb((x), plxaddr + PLX_DMA_CH0_COMMAND_STATUS)

/* Following PLX chip DMA progress by reading DMA address registers. */
#define PLX_DMA_CH0_PCI_ADDR 0x84

/*
 * PLX_DMA_CH0_getPciAddr is not very useful, since PLX chip doesn't
 * update the readback value of this register during one block of DMA.
 * Instead, the descriptor pointer register gets updated every time PLX
 * loads in a new descriptor.  (4 LSB are flags, remove them in reading.)
 */
#define PLX_DMA_CH0_DESCR_ADDR 0x90
#define PLX_DMA_CH0_getDescrAddr ((readl(plxaddr + PLX_DMA_CH0_DESCR_ADDR)) & 0xfffffff0)

/* For debugging PCI status register. */
unsigned char bus, devfn;

/* Opened VSIB settings */
struct vsib_dev_t {
   unsigned long    size;           /* amount of data stored here */
   struct semaphore sem;            /* mutual exclusion semaphore */
   int              is_open;        /* set to 1 on vsib_open()    */
   int              is_first_write; /* set to 1 on vsib_open()    */
   tAddress32       readpos;        /* current location in bigbuf incremented by read()/write() code    */
   tAddress32       vacantpos;      /* PLX DMA hardware point in bigbuf, updated manually getDMApoint() */
   #define vsib_big_first_unwritten vsib_dev.readpos    /* Nicer name, used by 'write()' routine        */
};
static struct vsib_dev_t vsib_dev;


/*
 * getDMAPoint(): get the "DMA progress point" in bigbuf.
 * Returns the byte index into the bigbuf array.
 */
static tAddress32 getDMAPoint(void)
{
   tAddress32 descrAddr;
   int        descrNum;

   /*
    * The idea is to safely take a copy of a "pessimistic" value of
    * DMA transfer pointer, i.e. the address of first byte in big buffer
    * which may be still in DMA progress.
    */

   /*
    * With PLX we can just take the DMA descriptor pointer
    * from PLX register and assume PLX is transferring
    * this particular DMA block---the previous descr has been
    * transferred.
    * Thus the first "vacant" (i.e. "still pending") address is
    * the starting address of currently running DMA block.
    */
   descrAddr = PLX_DMA_CH0_getDescrAddr;
   descrNum = (descrAddr - vsib_descr) / sizeof(tPLXDMADescr);

   /* Back up one descriptor... xxx */
   if (descrNum == 0) {
      descrNum = descrs - 1;
   } else {
      descrNum--;
   }

   if ((descrAddr == 0) || (descrNum < 0) || (descrNum > descrs)) {
      #if SINGLEBLOCK
      /* In singleblock test mode never uses descriptors; this fails always. */
      #else
      printk(KERN_INFO MYNAME ": not DMAing yet, descr addr = %08X, dnum = %d\n", descrAddr,
             descrNum);
      #endif
      /* xxx: return value is unsigned, cannot return -1 or similar */
      return (0);
   }

   /* The byte number of first still pending DMA transfer byte. */
   return (vsib_dd[descrNum].baddr - vsib_big);
}



//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
//  READ VSIB
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------

/*
 * Read routine that transfers accumulated stuff from VSIbrute DMA buffer.
 */
static ssize_t read_vsib(struct file *filep, char *buf, size_t count, loff_t * f_pos)
{
   unsigned long long ull;
   size_t    avail;
   size_t    remaining;
   size_t    transfer_now;
   size_t    written = 0;

   #if VSIB_VERIFYAREA
   /*
    * Apparently new copy_to_user() checks this.
    * Check that we can write to user buffer area.
    */
   if (verify_area(VERIFY_WRITE, buf, count) == -EFAULT)
      return (-EFAULT);
   #endif

   /* If the board was not open, or opened for writing, cannot read from it. */
   if ((!vsib_dev.is_open) || ((filep->f_flags & O_ACCMODE) == O_WRONLY))
      return (-EINVAL);

   /* Exclusive access */
   if (down_interruptible(&vsib_dev.sem))
      return -ERESTARTSYS;

   /* Use the proper offset */
   ull = (unsigned long long)(*f_pos);
   vsib_dev.readpos = do_div(ull, bigbufsize);

   /* The byte number of first still pending DMA transfer byte. */
   vsib_dev.vacantpos = getDMAPoint();

   if (DEBUG_ALL_READS)
      printk(KERN_INFO MYNAME ": reading %d bytes, readpos=%d, vacantpos=%d\n", count,
             vsib_dev.readpos, vsib_dev.vacantpos);

   if (vsib_dev.readpos <= vsib_dev.vacantpos) {
     /*
      * From first unread to current counter.
      * (We are assuming the "big vacant" counter points
      * to the next intended ring buffer address.)
      * If first_unread and first_vacant are the same,
      * we return zero words,
      * i.e. nothing new in buffer.
      */
      avail = vsib_dev.vacantpos - vsib_dev.readpos;
   } else {
     /*
      * From first unread to end of buffer,
      * and then _in the second 'memcpy()'_, from start of buffer
      * to first_vacant.
      */
      avail = vsib_dev.vacantpos + (bigbufsize - vsib_dev.readpos);
   }

  /* Monitoring the maximum usage of big buffer. */
   if (avail > vsib_max_big_used) {
      vsib_max_big_used = avail;
      if (vsib_max_big_used > vsib_global_max_big_used) {
         vsib_global_max_big_used = vsib_max_big_used;
         printk(KERN_INFO MYNAME ": big secondary ring buffer filled to %u bytes peak over all runs\n", vsib_global_max_big_used);
      } else {
         printk(KERN_INFO MYNAME ": big secondary ring buffer filled to %u bytes during current run\n", avail);
      }
   }

  /*
   * Transfer the first (or only) half, if fits in 'count'.
   */
   remaining = count;
   transfer_now = MIN_M(remaining, avail);
   // printk(KERN_INFO MYNAME ": remain %d, avail %d, now %d - readpos %d f_pos %Ld\n", remaining, avail, transfer_now, vsib_dev.readpos, filep->f_pos);
   if (transfer_now != 0) {
      if (copy_to_user(buf, &(vsib_bigbuf[vsib_dev.readpos]), transfer_now)) {
          written = -EFAULT;
          goto readExit;
      }
      if (DEBUG_ALL_READS)
          printk(KERN_INFO MYNAME ": transferred %d bytes from first_unread=%d\n", transfer_now, vsib_dev.readpos);

      buf        += transfer_now;
      remaining  -= transfer_now;
      written    += transfer_now;
      *f_pos     += transfer_now;

      vsib_dev.readpos += transfer_now;

      if (vsib_dev.readpos > (bigbufsize - 1)) { /* wrapped */
         /*
          * Transfer the second half, if fits in the residue of
          * 'remaining'. Wrap back to beginning of ring buffer.
          */
         // printk(KERN_INFO MYNAME ": wrap\n");

         /***assert***/
          if (vsib_dev.readpos != bigbufsize) {
               printk(KERN_ERR MYNAME
                      ": assertion failed: vsib_dev.readpos (0x%08x) != bigbufsize (0x%08x)\n",
                      vsib_dev.readpos, bigbufsize);
               ull = (unsigned long long)vsib_dev.readpos;
               vsib_dev.readpos = do_div(ull, bigbufsize);
          } else {
               /* Algorithm works as designed, 'vsib_dev.readpos' wraps back to 0. */
               vsib_dev.readpos = 0;
          }
          avail = vsib_dev.vacantpos - vsib_dev.readpos;
          transfer_now = MIN_M(remaining, avail);
          if (transfer_now != 0) {
               if (copy_to_user(buf, &(vsib_bigbuf[vsib_dev.readpos]), transfer_now)) {
                  written = -EFAULT;
                  goto readExit;
               }
               #if DEBUG_ALL_READS
               printk(KERN_INFO MYNAME ": transferred residue %d bytes from first_unread=%d\n",
                      transfer_now, vsib_dev.readpos);
               #endif
               vsib_dev.readpos += transfer_now;
               written += transfer_now;
               *f_pos  += transfer_now;
            }
      }  /* if wrapped */
   }  /* if initial transfer_now is > 0, i.e. there is something in buffer */

readExit:
   up(&vsib_dev.sem);
   return written;
}  /* read_vsib */



//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
//  WRITE VSIB - puts user process data into the large ring buffer
//               The PLX chip then takes care of DMAing out to the VSIB output port.
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------

static ssize_t write_vsib(struct file *filep, const char *buf, size_t count, loff_t * f_pos)
{
   unsigned long long ull;
   int        readThisFar = 0;
   int        avail;
   int        transfer_now;

   /* If the board was not opened, or not opened for writing only, cannot write to it. */
   if ((!vsib_dev.is_open) || ((filep->f_flags & O_ACCMODE) != O_WRONLY))
      return (-EINVAL);

   /* Exclusive access */
   if (down_interruptible(&vsib_dev.sem))
      return -ERESTARTSYS;

   /* Use the proper offset */
   ull = (unsigned long long)(*f_pos);
   vsib_big_first_unwritten = do_div(ull, bigbufsize);

   /* The byte number of first still pending DMA transfer byte. */
   vsib_dev.vacantpos = getDMAPoint();

   /* Monitoring the usage of big buffer dropping to zero. */
   {
      int emptied = (int) vsib_dev.vacantpos - (int) vsib_big_first_unwritten;
      static int prevEmptied;

      if (emptied < 0) {
         emptied = bigbufsize + emptied;
      }
      if (vsib_dev.is_first_write) {
         emptied = bigbufsize;
      }
      else {
         /* Ignore when empty space is decreasing. */
         if ((emptied > prevEmptied)
             && (emptied > vsib_max_big_emptied)) {
            vsib_max_big_emptied = emptied;
            printk(KERN_INFO MYNAME
                   ": big secondary ring buffer empty space increased to %d bytes\n",
                   vsib_max_big_emptied);
         }
      }
      prevEmptied = emptied;
   }

   #if DEBUG_ALL_WRITES
   printk(KERN_INFO MYNAME ": writing %d bytes, first_unwritten=%d, vacantpos=%d\n", count,
          vsib_big_first_unwritten, vsib_dev.vacantpos);
   #endif

   if (vsib_dev.is_first_write) {
      printk(KERN_INFO MYNAME
             ": first call after open(), writing %d bytes, first_unwritten=%d, vacantpos=%d\n",
             count, vsib_big_first_unwritten, vsib_dev.vacantpos);
      /*
       * For the first call only
       * where first_unwritten and first_vacant are the same,
       * allow the buffer to fill.
       */
      vsib_dev.is_first_write = 0;
      avail = bigbufsize;
   }
   else if (vsib_big_first_unwritten <= vsib_dev.vacantpos) {
      /*
       * Data from current counter to first unwritten (vacant) byte.
       * Vacant space from unwritten to ctr.
       * If first_unwritten and first_vacant are the same,
       * the whole buffer is full of data to be transferred to VSIB.
       */
      avail = vsib_dev.vacantpos - vsib_big_first_unwritten;
   }
   else {
      /*
       * From first unwritten to end of buffer,
       * and then _in the second 'memcpy()'_, from start of buffer
       * to current DMA point.
       */
      avail = bigbufsize - vsib_big_first_unwritten;
   }  /* if */

   /* Transfer the first (or only) half, if fits in 'count'. */
   if ((transfer_now = (count < avail ? count : avail))) {
      if (copy_from_user(&(vsib_bigbuf[vsib_big_first_unwritten]), buf, transfer_now)) {
         readThisFar = -EFAULT;
         goto writeExit;
      }
      #if DEBUG_ALL_WRITES
      printk(KERN_INFO MYNAME ": transferred %d bytes to first_unwritten=%d\n", transfer_now,
             vsib_big_first_unwritten);
      #endif

      vsib_big_first_unwritten += transfer_now;
      buf += transfer_now;
      count -= transfer_now;
      readThisFar = transfer_now;

      if (vsib_big_first_unwritten > (bigbufsize - 1)) { /* wrapped */
         /*
          * Transfer the second half, if fits in the residue of
          * 'count'. Wrap back to beginning of ring buffer.
          */

      /***assert***/ if (vsib_big_first_unwritten !=
                         bigbufsize) {
            printk(KERN_ERR MYNAME
                   ": assertion failed: vsib_big_first_unwritten (0x%08x) != bigbufsize (0x%08x)\n",
                   vsib_big_first_unwritten, bigbufsize);
            vsib_big_first_unwritten %= bigbufsize;
         }
         else {
            /* Algorithm works as designed, 'vsib_big_first_unwritten' wraps back to 0. */
            vsib_big_first_unwritten = 0;
         }
         avail = vsib_dev.vacantpos - vsib_big_first_unwritten;
         if ((transfer_now = (count < avail ? count : avail))) {
            if (copy_from_user(&(vsib_bigbuf[vsib_big_first_unwritten]), buf, transfer_now)) {
               readThisFar = -EFAULT;
               goto writeExit;
            }
            #if DEBUG_ALL_WRITES
            printk(KERN_INFO MYNAME ": transferred residue %d bytes from first_unread=%d\n",
                   transfer_now, vsib_big_first_unwritten);
            #endif
            vsib_big_first_unwritten += transfer_now;
            readThisFar += transfer_now;
         }
      }  /* if wrapped */
   }
   /* if initial transfer_now is > 0, i.e. there is something in buffer */

writeExit:
   if (VSIB_KLOG)
      printk(KERN_INFO MYNAME ": bigbuf[7]=%08X\n", *((int *) &(vsib_bigbuf[7])));
   up(&vsib_dev.sem);
   return readThisFar;
}


//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
//  SEEK VSIB
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------

/*
 * Seek routine which always forces VSIbrute to the "beginning" of the device.
 */
static loff_t lseek_vsib(struct file *filep, loff_t offset, int orig)
{
   unsigned long long ull;

   if (down_interruptible(&vsib_dev.sem))
      return -ERESTARTSYS;

   switch (orig) {
   case /*SEEK_SET*/ 0:
      filep->f_pos = offset;
      break;
   case /*SEEK_CUR*/ 1:
      filep->f_pos += offset;
      break;
   case /*SEEK_END*/ 2:
      if (VSIB_KLOG) printk(KERN_DEBUG MYNAME "lseek(): tried to seek with SEEK_END\n");
   default:
      return -EINVAL;
   }  /* switch */

   /* Update the read()/write() point to "wrapped" size of bigbuf. */
   ull = filep->f_pos;
   vsib_dev.readpos = do_div(ull, bigbufsize);

   if (VSIB_KLOG) printk(KERN_DEBUG MYNAME "lseek(): vsib_dev.readpos=0x%08X\n", vsib_dev.readpos);

   up(&vsib_dev.sem);
   return (filep->f_pos);
}



//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
//  IOCTL VSIB - gives access to mode reg (w) and status reg (r)
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------

static int ioctl_vsib(struct inode *node, struct file *filep, unsigned int cmd, unsigned long int arg)
{
   int result = 0;

   if (down_interruptible(&vsib_dev.sem))
      return -ERESTARTSYS;

   switch (cmd) {
   case VSIB_SET_MODE:
      {
         writel(arg, cmdaddr);   /* memory-mapped "put" into * command register */
         break;
      }
   case VSIB_GET_STATUS:
      {
         char statusbyte = 0;   /* inb(STATUSREG); */
         unsigned char *p = (unsigned char *) arg;

         /* Check that we can write to user buffer area. */
         #if VSIB_VERIFYAREA
         if (verify_area(VERIFY_WRITE, p, sizeof(unsigned char)) == -EFAULT) {
            result = -EFAULT;
            goto ioctlExit;
         }
         #endif
         put_user(statusbyte, p);   /* 'p' is a pointer to * 'char'/byte */
         break;
      }
   case VSIB_GET_DMA_RETRIES:
      {
         unsigned long int *p = (unsigned long int *) arg;

         /* Check that we can write to user buffer area. */
         #if VSIB_VERIFYAREA
         if (verify_area(VERIFY_WRITE, p, sizeof(unsigned long int)) == -EFAULT) {
            result = -EFAULT;
            goto ioctlExit;
         }
         #endif
         put_user(0, p);
         break;
      }
   case VSIB_GET_BIGBUF_SIZE:
      {
         unsigned long int *p = (unsigned long int *)arg;
         put_user(bigbufsize, p);
         break;
      }
   case VSIB_GET_BYTES_IN_BIGBUF:
      {
         unsigned long int *p = (unsigned long int *)arg;
         int filled;

#if VSIB_KLOG
         printk(KERN_INFO MYNAME ": DMA status = %02X\n", (int)(PLX_DMA_CH0_getStatus));
#endif

         /* xxx: doesn't stop when DMA reaches end of real data, */
         /* but instead wraps around... */
         vsib_dev.vacantpos = getDMAPoint();
         filled = (int)vsib_big_first_unwritten - (int)vsib_dev.vacantpos;

         if (filled < 0) {
         filled = bigbufsize + filled;
         }

         put_user(filled, p);
         break;
      }
   case VSIB_RESET_DMA:
      {
         int clearMemoryBuffersAndRWPointers = 1;
         unsigned int u;
         int i;
         int debugStatus;
         unsigned int dmaDescrFlagsRW;

         /*
          * When write()ing from mem to VSIB, we need to reset DMA
          * when memory buffers have already been filled.
          * (Default "clear" happens when arg==0 (for compatibility).)
          */
         if (arg) {
            clearMemoryBuffersAndRWPointers = 0;
         }

         /*
          * Descriptor address 4 LSBs are flags as follows:
          * Bit 0: descriptor in PCI address space
          */
#define DESCR_IN_PCI 0x00000001
         /* Bit 1: no end of chain */
#define DESCR_END_OF_CHAIN 0x00000002
         /*
          * Bit 2: no interrupt after this transfer block
          * Bit 3: local-->PCI==1 ('wr' to disk), ==0 PCI-->local ('rd'
          * from d)
          *
          * If the device special file has been opened for writing
          * only, then init DMA in direction of PCI-->local bus on VSIB
          * board.
          */
         if ((filep->f_flags & O_ACCMODE) == O_WRONLY) {
            dmaDescrFlagsRW = 0x00000000; /* Bit 3: * PCI-->local==0 */
         }
         else {
            dmaDescrFlagsRW = 0x00000008; /* Bit 3: * local-->PCI==1 */
         }

         /*
          * xxx: Any need to stop the VSIbrute board, apparently no,
          * since this disables the DMA first before reiniting.
          * Initialize DMA, junking everything which may already be in
          * buffer.
          * (Usually this is called when multiple read attempts return
          * no data.)
          */

#if USE_ABORT_OR_STOP

         /*
          * Re-init DMA channel 0.
          * Disable DMA and abort and clear interrupts and all for DMA ch0.
          * From manual: "Aborting when no DMA in progress causes the next DMA to abort."
          */
         if (!(debugStatus = PLX_DMA_CH0_isDone)) {
            #if USE_ABORT
                printk(KERN_INFO MYNAME ": trying to abort DMA, status = %02X\n", (int) debugStatus);
               /*
                * (_START bit is really required; otherwise doesn't
                * really abort and returns with done bit ==1 immediately.
                */
                PLX_DMA_CH0_putCommand(PLX_DMA_CS_ABORT | PLX_DMA_CS_CLEAR_INT | PLX_DMA_CS_START);
            #else
               /*
                * Chip rev. AB occasionally hangs when abort bit is set
                * from PCI. So, as a workaround we clear all '.next' pointers in
                * DMA chain. Then our DMA should stop (as long as VSIclk is
                * running...). This typically takes about 675000/4/32000000==5.3msec
                * though...
                */
               for (i = 0; i < descrs; i++) {
                  vsib_dd[i].next = 0 /* no next */  | DESCR_IN_PCI | dmaDescrFlagsRW | DESCR_END_OF_CHAIN;
               }
            #endif

            /* Wait for abort/last transfer to complete, "done==1". */
            i = 0;
            while (!(debugStatus = PLX_DMA_CH0_isDone)) {
               i++;
               /*
                * Asking for the status takes a minimum of
                * 2cycles*30ns. Completing the current block takes 2.7--12msec,
                * even more. It is not polite to busy wait so long, so with rev.
                * AC chips we should revert back to using the real abort bit.
                * 1000000 queries * 60ns --> about 60msec.
                */
               #if USE_ABORT
               if (i > 100) {
               #else
               if (i > 1000000) {
               #endif
                  /*
                   * xxx: This really happens quite easily e.g. when
                   * non-existent DMA PCI target addresses are used by PLX chip.
                   * xxx: Must find a stronger "master reset" for PLX.
                   */
                  printk(KERN_ERR MYNAME ": aborting DMA failed, status = %02X\n",
                         (int) debugStatus);
                  break;   /* prevent looping * forever */
               }
            }  /* while not done */
         }  /* if DMA was in progress */

         /* Clear interrupt, cancel START bit (possibly still on). */
         PLX_DMA_CH0_putCommand(PLX_DMA_CS_CLEAR_INT);

#endif /* if use abort/stop */

         if (clearMemoryBuffersAndRWPointers) {
            /* Clear memory buffers (to aid in debugging). */
            char *d;

            d = vsib_descrbuf;   /* virt. addr */
            for (i = 0; i < descrbufsize; i++) {
               d[i] = 0;
            }
            d = vsib_bigbuf;  /* virt. addr */
            for (i = 0; i < bigbufsize; i++) {
               d[i] = 0;
            }
         }

         /*
          * Define PCI command codes to be used on PCI bus when doing
          * DMA. The default PCI memory read code is MRL, cache line only.
          * We always burst long regions, thus MRM, memory read
          * multiple is more appropriate and newer MBs can do it more
          * efficiently.
          */
         {
            unsigned int old = readl(plxaddr + 0x6c);

            printk(KERN_INFO MYNAME ": CNTRL was = %08X\n", old);
            /*
             * Change PCI read command code from power-up default
             * MRL (memory real (cache) line) --> MRM (memory read
             * multiple).
             */
            writel((old & 0xfffffff0) | 0x0000000c, plxaddr + 0x6c);
            /*
             * xxx: could perhaps change PCI write command code
             * from MW --> MWI (0xf) (memory write and invalidate)
             * writel((old & 0xffffff00) | 0x000000fc , plxaddr+0x6c);
             */
            old = readl(plxaddr + 0x6c);
            printk(KERN_INFO MYNAME ": CNTRL is now = %08X\n", old);
         }

         /*
          * 32-bit, 0WS, enable bursting, hold local addr constant,
          * Demand mode, DMA fast/slow stop mode =0, slow==BLAST used.
          * BTERM# input is enabled, so PLX doesn't do Burst-4 but
          * does Burst-forever; Xilinx doesn't ask for extra ADS cycles
          * but instead keeps BTERM#==1 always.
          */
#if SINGLEBLOCK
         writel(0x00000003 /* 0-1: 32-bit local bus */
                | 0x00000100  /* 8: local bus bursting enabled */
                /* 9: NO scatter/gather mode enabled */
                | 0x00000800  /* 11: keep local address * constant */
                | 0x00001000  /* 12: demand mode (DREQ/DACK * signals used) */
                , plxaddr + 0x80);
#else
         writel(0x00000003 /* 0-1: 32-bit local bus */
                /*
                 * 2-5: 0 wait states
                 * 6: TA#/READY# input not enabled
                 */
                | 0x00000080  /* 7: BTERM# input enabled (but * not used by Xilinx) */
                | 0x00000100  /* 8: local bus bursting enabled */
                | 0x00000200  /* 9: scatter/gather mode * enabled */
                /* 10: done interrupt not enabled */
                | 0x00000800  /* 11: keep local address * constant */
                | 0x00001000  /* 12: demand mode (DREQ/DACK * signals used) */
                /*
                 * 13: no special PCI write and invalidate
                 * 14: EOT# pin not used
                 * 15: slow mode termination (2 before /w BLAST)
                 * 16: auto-zero count after transfer in descr
                 * 17: interrupt to ==0 local, ==1 PCI int
                 * 18: PCI DAC dual-address cycle...? not enabled, >4GB address space
                 * 19--31: reserved
                 */
                , plxaddr + 0x80);
#endif
         /* PCI address; physical bus address of DMA buffer. */
         writel(vsib_big, plxaddr + PLX_DMA_CH0_PCI_ADDR);
         /* Local bus address; CSxxx matches to a single/same LA 'localaddr'. */
         writel(localaddr, plxaddr + 0x88);
         /*
          * xxx: These PCI+local addr are probably not needed at all
          * in scatter/gather mode, they are overwritten by descr values.
          */

#if SINGLEBLOCK
         /*
          * Descriptor pointer; initially no descriptors/chaining/scatter,
          * but direction Local<->PCI is determined by flags in this reg.
          */
         writel(dmaDescrFlagsRW, plxaddr + PLX_DMA_CH0_DESCR_ADDR);
         /* Transfer byte count; use the max of bigbuf. */
         writel(bigbufsize, plxaddr + 0x8c);
#else
         /*
          * Chain of multiple descriptors, started by the first descr
          * ptr. Descriptor pointer; init to DMA descr table.
          * xxx: Must keep start of descr table 16 byte (4lword)
          * aligned!
          * The PLX chip needs the descriptor pointer as PCI bus
          * physical address and our setup code needs the kernel virtual addresses.
          */
         writel(u = ((tAddress32) virt_to_bus(vsib_dd) | DESCR_IN_PCI
                     /* Bit 0: descriptor in PCI address space Bit 1: no end of chain Bit 2: no
                        interrupt after this transfer block */
                     | dmaDescrFlagsRW /* Bit 3: * local-->PCI==1 */
                ), plxaddr + PLX_DMA_CH0_DESCR_ADDR);
         printk(KERN_INFO MYNAME ": virt ptr to 1st descr = %08X\n", ((unsigned int) vsib_dd));
         printk(KERN_INFO MYNAME ": bus  ptr to 1st descr = %08X\n", u);

         /* Init chain of descriptors, dividing bigbuf equally to all descrs. */
         {
            unsigned int blocksize = bigbufsize / descrs;   /* xxx: any limits to transfer size? */

            printk(KERN_INFO MYNAME ": calculated transfer size of each descr = %u\n", blocksize);
            /*
             * VSIbrute apparently doesn't handle sub-32-bit-word
             * transfers. In theory PLX allows it, but requires the use of byte
             * lane enables.
             */
            blocksize &= ~(0x00000003);   /* align to 4 * bytes */

            /*
             * xxx: should definitely check for:
             * assert( (blocksize * descrs) == bigbufsize );
             */

            printk(KERN_INFO MYNAME ": transfer size of each descr = %u\n", blocksize);

#   if NORMAL_LINEAR_DESCRS

            /* Normal "linear" descriptors. */
            for (i = 0; i < descrs; i++) {

               /* PCI hardware bus address for DMA data transfer. */
               u = vsib_dd[i].baddr = vsib_big + i * blocksize;
               if (DEBUG_DESCRINIT) printk(KERN_INFO MYNAME ": %d. descr PCI addr = %08X\n", i, u);

               /*
                * PLX on-board local bus address, always the same
                * 'localaddr'.
                */
               u = vsib_dd[i].laddr = localaddr;
               if (DEBUG_DESCRINIT) printk(KERN_INFO MYNAME ": %d. descr loc addr = %08X\n", i, u);

               u = vsib_dd[i].size = blocksize; /* transfer size */
               if(DEBUG_DESCRINIT) printk(KERN_INFO MYNAME ": %d. descr tr. size = %08X\n", i, u);

               u = vsib_dd[i].next = ((tAddress32)
                                      virt_to_bus(&(vsib_dd[i + 1])))
                  | DESCR_IN_PCI | dmaDescrFlagsRW;
               if (DEBUG_DESCRINIT) printk(KERN_INFO MYNAME ": %d. descr next des = %08X\n", i, u);

            }  /* for each descriptor */

            /*
             * Fix the pointer of last descriptor to point back to
             * start of chain.
             */
            i--;
            u = vsib_dd[i].next = ((tAddress32)
                                   virt_to_bus(&(vsib_dd[0]))) | DESCR_IN_PCI | dmaDescrFlagsRW;
            printk(KERN_INFO MYNAME ": last (%d) descr next des = %08X\n", i, u);
            printk(KERN_INFO MYNAME ": last (%d) descr PCI bus  = %08X\n", i, vsib_dd[i].baddr);
#   else
            /*
             * Special DMA chaining test descriptors.
             * Testing with small 5MB bigbuf; same 1.3MB is "reused"
             * and only small 1kB "one-second-marker-blocks" are created
             * in the beginning of bigbuf.
             */
            for (i = 0; i < descrs; i++) {
               /* PCI hardware bus address for DMA data transfer. */
               u = vsib_dd[i].baddr = vsib_big + 10 * 1024;
               /* PLX on-board local bus address, always the same 'localaddr'. */
               u = vsib_dd[i].laddr = localaddr;
               u = vsib_dd[i].size = 4 * 32000 * 10;  /* 1.3MB, * 1280000 * bytes */
               u = vsib_dd[i].next = ((tAddress32)
                                      virt_to_bus(&(vsib_dd[i + 1])))
                  | DESCR_IN_PCI | dmaDescrFlagsRW;
            }  /* for each descriptor */
            /*
             * Fix the pointer of last descriptor.
             */
            i--;
            /* Automatic (hw) ring buffer. */
            u = vsib_dd[i].next = ((tAddress32)
                                   virt_to_bus(&(vsib_dd[0]))) | DESCR_IN_PCI | dmaDescrFlagsRW;
            /* Fix 1-second descriptors (0, 100, 200...). */
            for (i = 0; i < 10; i++) {
               u = vsib_dd[i * 100].baddr = vsib_big + i * 1024;
               printk(KERN_INFO MYNAME ": fix descr[%d] PCI addr = %08X\n", i * 100, u);
               u = vsib_dd[i * 100].size = 1024;
               u = (vsib_dd[i * 100 + 1].size *= 2);
               u = (vsib_dd[i * 100 + 1].size -= 1024);
            }
            #if 0
            /* Move 100,200... ffff ffff marker in middle of 1024-byte buffer. */
            u = (vsib_dd[1].size -= 512);
            #endif

#   endif
            /* else not NORMAL_LINEAR_DESCRS */
         }
#endif /* else not SINGLEBLOCK */

         /* Enable and start DMA ch0. */
         PLX_DMA_CH0_putCommand(PLX_DMA_CS_ENABLE | PLX_DMA_CS_START);
         printk(KERN_INFO MYNAME ": PLX DMA ch0 started\n");

         if (clearMemoryBuffersAndRWPointers) {
            vsib_dev.readpos = 0;
            vsib_dev.vacantpos = 0;
         }
         break;
      }
   case VSIB_DELAYED_STOP_DMA:
      {
         unsigned int dmaDescrFlagsRW;
         int i;

         /*
          * If the device special file has been opened for writing
          * only, then init DMA in direction of PCI-->local bus on VSIB
          * board.
          */
         if ((filep->f_flags & O_ACCMODE) == O_WRONLY) {
            dmaDescrFlagsRW = 0x00000000; /* Bit 3: * PCI-->local==0 */
         }
         else {
            dmaDescrFlagsRW = 0x00000008; /* Bit 3: * local-->PCI==1 */
         }

         /*
          * Chip rev. AB occasionally hangs when abort bit is set from
          * PCI. So, as a workaround we clear all '.next' pointers in DMA
          * chain.
          * Then our DMA should stop (as long as VSIclk is running...).
          * This typically takes about 675000/4/32000000==5.3msec
          * though...
          */
         for (i = 0; i < descrs; i++) {
            vsib_dd[i].next = 0 /* no next */  | DESCR_IN_PCI |
               dmaDescrFlagsRW | DESCR_END_OF_CHAIN;
         }

         /*
          * Reset the per-DMA ring buffer peak fill counter
          */
         vsib_max_big_used = 0;
         break;
      }
   case VSIB_IS_DMA_DONE:
      {
         unsigned long int *p = (unsigned long int *) arg;

         /* Get the DMA ch0 status value, extract bit "done" and return it. */
         put_user(PLX_DMA_CH0_isDone, p);
         break;
      }
   default:
      {
         result = -EINVAL;
      }
   }
   result = 0;
   goto ioctlExit;

ioctlExit:
   up(&vsib_dev.sem);
   return result;
}



/*
 * Open/allocate routine.
 */
static int open_vsib(struct inode *node, struct file *filep)
{
   int result = 0;

   if (down_interruptible(&vsib_dev.sem))
      return -ERESTARTSYS;

   /* if opened, don't allow re-open; only one user process at a time */
   if (0 != vsib_dev.is_open) {
      up(&vsib_dev.sem);
      return -EMFILE;
   }
   vsib_dev.is_open = 1;

   VSIBMOD_INC_USE_COUNT;

   /* Init the board into stopped state. */
   up(&vsib_dev.sem);
   (void) ioctl_vsib(node, filep, VSIB_SET_MODE, VSIB_MODE_STOP);

   /*
    * xxx: wait until stopped/last cycle DACKed?
    * Flush all accumulated data.
    */
   if (down_interruptible(&vsib_dev.sem)) {
      // return -ERESTARTSYS;
   }
   vsib_dev.readpos = 0;
   vsib_dev.vacantpos = 0;

   /*
    * Enable first write call to put stuff in big buffer,
    * although both head and tail of ring buffer == 0.
    */
   vsib_dev.is_first_write = 1;

   /* Reset DMA according to R/W mode in 'filep->f_flags'. */
   up(&vsib_dev.sem);
   result = ioctl_vsib(node, filep, VSIB_RESET_DMA, 0);
   return result;
}



/*
 * Close/release/deallocate routine.
 */
static int release_vsib(struct inode *node, struct file *filep)
{

   if (down_interruptible(&vsib_dev.sem))
      return -ERESTARTSYS;

   if (!vsib_dev.is_open) {
      up(&vsib_dev.sem);
      return 0;
   }
   vsib_dev.is_open = 0;

   /* Stop the board. */
   up(&vsib_dev.sem);
   (void) ioctl_vsib(node, filep, VSIB_SET_MODE, VSIB_MODE_STOP);

   /*
    * xxx: wait until stopped/last cycle DACKed?
    * Flush all accumulated data (will be done again in 'open()').
    */
   if (down_interruptible(&vsib_dev.sem)) {
      // return -ERESTARTSYS;
   }
   vsib_dev.readpos = 0;
   vsib_dev.vacantpos = 0;

   VSIBMOD_DEC_USE_COUNT;

   up(&vsib_dev.sem);
   return (0);
}


// ---------------------------------------------------------------------------
/*
 * Driver initialization.
 */

int init_module(void)
{
   unsigned int tmp_uint = 0;
   u32 u = 0;

   /* Find VSIbrute PLX-based PCI card. */
   #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))

   if (!pcibios_present()) {
      printk(KERN_ERR MYNAME ": PCI BIOS not present or not accessible\n");
      return -ENODEV;
   }
   if (pcibios_find_device(PCI_VENDOR_ID_PLX, VSIB_DEVICE_ID, /* pci_index=> */ 0, &bus, &devfn)) {
      printk(KERN_ERR MYNAME ": PLX vendor=0x%04X, device id=0x%04X not found\n", PCI_VENDOR_ID_PLX,
             VSIB_DEVICE_ID);
      return -ENODEV;
   }
   pcibios_read_config_dword(bus, devfn, PCI_BASE_ADDRESS_0, &u);

   #else

   if (NULL != p_vsib_dev) {
      printk(KERN_INFO MYNAME ": current p_vsib_dev wasn't null, second module instance?");
   }
   // pci_present() : obsolete since 2.5; pci not present if search funcs return null
   // while(pci_find_device(PCI_VENDOR_ID_PLX, VSIB_DEVICE_ID, p_vsib_dev)) {
   // configure_device(p_vsib_dev);
   // }
   p_vsib_dev = pci_get_device(PCI_VENDOR_ID_PLX, VSIB_DEVICE_ID, NULL);
   if (NULL == p_vsib_dev) {
      printk(KERN_ERR MYNAME ": PLX vendor=0x%04X, device id=0x%04X not found\n", PCI_VENDOR_ID_PLX,
             VSIB_DEVICE_ID);
      return -ENODEV;
   }
   if (0 != pci_read_config_dword(p_vsib_dev, PCI_BASE_ADDRESS_0, &tmp_uint)) {
      printk(KERN_INFO " error in init_module(), could not read PCI_BASE_ADDRESS_0 \n");
   }
   u = tmp_uint;
   #endif
   if (VSIB_KLOG) printk(KERN_DEBUG MYNAME ": base0=0x%08X\n", u);

   u &= PCI_BASE_ADDRESS_MEM_MASK;
   plxaddr = ioremap(u, NUMOFIOADDR);  /* PLX configuration area */
   if (VSIB_KLOG) printk(KERN_DEBUG MYNAME ": base0 ioremap()ped to 0x%08X (plxaddr)\n", (unsigned int) plxaddr);

   #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
   pcibios_read_config_dword(bus, devfn, PCI_BASE_ADDRESS_2, &u);
   #else
   pci_read_config_dword(p_vsib_dev, PCI_BASE_ADDRESS_2, &tmp_uint);
   u = tmp_uint;
   #endif

   if (VSIB_KLOG) printk(KERN_DEBUG MYNAME ": base2=0x%08X\n", u);

#   if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
   pcibios_read_config_dword(bus, devfn, PCI_BASE_ADDRESS_3, &u);
#   else
   pci_read_config_dword(p_vsib_dev, PCI_BASE_ADDRESS_3, &tmp_uint);
   u = tmp_uint;
#   endif

   if (VSIB_KLOG) printk(KERN_DEBUG MYNAME ": base3=0x%08X\n", u);

   u &= PCI_BASE_ADDRESS_MEM_MASK;
   cmdaddr = ioremap(u, NUMOFCMDADDR); /* Local bus address range 1 of * PLX */

   if (VSIB_KLOG) printk(KERN_DEBUG MYNAME ": base3 ioremap()ped to 0x%08X (cmdaddr)\n", (unsigned int) cmdaddr);

   /* Check PCI cache line size.  (BIOS should set a reasonable value.) */
   {
      unsigned char ub;

      #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
      pcibios_read_config_byte(bus, devfn, PCI_CACHE_LINE_SIZE, &ub);
      #else
      pci_read_config_byte(p_vsib_dev, PCI_CACHE_LINE_SIZE, &ub);
      #endif

      printk(KERN_DEBUG MYNAME ": cache line size was = %i longwords\n", ub);
      #if 0
         #ifndef SMP_CACHE_BYTES
         #define SMP_CACHE_BYTES L1_CACHE_BYTES
         #endif
         if ((ub << 2) != SMP_CACHE_BYTES) {
            printk(KERN_INFO "  PCI cache line size set incorrectly "
                   "(%i bytes) by BIOS/FW, correcting to %i\n", (ub << 2), SMP_CACHE_BYTES);
            pcibios_write_config_byte(bus, devfn, PCI_CACHE_LINE_SIZE, SMP_CACHE_BYTES >> 2);
         }
      #endif
      #if 0
         #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
         pcibios_write_config_byte(bus, devfn, PCI_CACHE_LINE_SIZE, 16);
         pcibios_read_config_byte(bus, devfn, PCI_CACHE_LINE_SIZE, &ub);
         #else
         pci_write_config_byte(p_vsib_dev, PCI_CACHE_LINE_SIZE, 16);
         pci_read_config_byte(p_vsib_dev, PCI_CACHE_LINE_SIZE, &ub);
         #endif
         printk(KERN_DEBUG MYNAME ": cache line size now = %i longwords\n", ub);
      #endif
   }

   /*
    * Change the local starting address of local address range 1.
    * This will be VSIbrute's command register.
    */
   writel(0x50000001, (plxaddr + 0xf4));
   u = readl(plxaddr + 0xf4);
   printk(KERN_DEBUG MYNAME ": local address range 1 starting address changed to 0x%08X\n", u);
   /* Disable Ready input for this area. */
   writel(0x00000103, (plxaddr + 0xf8));
   u = readl(plxaddr + 0xf8);
   printk(KERN_DEBUG MYNAME ": local address range 1 region descr changed to 0x%08X\n", u);

   /* Check I/O addresses. */
   if (io <= 0) {
      /* Not set explicitly, "probe default", i.e. read PCI config. */
      #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
      pcibios_read_config_dword(bus, devfn, PCI_BASE_ADDRESS_1, &u);
      #else
      pci_read_config_dword(p_vsib_dev, PCI_BASE_ADDRESS_1, &tmp_uint);
      u = tmp_uint;
      #endif

      printk(KERN_DEBUG MYNAME ": base1=0x%08X\n", u);
      u &= PCI_BASE_ADDRESS_IO_MASK;
      io = u & 0x0000ffff;
   }

   if ((bigbufsize % descrs) != 0) {
      printk(KERN_ERR MYNAME ": bigbufsize (%d) not evenly divisible with descrs (%d)\n",
             bigbufsize, descrs);
      iounmap(plxaddr);
      iounmap(cmdaddr);
      return -ENODEV;
   }

#   if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
   if (check_region(io, NUMOFIOADDR)) {
      printk(KERN_ERR MYNAME ": unable to get I/O addresses 0x%04x--0x%04x\n", io,
             io + NUMOFIOADDR - 1);
      iounmap(plxaddr);
      iounmap(cmdaddr);
      return -ENODEV;
   }

   /* Reserve I/O addresses after successful probing. */
   request_region(io, NUMOFIOADDR, MYNAME);
#   else
   if (request_region(io, NUMOFIOADDR, MYNAME)) {
      printk(KERN_ERR MYNAME ": request_region() unable to get I/O addresses 0x%04x--0x%04x\n", io,
             io + NUMOFIOADDR - 1);
      if (request_region(io, NUMOFIOADDR, MYNAME)) {
         printk(KERN_ERR MYNAME ": second request_region() attempt failed, too. quitting.\n");
         iounmap(plxaddr);
         iounmap(cmdaddr);
         return -ENODEV;
      } else {
         printk(KERN_INFO MYNAME ": second request_region() successful!\n");
      }
   }
#   endif

   /*
    * Reserve a suitable DMA descriptor buffer.
    * Assert size of the descriptor struct to be 16 bytes as PLX wants.
    */
   if (sizeof(tPLXDMADescr) != PLX_DMA_DESCR_SIZE) {
      printk(KERN_ERR MYNAME ": gcc made sizeof(tPLXDMADescr) != %d bytes (gcc %d bytes)\n",
             PLX_DMA_DESCR_SIZE, sizeof(tPLXDMADescr));
      release_region(io, NUMOFIOADDR);
      iounmap(plxaddr);
      iounmap(cmdaddr);
      return -ENOMEM;
   }

   /* Force multiple of 16 bytes (descriptors are that long). */
   descrbufsize &= ~(PLX_DMA_DESCR_SIZE - 1);

   /* xxx: May want to lift the arbitrary limit of no more than 128kB of descrs. */
   if ((descrbufsize <= 0) || (descrbufsize > (128 * 1024))) {
      /*
       * Not set explicitly, "probe default".
       *
       * By default we'll use 128kB.  (It used to be the max ISA hw DMA
       * buffer size, and this code was originally made to provide 128k-aligned
       * 128k-buffer below 16MB, as required by ISA DMA system.)
       */
      descrbufsize = 128 * 1024;
   }
   if (allocated_descrbufsize <= 0) {
      /*
       * Not set explicitly, "probe default".
       * The original reasoning was:
       * 256kB is bound to contain 128kB which doesn't cross a 128kB
       * boundary.
       *
       * Now we allocate "one alignment block more".
       */
      allocated_descrbufsize = (descrbufsize + sizeof(tPLXDMADescr));
   }
   if (!(vsib_descrbuf = bigphysarea_alloc(allocated_descrbufsize))) {
      printk(KERN_ERR MYNAME ": DMA descr buffer (%d bytes) allocation failed\n",
             allocated_descrbufsize);
      release_region(io, NUMOFIOADDR);
      iounmap(plxaddr);
      iounmap(cmdaddr);
      return -ENOMEM;
   }

   /*
    * Orig: calculate the safe 128kB-non-boundary-crossing starting
    * address.
    * Here we use the same ISA-DMA strategy to provide 16-byte aligned
    * buffer / DMA descriptor table as required by PLX chip.
    * (Four LSB of descriptor addresses are used as flag bits.)
    */
   vsib_descrbuf_bus = vsib_descr = virt_to_bus(vsib_descrbuf);
   /* Round up to next start of 128kB block with 17 LSB address bits == 0. */
   vsib_descr = (vsib_descr & ~(PLX_DMA_DESCR_SIZE)) + PLX_DMA_DESCR_SIZE;

   /*
    * If 'vsib_descr' is perfectly aligned, we'll end up using the latter
    * half of the buffer, both +1 and -1 will then start at
    * vsib_descrbuf+1,
    * and all other values will start at vsib_descrbuf+1..end.
    */
   vsib_descrend = vsib_descr + (descrbufsize - 1);
   /* Put the table of DMA descriptors in the aligned middle of this buff. */
   vsib_dd = (tPLXDMADescr *) (bus_to_virt(vsib_descr));

   if (  /**((vsib_descrend & 0xff000000) != 0)**//* >16MB; xxx: was a ISA DMA lim. */

                        /**||**/ (vsib_descr < vsib_descrbuf_bus)
         // before buffer start
         || (vsib_descrend > (vsib_descrbuf_bus + allocated_descrbufsize - 1))
         // after buffer end
         || ((descrbufsize / sizeof(tPLXDMADescr)) < descrs)
         // too small buf for reqd # of descrptors
      ) {
      printk(KERN_ERR MYNAME ": DMA buffer at >16MB (0x%08x) or outside allocated buffer\n",
             vsib_descr);
      bigphysarea_free(vsib_descrbuf, allocated_descrbufsize);
      release_region(io, NUMOFIOADDR);
      iounmap(plxaddr);
      iounmap(cmdaddr);
      return -ENOMEM;
   }
   printk(KERN_INFO MYNAME
          ": descrbuf=0x%08x, allocated_descrbufsize=%d, bus=0x%08x--0x%08x\n",
          (tAddress32) vsib_descrbuf, allocated_descrbufsize, vsib_descr, vsib_descrend);

   /* Reserve a large, large secondary "big" ring buffer. */
   if (bigbufsize <= 0) {
      /* Not set explicitly, "probe default". By default we'll use 5MB.  */
      bigbufsize = 5 * 1024 * 1024;
   }
   if (!(vsib_bigbuf = bigphysarea_alloc(bigbufsize))) {
      printk(KERN_ERR MYNAME ": Secondary big ring buffer (%d bytes) allocation failed\n",
             bigbufsize);
      bigphysarea_free(vsib_descrbuf, allocated_descrbufsize);
      release_region(io, NUMOFIOADDR);
      iounmap(plxaddr);
      iounmap(cmdaddr);
      return -ENOMEM;
   }
   vsib_big = virt_to_bus(vsib_bigbuf);   /* xxx: assumed auto 4-byte * align */
   vsib_dev.readpos = vsib_dev.vacantpos = 0;

   /* Register this character device driver into Linux driver table. */
   if ((vsib_major = register_chrdev(0, MYNAME, &vsib_fops)) == -EBUSY) {
      printk(KERN_ERR MYNAME ": unable to get a dynamic major device number\n");
      bigphysarea_free(vsib_bigbuf, bigbufsize);
      bigphysarea_free(vsib_descrbuf, allocated_descrbufsize);
      release_region(io, NUMOFIOADDR);
      iounmap(plxaddr);
      iounmap(cmdaddr);
      return (-EIO);
   }

   /* Create a /dev/vsib entry
    * Kernel 2.6 sysfs: create /sys/classes/vsib/vsib/dev, the 'udev' daemon does the rest
    * Kernel 2.x devfs: create /dev/vsib directly
    */

#if USE_DEVFS_OR_SYSFS
   pClassVSIB = CLASS_CREATE(THIS_MODULE, MYNAME);
   if (IS_ERR(pClassVSIB)) {
      printk(KERN_ERR MYNAME ": could not create a sysfs class (/dev/vsib), continuing anyway...\n");
   } else {
      CLASS_DEV_CREATE(pClassVSIB, MKDEV(vsib_major, 0), NULL, MYNAME);
   }
#endif

   /* Stop the board. */
   writel(VSIB_MODE_STOP, cmdaddr);

   /* We got the major number (and everything else). */
   printk(KERN_INFO MYNAME
          ": loaded with major=%d, I/O=0x%04x, descrbuf=0x%08x (%d bytes), "
          "bus=0x%08x--0x%08x, bigbuf=0x%08x (%d bytes), $Revision: 1.31 $\n",
          vsib_major, io, (tAddress32) vsib_descrbuf, allocated_descrbufsize, vsib_descr,
          vsib_descrend, (tAddress32) vsib_bigbuf, bigbufsize);

   /* Test-fix-write to VSIbrute command register, by default STOP. */
   writel(fix, cmdaddr);

   /* Initialize semaphore and struct */
   init_MUTEX(&vsib_dev.sem);
   vsib_dev.size           = sizeof(vsib_dev);
   vsib_dev.is_open        = 0;
   vsib_dev.is_first_write = 0;

   return 0;
}

void cleanup_module(void)
{

#if USE_DEVFS_OR_SYSFS
   CLASS_DEV_DESTROY(pClassVSIB, MKDEV(vsib_major, 0));
   CLASS_DESTROY(pClassVSIB);
#endif

   unregister_chrdev(vsib_major, MYNAME);
   bigphysarea_free(vsib_bigbuf, bigbufsize);

   bigphysarea_free(vsib_descrbuf, allocated_descrbufsize);
   release_region(io, NUMOFIOADDR);
   iounmap(plxaddr);
   iounmap(cmdaddr);

#   if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
   if (NULL != p_vsib_dev) {
      // release the device, decreasese the reference counter
      pci_dev_put(p_vsib_dev);
   }
#   endif

   printk(KERN_INFO MYNAME ": unloaded, max_big=%u, max_global_big=%u.\n", vsib_max_big_used, vsib_global_max_big_used);
}  /* cleanup_module */


/*
 * $Log: vsib.c,v $
 * Revision 1.31  2008/09/15 09:18:11  jwagner
 * probably fixed the CLASS_CREATE versioning hell
 *
 * Revision 1.28  2008/01/14 08:09:35  jwagner
 * comment out debug printk
 *
 * Revision 1.27  2008/01/14 08:07:41  jwagner
 * comment out debug printk
 *
 * Revision 1.26  2007/11/30 12:35:28  jwagner
 * created struct vsib_dev, using mutexes like in scull example, added is_open flag, cleaned away some unused code
 *
 * Revision 1.25  2006/07/14 09:15:30  jwagner
 * typo corrected
 *
 * Revision 1.24  2006/07/12 07:14:25  jwagner
 * added Chris' VSIB_GET_BIGBUF_SIZE ioctl
 *
 * Revision 1.23  2006/07/12 07:01:46  jwagner
 * Chris' file_operations and some yucky USE_DEVFS_OR_SYSFS cleanup
 *
 * Revision 1.22  2006/07/12 06:13:29  jwagner
 * took into account deprecated and replaced MODULE_PARM
 *
 * Revision 1.21  2006/04/21 10:51:28  jwagner
 * fixed devfs_handle_t different declaration in 2.4 vs 2.6
 *
 * Revision 1.20  2006/04/21 09:44:05  jwagner
 * do request_region a second time if fails
 *
 * Revision 1.19  2006/04/21 05:58:56  jwagner
 * autoselection between devfs, old sysfs, new sysfs
 *
 * Revision 1.18  2006/04/18 09:10:51  jwagner
 * modified for post-2.6.12 sysfs
 *
 * Revision 1.17  2006/04/04 11:15:32  jwagner
 * dumped deprecated devfs for k2.6, sysfs with udev works fine
 *
 * Revision 1.16  2006/04/04 09:56:27  jwagner
 * corrected wrong warning for pci_read_config_dword()
 *
 * Revision 1.15  2006/04/04 08:41:50  jwagner
 * added devfs for 2.4 as well
 *
 * Revision 1.14  2006/04/04 07:01:50  jwagner
 * added compile option to automatically insert itself into /dev/vsib (USE_DEVFS_OR_SYSFS)
 *
 * Revision 1.13  2006/03/30 12:38:03  jwagner
 * corrected missing indentation of two #define's that for some odd reason caused compile error
 *
 * Revision 1.12  2006/03/29 11:27:14  jwagner
 * added module usage counter keeping for kernel 2.6
 *
 * Revision 1.11  2006/03/22 11:47:10  jwagner
 * Amazing, now it compiles for kernel 2.6 too! Test still pending. Removed unused system interrupt disable/enable macros and calls. Reformatted code and comments. Removed deprecated check_region. Converted pcibios_xxx funcs to new pci funcs.
 *
 * Revision 1.9  2005/01/19 07:19:02  amn
 * Dec-2004 JIVE software formatter changes; vsib.c seek support.
 *
 * Revision 1.8  2002/09/02 16:39:47  amn
 * Test version suitable for 50MHz/2 test pattern recording, for pb/JB.
 *
 * Revision 1.7  2002/08/09 11:26:56  amn
 * Jul-2002 first fringes Dwingeloo test version.
 *
 * Revision 1.5  2002/06/14 13:00:26  amn
 * Dwingeloo test trip version.
 *
 * Revision 1.4  2002/03/25 15:18:51  amn
 * First chained DMA ring buffer.
 *
 * Revision 1.3  2002/03/21 11:24:50  amn
 * Chain of DMA descriptors, 1000 descrs, 10 1-sec/1k descrs, rest reused memory.
 *
 * Revision 1.2  2002/02/27 14:33:28  amn
 * Changed Log line to be on next line than the comment start characters.
 *
 * Revision 1.1  2002/02/27 14:24:38  amn
 * Initial version.
 */

