Sindbad~EG File Manager

Current Path : /usr/src/snapapi26-0.7.90/
Upload File :
Current File : //usr/src/snapapi26-0.7.90/snapapi26.c

/* snapapi.c
   Copyright (C) Acronis, 2004
   Written by Vladimir Simonov
   $Id: snapapi26.c 1190905 2016-05-27 08:51:04Z marina $
*/
#ifdef HAVE_LINUX_CONFIG
#include <linux/config.h>
#elif defined(HAVE_LINUX_AUTOCONF)
#include <linux/autoconf.h>
#elif defined(HAVE_GENERATED_AUTOCONF)
#include <generated/autoconf.h>
#else
#warning "neither linux/config.h nor linux/autoconf.h or generated/autoconf.h found"
#endif
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/file.h>
#include <linux/kthread.h>
#include <asm/div64.h>

#include <linux/fs.h>

#include <linux/init.h>

#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
#include <asm/system.h>
#endif
#include <asm/uaccess.h>
#include <asm/bitops.h>
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/smp.h>

#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/interrupt.h> /* for in_interrupt */
#include <linux/poll.h>
#include <linux/timer.h>
#ifdef HAVE_IOCTL32_CONVERSIONS
#include <linux/ioctl32.h>
#endif
#ifdef HAVE_FREEZER_H
#include <linux/freezer.h>
#endif
#if defined(CONFIG_VZ_VZSNAP) || defined(CONFIG_VZ_VZSNAP_MODULE)
#define USE_VZ_VZSNAP
#include <linux/vzsnap.h>
#endif
#include "snapapi.h"

#define DEBUG		0

#define DEBUG_API	(1 << 1)
#define DEBUG_ALLOC	(1 << 2)
#define DEBUG_BIO	(1 << 3)
#define DEBUG_BIOQUE	(1 << 4)
#define DEBUG_CACHE	(1 << 5)
#define DEBUG_BREAD	(1 << 6)
#define DEBUG_INTERNALS	(1 << 7)
#define DEBUG_DUMP	(1 << 8)
#define DEBUG_LOCK	(1 << 9)
#define DEBUG_IOCTL	(1 << 10)
#define DEBUG_MESS	(1 << 11)
#define DEBUG_BMAP	(1 << 12)

#define DEBUG_LEVEL 	(DEBUG_API|DEBUG_BMAP)

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
#define sn_request_queue request_queue_t
#define sn_kmem_cache kmem_cache_t
#else
#define sn_request_queue struct request_queue
#define sn_kmem_cache struct kmem_cache
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
#define sn_bio_endio(x, y, z) bio_endio(x, y, z)
#define sn_bio_io_error(x, y) bio_io_error(x ,y)
#define BIO_EIO_RET_VAL_ERR 1
#define BIO_EIO_RET_VAL_OK 0
#else
#ifdef HAVE_BIO_ENDIO_2ARGS
#define sn_bio_endio(x, y, z) bio_endio(x, z)
#else
#define sn_bio_endio(x, y, z) bio_endio(x)
#endif /* HAVE_BIO_ENDIO_2ARGS */
#define sn_bio_io_error(x, y) bio_io_error(x)
#define BIO_EIO_RET_VAL_ERR
#define BIO_EIO_RET_VAL_OK
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)
#define USE_KERNEL_THREAD
#endif

#ifdef HAVE_KMAP_ATOMIC_2ARGS
#define sn_kmap_atomic(a) kmap_atomic(a, KM_USER0)
#define sn_kunmap_atomic(a) kunmap_atomic(a, KM_USER0)
#else
#define sn_kmap_atomic(a) kmap_atomic(a)
#define sn_kunmap_atomic(a) kunmap_atomic(a)
#endif

#ifdef HAVE_ASM_HAVE_SET_MB
#define sn_set_mb set_mb
#else
#define sn_set_mb smp_store_mb
#endif

#ifndef HAVE_REQ_WRITE
#define REQ_WRITE	(1 << BIO_RW)
#endif

#ifndef HAVE_FMODE_T
typedef unsigned int fmode_t;
#endif 

#if DEBUG
#define inline
#define sa_debug(level, fmt, arg...)					\
	do {								\
		static const char *func = __FUNCTION__;			\
		if ((level) & DEBUG_LEVEL)				\
			printk(KERN_DEBUG "%s(%s,%d): " fmt, func,	\
				current->comm, current->pid, ##arg);	\
	} while (0)
#else
#define sa_debug(fmt,arg...) do { } while (0)
#endif

#define sa_kdebug(fmt, arg...)					\
	do {							\
		static const char *func= __FUNCTION__;		\
		printk(KERN_DEBUG "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)
#define sa_info(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_INFO "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)
#define sa_warn(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_WARNING "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)
#define sa_error(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_ERR "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
	} while (0)

#define sa_BUG(fmt, arg...)					\
	do {							\
		static const char *func = __FUNCTION__;		\
		printk(KERN_CRIT "%s(%s,%d): " fmt, func,	\
			current->comm, current->pid, ##arg);	\
		BUG();						\
	} while (0)

#if defined(__x86_64) && defined(CONFIG_COMPAT) && !defined(HAVE_COMPAT_IOCTL)
#define HAVE_IOCTL32_CONVERSION
#endif

#ifdef HAVE_MAKE_REQUEST_INT
#define MAKE_REQUEST_EXIT_STATUS 0
#define MAKE_REQUEST_RETURN_VALUE int
#else
#define MAKE_REQUEST_EXIT_STATUS
#define MAKE_REQUEST_RETURN_VALUE void
#endif

#ifdef HAVE_BLKDEV_PUT_INT
#define MAKE_BLKDEV_RETURN_VALUE int
#else
#define MAKE_BLKDEV_RETURN_VALUE void
#endif

static int snap_init_ok;
static int snap_emergency_size;
static struct vm_operations_struct snapctl_vm_ops;

static wait_queue_head_t select_wait;
static int messages_pos;
#define MESSAGE_SIZE (sizeof(struct snap_message))
#define MAX_MESSAGES (PAGE_SIZE / MESSAGE_SIZE)
struct snap_message *messages_buf;
static struct semaphore messages_sem = __SEMAPHORE_INITIALIZER(messages_sem, 1);

#ifndef USE_KERNEL_THREAD
static struct task_struct *resolver_thread;
#else
#include <linux/smp_lock.h>
static pid_t resolver_thread_pid; 
static wait_queue_head_t resolver_thread_signal; 
#endif

static int resolver_thread_continue = 1;
static DECLARE_COMPLETION(resolver_thread_exited);

static LIST_HEAD(sessions_list);
static LIST_HEAD(notinited_list);

#define sn_round(a,b) (((a) + (b) - 1) / (b))

/* sessions_list  & noninit_sessions_list protection */
#ifdef HAVE_SPIN_LOCK_UNLOCKED
static spinlock_t sessions_lock = SPIN_LOCK_UNLOCKED;
#else
static DEFINE_SPINLOCK(sessions_lock);
#endif
static int slab_uid;

#define REFS_PER_PAGE	(PAGE_SIZE / (sizeof(void *)))
#define REFS_PER_PAGE_MASK (~(REFS_PER_PAGE - 1))
#if BITS_PER_LONG == 32
#define REFS_PER_PAGE_SHIFT (PAGE_SHIFT - 2)
#elif BITS_PER_LONG == 64
#define REFS_PER_PAGE_SHIFT (PAGE_SHIFT - 3)
#else
#error Unsupported architecture detected
#endif

#define MAX_BHPAGES	REFS_PER_PAGE
#define MAX_BH_DELAYED	(REFS_PER_PAGE * MAX_BHPAGES)

struct block_map {
	unsigned long long	size; /* size in bits of allocated memory */
	unsigned long long	rsize; /* size in bits of real data */
	struct page **		blkmap;
};

#ifdef __GFP_HIGHIO
#define GFP_SNAPHIGH	(__GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
#else
#define GFP_SNAPHIGH	(__GFP_IO | __GFP_FS | __GFP_HIGHMEM)
#endif
struct sa_page {
	struct sa_page *	next;
	struct page *		page;
	unsigned long long	bno;
};

struct sa_chain {
	struct sa_page *	busy;
	struct sa_page *	free;
	spinlock_t		lock;
#define FAKE_READ	1
#define READ_KERNEL1	2
#define READ_KERNEL2	3
};

#define BLK_CHAINS 512 /* must be power of 2 to avoid bno % BLK_CHAINS ops */
#define MAX_MMPAGES 64
#define TIMER_INTERVAL (5*HZ)

struct bio_req {
	struct bio *bio;
	struct completion event;
};

struct pending_request;
struct pending_queue {
	spinlock_t		pq_lock;
	struct pending_request	*pq_req;	/* pending request list head */
	struct pending_request	*pq_reqtail;	/* pending request list tail */
	int			pq_state;
	struct completion	pq_done;
	struct completion	pq_bio_done;	/* end_io signal */
	atomic_t		pq_ready_req;	/* number of ready requests */
	atomic_t		pq_notready_req;
};

enum  pending_queue_states {
	PQ_STOPPED,	/* Where is no any unhandled pending requests */
	PQ_RUNNING,	/* New requests may be pushed to queue */
	PQ_CLOSED,	/* New requests can't be pushed to queue, but old
			 * requests may stil unfinished */
};

struct pending_request {
	struct pending_request 	*pr_next;	/* pendnig requests list */
	struct bio		*pr_rbio;	/* bio submitted to read */
	struct bio		*pr_wbio;	/* original delayed bio */
	struct pending_queue	*pr_queue;	/* session delayed queue */
	/*
	 * While rbio handling it may be remapped, this result in loosing
	 * information about initial request so we have to explicytly
	 * save rbio block number.
	 */
	unsigned long long	pr_rblkno;	/* first rbio block */
};

#define alloc_pending_request() kmalloc(sizeof(struct pending_request), GFP_ATOMIC & ~__GFP_HIGH)

struct level_entry {
	unsigned long long max_key;
	struct page* page;
};

struct stack_entry {
	struct page* page;
	struct level_entry* entry;
	unsigned long long max_key;
};

struct group_map {
	unsigned level;
	struct page* root;
	unsigned long long max_key;
	struct stack_entry stack[8];
};

struct session_struct {
	struct list_head	s_list;
	dev_t			s_kdev;
	struct block_device *	s_bdev;
	volatile unsigned int	s_state;
	unsigned int		s_bppage;	/* blocks per page */
	unsigned int		s_bsize;	/* block size */
	unsigned int		s_spb;		/* secs per block */
	unsigned int		s_spbshift;	/* secs per block shift */
	unsigned long long	s_plen;
	unsigned long long	s_pstart;
	struct super_block *	s_sb;

	unsigned long long	s_fblock;	/* EXTxx: first data block */
	unsigned long		s_gcount;	/* group count */
	unsigned int		s_bpgroup;	/* blocks per group */

	atomic_t		s_users;
	struct block_map	s_blkmap;
#ifdef CATCH_ILLEGAL_ACCESS
	struct block_map	s_blkmap_backup;
#endif
	struct group_map	s_groupmap;
	int			s_usemap;
	unsigned long long 	s_bmsize;

#ifdef USE_VZ_VZSNAP
	struct vzsnap_struct *	s_vzs;
#endif

	int 			s_mess_pos;	/* last read message */
	spinlock_t		s_misc_lock;	/* protects from here to */
						/* s_make_request_fn */
	unsigned long long	s_ioctlcnt;	/* state data */
	unsigned long long	s_ioctlcnt_prev;
	int			s_heartbeat_active;
	struct timer_list 	s_timer;	/* heartbeat in frozen*/

	make_request_fn *	s_make_request_fn; /* original fn from queue */
	sn_request_queue *	s_request_queue;

	spinlock_t		s_biolist_lock;
	struct bio ***		s_bioarr;
	int			s_biopages;
	int			s_biocount;

	struct vm_area_struct *	s_vma;
	atomic_t		s_vma_users;
	int			s_msize;	/* vm area pages */
	int			s_maxmsize;	/* max vm area pages */
	struct page *		s_mpages[MAX_MMPAGES];	/* mmapped pages */
	struct bio_req *	s_local_bios;	/* space exchange */
	unsigned long long	s_ahead_bno;	/* start ahead buffer */
	unsigned int		s_asize;

	struct semaphore        s_sem;		/* user space requests
						   serialization */
	struct pending_queue	s_pending_queue;/* pending request queue used
						   by async handler */

	sn_kmem_cache *		s_blkcachep;
	char			s_blkcachename[32];
	int 			s_blkcache_pages;
	spinlock_t		s_blkcache_emlock;
	int	 		s_blkcache_empages;
	int	 		s_blkcache_emmin;
	struct sa_page *	s_blk_emlist;
	int			s_veid;
	int			s_simulate_freeze;	/* disable freeze */

	unsigned long long	s_gpages;	/* got pages */
	unsigned long long	s_ppages;	/* put pages */
	unsigned long long	s_abios;	/* allocated bios */
	unsigned long long	s_fbios;	/* freed bhs */
	unsigned long long	s_dbios;	/* delayed bhs */
	unsigned long long	s_rblocks;	/* read blocks */
	unsigned long long	s_cblocks;	/* cached blocks */
	unsigned long long	s_rcblocks;	/* read from cache */
	unsigned long long	s_fcblocks;	/* freed cache  blocks */
	unsigned long long	s_mcblocks;	/* max blocks in cache */
	unsigned long long	s_rwcolls;	/* read/write collisions */
	unsigned long long	s_rc2blocks;	/* read to cache2 blocks */
	unsigned long long 	s_sync_req;	/* sync bios */
	unsigned long long 	s_async_req;	/* async bios */
	unsigned long long 	s_async_retr;	/* async retries */
	struct sa_chain		s_blkchains[BLK_CHAINS];
};

static void destroy_cached_bio(struct session_struct* s, struct bio *bio);
static void  destroy_pending_request(struct session_struct *s,
			struct pending_request *preq)
{
	if (preq) {
		if (preq->pr_rbio)
			destroy_cached_bio(s, preq->pr_rbio);
		kfree(preq);
	}
}

#define snapapi_get_dev_queue(s) bdev_get_queue(s->s_bdev)
#define snapapi_lock_dev_queue(q) do { \
		if (q->queue_lock) \
			spin_lock_irq(q->queue_lock); \
	} while (0)
#define snapapi_unlock_dev_queue(q) do { \
		if (q->queue_lock) \
			spin_unlock_irq(q->queue_lock); \
		} while (0)


struct locked_dev {
	struct block_device *bdev;
	unsigned lock_type;
	struct session_struct *sess;
};

#define MAX_LOCKEDDEVS (PAGE_SIZE / sizeof(struct locked_dev))
static int lockedcnt; /* global lock/unlock devs */
static struct locked_dev * devlocked;
/* devlocked & lockedcnt protection */
static struct semaphore devlocked_sem = __SEMAPHORE_INITIALIZER(devlocked_sem, 1);

static void unregister_make_request(struct session_struct * s);
static void mpages_destroy(struct session_struct *s);
static void close_session(struct session_struct *s, int do_free);
#if 0
static void dump_sessions(void);
#endif

#ifdef HAVE_TRY_TO_FREEZE_NO_ARGS
#define snapapi_try_to_freeze() try_to_freeze()
#elif defined(HAVE_TRY_TO_FREEZE_ONE_ARG)
#define snapapi_try_to_freeze() try_to_freeze(PF_FREEZE)
#else
#define snapapi_try_to_freeze()
#endif

#ifndef HAVE_FREEZE_BDEV
static struct super_block *freeze_bdev(struct block_device *bdev)
{
	struct super_block *sb;

	sb = get_super(bdev);
	if (sb) {
		if (sb->s_op->write_super_lockfs)
			sb->s_op->write_super_lockfs(sb);
	}
	sync_blockdev(bdev);
	return sb;
}

static void thaw_bdev(struct block_device *bdev, struct super_block *sb)
{
	if (sb) {
		if (sb->s_op->unlockfs)
			sb->s_op->unlockfs(sb);
		drop_super(sb);
	}
}
#endif

static void sn_freeze_bdev(struct session_struct *s)
{
	if (!s->s_simulate_freeze) {
		s->s_sb = freeze_bdev(s->s_bdev);
	} else {
		fsync_bdev(s->s_bdev);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
		s->s_sb = get_super(s->s_bdev);
#else
		s->s_sb = user_get_super(s->s_kdev);
#endif
	}
}

static void sn_thaw_bdev(struct session_struct *s)
{
	if (!s->s_simulate_freeze) {
		thaw_bdev(s->s_bdev, s->s_sb);
	} else {
		drop_super(s->s_sb);
	}
	s->s_sb = NULL;
}

static int sn_blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
{
#ifdef HAVE_BLKDEV_GET_3ARG_FLAG
	return blkdev_get(bdev, mode, (unsigned)holder); /* up to 2.6.28 kernels */
#elif HAVE_BLKDEV_GET_3ARGS
	return blkdev_get(bdev, mode, holder); /* since 2.6.38 kernels */
#else
	return blkdev_get(bdev, mode); /* ~2.6.28 to 2.6.37 kernels */
#endif
}

static MAKE_BLKDEV_RETURN_VALUE sn_blkdev_put(struct block_device *bdev, fmode_t mode)
{
#ifdef HAVE_BLKDEV_PUT_2ARGS
	return blkdev_put(bdev, mode);
#else
	return blkdev_put(bdev);
#endif
}

#ifdef HAVE_BD_CLAIM
#define sn_bd_claim bd_claim
#define sn_bd_release bd_release
#else
int sn_bd_claim(struct block_device *bdev, void *holder)
{
	return sn_blkdev_get(bdev, FMODE_READ | FMODE_EXCL, holder);
}
void sn_bd_release(struct block_device *bdev)
{
	sn_blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
}
#endif /* HAVE_BD_CLAIM */

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
typedef void* (*sn_blkcache_ctor_t)(void*);

static void sn_blkcache_ctor_1bppage(void *mem)
{
	memset(mem, 0, sizeof(struct sa_page));
}

static void sn_blkcache_ctor_2bppage(void *mem)
{
	memset(mem, 0, sizeof(struct sa_page) + sizeof(unsigned long long) * 1);
}

static void sn_blkcache_ctor_4bppage(void *mem)
{
	memset(mem, 0, sizeof(struct sa_page) + sizeof(unsigned long long) * 3);
}

static sn_blkcache_ctor_t sn_get_blkcache_ctor(unsigned int bppage)
{
	switch(bppage) {
		case 1:
			return sn_blkcache_ctor_1bppage;
		case 2:
			return sn_blkcache_ctor_2bppage;
		case 4:
			return sn_blkcache_ctor_4bppage;
	};
	return NULL;
}
#endif

static int sn_is_error_bio(struct bio *bio)
{
#ifdef HAVE_BIO_UPTODATE
	return !test_bit(BIO_UPTODATE, &bio->bi_flags);
#else
	return bio->bi_error;
#endif
}

/*
 * Add request to back of pending list
 */
static void pqueue_add_request(struct pending_queue *pq, struct pending_request
			*preq)
{
	if (pq->pq_reqtail) {
		pq->pq_reqtail->pr_next = preq;
		pq->pq_reqtail = preq;
	} else
		pq->pq_req = pq->pq_reqtail = preq;
}

/*
 * Grab first pending request
 */
static struct pending_request *pqueue_get_request(struct pending_queue *pq)
{
	struct pending_request *preq = pq->pq_req;
	if (preq) {
		if (preq == pq->pq_reqtail)
			pq->pq_reqtail = NULL;
		pq->pq_req = preq->pr_next;
		preq->pr_next = NULL;
	}
	return preq;
}

static inline int blkmap_release_pages(struct page **page_ptr)
{
	unsigned int count;
	unsigned int i;

	for (i = 0, count = 0; i < REFS_PER_PAGE; i++, page_ptr++) {
		struct page *page;

		page = *page_ptr;
		if (unlikely(!page))
			continue;
		page_cache_release(page);
		count++;
	}
	return count;
}

static void do_block_map_destroy(struct session_struct *s,
				struct block_map *bmap)
{
	unsigned long long bsize;
	unsigned int pages, mpages;
	struct page *page;
	unsigned int i;

	if (!bmap->blkmap)
		return;
	bsize = sn_round(bmap->size, 8);
	pages = sn_round(bsize, PAGE_SIZE);
	/* pages with pointers to pages */
	mpages = sn_round(pages, REFS_PER_PAGE);

	for (i = 0; i < mpages; i++) {
		page = bmap->blkmap[i];
		if (unlikely(!page))
			break;
		sa_debug(DEBUG_BMAP, "s=%p, mpage(%u,%p,%p)\n",
					s, i, page, page_address(page));
		s->s_ppages += blkmap_release_pages(page_address(page));
		page_cache_release(page);
		s->s_ppages++;
	}
	kfree(bmap->blkmap);
	bmap->blkmap = NULL;
	bmap->size = 0;
	bmap->rsize = 0;
	return;
}

static void noinline block_map_destroy(struct session_struct *s)
{
	do_block_map_destroy(s, &s->s_blkmap);
#ifdef CATCH_ILLEGAL_ACCESS
	do_block_map_destroy(s, &s->s_blkmap_backup);
#endif
	return;
}

static inline unsigned int blkmap_high_pages(struct page **page_ptr, unsigned n)
{
	struct page *p;
	unsigned int count;

	for (count = 0; count < n; page_ptr++, count++) {
		p = alloc_page(GFP_HIGHUSER);
		if (unlikely(!p))
			return count;

		*page_ptr = p;
	}
	return count;
}

static int blkmap_alloc_pages(struct session_struct * s, struct page **blkmap,
					unsigned pages)
{
	struct page *p;
	unsigned int i, count, hpages;

	for (i = 0; i < pages; i += REFS_PER_PAGE, blkmap++) {
		p = alloc_page(GFP_KERNEL);
		if (unlikely(!p))
			goto out_free;
		memset(page_address(p), 0, PAGE_SIZE);
		*blkmap = p;
		sa_debug(DEBUG_BMAP, "s=%p, mpage(%u,%p,%p)\n",
			s, i, p, page_address(p));
		s->s_gpages++;
		hpages = (i + REFS_PER_PAGE < pages) ? REFS_PER_PAGE :
							pages - i;
		count = blkmap_high_pages(page_address(p), hpages);
		s->s_gpages += count;
		if (count != hpages)
			goto out_free;
	}
	return 0;

out_free:
	block_map_destroy(s);
	return -ENOMEM;
}

static inline struct page * blkmap_page(struct page **blkmap,
				unsigned int pageno)
{
	struct page **mpage;

	mpage = page_address(blkmap[pageno >> REFS_PER_PAGE_SHIFT]);
	return mpage[pageno & (~REFS_PER_PAGE_MASK)];
}

static void blkmap_page_release(struct page **blkmap, unsigned int pageno)
{
	struct page **mpage;
	struct page *page;
	unsigned int idx;

	mpage = page_address(blkmap[pageno >> REFS_PER_PAGE_SHIFT]);
	idx = pageno & (~REFS_PER_PAGE_MASK);
	page = mpage[idx];
	mpage[idx] = 0;
	page_cache_release(page);
}

static int noinline block_map_init(struct session_struct *s,
			unsigned long long size, char *data, int optimize)
{
	struct block_map *bmap;
	unsigned long long bsize;
	unsigned int count, pages, mpages, i;
	int ret, bexists;
	struct page * tpage;
	void * tpageaddr;

	sa_debug(DEBUG_API, "s=%p, size=%llu, data=%p mode=%d\n", s, size,
						data, optimize);
	bsize = sn_round(size, 8);
	if (!bsize)
		return -EINVAL;

	tpage = NULL;
	tpageaddr = NULL;
	ret = -ENOMEM;
	bexists = 0;
	pages = sn_round(bsize, PAGE_SIZE);
	mpages = sn_round(pages, REFS_PER_PAGE);

	bmap = &s->s_blkmap;
	if (bmap->size) {
		if (unlikely(bmap->size < size))
			return -EINVAL;
		bexists = 1;
		/* it may be we load data into larger bitmap,
		   rsize keeps real data size */
		bmap->rsize = size;
	}
	if (!bmap->blkmap) {
		size_t memsize;

		memsize = mpages * sizeof(struct page *);
		bmap->blkmap = kmalloc(memsize, GFP_KERNEL);
		if (unlikely(!bmap->blkmap))
			return ret;
		memset(bmap->blkmap, 0, memsize);
		bmap->size = size;
		bmap->rsize = size;
	}
	if (data) {
		tpage = alloc_page(GFP_KERNEL);
		if (unlikely(!tpage)) {
			kfree(bmap->blkmap);
			bmap->blkmap = NULL;
			bmap->size = 0;
			bmap->rsize = 0;
			return ret;
		}
		tpageaddr = page_address(tpage);
		s->s_gpages++;
	}
	sa_debug(DEBUG_BMAP, "size=%llu, blkmap=%p, pages=%u, mpages=%u\n",
			size, bmap->blkmap, pages, mpages);
	if (!bexists) {
		if (unlikely(blkmap_alloc_pages(s, bmap->blkmap, pages)))
			goto out_free;
	}
	count = PAGE_SIZE;
	for (i = 0; i < pages; i++, data += PAGE_SIZE) {
		char *kaddr;
		struct page *p;

		if (unlikely((i == pages - 1) && (bsize & (PAGE_SIZE - 1))))
			/* Don't touch count if bsize%PAGE_SIZE == 0 */
			count = bsize & (PAGE_SIZE - 1);

		if (tpageaddr) {
			ret = copy_from_user(tpageaddr, data, count);
			if (unlikely(ret)) {
				sa_warn("copy_from_user failed. data=%p, "
					"count=%d, bsize=%llu.\n", data, count,
					bsize);
				ret = -EACCES;
				goto out_free;
			}
			if (optimize) {
				int fbit;
				fbit = find_first_bit(tpageaddr, PAGE_SIZE << 3);
				if (unlikely(fbit == PAGE_SIZE << 3)) {
					blkmap_page_release(bmap->blkmap, i);
					s->s_ppages++;
					sa_debug(DEBUG_BMAP, "empty %u\n", i);
				}
			}
		}
		p = blkmap_page(bmap->blkmap, i);
		if (p) {
			kaddr = sn_kmap_atomic(p);
			if (!tpageaddr)
				memset(kaddr, 0xff, count);
			else
				memcpy(kaddr, tpageaddr, count);
			sn_kunmap_atomic(kaddr);
		}
	}

	if (tpage) {
		page_cache_release(tpage);
		s->s_ppages++;
	}
	return 0;

out_free:
	block_map_destroy(s);
	if (tpage) {
		page_cache_release(tpage);
		s->s_ppages++;
	}
	return ret;
}

#ifdef USE_VZ_VZSNAP
static int noinline block_map_init_vzsnap(struct session_struct *s,
					  struct vzsnap_struct *vzs)
{
	struct block_map *bmap;
	unsigned long long size = vzs->block_max;
	unsigned long long bsize;
	unsigned int pages;
	int i, ret;

	bsize = (size + 7) / 8;

	bmap = &s->s_blkmap;
	ret = -ENOMEM;
	memset(bmap, 0, sizeof(*bmap));
	pages = (bsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
	bmap->blkmap = kmalloc(pages * sizeof(struct page *), GFP_KERNEL);
	if (!bmap->blkmap)
		return ret;
	memset(bmap->blkmap, 0, pages * sizeof(struct page *));
	bmap->size = size;
	bmap->rsize = size;
#ifdef CATCH_ILLEGAL_ACCESS
	s->s_blkmap_backup.blkmap = kzalloc(pages * sizeof(struct page *), GFP_KERNEL);
	if (!s->s_blkmap_backup.blkmap)
		return ret;
	s->s_blkmap_backup.size = size;
	s->s_blkmap_backup.rsize = size;
#endif
	for (i = 0; i < pages; i++) {
		if (vzs->block_map[i]) {
			struct page **mpage;
			struct page **pg = bmap->blkmap + (i >> REFS_PER_PAGE_SHIFT);
			if (!*pg) {
				*pg = alloc_page(GFP_KERNEL|__GFP_ZERO);
				if (!*pg)
					return -ENOMEM;

			}
			get_page(vzs->block_map[i]);
			mpage = page_address(*pg);
			mpage[i & (~REFS_PER_PAGE_MASK)] = vzs->block_map[i];
#ifdef CATCH_ILLEGAL_ACCESS
			pg = bmap->blkmap_backup.blkmap + (i >> REFS_PER_PAGE_SHIFT);
			if (!*pg) {
				*pg = alloc_page(GFP_KERNEL|GFP_ZERO);
				if (!*pg)
					return -ENOMEM;
			}
			mpage = page_address(*pg);
			mpage[i & (~REFS_PER_PAGE_MASK)] = alloc_page(GFP_KERNEL);
			memcpy(page_address(mpage[i & (~REFS_PER_PAGE_MASK)],
			       page_address(bmap->blkmap[i]), PAGE_SIZE);
#endif
		}
	}
	return 0;
}
#endif

static inline int is_block_in_map(struct block_map *bmap,
					unsigned long long bno)
{
	unsigned int pageno;
	struct page *page;
	long *kaddr;
	int ret;

	if (bno >= bmap->rsize)
		return 0;

	pageno = bno >> (PAGE_SHIFT + 3);
	page = blkmap_page(bmap->blkmap, pageno);
	if (!page)
		return 0;
	kaddr = sn_kmap_atomic(page);
	ret = test_bit(bno % (PAGE_SIZE * 8), kaddr);
	sn_kunmap_atomic(kaddr);

	return ret;
}

static inline void clear_block_in_map(struct block_map *bmap,
					unsigned long long bno)
{
	unsigned int pageno;
	struct page *page;
	long *kaddr;

	if (bno >= bmap->rsize)
		return;

	pageno = bno >> (PAGE_SHIFT + 3);
	page = blkmap_page(bmap->blkmap, pageno);
	if (!page)
		return;
	kaddr = sn_kmap_atomic(page);
	clear_bit(bno % (PAGE_SIZE * 8), kaddr);
	sn_kunmap_atomic(kaddr);
}

static inline void set_block_in_map(struct block_map *bmap,
					unsigned long long bno)
{
	unsigned int pageno;
	struct page *page;
	long *kaddr;

	if (bno >= bmap->rsize)
		return;

	pageno = bno >> (PAGE_SHIFT + 3);
	page = blkmap_page(bmap->blkmap, pageno);
	if (!page)
		return;
	kaddr = sn_kmap_atomic(page);
	set_bit(bno % (PAGE_SIZE * 8), kaddr);
	sn_kunmap_atomic(kaddr);
}

#define BITS_ON_PAGE (1 << (PAGE_SHIFT+3))

static unsigned long long find_next_block(struct block_map *bmap, unsigned long long bno)
{
	unsigned int lpage; /* last pageno */
	unsigned int pageno;
	unsigned int psize; /* processing page size */

	if (bno >= bmap->rsize)
		return ~0ULL; /* goto out_end; */
	psize = BITS_ON_PAGE;
	lpage = (bmap->size - 1) >> (PAGE_SHIFT + 3);
	pageno = bno >> (PAGE_SHIFT + 3);
	bno &= BITS_ON_PAGE - 1;

	for (; pageno <= lpage; pageno++) {
		void* kaddr;
		struct page* page;

		if (pageno == lpage) {
			psize = bmap->size & ((PAGE_SIZE << 3) - 1);
			if (!psize)
				psize = BITS_ON_PAGE;
		}
		page = blkmap_page(bmap->blkmap, pageno);
		if (!page)
			continue;
		kaddr = sn_kmap_atomic(page);
		bno = find_next_bit(kaddr, psize, bno);
		sn_kunmap_atomic(kaddr);
		if (bno < psize) {
			bno += (unsigned long long)pageno << (PAGE_SHIFT + 3);
			goto out;
		}
		bno = 0;
	}

	bno = ~0ULL;
out:
	return bno;
}

static inline sector_t sn_bio_bi_sector(struct bio *bio)
{
#ifdef HAVE_BVEC_ITER
	return bio->bi_iter.bi_sector;
#else
	return bio->bi_sector;
#endif
}

static inline unsigned int sn_bio_bi_size(struct bio *bio)
{
#ifdef HAVE_BVEC_ITER
	return bio->bi_iter.bi_size;
#else
	return bio->bi_size;
#endif
}

#define snapapi_is_not_our_bio(s, bio) \
		(sn_bio_bi_sector(bio) + (sn_bio_bi_size(bio) >> 9) < s->s_pstart || \
		sn_bio_bi_sector(bio) >= s->s_pstart + s->s_plen)
#if 0
static struct session_struct *find_by_part(struct bio *bio)
{
	struct session_struct *s;
	list_for_each_entry(s, &sessions_list, s_list) {
		if (s->s_state == SNAP_NOTINITED)
			continue;
		if ((s->s_bdev->bd_contains == bio->bi_bdev ||
				s->s_bdev == bio->bi_bdev)
				&& !snapapi_is_not_our_bio(s, bio))
			return s;
	}
	return NULL;
}

static inline struct session_struct *find_by_dev(struct block_device *bd)
{
	struct session_struct *s;
	list_for_each_entry(s, &sessions_list, s_list)
		if (s->s_bdev && (s->s_bdev->bd_contains == bd
						|| s->s_bdev == bd))
			return s;
	return NULL;
}
#endif

static inline struct session_struct *find_by_queue(struct bio *bio, void *q)
{
	struct session_struct *s;
	list_for_each_entry(s, &sessions_list, s_list)
		if (s->s_request_queue == q)
			return s;
	return NULL;
}

static inline struct session_struct *find_by_queue_next(struct bio *bio,
			void *q, struct session_struct *s)
{
	list_for_each_entry_continue(s, &sessions_list, s_list)
		if (s->s_request_queue == q)
			return s;
	return NULL;
}

static inline struct session_struct *find_deadlocked(void)
{
	struct list_head *tmp;
	list_for_each(tmp, &sessions_list) {
		struct session_struct *s;
		s = list_entry(tmp, struct session_struct, s_list);
		sa_debug(DEBUG_API, "s=%p state=%d\n", s, s->s_state);
		if (s->s_state == SNAP_DEADLOCK_ERR)
			return s;
	}
	return NULL;
}

#if 0
static int make_original_request(struct bio *bio)
{
	sn_request_queue *q;
	do {
		q = bdev_get_queue(bio->bi_bdev);
		if (!q) {
			/*
			 * This is very sad situation. Bio can't be
			 * handled properly, but we have call end_io
			 * because nobody will do it for us.
			 */
			sa_error("Device %x does not have a queue.\n",
				bio->bi_bdev->bd_dev);
			sn_bio_io_error(bio, sn_bio_bi_size(bio));
			return 1;
		}
	} while (q->make_request_fn(q, bio));
	return 0;
}
#endif

static void noinline cleanup_biolist(struct session_struct * s)
{
	int pno, offset;
	struct bio *bio;

	if (!s->s_bioarr)
		return;

	spin_lock(&s->s_biolist_lock);
	while (s->s_biocount) {
		s->s_biocount--;
		pno = s->s_biocount / REFS_PER_PAGE;
		offset = s->s_biocount % REFS_PER_PAGE;
		bio = *(s->s_bioarr[pno] + offset);
		spin_unlock(&s->s_biolist_lock);
		generic_make_request(bio);
		spin_lock(&s->s_biolist_lock);
		sa_debug(DEBUG_BIO, "request sent, bh=%p\n", bio);
	}
	while (s->s_biopages) {
		pno = s->s_biopages - 1;
		free_page((unsigned long)s->s_bioarr[pno]);
		s->s_ppages++;
		s->s_bioarr[pno] = NULL;
		s->s_biopages--;
	}
	sa_debug(DEBUG_BIOQUE, "Free page=%p\n", s->s_bioarr);
	free_page((unsigned long)s->s_bioarr);
	s->s_ppages++;
	s->s_bioarr = NULL;
	spin_unlock(&s->s_biolist_lock);
}

static int noinline delay_bio(struct session_struct *s, struct bio *bio)
{
	int pno, idx;
	struct bio **bioptr;

	sa_debug(DEBUG_BIO, "delayed bio=%p\n", bio);

	spin_lock(&s->s_biolist_lock);
	if (s->s_biocount > MAX_BH_DELAYED - 1) {
		spin_unlock(&s->s_biolist_lock);
		sa_warn("No space for bio, count=%d.\n", s->s_biocount);
		return 1;
	}
	pno = s->s_biocount / REFS_PER_PAGE;
	idx = s->s_biocount % REFS_PER_PAGE;
	if (!s->s_bioarr[pno]) {
		s->s_bioarr[pno] = (struct bio **) get_zeroed_page(GFP_ATOMIC);
		if (!s->s_bioarr[pno]) {
			spin_unlock(&s->s_biolist_lock);
			sa_warn("No memory for bio queue, count=%d.\n",
							s->s_biocount);
			return 1;
		}
		s->s_gpages++;
		s->s_biopages++;
	}
	bioptr = s->s_bioarr[pno];
	*(bioptr + idx) = bio;
	s->s_biocount++;
	s->s_dbios++;
	spin_unlock(&s->s_biolist_lock);

	return 0;
}
static void cleanup_chain(struct session_struct *s,
		struct sa_chain *chain, struct sa_page *sab)
{
	struct sa_page *next;

	while (sab) {
		next = sab->next;
		s->s_blkcache_pages--;
		page_cache_release(sab->page);
		s->s_ppages++;
		kmem_cache_free(s->s_blkcachep, sab);
		sab = next;
	}
}

static void noinline cleanup_snapshot(struct session_struct *s)
{
	struct sa_chain *chain;
	int i;

	if (!s->s_blkcachep)
		return;

	for (i = 0; i < BLK_CHAINS; i++) {
		chain = &s->s_blkchains[i];
		cleanup_chain(s, chain, chain->busy);
		cleanup_chain(s, chain, chain->free);
	}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
	kmem_cache_destroy(s->s_blkcachep);
#else
        if (kmem_cache_destroy(s->s_blkcachep))
                sa_warn("Unable to destroy cache.%s", "\n");
#endif
	s->s_blkcachep = NULL;
	return;
}

static inline void insert_into_free_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	sab->next = chain->free;
	chain->free = sab;
}

static inline void insert_into_busy_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	sab->next = chain->busy;
	chain->busy = sab;
}

static inline void remove_from_free_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	chain->free = sab->next;
}

static inline void remove_from_busy_list(struct sa_chain *chain,
			struct sa_page *sab)
{
	chain->busy = sab->next;
}

static inline int find_free_on_page(struct sa_page *sab, int bppage,
							unsigned long long bno)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0; i < bppage; i++, bno_p++)
		if (*bno_p == ~0ULL) {
			/* mark as busy */
			*bno_p = bno;
			return i;
		}
	sa_BUG("Busy page in free list(%p).\n", sab);
	return 0;
}

static inline int blocks_on_page(struct sa_page *sab, int bppage)
{
	int i, count;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0, count = 0; i < bppage; i++, bno_p++)
		if (*bno_p != ~0ULL)
			count++;
	return count;
}

static inline int find_block_on_page(struct sa_page *sab, int bppage,
							unsigned long long bno)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0; i < bppage; i++, bno_p++)
		if (*bno_p == bno)
			return i;
	return i;
}

static inline void free_block_on_page(struct sa_page *sab, int idx)
{
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	/* mark as free */
	*(bno_p + idx) = ~0ULL;
}

static struct sa_page * find_block_in_chain(struct sa_page *sab, int bppage,
			unsigned long long bno, int *idx, struct sa_page **prev)
{
	struct sa_page *p;

	p = NULL;
	while (sab) {
		*idx = find_block_on_page(sab, bppage, bno);
		if (*idx != bppage) {
			if (p)
				*prev = p;
			break;
		}
		p = sab;
		sab = sab->next;
	}
	return sab;
}

static inline void init_sa_page(struct sa_page *sab, int bppage)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	bno_p++;
	for (i = 1; i < bppage; i++, bno_p++)
		*bno_p = ~0ULL;
}

static unsigned long long any_block_on_page(struct sa_page *sab, int bppage)
{
	int i;
	unsigned long long * bno_p;

	bno_p = &sab->bno;
	for (i = 0; i < bppage; i++, bno_p++)
		if (*bno_p != ~0ULL)
			return *bno_p;
	return ~0ULL;
}

static unsigned long long any_block_in_cache(struct session_struct *s)
{
	struct sa_chain *chain;
	int i;
	unsigned long long ret;

	ret = ~0ULL;
	if (s->s_blkcache_pages == 0)
		return ret;
	for (i = 0; i < BLK_CHAINS; i++) {
		chain = &s->s_blkchains[i];
		spin_lock(&chain->lock);
		if (chain->busy) {
			ret = chain->busy->bno;
			spin_unlock(&chain->lock);
			break;
		}
		if (chain->free) {
			ret = any_block_on_page(chain->free, s->s_bppage);
			spin_unlock(&chain->lock);
			break;
		}
		spin_unlock(&chain->lock);
	}
	return ret;
}

static int sa_cache_emlist_init(struct session_struct *s, int prealloc)
{
	struct sa_page *sab;
	int ret, i, delta;

	delta = snap_emergency_size >> 4;
	s->s_blkcache_emmin = snap_emergency_size - delta;
	if (!prealloc)
		return 0;

	ret = -ENOMEM;
	for (i = 0; i < snap_emergency_size; i++) {
		sab = (struct sa_page *)kmem_cache_alloc(s->s_blkcachep,
						GFP_KERNEL);
		if (!sab)
			goto out;
		s->s_blkcache_empages++;
		sab->page = alloc_page(GFP_HIGHUSER);
		if (!sab->page) {
			kmem_cache_free(s->s_blkcachep, sab);
			goto out;
		}
		s->s_gpages++;
		sab->next = s->s_blk_emlist;
		s->s_blk_emlist = sab;
	}
	ret = 0;

out:
	return ret;
}

static struct sa_page * sa_cache_emget(struct session_struct *s)
{
	struct sa_page *sab;

	sab = NULL;
	spin_lock(&s->s_blkcache_emlock);
	if (s->s_blkcache_empages > s->s_blkcache_emmin) {
		sab = s->s_blk_emlist;
		s->s_blk_emlist = sab->next;
		s->s_blkcache_empages--;
		s->s_blkcache_pages++;
		goto out_unlock;
	}
	spin_unlock(&s->s_blkcache_emlock);

	sab = (struct sa_page *)kmem_cache_alloc(s->s_blkcachep, GFP_ATOMIC);
	if (!sab)
		goto get_from_list;
	sab->page = alloc_page(GFP_SNAPHIGH);
	if (!sab->page) {
		kmem_cache_free(s->s_blkcachep, sab);
		goto get_from_list;
	}
	s->s_gpages++;
	s->s_blkcache_pages++;
	goto out;

get_from_list:
	spin_lock(&s->s_blkcache_emlock);
	sab = s->s_blk_emlist;
	if (sab) {
		s->s_blk_emlist = sab->next;
		s->s_blkcache_empages--;
		s->s_blkcache_pages++;
	}

out_unlock:
	spin_unlock(&s->s_blkcache_emlock);
out:
	return sab;
}

static void sa_cache_emput(struct session_struct *s, struct sa_page *sab)
{
	spin_lock(&s->s_blkcache_emlock);
	s->s_blkcache_pages--;
	if (s->s_blkcache_empages < snap_emergency_size) {
		sab->next = s->s_blk_emlist;
		s->s_blk_emlist = sab;
		s->s_blkcache_empages++;
		spin_unlock(&s->s_blkcache_emlock);
		return;
	}
	spin_unlock(&s->s_blkcache_emlock);
	page_cache_release(sab->page);
	s->s_ppages++;
	kmem_cache_free(s->s_blkcachep, sab);
}

static void sa_cache_emlist_destroy(struct session_struct *s)
{
	struct sa_page *sab;

	spin_lock(&s->s_blkcache_emlock);
	while (s->s_blk_emlist) {
		sab = s->s_blk_emlist;
		s->s_blk_emlist = sab->next;
		s->s_blkcache_empages--;
		spin_unlock(&s->s_blkcache_emlock);

		page_cache_release(sab->page);
		s->s_ppages++;
		kmem_cache_free(s->s_blkcachep, sab);
		spin_lock(&s->s_blkcache_emlock);
	}
	spin_unlock(&s->s_blkcache_emlock);
}

static int sa_cache_chain_read(struct session_struct *s, struct sa_chain* chain,
		void *data, unsigned long long bno, int mode, unsigned int flags)
{
	struct sa_page *prev;
	struct sa_page **head;
	struct sa_page *sab;
	int idx, bppage, ret;
	char *kaddr;

	ret = 0;
	bppage = s->s_bppage;
	idx = 0;
	prev = NULL;
	head = &chain->busy;
	sab = find_block_in_chain(chain->busy, bppage, bno, &idx, &prev);
	if (sab)
		goto copy_data;

	prev = NULL;
	head = &chain->free;
	sab = find_block_in_chain(chain->free, bppage, bno, &idx, &prev);
	if (sab)
		goto copy_data;
	/* not found */
	goto out;

copy_data:
	if (mode == FAKE_READ)
		goto arrange_lists;
	kaddr = sn_kmap_atomic(sab->page);
	memcpy(data, kaddr + idx * s->s_bsize, s->s_bsize);
	sn_kunmap_atomic(kaddr);
	s->s_rcblocks++;

arrange_lists:
	sa_debug(DEBUG_CACHE, "mode=%d flags=%u bno=%llu\n", mode, flags, bno);
	ret = s->s_bsize;
	if (!(flags & SNAP_READ_ONCE))
		goto out;
	s->s_fcblocks++;
	free_block_on_page(sab, idx);
	/* remove from list */
	if (prev)
		prev->next = sab->next;
	else
		*head = sab->next;
	if (bppage == 1 || !blocks_on_page(sab, bppage)) {
		sa_cache_emput(s, sab);
		goto out;
	}
	insert_into_free_list(chain, sab);
out:
	return ret;
}

struct level0entry {
	unsigned long long key;
	unsigned long long value;
};

#define STOP_ENTRY(SP) 	((void*)SP->entry - page_address(SP->page) > \
			PAGE_SIZE - sizeof(struct level_entry))

void map_free(struct session_struct* s)
{
	struct stack_entry *sp, *end;
	struct group_map* map;

	map = &s->s_groupmap;
	end = sp = map->stack + map->level;
	sp->page = map->root;
	if (sp > map->stack)
		sp->entry = page_address(sp->page);
	do {
		while (sp > map->stack) {
			sp--;
			sp->page = (sp + 1)->entry->page;
			if (sp - map->stack)
				sp->entry = page_address(sp->page);
		}
		do  {
			page_cache_release(sp->page);
			s->s_ppages++;
			if (++sp > end)
				break;
			sp->entry++;
		} while (STOP_ENTRY(sp) || !sp->entry->page);
	} while(sp <= end);
}

static int map_init(struct session_struct* s, unsigned long uaddr, unsigned n)
{
	int ret;
	struct page* destpage, *bubble;
	struct stack_entry* sp, *max_sp;
	struct group_map* map;

	ret = 0;
	map = &s->s_groupmap;
	memset(map->stack, 0, sizeof(map->stack));
	max_sp = map->stack;
	bubble = 0;
	destpage = 0;
	while (n) {
		unsigned copy_count;
		unsigned copy_size;
		unsigned long long max_key;
		struct level0entry* dest;

		ret = -ENOMEM;
		destpage = alloc_page(GFP_HIGHUSER);
		if (!destpage)
			break;
		s->s_gpages++;
		dest = (struct level0entry*)kmap(destpage);
		if (!dest)
			break;
		ret = 0;
		copy_count = PAGE_SIZE / sizeof(struct level0entry);
		while (copy_count > n)
			dest[--copy_count].key = ~0;
		copy_size = copy_count * sizeof(struct level0entry);
		if (copy_from_user(dest, (void*)uaddr, copy_size)) {
			ret = -EACCES;
			kunmap(destpage);
			break;
		}
		uaddr += copy_size;
		n -= copy_count;
		bubble = map->stack[0].page;
		max_key = map->stack[0].max_key;
		map->stack[0].page = destpage;
		map->stack[0].max_key = dest[copy_count - 1].key;
		kunmap(destpage);
		destpage = 0;
		for (sp = &map->stack[1]; bubble; sp++) {
			if (!sp->page) {
				sp->page = alloc_page(GFP_KERNEL);
				if (!sp->page) {
					ret = -ENOMEM;
					break;
				}
				s->s_gpages++;
				sp->entry = page_address(sp->page);
			}
			sp->entry->page = bubble;
			sp->entry->max_key = sp->max_key = max_key;
			sp->entry++;
			if (STOP_ENTRY(sp)) {
				bubble = sp->page;
				sp->page = 0;
			} else {
				/*sp->entry->page = 0; ???*/
				bubble = 0;
			}
		}
		if (--sp > max_sp)
			max_sp = sp;
	}
	for (sp = &map->stack[1]; sp <= max_sp; sp++) {
		if (!sp->page) {
			sp->page = alloc_page(GFP_KERNEL);
			if (!sp->page) {
				ret = -ENOMEM;
				break;
			}
			s->s_gpages++;
			sp->entry = page_address(sp->page);
		}
		sp->entry->page = (sp - 1)->page;
		sp->entry->max_key = map->stack[0].max_key;
		sp->entry++;
		(sp - 1)->page = 0;
		for (; !STOP_ENTRY(sp); sp->entry++) {
			sp->entry->max_key = ~0;
			sp->entry->page = 0;
		}
	}
	map->max_key = map->stack[0].max_key;
	map->level = --sp - map->stack;
	map->root = sp->page;
	sp->page = 0;
	if (destpage) {
		page_cache_release(destpage);
		s->s_ppages++;
	}
	if (bubble) {
		page_cache_release(bubble);
		s->s_ppages++;
	}
	for (sp = map->stack; sp <= max_sp; sp++)
		if (sp->page) {
			page_cache_release(sp->page);
			s->s_ppages++;
		}
	if (ret)
		map_free(s);
	return ret;
}

void map_init_iterator(struct group_map* map)
{
	struct stack_entry* sp;

	map->stack[map->level].page = map->root;
	for (sp = map->stack + map->level; sp > map->stack; ) {
		sp->entry = page_address(sp->page);
		sp--;
		sp->page = (sp+1)->entry->page;
	}
	map->stack[0].entry = kmap(map->stack[0].page);
}

struct level0entry* map_iterator_get_value(struct group_map* map)
{
	return (struct level0entry*)map->stack[0].entry;
}

int map_iterator_next(struct group_map* map)
{
	struct stack_entry* sp;

	struct stack0entry {
		struct page* page;
		struct level0entry* entry;
	}* sp0;

	sp0 = (struct stack0entry*)map->stack;
	sp0->entry++;

	if ((void*)(sp0->entry + 1) > page_address(sp0->page) + PAGE_SIZE ||
					sp0->entry->key > map->max_key) {
		kunmap(sp0->page);
		for (sp = map->stack + 1; sp <= map->stack + map->level; sp++) {
			sp->entry++;
			if (!STOP_ENTRY(sp) && sp->entry->page)
				break;
		}
		if (sp > map->stack + map->level)
			return 0;

		while (sp > map->stack) {
			sp--;
			sp->page = (sp+1)->entry->page;
			sp->entry = sp - map->stack ? page_address(sp->page)
							: kmap(sp->page);
		}
	}
	return 1;
}
void map_iterator_stop(struct group_map* map)
{
	kunmap(map->stack[0].page);
}

struct level0entry* map_search(struct group_map* map, unsigned long long key,
						struct page** entry_page)
{
	int level;
	struct page* page;
	int i, l, r;
	struct level0entry* array0;

	if (key > map->max_key)
		return 0;

	page = map->root;

	for (level = map->level; level; level--) {
		struct level_entry* array;

		array = page_address(page);
		l = 0;
		r = PAGE_SIZE / sizeof(struct level_entry) - 1;
		do {
			i = (l + r)/2;
			if (array[i].max_key >= key)
				r = i;
			else
				l = i + 1;
		} while (r != l);
		page = array[r].page;
	}

	array0 = kmap(page);
	l = 0;
	r = PAGE_SIZE / sizeof(struct level0entry) - 1;
	do {
		i = (l + r)/2;
		if (array0[i].key > key)
			r = i - 1;
		else if (array0[i].key < key)
			l = i + 1;
		else {
			*entry_page = page;
			return &array0[i];
		}
	} while (r >= l);
	entry_page = 0;
	kunmap(page);
	return 0;
}

#define sa_cache_chain_remove(s, chain, bno) \
	sa_cache_chain_read(s, chain, 0, bno, FAKE_READ, SNAP_READ_ONCE)

static int sa_cache_save(struct session_struct *s, void *data,
					unsigned long long bno)
{
	struct sa_page *sab;
	struct sa_chain *chain;
	int idx, bppage, ret, new_page;
	char *kaddr;
	struct group_entry* entry;
	struct page* entry_page;

	ret = 1;
	idx = 0;
	new_page = 0;
	bppage = s->s_bppage;
	entry_page = 0;

	sa_debug(DEBUG_API, "bno=%llu\n", bno);
	chain = &s->s_blkchains[bno & (BLK_CHAINS - 1)];
	spin_lock(&chain->lock);

	/* The block may be already read while we were waiting on bio */
	if (!(s->s_state == SNAP_READINGMAP && s->s_usemap ?
			(entry = map_search(&s->s_groupmap, bno, &entry_page)) :
			is_block_in_map(&s->s_blkmap, bno))) {
		s->s_rwcolls++;
		ret = 0;
		goto out_unlock;
	}

	if (s->s_state == SNAP_READINGMAP)
		sa_cache_chain_remove(s, chain, bno);

	if (bppage > 1 && chain->free) {
		sab = chain->free;
		idx = find_free_on_page(sab, bppage, bno);
		goto copy_data;
	}
	sab = sa_cache_emget(s);
	if (!sab)
		goto out_unlock;
	sab->bno = bno;
	new_page = 1;
	if (bppage > 1)
		init_sa_page(sab, bppage);

copy_data:
	kaddr = sn_kmap_atomic(sab->page);
	memcpy(kaddr + idx * s->s_bsize, data, s->s_bsize);
	sn_kunmap_atomic(kaddr);

	if (s->s_state == SNAP_READINGMAP && s->s_usemap) {
		if (!entry->init)
			sa_debug(DEBUG_API, "INITING group %u bno = %llu\n",
						entry->group, entry->bno);
		entry->init = entry->cached = 1;
	} else if (s->s_state != SNAP_READINGMAP)
		clear_block_in_map(&s->s_blkmap, bno);

	s->s_cblocks++;
	if (s->s_cblocks - s->s_fcblocks > s->s_mcblocks)
		s->s_mcblocks = s->s_cblocks - s->s_fcblocks;

	ret = 0;
	if (bppage == 1) {
		insert_into_busy_list(chain, sab);
		goto out_unlock;
	}
	if (blocks_on_page(sab, bppage) == bppage) {
		remove_from_free_list(chain, sab);
		insert_into_busy_list(chain, sab);
		goto out_unlock;
	}
	if (new_page)
		insert_into_free_list(chain, sab);

out_unlock:
	if (entry_page)
		kunmap(entry_page);
	spin_unlock(&chain->lock);
	return ret;
}

/* return number of read bytes or error */
static int sa_cache_read(struct session_struct *s, void *data,
		unsigned long long bno, int mode, unsigned int flags)
{
	struct sa_chain *chain;
	int ret;

	chain = &s->s_blkchains[bno & (BLK_CHAINS - 1)];
	spin_lock(&chain->lock);

	ret = sa_cache_chain_read(s, chain, data, bno, mode, flags);

	spin_unlock(&chain->lock);
	return ret;
}

#if (DEBUG != 0) && (DEBUG_LEVEL & DEBUG_BIO)
static void print_bio(struct bio *bio, char *pref)
{
	sa_warn("%s bio=%p, dev=%x, sector=%llu, bi_flags=%lx"
		" bi_rw=%lx bi_size=%d bi_vcnt=%d bi_io_vec=%p"
		" bi_max_vecs=%d\n", pref, bio,
		bio->bi_bdev ? bio->bi_bdev->bd_dev : -1, 
		(unsigned long long)sn_bio_bi_sector(bio), bio->bi_flags,
		bio->bi_rw, sn_bio_bi_size(bio), bio->bi_vcnt, bio->bi_io_vec,
		bio->bi_max_vecs);
}
#define dump_bio(x, y) print_bio(x, y)
#else
#define dump_bio(x, y)
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
static int sa_cache_bio_end_io(struct bio *bio, unsigned int bytes_done,
                               int err)
{
 	if (sn_bio_bi_size(bio)) {
 		sa_warn("bio->bi_size is invalid\n");
 		dump_bio(bio, "sa_cache_bio_end_io");
 		return BIO_EIO_RET_VAL_ERR;
 	}

	complete((struct completion *)bio->bi_private);
	return BIO_EIO_RET_VAL_OK;
}
#else
static void sa_cache_bio_end_io(struct bio *bio, int err)
{
	complete((struct completion *)bio->bi_private);
	return BIO_EIO_RET_VAL_OK;
}
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
static int sa_pending_bio_end_io(struct bio *bio, unsigned int bytes_done,
                               int err)
#else
static void sa_pending_bio_end_io(struct bio *bio, int err)
#endif
{
	unsigned long flags;

	struct pending_request *preq = (struct pending_request*)
							bio->bi_private;
	struct pending_queue *pq = preq->pr_queue;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
 	if (bio->bi_size) {
 		sa_warn("bio->bi_size is invalid\n")
 		dump_bio(bio, "sa_pending_bio_end_io");
	}
#endif
	/*
	 * ->pq_bio was completed, so add request to ready requests
	 * list for later handling in process context.
	 */

	spin_lock_irqsave(&pq->pq_lock, flags);
	if (pq->pq_state != PQ_RUNNING && pq->pq_state != PQ_CLOSED)
		BUG();
	atomic_dec(&pq->pq_notready_req);
	atomic_inc(&pq->pq_ready_req);
	pqueue_add_request(pq, preq);
	spin_unlock_irqrestore(&pq->pq_lock,flags);
	complete(&pq->pq_bio_done);

	return BIO_EIO_RET_VAL_OK;
}

static int submit_pending_request(struct session_struct *s, struct bio *rbio,
			struct bio *wbio, unsigned long long bno)
{
	struct pending_queue *pq = &s->s_pending_queue;
	struct pending_request *preq = NULL;
	int ready, not_ready, qstate;
	int ret;
	spin_lock_irq(&pq->pq_lock);
	ready = atomic_read(&pq->pq_ready_req);
	not_ready = atomic_read(&pq->pq_notready_req);
	qstate = pq->pq_state;
	BUG_ON(ready < 0);
	BUG_ON(not_ready < 0);
	if (ready + not_ready > s->s_blkcache_emmin) {
		s->s_async_retr++;
		ret = -EAGAIN;
		goto out_err;
	}
	if (qstate != PQ_RUNNING) {
		/* Seems pending queue was closed */
		ret = -EBADFD;
		goto out_err;
	}
	preq = alloc_pending_request();
	if (preq == NULL) {
		ret = -ENOMEM;
		goto out_err;
	}
	memset(preq, 0, sizeof(struct pending_request));
	atomic_inc(&pq->pq_notready_req);
	spin_unlock_irq(&pq->pq_lock);
	preq->pr_next = NULL;
	preq->pr_wbio = wbio;
	preq->pr_queue = pq;

	rbio->bi_private = preq;
	rbio->bi_end_io = sa_pending_bio_end_io;
	preq->pr_rbio = rbio;
	preq->pr_rblkno = bno;

	if (rbio)
		dump_bio(rbio, "sa_cache_block async case read");
	if (wbio)
		dump_bio(wbio, "sa_cache_block async case write");
	submit_bio(READ, preq->pr_rbio);
	return 0;
out_err:
	spin_unlock_irq(&pq->pq_lock);
	return ret;
}

static int sa_cache_save_bio(struct session_struct *s, struct bio *bio,
		unsigned long long bno)
{
	unsigned idx;
	unsigned bppage_shift = PAGE_SHIFT - (s->s_spbshift + 9);
	unsigned nr_blocks = bio->bi_vcnt << bppage_shift;
	unsigned bppage_mask = (s->s_bppage - 1);
	struct bio_vec *bv = bio->bi_io_vec;

	if (sn_is_error_bio(bio)) {
		sn_set_mb(s->s_state, SNAP_READING_ERR);
		unregister_make_request(s);
		return 1;
	}
	for (idx = 0; idx < nr_blocks; idx++) {
		unsigned pg_idx = idx >> bppage_shift;
		unsigned pg_off = (idx & bppage_mask) * s->s_bsize;
		if (sa_cache_save(s, page_address(bv[pg_idx].bv_page)
				 + pg_off, bno + idx))
			return 1;
	}
	return 0;
}

static void destroy_cached_bio(struct session_struct *s, struct bio *bio)
{
	if (bio) {
		int idx;
		unsigned nr_pages = bio->bi_vcnt;
		for (idx = 0; idx < nr_pages; idx++) {
			if (bio->bi_io_vec[idx].bv_page) {
				page_cache_release(bio->bi_io_vec[idx].bv_page);
				s->s_ppages++;
			}
		}
		bio_put(bio);
	}

}

static int sa_cache_block(struct session_struct *s, struct bio *orig_bio,
			unsigned long long bno, unsigned nr_blocks, int *pended)
{
	struct page *page;
	struct bio *bio;
	int ret;
	int idx;
	unsigned bppage_shift, nr_pages;

	bppage_shift = PAGE_SHIFT - (s->s_spbshift + 9);
	nr_pages = ((nr_blocks - 1) >> bppage_shift) + 1;
	ret = -ENOMEM;
	bio = bio_alloc(GFP_NOIO, nr_pages);
	if (!bio)
		return ret;
#ifdef HAVE_BVEC_ITER
	bio->bi_iter.bi_sector = ((sector_t) bno) * s->s_spb;
#else
	bio->bi_sector = ((sector_t) bno) * s->s_spb;
#endif
	bio->bi_bdev = s->s_bdev;
	for (idx = 0; idx < nr_pages; idx++) {
		page = alloc_page(GFP_NOIO);
		if (!page)
			goto out;
		s->s_gpages++;
		if (unlikely(!bio_add_page(bio, page, PAGE_SIZE, 0))) {
			sa_warn("Not all pages will be saved: nr_pages=%d, idx=%d",
					nr_pages, idx);
			page_cache_release(page);
			s->s_gpages++;
		}
	}
resubmit:
	ret = submit_pending_request(s, bio, orig_bio, bno);
	if (unlikely(ret == -EAGAIN)) {
		schedule();
		goto resubmit;
	}
	if (unlikely(ret))
		goto out;
	*pended = 1;
	s->s_async_req++;
	return 0;

out:
	destroy_cached_bio(s, bio);
	return ret;
}

static int sa_cache_bio(struct session_struct *s, struct bio *bio, int *pended)
{
	unsigned long long sbno, ebno, i;
	unsigned long long sbno_cow, ebno_cow;
	sector_t start, end; /* relative to part start */

	dump_bio(bio, "sa_cache_bio");
	start = sn_bio_bi_sector(bio) - s->s_pstart;
	if (sn_bio_bi_sector(bio) < s->s_pstart)
		start = 0;
	end = sn_bio_bi_sector(bio) + (sn_bio_bi_size(bio) >> 9) - s->s_pstart;
	if (end >= s->s_plen)
		end = s->s_plen;
	sbno = start >> s->s_spbshift;
	ebno = (end + s->s_spb - 1) >> s->s_spbshift;
	sbno_cow = ebno + 1;
	ebno_cow = sbno;
	for (i = sbno; i < ebno; i++) {
		if (is_block_in_map(&s->s_blkmap, i)) {
			sbno_cow = i;
			ebno_cow = i + 1;
			break;
		}
	}
	/* Where is no block in map */
	if (sbno_cow > ebno) {
		*pended = 0;
		return 0;
	}
	for (i = ebno - 1; i > sbno_cow; i--) {
		if (is_block_in_map(&s->s_blkmap, i)){
			ebno_cow = i + 1;
			break;
		}
	}
	if (sa_cache_block(s, bio, sbno_cow, ebno_cow - sbno_cow, pended))
		return 1;
	return 0;
}

static int sa_save_bio_to_cache(struct session_struct *s, struct bio *bio)
{
	unsigned int pg_idx;
	unsigned long long bno;
	sector_t start; /* relative to part start */

	start = sn_bio_bi_sector(bio) - s->s_pstart;
	if (sn_bio_bi_sector(bio) < s->s_pstart)
		start = 0;
	bno = start >> s->s_spbshift;

	for (pg_idx = 0; pg_idx < bio->bi_vcnt; pg_idx++) {
		struct bio_vec* bvec;	/* bio_vec of this page */
		unsigned int pg_off;	/* block offset withing page */

		bvec = bio->bi_io_vec + pg_idx;
		for (pg_off = 0; pg_off < bvec->bv_len; pg_off +=s->s_bsize) {
			if (sa_cache_save(s, page_address(bvec->bv_page) +
					pg_off + bvec->bv_offset, bno++))
				return 1;
		}
	}
	return 0;
}

static void wait_for_users(struct session_struct *s)
{
	spin_lock(&sessions_lock);
	while (!atomic_dec_and_test(&s->s_users)) {
		atomic_inc(&s->s_users);
		spin_unlock(&sessions_lock);
		schedule();
		spin_lock(&sessions_lock);
	}
	atomic_inc(&s->s_users);
}

static int session_handle_bio(struct session_struct *s, struct bio *bio,
		int *pended)
{
	int state;

	state = s->s_state;
	dump_bio(bio, "session_make_request write");
	if (state == SNAP_FREEZING || state == SNAP_FROZEN ||
					state == SNAP_INITINGMAP) {
		if (!delay_bio(s, bio)) {
			*pended = 1;
			return 0;
		}
		sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
		unregister_make_request(s);
		sn_thaw_bdev(s);
		/* pass bh to original handler */
	} else if (state == SNAP_MAPPED) {
		if (!sa_cache_bio(s, bio, pended))
			return 0;
		sn_set_mb(s->s_state, SNAP_READING_ERR);
		unregister_make_request(s);
	} else if (state == SNAP_READINGMAP) {
		*pended = 0;
		if (!sa_save_bio_to_cache(s, bio))
			return 0;
		sn_set_mb(s->s_state, SNAP_READING_ERR);
		unregister_make_request(s);
	}
	return 1;
}

static void handle_pending_request(struct session_struct *s)
{
	struct pending_queue *pq = &s->s_pending_queue;
	struct pending_request *preq = NULL;
	struct bio *rbio = NULL;
	struct bio *wbio = NULL;
	spin_lock_irq(&pq->pq_lock);
	preq = pqueue_get_request(pq);
	atomic_dec(&pq->pq_ready_req);
	spin_unlock_irq(&pq->pq_lock);
	BUG_ON(!preq);
	rbio = preq->pr_rbio;
	wbio = preq->pr_wbio;
	BUG_ON(!rbio);
	if (sa_cache_save_bio(s, rbio, preq->pr_rblkno)) {
		sn_set_mb(s->s_state, SNAP_READING_ERR);
		unregister_make_request(s);
	}

	if (wbio)
		generic_make_request(wbio);

	destroy_pending_request(s, preq);
}

/*
 * Worker thread that handles pending bios, to avoid blocking in our
 * make_request_fn.
 */
static int pending_req_handler_thread(void *data)
{
	struct session_struct *s = data;
	struct pending_queue *pq =  &s->s_pending_queue;
	atomic_inc(&s->s_users);
#ifdef USE_KERNEL_THREAD
	daemonize("pending_bio%x", s->s_kdev);
#endif
	/*current->flags |= PF_NOFREEZE;*/
	set_user_nice(current, -20);
	spin_lock_irq(&pq->pq_lock);
	BUG_ON(pq->pq_state != PQ_STOPPED);
	pq->pq_state = PQ_RUNNING;
	spin_unlock_irq(&pq->pq_lock);
	/*
	 * complete it, we are running
	 */
	complete(&pq->pq_done);

	while (1) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
		wait_for_completion(&pq->pq_bio_done);
#else
		if (wait_for_completion_interruptible(&pq->pq_bio_done)) {
			if (!s->s_simulate_freeze)
				snapapi_try_to_freeze();
			continue;
		}
#endif
		if (!atomic_read(&pq->pq_ready_req)) {
			/*
			 * ->pq_bio_done was completed but queue is empty.
			 * This fake event was generated by session unregister
			 * routine. We have to wait untill all notready pending
			 * requests become ready. After all pending requests
			 * will be handled we may safely exit.
		 	 */
			spin_lock_irq(&pq->pq_lock);
			if (pq->pq_state != PQ_CLOSED) {
				sa_debug(DEBUG_API, "close queue notready=%d\n",
						atomic_read(&pq->pq_ready_req));
				pq->pq_state = PQ_CLOSED;
			}
			spin_unlock_irq(&pq->pq_lock);
			goto check_queue;
		}

		handle_pending_request(s);
check_queue:
		if (pq->pq_state == PQ_CLOSED) {
			spin_lock_irq(&pq->pq_lock);
			if (!atomic_read(&pq->pq_notready_req) &&
					!atomic_read(&pq->pq_ready_req)) {
				/* All pending requests was handled */
				spin_unlock_irq(&pq->pq_lock);
				break;
			}
			spin_unlock_irq(&pq->pq_lock);
		}
	}
	pq->pq_state = PQ_STOPPED;
	complete(&pq->pq_done);
	atomic_dec(&s->s_users);
	return 0;
}

static int start_req_handler_thread(struct session_struct *s)
{
	int ret;
#ifndef USE_KERNEL_THREAD
	struct task_struct *th;
	th = kthread_create(pending_req_handler_thread, s, "snapapi_prht");
	if (IS_ERR(th)) {
		ret = IS_ERR(th);
		sa_debug(DEBUG_API, "Can't create thread err=%d.\n", ret);
		return ret;
	}
	wake_up_process(th);
#else
	ret = kernel_thread(pending_req_handler_thread, s, CLONE_KERNEL);
	if (ret < 0) {
		sa_debug(DEBUG_API, "Can't create thread err=%d.\n", ret);
		return ret;
	}
#endif
	wait_for_completion(&s->s_pending_queue.pq_done);
	return 0;
}

static void stop_req_handler_thread(struct session_struct *s, int wait)
{
	int ready, not_ready, qstate;
	struct pending_queue *pq;

	pq = &s->s_pending_queue;
restart:
	spin_lock_irq(&pq->pq_lock);
	ready = atomic_read(&pq->pq_ready_req);
	not_ready = atomic_read(&pq->pq_notready_req);
	spin_unlock_irq(&pq->pq_lock);
	qstate = pq->pq_state;
	BUG_ON(ready < 0);
	BUG_ON(not_ready < 0);
	if (wait && (ready + not_ready)) {
		schedule();
		goto restart;
	}
	if (qstate != PQ_STOPPED) {
		/* Send close event to pending queue and
		 * wait while it stopped */
		complete(&pq->pq_bio_done);
		wait_for_completion(&pq->pq_done);
		BUG_ON(pq->pq_state != PQ_STOPPED);
	}
}

static MAKE_REQUEST_RETURN_VALUE snapapi_make_request(sn_request_queue *q, struct bio *bio)
{
	struct session_struct *s;
	make_request_fn *fn;
	int state;
	int pended = 0;

	s = NULL;
	fn = NULL;
	while (1) {
		spin_lock(&sessions_lock);
		if (!s)
			s = find_by_queue(bio, q);
		else
			s = find_by_queue_next(bio, q, s);
		if (!s) {
			spin_unlock(&sessions_lock);
			break;
		}
		atomic_inc(&s->s_users);
		spin_unlock(&sessions_lock);
		if (!fn)
			fn = s->s_make_request_fn;
		if (!(bio->bi_rw & REQ_WRITE) || !sn_bio_bi_size(bio)) {
			dump_bio(bio, "sesson_make_request read");
			atomic_dec(&s->s_users);
			break;
		}
		state = s->s_state;
		if (state == SNAP_FREEZING) /* freeze whole device */
			goto next_session;
		/*
		 * We assume what bio already remapped to disk by
		 * generic_make_request(), so device cant be partition here.
		 */
		if (bio->bi_bdev->bd_contains != bio->bi_bdev) {
			dev_t ddev;
			ddev = bio->bi_bdev->bd_contains ? bio->bi_bdev->bd_contains->bd_dev : 0;
			sa_warn("bi_dev(%x) != bd_contains(%x)\n", bio->bi_bdev->bd_dev, ddev);
		}
		if (snapapi_is_not_our_bio(s, bio))
			goto next_session;
		session_handle_bio(s, bio, &pended);
		if (pended) {
			/* bio was pended and will be handled anisochronous */
			atomic_dec(&s->s_users);
			return MAKE_REQUEST_EXIT_STATUS;
		}
next_session:
		atomic_dec(&s->s_users);
	}
	if (unlikely(!fn)) {
		fn = q->make_request_fn;
		if (!fn || fn == snapapi_make_request)
			goto out_err;
	}
	return fn(q, bio);

out_err:
	sn_bio_endio(bio, sn_bio_bi_size(bio), -EIO);
	return MAKE_REQUEST_EXIT_STATUS;
}

static int register_make_request(struct session_struct * s)
{
	sn_request_queue *q;
	struct list_head *tmp;

	sa_debug(DEBUG_API, "\n");
	q = snapapi_get_dev_queue(s);
	if (!q)
		return 1;
	snapapi_lock_dev_queue(q);
	spin_lock(&sessions_lock);
	list_for_each(tmp, &sessions_list) {
		struct session_struct *tmp_s;
		tmp_s = list_entry(tmp, struct session_struct, s_list);
		if (tmp_s->s_request_queue == q) {
			s->s_request_queue = q;
			s->s_make_request_fn = tmp_s->s_make_request_fn;
			spin_unlock(&sessions_lock);
			snapapi_unlock_dev_queue(q);
			sa_debug(DEBUG_API, "Keep queue as is.\n");
			return 0;
		}
	}
	s->s_request_queue = q;
	s->s_make_request_fn = q->make_request_fn;
	q->make_request_fn = snapapi_make_request;
	spin_unlock(&sessions_lock);
	snapapi_unlock_dev_queue(q);
	return 0;
}

static void unregister_make_request(struct session_struct * s)
{
	sn_request_queue *q;
	struct list_head *tmp;
	sa_debug(DEBUG_API, "s=%p\n", s);

	if (!s->s_make_request_fn)
		return;
	q = s->s_request_queue;
	if (!q)
		return;
	snapapi_lock_dev_queue(q);
	spin_lock(&sessions_lock);
	list_for_each(tmp, &sessions_list) {
	struct session_struct *tmp_s;
		tmp_s = list_entry(tmp, struct session_struct, s_list);
		if (tmp_s->s_request_queue == q && tmp_s != s) {
			s->s_make_request_fn = 0;
			s->s_request_queue = 0;
			spin_unlock(&sessions_lock);
			snapapi_unlock_dev_queue(q);
			sa_debug(DEBUG_API, "Keep queue as is. s=%p\n", s);
			return;
		}
	}
	q->make_request_fn = s->s_make_request_fn;
	s->s_make_request_fn = NULL;
	s->s_request_queue = 0;
	spin_unlock(&sessions_lock);
	snapapi_unlock_dev_queue(q);
	sa_debug(DEBUG_API, "make_request deinstalled OK. s=%p\n", s);
	return;
}

static void do_resolver(void)
{
	struct session_struct *s;
	sa_debug(DEBUG_API, "\n");

	spin_lock(&sessions_lock);
	s = find_deadlocked();
	if (!s) {
		spin_unlock(&sessions_lock);
		return;
	}
	atomic_inc(&s->s_users);
	spin_unlock(&sessions_lock);

	sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
	unregister_make_request(s);
	sa_info("Real cleanup started... s=%p", s);
	sn_thaw_bdev(s);
	cleanup_biolist(s);
	atomic_dec(&s->s_users);
}
#ifndef USE_KERNEL_THREAD 
static int resolver_loop(void *flag)
{
	sa_debug(DEBUG_API, "\n");

	while (1) {
		snapapi_try_to_freeze();
		set_current_state(TASK_INTERRUPTIBLE);
		if (!resolver_thread_continue)
			break;

		schedule();
		if (resolver_thread_continue)
			do_resolver();
		else
			break;
		if (signal_pending(current))
			flush_signals(current);
	}
	sa_debug(DEBUG_API, "exiting\n");
	complete_and_exit(&resolver_thread_exited, 0);
}
#else
static int resolver_loop(void *flag)
{
	sa_debug(DEBUG_API, "\n");
	lock_kernel();
	init_waitqueue_head(&resolver_thread_signal);
	daemonize("snapapid");
	allow_signal(SIGKILL);
	while (resolver_thread_continue) {
		interruptible_sleep_on_timeout(&resolver_thread_signal,
							MAX_SCHEDULE_TIMEOUT);
		if (resolver_thread_continue)
			do_resolver();
		else
			break;
		if (signal_pending(current))
			flush_signals(current);
	}
	unlock_kernel();
	sa_debug(DEBUG_API, "exiting\n");
	complete_and_exit(&resolver_thread_exited, 0);
}
#endif
static void heartbeat_timer_func(unsigned long __data)
{
	struct session_struct *s;

	s = (struct session_struct *) __data;
	if (!s->s_heartbeat_active || s->s_ioctlcnt != s->s_ioctlcnt_prev) {
		sa_debug(DEBUG_API, "s=%p\n", s);
		if (s->s_heartbeat_active)
			mod_timer(&s->s_timer, jiffies + TIMER_INTERVAL);
		s->s_ioctlcnt_prev = s->s_ioctlcnt;
		return;
	}
	sn_set_mb(s->s_state, SNAP_DEADLOCK_ERR);
	sa_info("Deadlock detected. Unfreezing...%s", "\n");
#ifndef USE_KERNEL_THREAD
	wake_up_process(resolver_thread);
#else
	wake_up_interruptible(&resolver_thread_signal); 
#endif
}

static void sa_heartbeat_stop(struct session_struct *s)
{
	spin_lock_bh(&s->s_misc_lock);
	s->s_heartbeat_active = 0;
	spin_unlock_bh(&s->s_misc_lock);
	if (s->s_timer.function) {
		del_timer_sync(&s->s_timer);
		s->s_timer.function = NULL;
	}
}

static void sa_heartbeat_start(struct session_struct *s)
{
	spin_lock_bh(&s->s_misc_lock);
	s->s_heartbeat_active = 1;
	s->s_ioctlcnt_prev = s->s_ioctlcnt;
	init_timer(&s->s_timer);
	s->s_timer.function = &heartbeat_timer_func;
	s->s_timer.data = (unsigned long) s;
	s->s_timer.expires = jiffies + TIMER_INTERVAL;
	add_timer(&s->s_timer);
	spin_unlock_bh(&s->s_misc_lock);
}

static int session_freeze(struct session_struct *s)
{
	int ret;
	sn_request_queue *q;

	sa_debug(DEBUG_API, "s=%p\n", s);
	q = NULL;
	ret = -EINVAL;
	down(&s->s_sem);

	if (s->s_make_request_fn || s->s_state != SNAP_INITED)
		goto out_up;
/* sync !!! */
	sn_freeze_bdev(s);
	if (!s->s_sb) {
		sa_warn("Can't find super, device %x, freeze.\n", s->s_kdev);
		sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
		ret = -ESRCH;
		goto out_up;
	}
	sn_set_mb(s->s_state, SNAP_FREEZING);
	if (register_make_request(s)) {
		sa_warn("Device %x does not have a queue.\n", s->s_kdev);
		sn_set_mb(s->s_state, SNAP_FREEZE_ERR);
		sn_thaw_bdev(s);
		goto out_up;
	}
/* The queue exists. It has been checked in register_make_request */
	q = snapapi_get_dev_queue(s);
	set_current_state(TASK_UNINTERRUPTIBLE);
	do {
#ifdef HAVE_REQUEST_QUEUE_RQS
		const int rq_cnt = q->nr_rqs[WRITE];
#else
		const int rq_cnt = q->rq.count[WRITE];
#endif
		if (rq_cnt == 0)
			break;
		schedule_timeout(HZ / 20);
		sa_debug(DEBUG_INTERNALS, "count=%d, nr_requests=%lu\n",
			rq_cnt, q->nr_requests);
	} while (1);
	current->state = TASK_RUNNING;
	sn_set_mb(s->s_state, SNAP_FROZEN);

	sa_heartbeat_start(s);
	ret = 0;

out_up:
	up(&s->s_sem);
	return ret;
}

static int session_unfreeze(struct session_struct *s)
{
	int ret;

	sa_debug(DEBUG_API, "s=%p\n", s);
	ret = -EINVAL;
	down(&s->s_sem);
	if (s->s_state != SNAP_FROZEN && s->s_state != SNAP_FREEZE_ERR)
		goto out_up;
	up(&s->s_sem);
	ret = 0;
	close_session(s, 0);
	return ret;

out_up:
	up(&s->s_sem);
	return ret;
}

static void session_stat(struct sn_state *sn)
{
	sa_warn("dev=%x:%x state=%d blksize=%d mmapsize=%d\n",
		sn->major, sn->minor, sn->state, sn->blksize, sn->mmapsize);
	sa_warn("psize=%llu pstrt=%llu mshft=%d ioctls=%llu\n",
		sn->partsize, sn->partstrt, sn->minorshft, sn->ioctlcnt);
	sa_warn("bhpgs=%d bhcnt=%d abhs=%llu fbhs=%llu dbhs=%llu\n",
		sn->bhpages, sn->bhcount, sn->abhs, sn->fbhs, sn->dbhs);

	sa_warn("gpgs=%llu ppgs=%llu emmax=%d emmin=%d emcur=%d cached=%d\n",
		sn->gpages, sn->ppages, sn->emmax, sn->emmin, sn->emcur,
		sn->cachepages);

	sa_warn("rblk=%llu cblk=%llu rcblk=%llu rc2blk=%llu mcblk=%llu"
		" rwcolls=%llu\n", sn->rblocks, sn->cblocks,
		sn->rcblocks, sn->rc2blocks, sn->mcblocks, sn->rwcolls);

	sa_warn("sync=%llu async=%llu aretr=%llu\n",
		sn->sync_req, sn->async_req, sn->async_retr);
}

static void fill_state(struct session_struct *s, struct sn_state *out)
{
	out->state = s->s_state;
	out->major = MAJOR(s->s_kdev);
	out->minor = MINOR(s->s_kdev);
	out->blksize = s->s_bsize;
	out->mmapsize = s->s_maxmsize * PAGE_SIZE;

	out->partstrt = s->s_pstart;
	out->minorshft = 0;
	out->partsize = s->s_plen;

	out->bhpages = s->s_biopages;
	out->bhcount = s->s_biocount;
	out->emmax = snap_emergency_size;
	out->emmin = s->s_blkcache_emmin;
	out->emcur = s->s_blkcache_empages;
	out->cachepages = s->s_blkcache_pages;

	out->gpages = s->s_gpages;
	out->ppages = s->s_ppages;
	out->abhs = s->s_abios;
	out->fbhs = s->s_fbios;
	out->dbhs = s->s_dbios;
	out->rblocks = s->s_rblocks;
	out->cblocks = s->s_cblocks;
	out->rcblocks = s->s_rcblocks;
	out->fcblocks = s->s_fcblocks;
	out->mcblocks = s->s_mcblocks;
	out->rwcolls = s->s_rwcolls;
	out->rc2blocks = s->s_rc2blocks;
	out->sync_req = s->s_sync_req;
	out->async_req = s->s_async_req;
	out->async_retr = s->s_async_retr;
	out->ioctlcnt = s->s_ioctlcnt;
	out->version =  (SNAPAPI_VMAJOR << 16) + (SNAPAPI_VMINOR << 8) +
							SNAPAPI_VSUBMINOR;
}

static int session_state(struct session_struct *s, struct sn_state *state,
						unsigned int size)
{
	int ret;
	struct sn_state out;

	sa_debug(DEBUG_API, "s=%p, state=%p\n", s, state);
	fill_state(s, &out);
	if (size > sizeof(out))
		size = sizeof(out);
	ret = copy_to_user(state, &out, size);
	if (ret)
		return -EACCES;
	return 0;
}

#if 0
static void dump_sessions(void)
{
	struct session_struct *s;
	sa_warn("Start sessions dump\n");
	list_for_each_entry(s, &sessions_list, s_list) {
		sa_warn("dev=%x:%x state=%u blksize=%u mmapsize=%d queue=%p\n",
			MAJOR(s->s_kdev), MINOR(s->s_kdev), s->s_state,
			s->s_bsize,  (int)(s->s_maxmsize * PAGE_SIZE),
			s->s_request_queue);
		sa_warn("psize=%llu pstrt=%llu mshft=%d ioctls=%llu\n",
			s->s_plen, s->s_pstart, 0, s->s_ioctlcnt);
		sa_warn("bhpgs=%d bhcnt=%d abhs=%llu fbhs=%llu dbhs=%llu\n",
			s->s_biopages, s->s_biocount, s->s_abios, s->s_fbios,
			s->s_dbios);
		sa_warn("gpgs=%llu ppgs=%llu emmax=%d emmin=%d emcur=%d"
			" cached=%d\n", s->s_gpages, s->s_ppages,
			snap_emergency_size, s->s_blkcache_emmin,
			s->s_blkcache_empages, s->s_blkcache_pages);
		sa_warn("rblk=%llu cblk=%llu rcblk=%llu rc2blk=%llu mcblk=%llu"
			" rwcolls=%llu\n", s->s_rblocks, s->s_cblocks,
			s->s_rcblocks, s->s_rc2blocks, s->s_mcblocks,
			s->s_rwcolls);
	}
	sa_warn("End of sessions dump\n");
}
#endif

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
#define _READS ios[0]
#define _WRITES ios[1]
#define _READ_SECTORS sectors[0]
#define _WRITE_SECTORS sectors[1]
#else
#define _READS reads
#define _WRITES writes
#define _READ_SECTORS read_sectors
#define _WRITE_SECTORS write_sectors
#endif

static int session_devinfo(struct session_struct *s, dev_t kdev,
				struct sn_devinfo *info, unsigned int size)
{
	int ret;
	struct sn_devinfo out;
	struct super_block * sb;
	struct block_device *bdev;

	sa_debug(DEBUG_API, "s=%p, devinfo=%p\n", s, info);
	memset(&out, 0,sizeof(out));
	out.major = MAJOR(kdev);
	out.minor = MINOR(kdev);
	bdev = bdget(kdev);
	if (!bdev)
		return -ENODEV;
	if ((ret = sn_blkdev_get(bdev, FMODE_READ, 0)) < 0)
		return ret;
	sa_debug(DEBUG_API, "bd_part=%p bd_contains=%p\n", bdev->bd_part,
			bdev->bd_contains);
	out.partstrt = get_start_sect(bdev);
	if (bdev->bd_part) {
		out.partsize = bdev->bd_part->nr_sects;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
		out.reads = part_stat_read(bdev->bd_part, _READS);
		out.read_sectors = part_stat_read(bdev->bd_part, _READ_SECTORS);
		out.writes = part_stat_read(bdev->bd_part, _WRITES);
		out.write_sectors = part_stat_read(bdev->bd_part, _WRITE_SECTORS);
#else
		out.reads = bdev->bd_part->_READS;
		out.read_sectors = bdev->bd_part->_READ_SECTORS;
		out.writes = bdev->bd_part->_WRITES;
		out.write_sectors = bdev->bd_part->_WRITE_SECTORS;
#endif
	} else if (bdev->bd_disk) {
		out.partsize = get_capacity(bdev->bd_disk);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
		out.reads = disk_stat_read(bdev->bd_disk, _READS);
		out.read_sectors = disk_stat_read(bdev->bd_disk, _READ_SECTORS);
		out.writes = disk_stat_read(bdev->bd_disk, _WRITES);
		out.write_sectors = disk_stat_read(bdev->bd_disk, _WRITE_SECTORS);
#else
		out.reads = part_stat_read(&bdev->bd_disk->part0, _READS);
		out.read_sectors = part_stat_read(&bdev->bd_disk->part0, _READ_SECTORS);
		out.writes = part_stat_read(&bdev->bd_disk->part0, _WRITES);
		out.write_sectors = part_stat_read(&bdev->bd_disk->part0, _WRITE_SECTORS);
#endif
	}
	else
		sa_warn("Can't detect device %x size.\n", kdev);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
	sb = get_super(bdev);
#else
	sb = user_get_super(kdev);
#endif
	sn_blkdev_put(bdev, FMODE_READ);
	if (sb) {
		out.blksize = sb->s_blocksize;
		drop_super(sb);
	}
	if (size > sizeof(out))
		size = sizeof(out);
	ret = copy_to_user(info, &out, size);
	if (ret)
		return -EACCES;
	return 0;
}

static int session_getbno(struct session_struct *s, unsigned long long *data)
{
	unsigned long long bno;

	if (!s->s_blkmap.blkmap || s->s_state != SNAP_MAPPED)
		return -EINVAL;
	down(&s->s_sem);
	bno = any_block_in_cache(s);
	up(&s->s_sem);
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu\n", s, bno);
	return put_user(bno, data);
}

static int session_rdcache(struct session_struct *s, struct sn_rdcache *req,
						unsigned int size)
{
	int ret;
	struct sn_rdcache rdc;
	struct page * page;
	unsigned int max_blocks;
	char *data;
	unsigned long long bno;
	unsigned int i;

	sa_debug(DEBUG_API, "s=%p, req=%p\n", s, req);
	if (!s->s_blkmap.blkmap || s->s_state != SNAP_MAPPED)
		return -EINVAL;
	page = alloc_page(GFP_KERNEL);
	if (!page)
		return -ENOMEM;
	memset(&rdc, 0, sizeof(rdc));
	if (size > sizeof(rdc))
		size = sizeof(rdc);
	ret = copy_from_user(&rdc, req, size);
	if (ret || rdc.buf == 0 || rdc.size == 0) {
		page_cache_release(page);
		return  -EACCES;
	}
	down(&s->s_sem);
	rdc.bno = any_block_in_cache(s);
	if (rdc.bno == ~0ULL)
		goto out_up;
	max_blocks = rdc.size / s->s_bsize;
	data = rdc.buf;
	bno = rdc.bno;
	for (i = 0; i < max_blocks; i++, bno++, data += s->s_bsize) {
		ret = sa_cache_read(s, page_address(page), bno,
				READ_KERNEL1, SNAP_READ_ONCE);
		if (!ret)
			break;
		ret = copy_to_user(data, page_address(page), s->s_bsize);
		if (ret) {
			ret = -EACCES;
			break;
		}
		s->s_rc2blocks++;
	}
	rdc.count = bno - rdc.bno;
out_up:
	up(&s->s_sem);
	page_cache_release(page);
	if (ret)
		return ret;
	ret = copy_to_user(req, &rdc, size);
	if (ret)
		return -EACCES;
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu count=%u\n", s, rdc.bno, rdc.count);
	return 0;
}

static int session_bfree(struct session_struct *s, unsigned long long bno,
				unsigned long long count)
{
	int ret;
	unsigned long long end;

	ret = -EINVAL;
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%llu\n", s, bno, count);
	down(&s->s_sem);
	if (!s->s_blkmap.blkmap)
		goto out;
	end = bno + count;
	if (end < bno || end > s->s_blkmap.size)
		goto out;
	if (s->s_state != SNAP_MAPPED)
		goto out;

	for (; count; bno++, count--) {
		sa_cache_read(s, NULL, bno, FAKE_READ, SNAP_READ_ONCE);
		clear_block_in_map(&s->s_blkmap, bno);
 	}
	ret = 0;

out:
	up(&s->s_sem);
	return ret;
}

static inline char * bread_data_addr(struct session_struct *s, int i)
{
	if (s->s_bppage == 1)
		return page_address(s->s_mpages[i]);
	return ((char *)page_address(s->s_mpages[i / s->s_bppage]) +
			(i % s->s_bppage) * s->s_bsize);
}

static void bread_submit_bios(struct session_struct *s,
		unsigned long long bno, int count)
{
	struct bio *bio;
	struct page *page;
	int i, k;
	int vecs, page_idx, last_len;

	i = 0;
	page_idx = 0;
	last_len = count % s->s_bppage;
	s->s_rblocks += count;

	while (count > 0) {
repeate:
		vecs = sn_round(count, s->s_bppage);
		bio = bio_alloc(GFP_NOIO, vecs);
		if (!bio) {
			schedule();
			goto repeate;
		}
		s->s_abios++;
   	        bio->bi_bdev = s->s_bdev;

#ifdef HAVE_BVEC_ITER
		bio->bi_iter.bi_sector = ((sector_t) bno) * s->s_spb;
#else
		bio->bi_sector = ((sector_t) bno) * s->s_spb;
#endif
		init_completion(&s->s_local_bios[i].event);
		bio->bi_private = &s->s_local_bios[i].event;
		bio->bi_end_io = sa_cache_bio_end_io;

		for (k = 0; k < vecs; k++) {
			int vec_len = PAGE_SIZE;
			page = s->s_mpages[page_idx];
			if (count == last_len)
				vec_len = last_len * s->s_bsize;
			if (bio_add_page(bio, page, vec_len , 0) < vec_len)
				break;
			count -= vec_len / s->s_bsize;
			page_idx++;
		}
		s->s_local_bios[i].bio = bio;
		bno += sn_bio_bi_size(bio) / s->s_bsize;
		submit_bio(READ, bio);
		i++;
	}
}

static int bread_from_cache(struct session_struct *s, unsigned long long bno,
		unsigned int count, unsigned int flags)
{
	int i, numread, ret;

	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%u\n",
					s, bno, count);
	numread = 0;
	for (i = 0; i < count; i++, bno++) {
		char * data;

		data = bread_data_addr(s, i);
		ret = sa_cache_read(s, data, bno, READ_KERNEL1, flags);
		if (ret)
			numread++;
	}
	return numread;
}

static int bread_wait_submitted(struct session_struct *s,
				unsigned long long bno, unsigned int count)
{
	int i, ret;
	int reqs;

	ret = 0;
	reqs = s->s_msize;
	for (i = 0; i < reqs; i++) {
		if (!s->s_local_bios[i].bio)
			continue;
		wait_for_completion(&s->s_local_bios[i].event);
		if (sn_is_error_bio(s->s_local_bios[i].bio))
			ret = -EIO;
	}
	return ret;
}

#if 0
static inline void dump_data(void *data, int offset, char *pref)
{
	unsigned char *p = (unsigned char *)data + offset;
	sa_debug(DEBUG_BREAD, "%s %x:%x %x %x %x %x %x %x %x %x %x %x %x %x"
		" %x %x %x\n",
		pref, offset,
		*p, *(p+1), *(p+2), *(p+3), *(p+4), *(p+5), *(p+6), *(p+7),
		*(p+8), *(p+9), *(p+10), *(p+11), *(p+12), *(p+13), *(p+14),
		*(p+15));
}
#endif
static int session_bread_fast(struct session_struct *s, unsigned long long bno,
			unsigned int count, unsigned int flags,
			unsigned long long *bincache)
{
	int ret, ccnt, i;
	unsigned long long cachecnt;
	unsigned int rcount;	/* saved count */
	unsigned int reqs;

	ret = ccnt = 0;
	rcount = count;

	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%u\n",
					s, bno, count);
	if (bno + count > (s->s_plen >> s->s_spbshift))
		count = (s->s_plen >> s->s_spbshift) - bno;
	bread_submit_bios(s, bno, count);
	ret = bread_wait_submitted(s, bno, count);
	if (!ret) /* read from cache only requested blocks */ {
		if (flags & SNAP_READ_ONCE)
			for (i = 0; i < rcount; i++)
				clear_block_in_map(&s->s_blkmap, bno + i);
		ccnt = bread_from_cache(s, bno, rcount, flags);
	}
	reqs = s->s_msize;
	for (i = 0; i < reqs; i++) {
		if (s->s_local_bios[i].bio) {
			bio_put(s->s_local_bios[i].bio);
			s->s_local_bios[i].bio = NULL;
			s->s_fbios++;
		}
	}
	cachecnt = 0;
	if (!(flags & SNAP_READ_ONCE))
		/* wakeup user level cache in none SNAP_READ_ONCE mode only */
		cachecnt = s->s_cblocks - s->s_fcblocks;
	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, L=%u, R=%d, C=%d\n",
					s, bno, count, rcount, ccnt);
	sa_debug(DEBUG_CACHE, "cached=%llu, read=%llu, incache=%llu\n",
					s->s_cblocks, s->s_rcblocks, cachecnt);
/*	dump_data(page_address(s->s_mpages[0]), 0, "session_bread_fast"); */
	up(&s->s_sem);
	if (!ret && copy_to_user(bincache, &cachecnt, sizeof(cachecnt)))
		return -EACCES;
	return ret;
}

static inline int sn_page_mapcount(struct page *page)
{
#ifdef HAVE_PAGE_UMAPCOUNT
	return atomic_read(&page->_mapcount) + 1;
#elif defined(HAVE_PAGE_MAPCOUNT)
	return page_mapcount(page);
#else
	return (page->mapcount);
#endif
}

static int session_copy_to_cow(struct session_struct *s, char *data, unsigned int count)
{
	struct page **page_ref;
	int size;

	size = PAGE_SIZE;
	for (page_ref = s->s_mpages; count; data += PAGE_SIZE) {
		struct page *page;

		if (count < s->s_bppage) {
			size = count * s->s_bsize;
			count = 0;
		} else
			count -= s->s_bppage;

		page = *page_ref++;
		if (page && !sn_page_mapcount(page))
			if (copy_to_user(data, page_address(page), size))
				return -EACCES;
	}
	return 0;
}

static int session_bread(struct session_struct *s, unsigned long long bno,
			unsigned int count, char *data, unsigned int flags,
			unsigned long long *bincache)
{
	int ret;
	unsigned long long end;

	ret = -EINVAL;

	sa_debug(DEBUG_BREAD, "s=%p, bno=%llu, count=%u\n", s, bno, count);
	down(&s->s_sem);
	if (!s->s_blkmap.blkmap)
		goto out;
	end = bno + count;
	if (end < bno || end > s->s_blkmap.size)
		goto out;
	if (s->s_state != SNAP_MAPPED)
		goto out;

	if (s->s_vma && data == (char *)s->s_vma->vm_start &&
				count * s->s_bsize <= PAGE_SIZE * s->s_msize) {
		ret = session_bread_fast(s, bno, count, flags, bincache);
		/* coping data up to user COW'ed pages if any*/
		if (!ret && s->s_vma->anon_vma)
			ret = session_copy_to_cow(s, data, count);
		return ret;
	}
	ret = -EINVAL;
	sa_warn("Interface error.%s","\n");
out:
	up(&s->s_sem);
	return ret;
}

static int session_ldmap(struct session_struct *s, unsigned long long size,
								void *map)
{
	int ret;

	ret = -EINVAL;
	sa_debug(DEBUG_API, "size=%llu\n", size);
	down(&s->s_sem);
	if (s->s_state != SNAP_FROZEN)
		goto out_up;
	sn_set_mb(s->s_state, SNAP_INITINGMAP);
#ifdef USE_VZ_VZSNAP
	if (s->s_veid) /* block_map already filled by block_map_init_vzsnap */
		ret = 0;
	else
#endif
	ret = block_map_init(s, size, map, 1);
	sa_heartbeat_stop(s);
	if (ret) {
		sn_set_mb(s->s_state, SNAP_MAP_ERR);
		goto out_unlock;
	}

	wait_for_users(s);
	sn_set_mb(s->s_state, SNAP_MAPPED);
	spin_unlock(&sessions_lock);

	ret = start_req_handler_thread(s);
	if (ret < 0)
		goto out_unlock;
	/* push delayed bios */
	cleanup_biolist(s);
	ret = 0;

out_unlock:
	sn_thaw_bdev(s);
out_up:
	up(&s->s_sem);
	return ret;
}

void static copy_page_bits_slow(void* dst, unsigned int dstbit, void* src,
				unsigned int srcbit, unsigned int len)
{
	while (len--) {
		if (test_bit(srcbit++, src))
			set_bit(dstbit++, dst);
		else
			clear_bit(dstbit++, dst);
	}
}

/* !NOTE!: we assume dst and src both are points to page start */

void static copy_page_bits(unsigned int* dst, unsigned int dstbit,
				unsigned int* src, unsigned int srcbit,
				unsigned int len)
{
	unsigned int* srcend;
	unsigned int headlen;

	/* normalize destination ptr and bitno by 4-byte boundary */
	dst += dstbit >> 5;
	dstbit &= 31;
	headlen = 32 - dstbit;
	if (len < headlen)
		headlen = len;
	copy_page_bits_slow(dst++, dstbit, src, srcbit, headlen);
	len -= headlen;
	if (!len)
		return;
	srcbit += headlen;
	/* normalize source ptr and bitno by 4-byte boundary*/
	src += srcbit >> 5;
	srcbit &= 31;
	/* processing the full DWORD's, DWORD-count is len/32 */
	srcend = src + (len >> 5);
	for (; src != srcend; src++)
		*dst++ = *(unsigned long long*)src >> srcbit;
	/* processing the tail, tail length is low 5 bits of len */
	copy_page_bits_slow(dst, 0, src, srcbit, len & 31);
}

void copy_block_to_bitmap(struct session_struct* s, unsigned long long dest_bit,
						unsigned int len, void* array)
{
	unsigned int src_bit;

	src_bit = 0;
	while (len) {
		struct page* page;
		void* kaddr;
		unsigned int count;
		unsigned int bitno;	/* start bit on destination page */

		page = blkmap_page(s->s_blkmap.blkmap,
					dest_bit >> (PAGE_SHIFT + 3));
		bitno = dest_bit & (BITS_ON_PAGE - 1);
		count = BITS_ON_PAGE - bitno;
		if (count > len)
			count = len;
		kaddr = sn_kmap_atomic(page);
		copy_page_bits(kaddr, bitno, array, src_bit, count);
		sn_kunmap_atomic(kaddr);
		dest_bit += count;
		src_bit +=count;
		len -= count;
	}
}

static int compute_bitmap_ext2(struct session_struct *s)
{
	unsigned long long fblock;	/* first data block */
	unsigned int bpgroup;		/* blocks per group */
	unsigned int lgroup; 		/* last group */
	struct page* block_page;
	void* block;
	unsigned int count;

	count = 0;
	fblock = s->s_fblock;
	lgroup = s->s_gcount - 1;
	bpgroup =s->s_bpgroup;

	block_page = alloc_page(GFP_KERNEL);
	if (!block_page)
		return -1;
	block = page_address(block_page);
	while (1) {
		unsigned long long group;
		unsigned long long cblock;	/* current block */
		unsigned long long gstart_bit;
		int copy_count;

		cblock = any_block_in_cache(s);
		if (cblock == ~0ULL)
			break;
		group = cblock;
		gstart_bit = cblock - do_div(group, bpgroup) + fblock;
		if (sa_cache_read(s, block, cblock, 0, SNAP_READ_ONCE)
							!= s->s_bsize)
			break;
		count++;
		copy_count = bpgroup;
		if (group == lgroup)
			copy_count = s->s_blkmap.size - gstart_bit;
		copy_block_to_bitmap(s, gstart_bit, copy_count, block);
	}

	page_cache_release(block_page);
	return count;
}

static int copy_bitmap_to_user(struct session_struct *s, char* bitmap)
{
	void* taddr;
	struct page* tpage;
	int ret;
	unsigned int pageno;
	unsigned long long bytes;

	ret = -ENOMEM;
	bytes = (s->s_blkmap.size + 7) >> 3;
	tpage = alloc_page(GFP_KERNEL);
	if (!tpage)
		goto out;
	taddr = page_address(tpage);
	ret = 0;
	for (pageno = 0; bytes; bitmap += PAGE_SIZE, pageno++) {
		unsigned int copy_count;
		struct page* page;
		char *kaddr;

		page = blkmap_page(s->s_blkmap.blkmap, pageno);
		/* checking for last group */
		copy_count = bytes > PAGE_SIZE ? PAGE_SIZE : bytes;
		if (page) {
			kaddr = sn_kmap_atomic(page);
			memcpy(taddr, kaddr, copy_count);
			sn_kunmap_atomic(kaddr);
		} else
			memset(taddr, 0, PAGE_SIZE);
		ret = copy_to_user(bitmap, taddr, copy_count);
		if (ret) {
			ret = -EACCES;
			break;
		}
		bytes -= copy_count;
	}

out:
	if (tpage)
		page_cache_release(tpage);
	return ret;
}

static int check_session_params(struct session_struct *s)
{
	if (s->s_state != SNAP_FROZEN) {
		sa_warn("Session must be frozen (state=%d)\n", s->s_state);
		return -EINVAL;
	}

	if (!s->s_sb) {
		sa_warn("No superblock info for s=%p\n", s);
		return -EINVAL;
	}

	if (strncmp(s->s_sb->s_type->name, "ext", 3)) {
		sa_warn("Invalid partition type (%s)\n", s->s_sb->s_type->name);
		return -EINVAL;
	}
	return 0;
}

#ifdef USE_VZ_VZSNAP
static int vzsnap_getmap(struct session_struct* s)
{
	int ret;

	ret = -EINVAL;
	sn_set_mb(s->s_state, SNAP_MAP_ERR);
	if (s->s_vzs)
		return ret;
	s->s_vzs = vzsnap_get_map(s->s_veid, s->s_bdev);
	if (s->s_vzs == NULL) {
		vzsnap_release_map(s->s_vzs);
		return ret;
	}
	ret = block_map_init_vzsnap(s, s->s_vzs);
	vzsnap_release_map(s->s_vzs);
	s->s_vzs = NULL;
	if (ret)
		return ret;
	sn_set_mb(s->s_state, SNAP_FROZEN);
	return 0;
}
#endif //USE_VZ_VZSNAP

static int session_getmap(struct session_struct *s, unsigned long long size,
		void* bitmap, unsigned long bsize, unsigned long fblock,
		unsigned long bpgroup, unsigned long gcount)
{
	int ret;
	int pended;
	int bcount;
	unsigned long long bno;

	sa_debug(DEBUG_API, "s=%p size=%llu, bmap=%p, bsize=%lu, fblock=%lu,"
			" bpgroup=%lu, gcount=%lu\n", s, size, bitmap, bsize,
			fblock, bpgroup, gcount);
	bcount = 0;
	ret = -EINVAL;
	if (!bitmap || !size)
		return ret;

	down(&s->s_sem);
	ret = check_session_params(s);
	if (ret)
		goto out_up;
 
	s->s_fblock = fblock;
	s->s_gcount = gcount;
	s->s_bpgroup = bpgroup;
	s->s_bmsize = size;

	sn_set_mb(s->s_state, SNAP_INITINGMAP);
	sa_heartbeat_stop(s);
#ifdef USE_VZ_VZSNAP
	if (s->s_veid) {
		ret = vzsnap_getmap(s);
		if (ret)
			goto out_thaw;
		goto out_copy;
	}
#endif
	ret = block_map_init(s, size, bitmap, 0);
	if (ret) {
		sa_warn("block_map_init failed\n");
		goto out_thaw;
	}
	sn_set_mb(s->s_state, SNAP_READINGMAP);
	sn_set_mb(s->s_usemap, 0);
	ret = -EIO;
	sn_thaw_bdev(s);

	ret = start_req_handler_thread(s);
	if (ret < 0)
		goto out_destroy;
	/* Reading bitmap from device */
	bno = 0;
	while (1) {
		bno = find_next_block(&s->s_blkmap, bno);
		if (bno == ~0ULL)
			break;
		if (sa_cache_block(s, NULL, bno, 1, &pended)) {
			sa_warn("reading bitmap: sa_cache_block(%llu)\n", bno);
			goto out_destroy;
		}
		bno++;
		bcount++;
	}
	stop_req_handler_thread(s, 1);
	sn_freeze_bdev(s);
	sn_set_mb(s->s_state, SNAP_FROZEN);
	ret = compute_bitmap_ext2(s);
	if (bcount != ret) {
		ret = -EPROTO;
		sa_warn("computing bitmap: %d!=%d\n", bcount, ret);
		goto out_thaw;
	}
/*	Setting bits at start of bitmap till FirstDataBlock	*/
/*	Moved to userspace 					*/
/*	for (bno = 0; bno < fblock; bno++)
		set_block_in_map(&s->s_blkmap, bno);
*/
out_copy:
	ret = copy_bitmap_to_user(s, bitmap);
	if (ret)
		goto out_thaw;

	sa_heartbeat_start(s);
	up(&s->s_sem);
	return 0;

out_thaw:
	sn_thaw_bdev(s);

out_destroy:
	block_map_destroy(s);
	sn_set_mb(s->s_state, SNAP_MAP_ERR);

out_up:
	up(&s->s_sem);

	return ret;
}

static int copy_bits_to_user(unsigned int* map, unsigned long long bitno, 
		unsigned char* src, unsigned int count)
{
	unsigned int rel;
	unsigned int uval;
	unsigned int offset = 0;
	int ret = 0;

	sa_debug(DEBUG_API, "map=%p bitno=%llu count=%u\n", map, bitno, count);
	if (bitno & 7) {
		map += bitno >> 5;

		ret = get_user(uval, map);
		if (ret)
			goto out;
		for (rel = bitno & 31; rel < 32 && offset < count;
							++rel, ++offset) {
			if (test_bit(offset, src))
				set_bit(rel, &uval);
			else
				clear_bit(rel, &uval);
		}
		ret = put_user(uval, map++);
		if (ret)
			goto out;

		while (count - offset >= 32) {
			uval = 0;
			for (rel = 0; rel < 32; ++rel, ++offset) {
				if (test_bit(offset, src))
					set_bit(rel, &uval);
			}
			ret = put_user(uval, map++);
			if (ret)
				goto out;
		}
	}
	else {
		uval = (0 - bitno - count) & 31;
		if ((uval & 7) == 0)
			uval = 0;

		if (uval < count) {
			ret = copy_to_user((unsigned char*)map + (bitno >> 3),
						src, (count - uval) >> 3);
			if (ret)
				goto out;
			offset = count - uval;
		}

		map += (bitno + offset) >> 5;
	}

	if (offset < count) {
		ret = get_user(uval, map);
		if (ret)
			goto out;
		for (rel = (bitno + offset) & 31; offset < count; 
							++rel, ++offset) {
			if (test_bit(offset, src))
				set_bit(rel, &uval);
			else
				clear_bit(rel, &uval);
		}
		ret = put_user(uval, map);
		if (ret)
			goto out;
	}

out:
	return ret;
}

static int collect_bitmap_to_user(struct session_struct* s, uint8_t* map)
{
	int ret;
	struct page* block_page;
	unsigned char* block;

	sa_debug(DEBUG_API, "s=%p map=%p\n", s, map);
	ret = 0;
	block_page = alloc_page(GFP_KERNEL);
	if (!block_page)
		return -ENOMEM;
	block = page_address(block_page);

	map_init_iterator(&s->s_groupmap);
	ret = -EINVAL;
	do {
		unsigned long long bitno;
		unsigned long copy_count;
		struct group_entry* entry;

		entry = (void*)map_iterator_get_value(&s->s_groupmap);
		BUG_ON(!entry);

		bitno = entry->group * s->s_bpgroup + s->s_fblock;

		copy_count = s->s_bpgroup;
		if (entry->group == s->s_gcount - 1)
			copy_count = s->s_bmsize - bitno;

		if (!entry->cached)
			memset(block, 0, (copy_count + 7) >> 3);
		else if (sa_cache_read(s, block, entry->bno, 0,
						SNAP_READ_ONCE)	!= s->s_bsize) {
			sa_warn("cache block %llu can't be read\n", entry->bno);
			map_iterator_stop(&s->s_groupmap);
			break;
		}

		ret = copy_bits_to_user(map, bitno, block, copy_count);
		if (ret) {
			sa_warn("copy_bits_to_user failed (%d)\n", ret);
			break;
		}
	} while (map_iterator_next(&s->s_groupmap));

	page_cache_release(block_page);
	return ret;
}

static int session_getsparsedmap(struct session_struct *s,
		unsigned long long size, void *bitmap, unsigned long bsize,
		unsigned long fblock, unsigned long bpgroup,
		unsigned long gcount, struct group_entry *groups)
{
	int ret;
	int pended;
	struct group_entry* entry;

	sa_debug(DEBUG_API, "s=%p size=%llu bsize=%lu fblock=%lu bpgroup=%lu "
			"gcount=%lu groups=%p\n", s, size, bsize, fblock,
			bpgroup, gcount, groups);

	ret = -EINVAL;
	if (!bitmap || !size || !gcount)
		return ret;

	down(&s->s_sem);
	if (s->s_state != SNAP_FROZEN) {
		sa_warn("Session must be frozen (state=%d)\n", s->s_state);
		goto out_up;
	}

	if (strcmp(s->s_sb->s_type->name, "ext2") &&
	    strcmp(s->s_sb->s_type->name, "ext3") &&
	    strcmp(s->s_sb->s_type->name, "ext4")) {
		sa_warn("Invalid partition type (%s)\n", s->s_sb->s_type->name);
		goto out_up;
	}
	sn_set_mb(s->s_state, SNAP_INITINGMAP);

	s->s_fblock = fblock;
	s->s_gcount = gcount;
	s->s_bpgroup = bpgroup;
	s->s_bmsize = size;

	ret = map_init(s, groups, gcount);
	if (ret)
		goto out_thaw;

	map_init_iterator(&s->s_groupmap);
	sn_set_mb(s->s_state, SNAP_READINGMAP);
	sn_set_mb(s->s_usemap, 1);

	sn_thaw_bdev(s);
	sa_heartbeat_stop(s);

	ret = start_req_handler_thread(s);
	if (ret < 0)
		goto out_destroy;

	do {
		entry = map_iterator_get_value(&s->s_groupmap);
		BUG_ON(!entry);
		if (entry->init && !entry->cached && sa_cache_block(s, NULL,
						entry->bno, 1, &pended)) {
			sa_warn("caching block of %llu failed\n" , entry->bno);
			map_iterator_stop(&s->s_groupmap);
			goto out_destroy;
		}

	} while (map_iterator_next(&s->s_groupmap));

	stop_req_handler_thread(s, 1);
	sn_freeze_bdev(s);
	sn_set_mb(s->s_state, SNAP_FROZEN);
	ret = collect_bitmap_to_user(s, bitmap);
	if (ret)
		goto out_thaw;

	map_free(s);
	sa_heartbeat_start(s);
	up(&s->s_sem);
	return 0;

out_thaw:
	sn_set_mb(s->s_state, SNAP_MAP_ERR);
	sn_thaw_bdev(s);

out_destroy:
	map_free(s);
	sn_set_mb(s->s_state, SNAP_MAP_ERR);

out_up:
	up(&s->s_sem);
	return ret;
}

static int do_init_session(struct session_struct *s, dev_t kdev, int prealloc)
{
	int ret;
	int sa_page_size;
	int max_req;
	sn_request_queue *q;

	ret = -ENODEV;
	s->s_bdev = bdget(kdev);
	sa_debug(DEBUG_API, "kdev=%x s->s_bdev=%p\n", kdev, s->s_bdev);
	if (!s->s_bdev)
		goto out;
	sa_debug(DEBUG_INTERNALS, "bd_part=%p bd_contains=%p\n",
			s->s_bdev->bd_part, s->s_bdev->bd_contains);
	if ((ret = sn_blkdev_get(s->s_bdev, FMODE_READ, 0)) < 0)
		goto out;
	ret = -ENODEV;
	if (!s->s_bdev->bd_contains)
		goto out_blk_put;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
	s->s_sb = get_super(s->s_bdev);
#else
	s->s_sb = user_get_super(kdev);
#endif
	sa_debug(DEBUG_INTERNALS, "s->s_sb=%p\n", s->s_sb);
	if (!s->s_sb)
		goto out_blk_put;

	s->s_bsize = s->s_sb->s_blocksize;
	s->s_bppage = PAGE_SIZE / s->s_sb->s_blocksize;
	if (strcmp(s->s_sb->s_type->name, "vfat") == 0)
		s->s_simulate_freeze = 1;
	drop_super(s->s_sb);
	s->s_sb = NULL;
	s->s_spb = s->s_bsize >> 9;
	if (!s->s_spb) {
		sa_warn("Device %x has incorrect block size %d\n", kdev,
								s->s_bsize);
		goto out_blk_put;
	}
	s->s_spbshift = ffz(~s->s_spb);
	s->s_pstart = get_start_sect(s->s_bdev);
	if (s->s_bdev->bd_part)
		s->s_plen = s->s_bdev->bd_part->nr_sects;
	else if (s->s_bdev->bd_disk)
		s->s_plen = get_capacity(s->s_bdev->bd_disk);
	else
		sa_warn("Can't detect device %x size.\n", kdev);

	q = bdev_get_queue(s->s_bdev);
	if (!q) {
		sa_warn("Device %x does not have a queue.\n", kdev);
		goto out_blk_put;
	}
#ifdef HAVE_QUEUE_MAX_SECTORS
	max_req = (queue_max_sectors(q) << 9) / PAGE_SIZE;
#else
	max_req = (q->max_sectors << 9) / PAGE_SIZE;
#endif

	sa_debug(DEBUG_API, "s_bsize=%d s_bppage=%d s_spb=%d s_spbshift=%d"
		" s_plen=%llu s_pstart=%llu\n",
		s->s_bsize, s->s_bppage, s->s_spb, s->s_spbshift, s->s_plen,
		s->s_pstart);

	ret = -ENOMEM;

	s->s_bioarr = (struct bio***)get_zeroed_page(GFP_KERNEL);
	if (!s->s_bioarr)
		goto out_blk_put;
	s->s_gpages++;

	slab_uid++;
	sprintf(s->s_blkcachename, "snapapi_blk_%d", slab_uid);
	sa_page_size = sizeof(struct sa_page) +
				sizeof(unsigned long long) * (s->s_bppage - 1);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
	s->s_blkcachep = kmem_cache_create(s->s_blkcachename, sa_page_size,
								0, 0, 0, NULL);
#else
	s->s_blkcachep = kmem_cache_create(s->s_blkcachename, sa_page_size,
					0, 0, sn_get_blkcache_ctor(s->s_bppage));
#endif
	if (!s->s_blkcachep)
		goto out_free;

	ret = sa_cache_emlist_init(s, prealloc);
	if (ret)
		goto out_destroy;

	s->s_maxmsize = MAX_MMPAGES;
	s->s_ahead_bno = ~0ULL;
	/* pending queue init */
	s->s_pending_queue.pq_req = NULL;
	s->s_pending_queue.pq_reqtail = NULL;
	s->s_pending_queue.pq_state = 0;
	init_completion(&s->s_pending_queue.pq_done);
	init_completion(&s->s_pending_queue.pq_bio_done);
	atomic_set(&s->s_pending_queue.pq_ready_req, 0);
	atomic_set(&s->s_pending_queue.pq_notready_req, 0);
	s->s_pending_queue.pq_state = PQ_STOPPED;

	sn_set_mb(s->s_state, SNAP_INITED);
	return 0;

out_destroy:
	sa_cache_emlist_destroy(s);
out_free:
	free_page((unsigned long)s->s_bioarr);
	s->s_ppages++;
	s->s_bioarr = NULL;
out_blk_put:
	sn_blkdev_put(s->s_bdev, FMODE_READ);
out:
	sn_set_mb(s->s_state, SNAP_NOTINITED);
	s->s_bdev = NULL;
	return ret;
}

static void mpages_destroy(struct session_struct *s)
{
	int i;
	sa_debug(DEBUG_API, "s=%p\n", s);

	for (i = 0; i < s->s_msize; i++) {
		struct page * page;
		page = s->s_mpages[i];
		if (page) {
			sa_debug(DEBUG_INTERNALS, "s=%p, i=%d, page=%p(%d)\n",
						s, i, page, page_count(page));
			page_cache_release(page);
			s->s_mpages[i] = NULL;
			s->s_ppages++;
		}
	}
	if (s->s_local_bios) {
		sa_debug(DEBUG_INTERNALS, "s=%p, free local_bios(%p)\n",
					s, s->s_local_bios);
		kfree(s->s_local_bios);
	}

	s->s_local_bios = NULL;
	s->s_msize = 0;
}

#define	DL_READ 0
#define	DL_WRITE 1

static const char* devlock_name(unsigned lock_type)
{
	return lock_type == DL_WRITE ? "write" : "read";
}

static struct locked_dev* find_lockeddev(struct session_struct* s,
					struct block_device* bdev)
{
	struct locked_dev* idev, *end;

	end = devlocked + MAX_LOCKEDDEVS;
	for (idev = devlocked; idev != end; idev++)
		if (idev->bdev == bdev && idev->sess == s)
			return idev;
	return 0;
}

static struct locked_dev* create_lockeddev(struct session_struct* s,
			struct block_device* bdev, unsigned lock_type)
{
	struct locked_dev* idev, *end;
	end = devlocked + MAX_LOCKEDDEVS;
	for (idev = devlocked; idev != end; idev++)
		if (!idev->bdev) {
			idev->bdev = bdev;
			idev->sess = s;
			idev->lock_type = lock_type;
			lockedcnt++;
			return idev;
		}
	return 0;
}

static void remove_lockeddev(struct locked_dev* ldev)
{
	memset(ldev, 0, sizeof(struct locked_dev));
	lockedcnt--;
}

static int session_lockdev(struct session_struct *s, dev_t dev,
						unsigned lock_type)
{
	int ret;
	struct locked_dev* ldev;
	struct block_device *bdev;
	struct super_block *sb;
	void *holder;

	sa_debug(DEBUG_API, "s=%p, dev=%x, type=%s\n", s,
			dev, devlock_name(lock_type));
	ret = -ENOMEM;

	down(&devlocked_sem);
	if (lockedcnt >= MAX_LOCKEDDEVS || !devlocked)
		goto out_up;
	ret = -ENODEV;
	bdev = bdget(dev);
	if (!bdev)
		goto out_up;
	ldev = find_lockeddev(s, bdev);
	if (ldev) {
		ret = -EEXIST;
		sa_warn("Device %X already have %s-lock for session %p.\n",
			dev, devlock_name(ldev->lock_type), s);
		bdput(bdev);
		goto out_up;
	}
	sb = get_super(bdev);
	if (sb) {
		ret = -EBUSY;
		drop_super(sb);
		bdput(bdev);
		goto out_up;
	}
#ifdef HAVE_BD_CLAIM
	ret = sn_blkdev_get(bdev, FMODE_READ, 0);
	if (ret)
		goto out_up;
#endif

	holder = lock_type == DL_WRITE ? s : (void *)session_lockdev;
	ret = sn_bd_claim(bdev, holder);
	if (ret)
		goto out_blkput;
	ldev = create_lockeddev(s, bdev, lock_type);
	if (!ldev) {
		sa_warn("All devlocked slots are exhausted\n");
		ret = -ENOMEM;
		goto out_release;
	}
	up(&devlocked_sem);
	return 0;

out_release:
	sn_bd_release(bdev);
out_blkput:
#ifdef HAVE_BD_CLAIM
	sn_blkdev_put(bdev, FMODE_READ);
#endif
out_up:
	up(&devlocked_sem);
	return ret;
}

static int session_unlockdev(struct session_struct *s, dev_t dev,
						unsigned lock_type)
{
	int ret;
	struct locked_dev* ldev;
	struct block_device *bdev;

	sa_debug(DEBUG_API, "s=%p, dev=%x, type=%s\n", s,
			dev, devlock_name(lock_type));
	ret = -ENOMEM;
	down(&devlocked_sem);
	if (!devlocked)
		goto out_up;
	ret = -ENODEV;
	bdev = bdget(dev);
	if (!bdev)
		goto out_up;
	ret = -ESRCH;
	ldev = find_lockeddev(s, bdev);
	if (!ldev) {
		sa_warn("No lock for device (%X) in session (%p)\n", dev, s);
		bdput(bdev);
		goto out_up;
	}
	ret = -EINVAL;
	if (ldev->lock_type != lock_type) {
		sa_warn("Lock for device (%X) in session (%p) is of type %s\n",
			dev, s, devlock_name(lock_type));
		bdput(bdev);
		goto out_up;
	}

	sn_bd_release(bdev);
	remove_lockeddev(ldev);
	ret = 0;

#ifdef HAVE_BD_CLAIM
	sn_blkdev_put(bdev, FMODE_READ);
#endif
out_up:
	up(&devlocked_sem);
	return ret;
}

static void unlock_sessiondevs(struct session_struct *s)
{
	struct locked_dev* idev, *end;

	sa_debug(DEBUG_API, "\n");

	down(&devlocked_sem);
	if (!devlocked)
		goto out_up;
	end = devlocked + MAX_LOCKEDDEVS;

	for (idev = devlocked; idev != end; idev++) {
		if (!idev->bdev || idev->sess != s)
			continue;
		sn_bd_release(idev->bdev);
#ifdef HAVE_BD_CLAIM
		sn_blkdev_put(idev->bdev, FMODE_READ);
#endif
		remove_lockeddev(idev);
	}
out_up:
	up(&devlocked_sem);
}

static void close_session(struct session_struct *s, int do_free)
{
	sa_debug(DEBUG_API, "s=%p\n", s);
	down(&s->s_sem);
	sa_heartbeat_stop(s);
	unregister_make_request(s);
	stop_req_handler_thread(s, 0);
	sa_debug(DEBUG_API, "s=%p, users=%d, do_free=%d\n", s,
					atomic_read(&s->s_users), do_free);
	wait_for_users(s);
	spin_unlock(&sessions_lock);
	if (s->s_state == SNAP_FROZEN) {
		sn_thaw_bdev(s);
	}
	mpages_destroy(s);
	sa_cache_emlist_destroy(s);
	cleanup_biolist(s);
	cleanup_snapshot(s);
	if (s->s_bdev) {
		sn_blkdev_put(s->s_bdev, FMODE_READ);
		s->s_bdev = NULL;
	}
	block_map_destroy(s);
	unlock_sessiondevs(s);

	if (s->s_kdev != 0 && s->s_rblocks) {
		struct sn_state out;
		fill_state(s, &out);
		session_stat(&out);
	}
	spin_lock(&sessions_lock);
	list_del_init(&s->s_list);
	if (!do_free)
		list_add(&s->s_list, &notinited_list);
	sn_set_mb(s->s_state, SNAP_NOTINITED);
	s->s_kdev = 0;
	spin_unlock(&sessions_lock);
	up(&s->s_sem);
	if (do_free)
		kfree(s);
}
#if 0
static int chk_conflicts(dev_t kdev)
{
	struct list_head *tmp;

	list_for_each(tmp, &sessions_list) {
		struct session_struct *s;

		s = list_entry(tmp, struct session_struct, s_list);
		/* one queue per device */
		if (MAJOR(s->s_kdev) == MAJOR(kdev))
			return 1;
	}
	return 0;
}
#endif
static int session_init(struct session_struct * s, dev_t kdev, int prealloc)
{
	int ret;

	sa_debug(DEBUG_API, "s=%p, dev=%x, prealloc=%d\n", s, kdev, prealloc);
	ret = -EBUSY;
	down(&s->s_sem);
	if (s->s_state != SNAP_NOTINITED)
		goto out;

	spin_lock(&sessions_lock);
/*
	if (chk_conflicts(kdev)) {
		spin_unlock(&sessions_lock);
		goto out;
	}
*/
	list_del_init(&s->s_list);
	s->s_kdev = kdev;
	sn_set_mb(s->s_state, SNAP_ININIT);
	list_add_tail(&s->s_list, &sessions_list);
	spin_unlock(&sessions_lock);
	ret = do_init_session(s, kdev, prealloc);
	if (ret) {
		spin_lock(&sessions_lock);
		list_del_init(&s->s_list);
		s->s_kdev = 0;
		sn_set_mb(s->s_state, SNAP_NOTINITED);
		list_add(&s->s_list, &notinited_list);
		spin_unlock(&sessions_lock);
		goto out;
	}
	sa_kdebug("OK. kdev=%x:%x, bs=%d.\n", MAJOR(s->s_kdev), MINOR(s->s_kdev),
								s->s_bsize);
out:
	up(&s->s_sem);
	return ret;
}

static int session_messqstate(struct session_struct *s, unsigned int *state)
{
	int ret;
	unsigned int out;
	struct list_head *tmp;

	sa_debug(DEBUG_API,"s=%p\n", s);

	ret = -EFAULT;
	out = 0;
	down(&messages_sem);
	spin_lock(&sessions_lock);
	list_for_each(tmp, &sessions_list) {
		struct session_struct *sp;

		sp = list_entry(tmp, struct session_struct, s_list);
		/* one queue per device */
		sa_debug(DEBUG_API,"sp=%p, sp->mess_pos=%d, mess_pos=%d\n", sp,
					sp->s_mess_pos, messages_pos);
		if (sp->s_mess_pos != messages_pos) {
			out = 1;
			goto out_up;
		}
	}
	list_for_each(tmp, &notinited_list) {
		struct session_struct *sp;

		sp = list_entry(tmp, struct session_struct, s_list);
		/* one queue per device */
		sa_debug(DEBUG_API,"sp=%p, sp->mess_pos=%d, mess_pos=%d\n", sp,
					sp->s_mess_pos, messages_pos);
		if (sp->s_mess_pos != messages_pos) {
			out = 1;
			break;
		}
	}
out_up:
	spin_unlock(&sessions_lock);
	up(&messages_sem);
	if (copy_to_user(state, &out, sizeof(*state)))
		goto out;

	ret = 0;
out:
	return ret;
}

static struct inode* sn_get_inode(struct file *filep)
{
#ifdef HAVE_FILE_F_DENTRY
	return filep->f_dentry->d_inode;
#else
	return file_inode(filep);
#endif
}

static struct inode* sn_get_dentry(struct file *filep)
{
#ifdef HAVE_FILE_F_DENTRY
	return filep->f_dentry;
#else
	return filep->f_path.dentry;
#endif
}

static int session_resetatime(struct session_struct *s, unsigned int fd)
{
	int ret;
	struct file *file;
	struct inode *inode;

	sa_debug(DEBUG_API,"s=%p\n", s);
	down(&s->s_sem);
	ret = -ESRCH;
	file = fget(fd);
	if (!file)
		goto out_up;
	if (!sn_get_dentry(file) || !sn_get_inode(file))
		goto out_put;
	inode = sn_get_inode(file);
	inode->i_flags |= S_NOATIME;
	ret = 0;
out_put:
	fput(file);
out_up:
	up(&s->s_sem);
	return ret;
}

static int snapapi3_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	int err;
	struct session_struct * ss;

	sa_debug(DEBUG_IOCTL, "cmd=%x\n", cmd);
	if (!snap_init_ok)
		return -EPERM;
	ss = file->private_data;
	if (!ss)
		return -EINVAL;
	err = -EFAULT;

	spin_lock_bh(&ss->s_misc_lock);
	ss->s_ioctlcnt++;
	spin_unlock_bh(&ss->s_misc_lock);

	switch (cmd) {
	    case SNAPCTL_INIT: {
			struct snapctl_init s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_init(ss, MKDEV(s.major, s.minor),
								s.prealloc);
		}
		break;
	    case SNAPCTL_FREEZE:
			err = session_freeze(ss);
		break;
	    case SNAPCTL_UNFREEZE:
			err = session_unfreeze(ss);
		break;
	    case SNAPCTL_GETMAP: {
			struct snapctl_getmap s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_getmap(ss, s.map_size, s.map,
					(unsigned long)s.bsize,
					(unsigned long)s.fblock,
					(unsigned long)s.bpgroup,
					(unsigned long)s.gcount);
		}
		break;
	    case SNAPCTL_GETSPARSEDMAP: {
			struct snapctl_getsparsedmap s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_getsparsedmap(ss, s.map_size, s.map,
					(unsigned long)s.bsize,
					(unsigned long)s.fblock,
					(unsigned long)s.bpgroup,
					(unsigned long)s.gcount,
					s.groups);
		}
		break;
	    case SNAPCTL_LDMAP: {
			struct snapctl_ldmap s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_ldmap(ss, s.map_size, s.map);
		}
		break;
	    case SNAPCTL_GETBNO: {
			struct snapctl_getbno s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_getbno(ss, s.bno);
		}
		break;
	    case SNAPCTL_BFREE: {
			struct snapctl_bfree s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_bfree(ss, s.bstart, s.count);
		}
		break;
	    case SNAPCTL_BREAD: {
			struct snapctl_bread s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_bread(ss, s.bstart, s.count, s.data,
				s.flags,
				&(((struct snapctl_bread*)arg)->bincache));
		}
		break;
	    case SNAPCTL_STATE: {
			struct snapctl_state s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_state(ss, s.state, s.size);
		}
		break;
	    case SNAPCTL_DEVINFO: {
			struct snapctl_devinfo s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_devinfo(ss, MKDEV(s.major, s.minor),
								s.info, s.size);
		}
		break;
	    case SNAPCTL_DEVLOCK: {
			struct snapctl_devlock s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_lockdev(ss, MKDEV(s.major, s.minor), DL_WRITE);
		}
		break;
	    case SNAPCTL_DEVUNLOCK: {
			struct snapctl_devunlock s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_unlockdev(ss, MKDEV(s.major, s.minor), DL_WRITE);
		}
		break;
	    case SNAPCTL_DEVLOCKREAD: {
			struct snapctl_devlockread s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_lockdev(ss, MKDEV(s.major, s.minor), DL_READ);
		}
		break;
	    case SNAPCTL_DEVUNLOCKREAD: {
			struct snapctl_devunlockread s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_unlockdev(ss, MKDEV(s.major, s.minor), DL_READ);
		}
		break;
	    case SNAPCTL_MESSQSTATE: {
			struct snapctl_messqstate s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_messqstate(ss, s.state);
		}
		break;
	    case SNAPCTL_RESETATIME: {
			struct snapctl_resetatime s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_resetatime(ss, s.fd);
		}
		break;
	    case SNAPCTL_RDCACHE: {
			struct snapctl_rdcache s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			err = session_rdcache(ss, s.data, s.size);
		}
		break;
#ifdef USE_VZ_VZSNAP
	    case SNAPCTL_SET_VEID: {
			unsigned int s;
			if (copy_from_user(&s, (void *)arg, sizeof(s)))
				break;
			ss->s_veid = s;
			err = 0;
		}
		break;
#endif
	    default:
		err = -ENOTTY;
		break;
	}
	if (err)
		sa_debug(DEBUG_API, "cmd=%x err=%d\n", cmd, -err);
	return err;
}

static int snapapi4_ioctl(struct inode *ino, struct file *file, unsigned int cmd,
		unsigned long arg)
{
	return snapapi3_ioctl(file, cmd, arg);
}
#ifdef HAVE_IOCTL32_CONVERSION
static int
snapapi_compat_ioctl(unsigned int fd, unsigned int cmd,
			unsigned long arg, struct file *filep)
{
	sa_debug(DEBUG_IOCTL, "cmd=%x\n", cmd);
	return snapapi3_ioctl(filep, cmd, arg);
}
#endif

#ifdef HAVE_COMPAT_IOCTL
static long
snapapi_compat_ioctl(struct file *filep, unsigned int cmd,
			unsigned long arg)
{
	sa_debug(DEBUG_IOCTL, "cmd=%x\n", cmd);
	return snapapi3_ioctl(filep, cmd, arg);
}
#endif

static int snapapi_open(struct inode *ino, struct file *file)
{
	struct session_struct * s;
	int i;

	sa_debug(DEBUG_API,"%s\n","enter");
	if (!snap_init_ok) {
		sa_warn("snapapi is not inited.%s", "\n");
		return -EPERM;
	}
	s = kmalloc(sizeof(*s), GFP_KERNEL);
	if (!s)
		return -ENOMEM;
	if (!try_module_get(THIS_MODULE)) {
		kfree(s);
		return -ENODEV;
	}
	memset(s, 0, sizeof(*s));
	INIT_LIST_HEAD(&s->s_list);
	sema_init(&s->s_sem, 1); /* unlocked state */
	s->s_heartbeat_active = 0;
	s->s_usemap = 0;
	spin_lock_init(&s->s_misc_lock);
	spin_lock_init(&s->s_biolist_lock);
	spin_lock_init(&s->s_blkcache_emlock);
	spin_lock_init(&s->s_pending_queue.pq_lock);
	atomic_set(&s->s_users, 1);
	for (i = 0; i < BLK_CHAINS; i++)
		spin_lock_init(&s->s_blkchains[i].lock);

	down(&messages_sem);
	s->s_mess_pos = messages_pos;
	up(&messages_sem);
	spin_lock(&sessions_lock);
	list_add(&s->s_list, &notinited_list);
	spin_unlock(&sessions_lock);

	file->private_data = s;
	sa_debug(DEBUG_API, "OK s=%p tgid=%d\n", s, current->tgid);
	return 0;
}

static int snapapi_release(struct inode *ino, struct file *file)
{
	struct session_struct * s;

	sa_debug(DEBUG_API,"%s\n","enter");
	s = file->private_data;
	if (!s)
		return -EINVAL;
	file->private_data = NULL;

	close_session(s, 1);
	module_put(THIS_MODULE);
	sa_debug(DEBUG_API, "OK s=%p tgid=%d\n", s, current->tgid);
	return 0;
}

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
#define SN_NOPAGE_SIGBUS NOPAGE_SIGBUS
#else
#define SN_NOPAGE_SIGBUS VM_FAULT_ERROR
#endif

static struct page * snapapi_vm_nopage(struct vm_area_struct * vma,
					unsigned long address, int *unused)
{
	unsigned int i;
	struct session_struct *s;

	if (!vma->vm_file) {
		sa_warn("vma does not have a file attached.%s", "\n");
		return (struct page *)SN_NOPAGE_SIGBUS;
	}
	s = vma->vm_file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p, address=%lx, pgoff=%lu\n", s, vma,
			address, vma->vm_pgoff);

	i = (address - vma->vm_start) >> PAGE_SHIFT;

	if (i >= s->s_msize) {
		sa_warn("Incorrect address.%s", "\n");
		return (struct page *)SN_NOPAGE_SIGBUS;
	}
	get_page(s->s_mpages[i]);
	sa_debug(DEBUG_ALLOC, "s=%p, nopage=%p(%d)\n", s, s->s_mpages[i],
					page_count(s->s_mpages[i]));
	s->s_gpages++;

	return s->s_mpages[i];
}

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
static int snapapi_vm_fault(struct vm_area_struct * vma, struct vm_fault *vmf)
{
	unsigned long address = (unsigned long) vmf->virtual_address;

	vmf->page = snapapi_vm_nopage(vma, address, 0);
	if (vmf->page == (struct page *)SN_NOPAGE_SIGBUS)
		return VM_FAULT_ERROR;
	return 0;
}
#endif

static void snapapi_vm_open(struct vm_area_struct * vma)
{
	struct session_struct *s;

	if (!vma->vm_file) {
		sa_warn("vma does not have a file attached.%s", "\n");
		return;
	}
	s = vma->vm_file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p, users=%d\n", s, vma,
				atomic_read(&s->s_vma_users));
	atomic_inc(&s->s_vma_users);
}

static void snapapi_vm_close(struct vm_area_struct * vma)
{
	unsigned int i;
	struct session_struct *s;

	if (!vma->vm_file) {
		sa_warn("vma does not have a file attached.%s", "\n");
		return;
	}
	s = vma->vm_file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p, users=%d\n", s, vma,
				atomic_read(&s->s_vma_users));
	if (!atomic_dec_and_test(&s->s_vma_users))
		return;

	for (i = 0; i < s->s_msize; i++) {
		if (s->s_mpages[i]) {
			sa_debug(DEBUG_ALLOC, "s=%p, put page=%p(%d)\n", s,
				s->s_mpages[i], page_count(s->s_mpages[i]));
			/* page was put by upper level */
			s->s_ppages++;
		}
	}
	s->s_vma = NULL;
	mpages_destroy(s);
}

static int snapapi_mmap(struct file * file, struct vm_area_struct * vma)
{
	struct session_struct *s;
	int ret, size;
	struct page * page;
	int i;

	s = file->private_data;
	sa_debug(DEBUG_API,"s=%p, vma=%p,%lx-%lx %lx %lx\n", s, vma,
						vma->vm_start, vma->vm_end,
						vma->vm_flags, vma->vm_pgoff);
	if (!s)
		return -EBADF;
	if (!(vma->vm_flags & VM_READ)
			|| (vma->vm_flags & VM_SHARED))
		return -EINVAL;

	ret = -EINVAL;
	down(&s->s_sem);
	if (s->s_vma || s->s_state < SNAP_INITED || vma->vm_pgoff != 0)
		goto out_up;

	ret = -ENOMEM;
	size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
	if (size > s->s_maxmsize || size < 1)
		goto out_up;

	s->s_msize = size; /* mmap size in pages */
	s->s_local_bios = kmalloc(sizeof(struct bio_req) * size, GFP_KERNEL);
	if (!s->s_local_bios)
		goto out_up;
	sa_debug(DEBUG_INTERNALS, "s=%p, mmap pages=%d, local_bios==%p\n", s,
						size, s->s_local_bios);
	memset(s->s_local_bios, 0, sizeof(struct bio_req) * size);

	for (i = 0; i < size; i++) {
		page = alloc_page(GFP_KERNEL);
		if (!page) {
			goto out_destroy;
		}
		s->s_gpages++;
		s->s_mpages[i] = page;
		sa_debug(DEBUG_ALLOC, "s=%p, alloc page=%p(%d)\n", s,
				page, page_count(page));
	}
	ret = 0;
	s->s_vma = vma;
	vma->vm_ops = &snapctl_vm_ops;
	atomic_set(&s->s_vma_users, 1);
	goto out_up;

out_destroy:
	s->s_vma = NULL;
	mpages_destroy(s);
out_up:
	up(&s->s_sem);
	return ret;
}

static ssize_t snapapi_read(struct file * filp, char * buf, size_t count,
								loff_t *ppos)
{
	struct session_struct *s;
	ssize_t size, read, ret;
	int idx;

	s = filp->private_data;
	sa_debug(DEBUG_MESS, "s=%p, buf=%p, count=%lu, ppos=%lld\n", s,
				buf, (unsigned long)count, (long long)*ppos);
	if (!s)
		return -EBADF;
	if (count % MESSAGE_SIZE)
		return -EINVAL;
	if (*ppos != filp->f_pos)
		return -ESPIPE;
	/* Null write succeeds.  */
	if (count == 0)
		return 0;
	ret = -ERESTARTSYS;
	down(&s->s_sem);
	if (down_interruptible(&messages_sem))
		goto out_nolock;
	if (signal_pending(current))
		goto out;
	ret = 0;
	/* Always work in NONBLOCK mode */
	if (s->s_mess_pos == messages_pos)
		goto out;
	size = (messages_pos > s->s_mess_pos) ? messages_pos - s->s_mess_pos :
		MAX_MESSAGES - s->s_mess_pos + messages_pos;
	size *= MESSAGE_SIZE;
	if (size > count)
		size = count;
	idx = s->s_mess_pos + 1;
	read = 0;
	ret = -EFAULT;
	while (size > 0) {
		idx %= MAX_MESSAGES;
		if (copy_to_user(buf, &messages_buf[idx++], MESSAGE_SIZE))
			goto out;
		read += MESSAGE_SIZE;
		size -= MESSAGE_SIZE;
	}
	s->s_mess_pos = (idx - 1) % MAX_MESSAGES;
	ret = read;

out:
	up(&messages_sem);
out_nolock:
	up(&s->s_sem);
	return ret;
}

static ssize_t snapapi_write(struct file *filp, const char *buf, size_t count,
								loff_t *ppos)
{
	struct session_struct *s;
	int idx;
	ssize_t ret;

	s = filp->private_data;
	sa_debug(DEBUG_MESS,"s=%p, buf=%p, count=%lu, ppos=%lld, f_pos=%lld\n",
			s, buf, (unsigned long)count, *ppos, filp->f_pos);
	if (!s)
		return -EBADF;
	if (count != MESSAGE_SIZE)
		return -EINVAL;
	if (*ppos != filp->f_pos)
		return -ESPIPE;
	/* Null write succeeds.  */
	if (count == 0)
		return 0;
	ret = -ERESTARTSYS;
	down(&s->s_sem);
	if (down_interruptible(&messages_sem))
		goto out_nolock;
	if (signal_pending(current))
		goto out;
	ret = -EFAULT;
	idx = (messages_pos + 1) % MAX_MESSAGES;
	if (copy_from_user(&messages_buf[idx], buf, MESSAGE_SIZE))
		goto out;
	messages_pos = idx;
	ret =  MESSAGE_SIZE;
	/* Signal readers asynchronously that there is more data.  */
	sa_debug(DEBUG_MESS, "s=%p, wake_up_interruptible\n", s);
	wake_up_interruptible(&select_wait);

out:
	up(&messages_sem);
out_nolock:
	up(&s->s_sem);
	return ret;
}

static unsigned int snapapi_poll(struct file *filp, poll_table *wait)
{
	struct session_struct *s;
	unsigned int mask;

	s = filp->private_data;
	sa_debug(DEBUG_MESS, "s=%p\n", s);
	if (!s)
		return POLLERR;
	poll_wait(filp, &select_wait, wait);
	down(&s->s_sem);
	down(&messages_sem);
	mask = 0;
	if (s->s_mess_pos != messages_pos) {
		sa_debug(DEBUG_MESS,"s=%p, message ready\n", s);
		mask = POLLIN | POLLRDNORM;
	}
	up(&messages_sem);
	up(&s->s_sem);
	return mask;
}

static struct vm_operations_struct snapctl_vm_ops = {
	open:	snapapi_vm_open,
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
	nopage:	snapapi_vm_nopage,
#else
	fault:	snapapi_vm_fault,
#endif
	close:	snapapi_vm_close,
};

static struct file_operations snapctl_fops = {
#ifdef HAVE_UNLOCKED_IOCTL
	unlocked_ioctl: snapapi3_ioctl,
#else
	ioctl: snapapi4_ioctl,
#endif
	open: snapapi_open,
	read: snapapi_read,
	write: snapapi_write,
	poll: snapapi_poll,
	mmap:  snapapi_mmap,
	release: snapapi_release,
#ifdef HAVE_COMPAT_IOCTL
	compat_ioctl: snapapi_compat_ioctl,
#endif
};

static int snapctl_unload(void)
{
	unregister_chrdev(SNAPCTL_MAJOR, SNAPCTL_NAME);
#ifdef HAVE_IOCTL32_CONVERSION
	unregister_ioctl32_conversion(SNAPCTL_INIT);
	unregister_ioctl32_conversion(SNAPCTL_FREEZE);
	unregister_ioctl32_conversion(SNAPCTL_LDMAP);
	unregister_ioctl32_conversion(SNAPCTL_GETMAP);
	unregister_ioctl32_conversion(SNAPCTL_GETBNO);
	unregister_ioctl32_conversion(SNAPCTL_BREAD);
	unregister_ioctl32_conversion(SNAPCTL_BFREE);
	unregister_ioctl32_conversion(SNAPCTL_STATE);
	unregister_ioctl32_conversion(SNAPCTL_DEVINFO);
	unregister_ioctl32_conversion(SNAPCTL_DEVLOCK);
	unregister_ioctl32_conversion(SNAPCTL_DEVUNLOCK);
	unregister_ioctl32_conversion(SNAPCTL_UNFREEZE);
	unregister_ioctl32_conversion(SNAPCTL_MESSQSTATE);
	unregister_ioctl32_conversion(SNAPCTL_RESETATIME);
	unregister_ioctl32_conversion(SNAPCTL_RDCACHE);
	unregister_ioctl32_conversion(SNAPCTL_SET_VEID);
	unregister_ioctl32_conversion(SNAPCTL_START_SWAP_THREAD);
	unregister_ioctl32_conversion(SNAPCTL_STOP_SWAP_THREAD);
	unregister_ioctl32_conversion(SNAPCTL_DEVLOCKREAD);
	unregister_ioctl32_conversion(SNAPCTL_DEVUNLOCKREAD);
#endif
	down(&devlocked_sem);
	if (devlocked) {
		free_page((unsigned long)devlocked);
		devlocked = NULL;
	}
	up(&devlocked_sem);
	down(&messages_sem);
	if (messages_buf) {
		free_page((unsigned long)messages_buf);
		messages_buf = NULL;
	}
	up(&messages_sem);
	return 0;
}

static void stop_resolver_thread(void)
{
	resolver_thread_continue = 0;
	wmb();
#ifndef USE_KERNEL_THREAD
	wake_up_process(resolver_thread);
#else
	wake_up_interruptible(&resolver_thread_signal);
#endif
	wait_for_completion(&resolver_thread_exited);
}

static int __init snapapi_init(void)
{
	struct sysinfo i;
	int ret;
	ret = -ENOMEM;

	init_waitqueue_head(&select_wait);
	si_meminfo(&i);
	snap_emergency_size = i.totalram >> 5;

#ifndef USE_KERNEL_THREAD
	resolver_thread = kthread_create(resolver_loop, NULL, "snapapid");
	if (IS_ERR(resolver_thread)) {
		ret = IS_ERR(resolver_thread);
		goto out_info;
	}
	wake_up_process(resolver_thread);
#else
	resolver_thread_pid = kernel_thread(resolver_loop, NULL, 0);
	if (resolver_thread_pid < 0) {
		ret = resolver_thread_pid;
		goto out_info;
	} 
#endif

	messages_buf = (struct snap_message *) get_zeroed_page(GFP_KERNEL);
	if (!messages_buf)
		goto err_mbuf;

	devlocked = (struct locked_dev *) get_zeroed_page(GFP_KERNEL);
	if (!devlocked)
		goto err_devl;

	snapctl_fops.owner = THIS_MODULE;
	ret =  register_chrdev(SNAPCTL_MAJOR, SNAPCTL_NAME, &snapctl_fops);
	if (ret)
		goto err_chrdev;

#ifdef HAVE_IOCTL32_CONVERSION
	register_ioctl32_conversion(SNAPCTL_INIT, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_FREEZE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_LDMAP, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_GETMAP, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_GETBNO, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_BREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_BFREE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_STATE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVINFO, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVLOCK, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVUNLOCK, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_UNFREEZE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_MESSQSTATE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_RESETATIME, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_RDCACHE, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_SET_VEID, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_START_SWAP_THREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_STOP_SWAP_THREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVLOCKREAD, snapapi_compat_ioctl);
	register_ioctl32_conversion(SNAPCTL_DEVUNLOCKREAD, snapapi_compat_ioctl);
#endif
	snap_init_ok = 1;
	ret = 0;

out_info:
	sa_info("Snapapi(v.%d.%d.%d) init %s. Session size %d.\n",
				SNAPAPI_VMAJOR, SNAPAPI_VMINOR,
				SNAPAPI_VSUBMINOR,
				snap_init_ok ? "OK" : "failed",
				(int)sizeof(struct session_struct));
	return ret;

err_chrdev:
	free_page((unsigned long)devlocked);
err_devl:
	free_page((unsigned long)messages_buf);
err_mbuf:
	stop_resolver_thread();
err_thread:
	goto out_info;
}

static void __exit snapapi_exit(void)
{
	snapctl_unload();
	stop_resolver_thread();
	sa_info("Snapapi unloading...%s", "\n");
}

module_init(snapapi_init);
module_exit(snapapi_exit);
MODULE_AUTHOR("Acronis");
MODULE_DESCRIPTION("Acronis Snapshot kernel API module");
MODULE_LICENSE("Proprietary");
MODULE_VERSION(SNAPAPI_COMMON_MOD_VERSION);
MODULE_INFO(supported, "external");

Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists