int file_chunk_cdc(int fd, vector* features) {
	unsigned char buf[BUF_MAX_SIZE] = {0};
	unsigned char buf_bz[BUF_MAX_SIZE] = {0};
	unsigned char block_buf[BLOCK_MAX_SIZE * 2] = {0};
	unsigned char last_block_buf[BLOCK_MAX_SIZE * 2] = {0};
	char win_buf[BLOCK_WIN_SIZE + 1] = {0};
	unsigned char md5_str[33] = {0};
	unsigned char adler_pre_char;
	unsigned char md5_checksum[32 + 1] = {0};
	unsigned int bpos = 0;
	unsigned int rwsize = 0, bzsize = 0;
	unsigned int exp_rwsize = BUF_MAX_SIZE;
	unsigned int head, tail;
	unsigned int block_sz = 0, old_block_sz = 0;
	unsigned int hkey = 0;
	int ret = 0;

    feature_t f = 0;
	while(rwsize = read(fd, buf + bpos, exp_rwsize))
	{
		/* last chunk */
        
		if ((rwsize + bpos + block_sz) < BLOCK_MIN_SIZE){
			break;
        }

		head = 0;
		tail = bpos + rwsize;
		/* avoid unnecessary computation and comparsion */
		if (block_sz < (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE))
		{
			old_block_sz = block_sz;
			block_sz = ((block_sz + tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? 
					BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : block_sz + tail -head;  
			memcpy(block_buf + old_block_sz, buf + head, block_sz - old_block_sz);
			head += (block_sz - old_block_sz);
		}

		while ((head + BLOCK_WIN_SIZE) <= tail)
		{
			memcpy(win_buf, buf + head, BLOCK_WIN_SIZE);
			/*
			 * Firstly, i think rabinhash is the best. However, it's performance is very bad.
			 * After some testing, i found ELF_hash is better both on performance and dedup rate.
			 * So, EFL_hash is default. Now, adler_hash as default.
			 */
			if (g_rolling_hash)
			{
				hkey = (block_sz == (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? adler32_checksum(win_buf, BLOCK_WIN_SIZE) :
					adler32_rolling_checksum(hkey, BLOCK_WIN_SIZE, adler_pre_char, buf[head+BLOCK_WIN_SIZE-1]);
			} 
			else 
				hkey = g_cdc_chunk_hashfunc(win_buf);

			/* get a normal chunk */
			if ((hkey % g_block_size) == CHUNK_CDC_R)
			{
				memcpy(block_buf + block_sz, buf + head, BLOCK_WIN_SIZE);
				head += BLOCK_WIN_SIZE;
				block_sz += BLOCK_WIN_SIZE;
				if (block_sz >= BLOCK_MIN_SIZE)
				{
			        md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /* 
					if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz, 
						md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
					{
						perror("dedup_reggile_block_process in file_chunk_cdc");
						goto _FILE_CHUNK_CDC_EXIT;
					}
                    */
					block_sz = 0;
				}
			}
			else 
			{
				block_buf[block_sz++] = buf[head++];
				/* get an abnormal chunk */
				if (block_sz >= BLOCK_MAX_SIZE)
				{
					md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
					if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz, 
						md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
					{
						perror("dedup_reggile_block_process in file_chunk_cdc");
						goto _FILE_CHUNK_CDC_EXIT;
					}
                    */
					block_sz = 0;
				}
			}

			/* avoid unnecessary computation and comparsion */
			if (block_sz == 0)
			{
				block_sz = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? 
					BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : tail - head;
				memcpy(block_buf, buf + head, block_sz);
				head = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? 
					head + (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE) : tail;
			}

			adler_pre_char = buf[head -1];
		}

		/* read expected data from file to full up buf */
		bpos = tail - head;
		exp_rwsize = BUF_MAX_SIZE - bpos;
		adler_pre_char = buf[head -1];
		memmove(buf, buf + head, bpos);
	}
	/* last chunk */

    int last_block_len = ((rwsize + bpos + block_sz) >= 0) ? rwsize + bpos + block_sz : 0;
	if (last_block_len > 0)
	{
		memcpy(last_block_buf, block_buf, block_sz);
		memcpy(last_block_buf + block_sz, buf, rwsize + bpos);

        md5(last_block_buf, last_block_len, md5_checksum);
        f = md5_2_feature(md5_checksum);
        VEC_PUSH_BACK(features, &f);
	}

_FILE_CHUNK_CDC_EXIT:
	return 0;
}


/* slide block chunk */
int file_chunk_sb(int fd, vector* features) {
	char buf[BUF_MAX_SIZE] = {0};
	char buf_bz[BUF_MAX_SIZE] = {0};
	char win_buf[BLOCK_MAX_SIZE * 2] = {0};
	char block_buf[BLOCK_MAX_SIZE * 2] = {0};
	char adler_pre_char;
	unsigned char md5_checksum[32 + 1] = {0};
	unsigned char md5_checksum1[32 + 1] = {0};
	char crc_checksum[16] = {0};
	unsigned int bpos = 0;
	unsigned int slide_sz = 0;
	unsigned int rwsize = 0, bzsize = 0, bzsize_f = 0;
	unsigned int exp_rwsize = BUF_MAX_SIZE;
	unsigned int head, tail;
	unsigned int hkey = 0;
	unsigned int bflag = 0;
	int ret = 0;

    hashtable* sb_htable = create_hashtable(g_htab_bucket_nr);
    hashtable* sb_htable_crc = create_hashtable(g_htab_bucket_nr);
    if (NULL == sb_htable_crc || sb_htable == NULL)
        return -1;

    feature_t f, f1;
	while(rwsize = read(fd, buf + bpos, exp_rwsize)) {
		/* last chunk */
        /*
		if ((rwsize + bpos + slide_sz) < g_block_size)
			break;
        */
		head = 0;
		tail = bpos + rwsize;
		while ((head + g_block_size) <= tail) {
			memcpy(win_buf, buf + head, g_block_size);
			hkey = (slide_sz == 0) ? adler32_checksum(win_buf, g_block_size) : 
				adler32_rolling_checksum(hkey, g_block_size, adler_pre_char, buf[head+g_block_size-1]);

			uint_2_str(hkey, crc_checksum);

			/* bflag: 0, both CRC and MD5 are not idenitical
			          1, both CRC and MD5 are identical
				  2, CRC is identical and MD5 is not
			 */
			bflag = 0;

			/* this block maybe is duplicate */
			bzsize = g_block_size;
			if (hash_exist(sb_htable_crc, crc_checksum))
			{	
				bflag = 2;
				md5((unsigned char*)win_buf, bzsize, md5_checksum);
                f = md5_2_feature(md5_checksum);
				md5_2_str(md5_checksum);
				if (hash_exist(sb_htable, (char*)md5_checksum))
				{
					/* insert fragment */
					if (slide_sz != 0)
					{
						md5((unsigned char*)block_buf, slide_sz, md5_checksum1);
                        f1 = md5_2_feature(md5_checksum1);
                        VEC_PUSH_BACK(features, &f1);
                          
                        /*  
						if (0 != (ret = dedup_regfile_block_process(block_buf, slide_sz, md5_checksum1, 
							fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
						{
							perror("dedup_regfile_block_process in file_chunk_sb");
							goto _FILE_CHUNK_SB_EXIT;
						}
                        */
					}

                    VEC_PUSH_BACK(features, &f);
					/* insert fixed-size block */
                    /*
					if (0 != (ret = dedup_regfile_block_process(win_buf, bzsize, md5_checksum, 
						fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
					{
						perror("dedup_regfile_block_process in file_chunk_sb");
						goto _FILE_CHUNK_SB_EXIT;
					}
                    */

					head += g_block_size;
					slide_sz = 0;
					bflag = 1;
				}
			}

			/* this block is not duplicate */
			if (bflag != 1)
			{
				block_buf[slide_sz++] = buf[head++];
				if (slide_sz == g_block_size)
				{
					bzsize = g_block_size;

					/* calculate checksum and check in */
					hkey = adler32_checksum(block_buf, bzsize);
					uint_2_str(hkey, crc_checksum);
					hash_checkin(sb_htable_crc, crc_checksum);

					md5((unsigned char*)block_buf, bzsize, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
					if (0 != (ret = dedup_regfile_block_process(block_buf, bzsize, md5_checksum, 
						fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
					{
						perror("dedup_regfile_block_process in file_chunk_sb");
						goto _FILE_CHUNK_SB_EXIT;
					}
                    */

					slide_sz = 0;
				}
			}

			adler_pre_char = buf[head - 1];
		}

		/* read expected data from file to full up buf */
		bpos = tail - head;
		exp_rwsize = BUF_MAX_SIZE - bpos;
		adler_pre_char = buf[head - 1];
		memmove(buf, buf + head, bpos);
	}
	/* last chunk */
    /*
	*last_block_len = ((rwsize + bpos + slide_sz) > 0) ? rwsize + bpos + slide_sz : 0;
	if (*last_block_len > 0)
	{
		memcpy(last_block_buf, block_buf, slide_sz);
		memcpy(last_block_buf + slide_sz, buf, rwsize + bpos);
	}
    */
_FILE_CHUNK_SB_EXIT:
	lseek(fd, 0, SEEK_SET);
    hash_free(sb_htable);
    hash_free(sb_htable_crc);
	return 0;
}