#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>

struct sort_structure{
	unsigned int highest;
	unsigned int sub_highest[256][256];
};

struct translation_structure{
#ifdef BIG_FILE
	unsigned long series;
#else
	unsigned int series;
#endif
        unsigned int times;
};

struct empty{
	char times;
};

struct pat_length{
	unsigned int highest_times;
	unsigned int len;
	unsigned int search_num;
	struct translation_structure ***member;
}*table;

struct buffer_structure {
	char *string;
	unsigned int times;
#ifdef BIG_FILE
        unsigned long series;
#else
        unsigned int series;
#endif
};

struct common_structure {
	char *string;
	unsigned int times;
	unsigned int sub;
};

//struct unique {
//	unsigned int id;
	//unsigned int filler[3];
//};

int file_bytes;
int frequency;
char *mem_file;

static void progress_func(int max_searchs, int pat_len);
void *match_search(void *set);

static void progress_func(int max_searchs, int pat_len){
	static float a = 0;
	static float b = 0;
	
	if(max_searchs == 0){
		b++;
		printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len );
	}else{
		a = max_searchs;
		printf("progress: %.02lf%%\n",(double)((b/a)*100));
	}
}

void *match_search(void *set){
        //returns 0 for match found and written
        //returns 1 for no matches found

	int num7;
        int num4;
        int num2=0;
        register int num3;
	int saved=0;
	int num5=0;
	int num6=0;

	const int num1 = table[*(unsigned int *)set].search_num;
	const unsigned int stop_byte = file_bytes-table[num1].len-128;
	const unsigned short member_length = table[num1].len-1;
	unsigned short search_offset = 0;	

        int prefetch_a[128];
        int prefetch_b[8];
        unsigned int pre_int=0;
        unsigned int pre_count=0;
        int pre_ret=0;
	unsigned int memfile_bufa[4];
	unsigned int memfile_bufb[4];
	unsigned int memfile_bufc[4];
	unsigned int memfile_bufd[4];
	int mem_counter;	

	unsigned char first_char=0;
	unsigned char last_char=0;	
	unsigned char sec_char=0;
	unsigned char a_char=0;
	unsigned char b_char=0;
	unsigned char c_char=0;

	register unsigned int id = 0;
	unsigned int table_miss = 0;
	unsigned int table_found = 0;
	unsigned int cache_found = 0;
	unsigned int buffer_found = 0;
	unsigned int patterns_total = 0;
	unsigned int patterns_freed = 0;
	unsigned int empties = 0;

        char *index;
        
        if(member_length>5){
                index = malloc(sizeof(char)*65536*2);
        }else{
                index = malloc(sizeof(char)*65536);
        }

	unsigned int sizes[256][256];

        struct buffer_structure *buffer[256]; 
        struct common_structure *common[256]; 
        unsigned int buf_members[256]; 
        unsigned int buf_count[256]; 
        unsigned int com_count[256]; 

	//unsigned int *unique_id[256];
	unsigned int *common_id[256];
	unsigned int *buffer_id[256];
	//struct unique *unique_id[256];
	unsigned int *unique_id[256];
//posix_memalign(&unique_id, 64, sizeof(struct unique *)*256);

	int *buffer_open[256];
	int *common_open[256];

	struct translation_structure *temp_struct_array;

	double cache_sizes[2];

	num3 = 256*(sizeof(struct buffer_structure)+member_length+1);
	//num4=floor(917504/num3);
	//num4=floor(524288/num3);
	//num4=floor(393216/num3);
	//num4=floor(1572864/num3);
	//num4=floor(262144/num3);
	num4=floor(131072/num3);
	//num4=floor(65536/num3);

	num3 = 256*(sizeof(struct common_structure)+member_length+1);
	//num5=floor(1572864/num3);
	num5=floor(524288/num3);
	//num5=floor(393216/num3);
	//num5=floor(262144/num3);
	//num5=floor(131072/num3);
	//num5=floor(65536/num3);


	if(num4 < 8){
		num4=8;
	}
	while(num4 % 8 != 0){
		num4++;
	}
	while(num5 % 4 != 0){
		num5++;
	}

	const unsigned short b_members = num4;
	const unsigned short c_members = num5;

	for(num3=0; num3<256; num3++){
		for(num4=0; num4<256; num4++){
			sizes[num3][num4]=1;
		}
		//buffer[num3] = malloc(b_members*sizeof(struct buffer_structure));
		//common[num3] = malloc(c_members*sizeof(struct common_structure));

		buffer_id[num3] = malloc(b_members*sizeof(unsigned int));
		common_id[num3] = malloc(c_members*sizeof(unsigned int));
		buffer_open[num3] = malloc(b_members*sizeof(int));
		common_open[num3] = malloc(c_members*sizeof(int));

		buffer[num3] = malloc(b_members*sizeof(struct buffer_structure));
		for(num4=0; num4<b_members; num4++){			
			buffer[num3][num4].string = malloc(table[num1].len*sizeof(char));
		}
		common[num3] = malloc(c_members*sizeof(struct common_structure));
		for(num4=0; num4<c_members; num4++){
			common[num3][num4].string = malloc(table[num1].len*sizeof(char));
		}

	}
	cache_sizes[0] = 256*b_members*( sizeof(struct buffer_structure)+sizeof(unsigned int)+sizeof(int)+(table[num1].len*sizeof(char)) );
	cache_sizes[1] = 256*c_members*( sizeof(struct common_structure)+sizeof(unsigned int)+sizeof(int)+(table[num1].len*sizeof(char)) );
	
	for(num3=0; num3<256; num3++){
		//posix_memalign(&unique_id[num3], 16, sizeof(unsigned int)*16);

		unique_id[num3]=malloc(sizeof(unsigned int)*16);
		unique_id[num3][0]=15;
	}

	for(num7=0; num7<256; num7++){
		num2=0;
		for(num3=0; num3<256; num3++){
			buf_members[num3] = 0;
			buf_count[num3] = 0;
			com_count[num3]=0;

			if(member_length>5){
				for(num4=0; num4<256; num4++){
					index[num3*256+num4]=0;
					index[65536+num3*256+num4]=0;
				}
			}else{
				for(num4=0; num4<256; num4++){
					index[num3*256+num4]=0;
				}
			}

			for(num4=0; num4<b_members; num4++){
				buffer_open[num3][num4]=1;
				buffer[num3][num4].times=0;
			}
			for(num4=0; num4<c_members; num4++){
				common[num3][num4].times=0;
				common[num3][num4].sub=0;
				common_open[num3][num4]=1;
			}
		}
		
		for(num3=0; num3<256; num3++){
			unique_id[num3][0]=15;
		}
		for(num3=0; num3<8; num3++){
			prefetch_b[num3] = num7;
		}
		first_char = num7;

		while(num2<stop_byte){
			//fill a page 16bytes at a time
			for(num3=0; num3<121; num3+=8){
			//for(num3=0; num3<249; num3+=8){
				prefetch_a[num3] = (unsigned char)mem_file[num2+num3];
				prefetch_a[num3+1] = (unsigned char)mem_file[num2+num3+1];
				prefetch_a[num3+2] = (unsigned char)mem_file[num2+num3+2];
				prefetch_a[num3+3] = (unsigned char)mem_file[num2+num3+3];
				prefetch_a[num3+4] = (unsigned char)mem_file[num2+num3+4];
				prefetch_a[num3+5] = (unsigned char)mem_file[num2+num3+5];
				prefetch_a[num3+6] = (unsigned char)mem_file[num2+num3+6];
				prefetch_a[num3+7] = (unsigned char)mem_file[num2+num3+7];
			}
			//parse page for num7 matches
			for(num3=0; num3<121; num3+=8){
			//for(num3=0; num3<241; num3+=16){
				if(prefetch_a[num3] == prefetch_b[0]){
					prefetch_a[num3]=-1;
				}
				if(prefetch_a[num3+1] == prefetch_b[1]){
					prefetch_a[num3+1]=-1;
				}
				if(prefetch_a[num3+2] == prefetch_b[2]){
					prefetch_a[num3+2]=-1;
				}
				if(prefetch_a[num3+3] == prefetch_b[3]){
					prefetch_a[num3+3]=-1;
				}
				if(prefetch_a[num3+4] == prefetch_b[4]){
					prefetch_a[num3+4]=-1;
				}
				if(prefetch_a[num3+5] == prefetch_b[5]){
					prefetch_a[num3+5]=-1;
				}
				if(prefetch_a[num3+6] == prefetch_b[6]){
					prefetch_a[num3+6]=-1;
				}
				if(prefetch_a[num3+7] == prefetch_b[7]){
					prefetch_a[num3+7]=-1;
				}
/*
                                if(prefetch_a[num3+8] == prefetch_b[8]){
                                        prefetch_a[num3+8]=-1;
                                }
                                if(prefetch_a[num3+9] == prefetch_b[9]){
                                        prefetch_a[num3+9]=-1;
                                }
                                if(prefetch_a[num3+10] == prefetch_b[10]){
                                        prefetch_a[num3+10]=-1;
                                }
                                if(prefetch_a[num3+11] == prefetch_b[11]){
                                        prefetch_a[num3+11]=-1;
                                }
                                if(prefetch_a[num3+12] == prefetch_b[12]){
                                        prefetch_a[num3+12]=-1;
                                }
                                if(prefetch_a[num3+13] == prefetch_b[13]){
                                        prefetch_a[num3+13]=-1;
                                }
                                if(prefetch_a[num3+14] == prefetch_b[14]){
                                        prefetch_a[num3+14]=-1;
                                }
                                if(prefetch_a[num3+15] == prefetch_b[15]){
                                        prefetch_a[num3+15]=-1;
                                }
*/

			}
			//run the matches
			pre_int=num2;
			pre_count=0;
			//first_char = num7;
			while(pre_count<125){
			//while(pre_count<253){
				if(prefetch_a[pre_count] == -1){
					num2=pre_int+pre_count;
					pre_ret=1;
					goto proceed;
				}
				pre_a:
				pre_count++;
				if(prefetch_a[pre_count] == -1){
					num2=pre_int+pre_count;
					pre_ret=2;
					goto proceed;
				}
				pre_b:
				pre_count++;
				if(prefetch_a[pre_count] == -1){
					num2=pre_int+pre_count;
					pre_ret=3;
					goto proceed;
				}
				pre_c:
				pre_count++;
				if(prefetch_a[pre_count] == -1){
					num2=pre_int+pre_count;
					pre_ret=4;
					goto proceed;
				}
				pre_d:
				pre_count++;
			}
			goto found;
			proceed:
			
			/*
			first_char = (unsigned char)mem_file[num2];
			if(first_char != num7){
				goto found;
			}
			*/

			sec_char = (unsigned char)mem_file[num2+1];
			last_char = (unsigned char)mem_file[num2+member_length];

			if(member_length>5){
				a_char = (unsigned char)mem_file[num2+member_length-1];
				b_char = (unsigned char)mem_file[num2+member_length-2];
				search_offset = member_length-3;
				c_char = (unsigned char)mem_file[num2+search_offset];
				id = c_char*16777216 + b_char*65536 + a_char*256 + last_char;
				if(!index[sec_char*256+last_char] || !index[65536+a_char*256+b_char]){
					goto no_search;
				}

			}else{
				search_offset = member_length-1;
				a_char = (unsigned char)mem_file[num2+search_offset];
				id = first_char*16777216 + sec_char*65536 + a_char*256 + last_char;
				if(!index[sec_char*256+last_char]){
					goto no_search;
				}
			}

                        num3=0;
                        //search cached xMB worth of most recent found patterns
                        while(num3<b_members){
                                if(!buffer_open[last_char][num3]){
					if(id != buffer_id[last_char][num3]){
						goto r_next;
					}         
					for( num4=1; num4 < search_offset; num4++){
						if( mem_file[num2+num4] != buffer[last_char][num3].string[num4] ){
							goto r_next;
						}
					}
					buffer_found++;
					buffer[last_char][num3].times++;
					//goto found;
                                        if(pre_ret == 1){
                                                goto pre_a;
                                        }
                                        if(pre_ret == 2){
                                                goto pre_b;
                                        }
                                        if(pre_ret == 3){
                                                goto pre_c;
                                        }
                                        if(pre_ret == 4){   
                                                goto pre_d;
                                        }

				}
				r_next:
				num3++;
			}

			//search cached xMB worth of most often found patterns
			//search:
			num3=0; 
			
			while(num3<c_members){
				if(!common_open[last_char][num3]){
					if(id != common_id[last_char][num3]){
						goto o_next;
					}
					for( num4=1; num4 < search_offset; num4++){
						if( mem_file[num2+num4] != common[last_char][num3].string[num4] ){
							goto o_next;						
						}
					}
					cache_found++;
					common[last_char][num3].times++;
					//goto found;
                                        if(pre_ret == 1){
                                                goto pre_a;
                                        }
                                        if(pre_ret == 2){
                                                goto pre_b;
                                        }
                                        if(pre_ret == 3){
                                                goto pre_c;
                                        }
                                        if(pre_ret == 4){   
                                                goto pre_d;
                                        }

				}
				o_next:
				num3++;
			}

			num5 = table[num1].member[first_char][last_char][0].times;

			num3 = num5;

			//if( member_length > 5 && (member_length-4)%4 == 0

//memfile_bufa[0]=id;	


				//while(num3>3){
				while(num3>3){
					saved=num3;
					memfile_bufa[0]=unique_id[last_char][saved];
					memfile_bufa[1]=unique_id[last_char][saved-1];
					memfile_bufa[2]=unique_id[last_char][saved-2];
					memfile_bufa[3]=unique_id[last_char][saved-3];

					if(saved>7){
						memfile_bufb[0]=unique_id[last_char][saved-4];
						memfile_bufb[1]=unique_id[last_char][saved-5];
						memfile_bufb[2]=unique_id[last_char][saved-6];
						memfile_bufb[3]=unique_id[last_char][saved-7];
					}					

					//saved=num3;
					if(memfile_bufa[0] == id){
						num6=0;					
						goto id_match;
					}
					next1:
					;				
					if(memfile_bufa[1] == id){
						num6=1;
						num3=saved-1;
						goto id_match;
					}
					next2:
					;
					if(memfile_bufa[2] == id){
						num6=2;
						num3=saved-2;
						goto id_match;
					}
					next3:
					;
					if(memfile_bufa[3] == id){
						num6=3;
						num3=saved-3;
						goto id_match;
					}
					next4:
					;

					if(saved>7){
						if(memfile_bufb[0] == id){
							num6=4;
							num3=saved-4;
							goto id_match;
						}
						next5:
						;
					
						if(memfile_bufb[1] == id){
							num6=5;
							num3=saved-5;
							goto id_match;
						}
						next6:
						;
						if(memfile_bufb[2] == id){
							num6=6;
							num3=saved-6;
							goto id_match;
						}
						next7:
						;
						if(memfile_bufb[3] == id){
							num6=7;
							num3=saved-7;
							goto id_match;
						}
						next8:
						num3=saved-8;
					}else{
						num3=saved-4;
					}

					//next8:
					//;
					
				}

			


			num6=8;
			while(num3>0){
				//if( member_length > 12){
					if(unique_id[last_char][num3] == id){
						//num6=4;
						goto id_match;
					}
				//}
				//goto id_match;
				//}
				next9:
				num3--;
			}


			//while(num3>0){
			//	goto id_match;
			//	next1:
			//	num3--;
			//}


			table_miss++;
			goto no_search;


			no_match:
			;
                                        if(num6 == 8){
                                                goto next9;
                                        }
                                        if(num6 == 0){
                                                goto next1;
                                        }
                                        if(num6 == 1){
                                                goto next2;
                                        }
                                        if(num6 == 2){
                                                goto next3;
                                        }
                                        if(num6 == 3){
                                                goto next4;
                                        }
                                        if(num6 == 4){
                                                goto next5;
                                        }
                                        if(num6 == 5){
                                                goto next6;
                                        }
                                        if(num6 == 6){
                                                goto next7;
                                        }
                                        if(num6 == 7){
                                                goto next8;
                                        }
                                        //if(num6 == 3){
                                          //      goto next4;
                                        //}

					



			id_match:
			num4=1;
			//while(num4<search_offset){

			//if( member_length > 5 && (member_length-4)%4 == 0){
			//if( member_length > 5){
			if(member_length > 9){
				while(num4<search_offset-15){
					//for(
					//memfile_bufe[0]=mem_file[mem_counter];

					mem_counter=num2+num4;
					memfile_bufc[0]=mem_file[mem_counter]*16777216 + mem_file[mem_counter+1]*65536 + mem_file[mem_counter+2]*256 + mem_file[mem_counter+3];
					memfile_bufc[1]=mem_file[mem_counter+4]*16777216 + mem_file[mem_counter+5]*65536 + mem_file[mem_counter+6]*256 + mem_file[mem_counter+7];
					memfile_bufc[2]=mem_file[mem_counter+8]*16777216 + mem_file[mem_counter+9]*65536 + mem_file[mem_counter+10]*256 + mem_file[mem_counter+11];
					memfile_bufc[3]=mem_file[mem_counter+12]*16777216 + mem_file[mem_counter+13]*65536 + mem_file[mem_counter+14]*256 + mem_file[mem_counter+15];	
					mem_counter=table[num1].member[first_char][last_char][num3].series+num4;
					memfile_bufd[0]=mem_file[mem_counter]*16777216 + mem_file[mem_counter+1]*65536 + mem_file[mem_counter+2]*256 + mem_file[mem_counter+3];
					memfile_bufd[1]=mem_file[mem_counter+4]*16777216 + mem_file[mem_counter+5]*65536 + mem_file[mem_counter+6]*256 + mem_file[mem_counter+7];
					memfile_bufd[2]=mem_file[mem_counter+8]*16777216 + mem_file[mem_counter+9]*65536 + mem_file[mem_counter+10]*256 + mem_file[mem_counter+11];
					memfile_bufd[3]=mem_file[mem_counter+12]*16777216 + mem_file[mem_counter+13]*65536 + mem_file[mem_counter+14]*256 + mem_file[mem_counter+15];
					if(memfile_bufc[0] != memfile_bufd[0])
						goto no_match;
					if(memfile_bufc[1] != memfile_bufd[1])
						goto no_match;
					if(memfile_bufc[2] != memfile_bufd[2])
						goto no_match;
					if(memfile_bufc[3] != memfile_bufd[3])
						goto no_match;
					num4+=16;
				}

				while(num4<search_offset-7){
					mem_counter=num2+num4;
					memfile_bufc[0]=mem_file[mem_counter]*16777216 + mem_file[mem_counter+1]*65536 + mem_file[mem_counter+2]*256 + mem_file[mem_counter+3];
					memfile_bufc[1]=mem_file[mem_counter+4]*16777216 + mem_file[mem_counter+5]*65536 + mem_file[mem_counter+6]*256 + mem_file[mem_counter+7];
					mem_counter=table[num1].member[first_char][last_char][num3].series+num4;
					memfile_bufd[0]=mem_file[mem_counter]*16777216 + mem_file[mem_counter+1]*65536 + mem_file[mem_counter+2]*256 + mem_file[mem_counter+3];
					memfile_bufd[1]=mem_file[mem_counter+4]*16777216 + mem_file[mem_counter+5]*65536 + mem_file[mem_counter+6]*256 + mem_file[mem_counter+7];
					if(memfile_bufc[0] != memfile_bufd[0])
						goto no_match;
					if(memfile_bufc[1] != memfile_bufd[1])
						goto no_match;
					num4+=8;
				}
				while(num4<search_offset-3){
					mem_counter=num2+num4;
					memfile_bufc[0]=mem_file[mem_counter]*16777216 + mem_file[mem_counter+1]*65536 + mem_file[mem_counter+2]*256 + mem_file[mem_counter+3];
					mem_counter=table[num1].member[first_char][last_char][num3].series+num4;
					memfile_bufd[0]=mem_file[mem_counter]*16777216 + mem_file[mem_counter+1]*65536 + mem_file[mem_counter+2]*256 + mem_file[mem_counter+3];

					if(memfile_bufc[0] != memfile_bufd[0])
						goto no_match;
					num4+=4;
				}
				}
				while(num4<search_offset){
					if( mem_file[num2+num4] != mem_file[table[num1].member[first_char][last_char][num3].series+num4]){
						goto no_match;
					}
					num4++;
				}

			//}else{
			//	while(num4<search_offset){
			//		if( mem_file[num2+num4] != mem_file[table[num1].member[first_char][last_char][num3].series+num4]){
			//			goto no_match;					
			//		}
			//		num4++;
			//	}
			//}

			table[num1].member[first_char][last_char][num3].times++;
					
			if(table[num1].member[first_char][last_char][num3].times > table[num1].highest_times){
				table[num1].highest_times=table[num1].member[first_char][last_char][num3].times;
			}

			for(num4=com_count[last_char]; num4<c_members; num4++){
				if(table[num1].member[first_char][last_char][num3].times>common[last_char][num4].times){
					//contribute results to table before replacing
					if(common_open[last_char][num4] != 1){
						num6 = common[last_char][num4].sub;
						table[num1].member[first_char][last_char][num6].times=common[last_char][num4].times;
						if(table[num1].member[first_char][last_char][num6].times > table[num1].highest_times){
							table[num1].highest_times=table[num1].member[first_char][last_char][num6].times;
						}
					}
					//write new results to common index
					common_open[last_char][num4]=0;
					common[last_char][num4].times=table[num1].member[first_char][last_char][num3].times;
					common_id[last_char][num4]=unique_id[last_char][num3];
					common[last_char][num4].sub=num3;
					for(num6=0; num6<member_length+1; num6++){
						common[last_char][num4].string[num6]=mem_file[table[num1].member[first_char][last_char][num3].series+num6];
					}
					if(com_count[last_char] == c_members-1){
						com_count[last_char] = 0;
					}else{
						com_count[last_char]++;
					}
					table_found++;
					//goto found;
                                        if(pre_ret == 1){
                                                goto pre_a;
                                        }
                                        if(pre_ret == 2){
                                                goto pre_b;
                                        }
                                        if(pre_ret == 3){
                                                goto pre_c;
                                        }
                                        if(pre_ret == 4){   
                                                goto pre_d;
                                        }

				}
			}						
			table_found++;
			//goto found;
                                        if(pre_ret == 1){
                                                goto pre_a;
                                        }
                                        if(pre_ret == 2){
                                                goto pre_b;
                                        }
                                        if(pre_ret == 3){
                                                goto pre_c;
                                        }
                                        if(pre_ret == 4){   
                                                goto pre_d;
                                        }
		

			no_search:
			;
			if(buf_members[last_char]<b_members){				
				for(num3=0; num3<member_length+1; num3++){
					buffer[last_char][ buf_count[last_char] ].string[num3]=mem_file[num2+num3];
				}
				buffer[last_char][ buf_count[last_char] ].series=num2;
				buffer_id[last_char][ buf_count[last_char] ] = id;
				buffer[last_char][ buf_count[last_char] ].times = 1;
				buffer_open[last_char][buf_count[last_char]]=0;

				if(member_length>5){
					index[sec_char*256+last_char]=1;
					index[65536+a_char*256+b_char]=1;
				}else{
					index[sec_char*256+last_char]=1;
				}
				buf_count[last_char]++;
				buf_members[last_char]++;
			}else{
				//take 4 oldest out of buffer and place into table
				//dealing with sets of 4
				if(buf_count[last_char] == b_members){
					buf_count[last_char] = 0;
				}

				for(num3=buf_count[last_char]; num3<buf_count[last_char]+4; num3++){

					num5 = table[num1].member[first_char][last_char][0].times;
					if(sizes[first_char][last_char] == num5){
						table[num1].member[first_char][last_char]=realloc(table[num1].member[first_char][last_char],sizeof(struct translation_structure) * (sizes[first_char][last_char]+1+3) );
						sizes[first_char][last_char]+=3;
					}
					
					table[num1].member[first_char][last_char][num5+1].series = buffer[last_char][num3].series;
					table[num1].member[first_char][last_char][num5+1].times = buffer[last_char][num3].times;
					if( num5 == unique_id[last_char][0]){
//posix_memalign(&unique_id[last_char], 16, sizeof(unsigned int)*(unique_id[last_char][0]+6));
						unique_id[last_char]=realloc(unique_id[last_char], sizeof(unsigned int)*(unique_id[last_char][0]+6) );
						unique_id[last_char][0]+=5;
					}
					unique_id[last_char][ num5+1 ] = buffer_id[last_char][num3];
					table[num1].member[first_char][last_char][0].times++;
					buffer_open[last_char][num3]=1;
				}

				//put current pattern into buffer
				for(num3=0; num3<member_length+1; num3++){
					buffer[last_char][buf_count[last_char]].string[num3]=mem_file[num2+num3];
				}
				buffer[last_char][buf_count[last_char]].series=num2;
				buffer_id[last_char][buf_count[last_char]] = id;
				buffer[last_char][buf_count[last_char]].times = 1;
				buffer_open[last_char][buf_count[last_char]]=0;

				if(member_length>5){
					index[sec_char*256+last_char]=1;
					index[65536+a_char*256+b_char]=1;
				}else{
					index[sec_char*256+last_char]=1;
				}
				buf_count[last_char]++;
				buf_members[last_char]-=3;
			}
			found:
			num2=pre_int+pre_count;
			//num2++;
		}
		first_char = num7;
		//save all needed index info to tables before reset
		for(num3=0; num3<256; num3++){
			for(num6=0; num6<b_members; num6++){
				if(!buffer_open[num3][num6]){
					num5 = table[num1].member[first_char][num3][0].times;
					if(sizes[first_char][num3] == num5){					
						table[num1].member[first_char][num3]=realloc(table[num1].member[first_char][num3], sizeof(struct translation_structure) * (sizes[first_char][num3]+1+3) );
						sizes[first_char][num3]+=3;
					}
					table[num1].member[first_char][num3][0].times++;
					
					table[num1].member[first_char][num3][num5+1].series=buffer[num3][num6].series;
					table[num1].member[first_char][num3][num5+1].times=buffer[num3][num6].times;
					if(table[num1].member[first_char][num3][num5+1].times > table[num1].highest_times){
						table[num1].highest_times=table[num1].member[first_char][num3][num5+1].times;
					}
				}
			}

			for(num4=0; num4<c_members; num4++){
				if(!common_open[num3][num4]){
					num6 = common[num3][num4].sub;
					table[num1].member[first_char][num3][num6].times=common[num3][num4].times;
					if(table[num1].member[first_char][num3][num6].times > table[num1].highest_times){
						table[num1].highest_times=table[num1].member[first_char][num3][num6].times;
					}
				}
			}
		}

	}
	//free all strings that don't meet min frequency to save memory later
	for(num2=0; num2<256; num2++){
		for(num3=0; num3<256; num3++){
			num5=table[num1].member[num2][num3][0].times+1;
			num6=0;
			for(num4=0; num4<num5; num4++){
				if(table[num1].member[num2][num3][num4].times>frequency-1){
					num6=1;
				}
			}
			if(num6 == 0){
				free(table[num1].member[num2][num3]);				
				table[num1].member[num2][num3]=malloc(sizeof(struct empty));
				table[num1].member[num2][num3][0].times=0;
				empties++;
				patterns_freed+=(sizes[num2][num3]+1);
			}else{
				num7=1;
				temp_struct_array = malloc(sizeof(struct translation_structure)*num5 );
				temp_struct_array[0].times = table[num1].member[num2][num3][0].times;
				for(num4=1; num4<num5; num4++){
					if(table[num1].member[num2][num3][num4].times>frequency-1){
						memcpy(&temp_struct_array[num7], &table[num1].member[num2][num3][num4], sizeof(struct translation_structure));
						num7++;
					}else{
						temp_struct_array[0].times--;
						patterns_freed++;
					}
				}
				free(table[num1].member[num2][num3]);
				table[num1].member[num2][num3]=malloc(sizeof(struct translation_structure)*num7 );
				memcpy(table[num1].member[num2][num3], temp_struct_array, sizeof(struct translation_structure)*num7 );	
			}
		}
	}

	progress_func(0, member_length+1);
	printf("\tbuffer/cache sizes\n");
	printf("\t\trecent: %u(%.02lfKB) common: %u(%.02lfKB) index: %.02lfKB\n",b_members, cache_sizes[0]/1024, c_members, cache_sizes[1]/1024, (double)sizeof(char)*256*256*2/1024); 

	patterns_total = table_found + buffer_found + cache_found + table_miss;
	printf("\ttotal patterns found: %u\n", patterns_total-table_miss);
	printf("\ttable_miss: %u table_found: %u recent_found: %u common_found: %u\n", table_miss, table_found, buffer_found, cache_found);
	printf("\ttable_miss: %.02lf%% table_found: %.02lf%% recent_found: %.02lf%% common_found: %.02lf%%\n", (double)table_miss/patterns_total*100, (double)table_found/patterns_total*100, (double)buffer_found/patterns_total*100, (double)cache_found/patterns_total*100); 

	printf("\tfreed: %u (%.04lfMB) low freq pattern structures\n", patterns_freed-empties, (double)(patterns_freed*sizeof(struct translation_structure)-empties)/1024/1024);
	pthread_exit(0);
}

void usage(void){
        printf("\tUsage: patterns <output> <input> <min pattern size (bytes)> <max pattern size (bytes)> <threads>\n");
        exit(1);
}


int main(int argc, char **argv){
        FILE *output;
	int input;
        struct stat buffer;
        int status;
        int pat_size;
        int num1, num2, num3, num4, num5;
        int progress=0;
	double progress2=0;
        int max_searchs=0;
        int max_pat;
        int half_file;
        frequency=4;
	int thread_count;
	struct timeval starttime,endtime;
	double time1;

        if(argc != 6){
                usage();
        }

	thread_count = atoi(argv[5]);
        pat_size = atoi(argv[3]);
        max_pat = atoi(argv[4]);
        input = open(argv[2], O_RDONLY);
        if(input == -1){
                fprintf( stderr, "Error opening %s\n", argv[2] );
                exit( 1 );
        }
	
        status = fstat(input, &buffer);
        file_bytes=buffer.st_size;
	if(file_bytes < max_pat+128){    
		printf("Error: file is too small!\n");
		exit(1);
	}
        
#ifndef BIG_FILE
	if(file_bytes > 4294967296-max_pat){
		printf("Error: this file exceeds 4GB, recompile with -D BIG_FILE\n");
		exit(1);
	}
#endif
        mem_file=(char *)mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0);
        if (mem_file == MAP_FAILED) {
                close(input);
                perror("Error mmapping the file");
                exit(EXIT_FAILURE);
        }


        if( ( output = fopen( argv[1], "w" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[1] );
                exit( 1 );
        }

        printf("input: %s\n", argv[2]);
        printf("output: %s\n", argv[1]);
        printf("pat_size: %d\n", pat_size);
        printf("bytes: %d\n", file_bytes);
        if(max_pat > file_bytes/2){
                max_pat = floor(file_bytes/2);
                printf("max_pat: %d (reduced to limit!)\n", max_pat);
        }else{
                printf("max_pat: %d\n", max_pat);
        }



        half_file = sizeof(unsigned char)*ceil(file_bytes/2);

        for(num1=pat_size; num1<max_pat; num1++){
                max_searchs++;
        }

	max_searchs++;
        printf("max_searchs: %d\n", max_searchs);
        printf("frequency: %d\n", frequency);
	printf("threads: %d\n", thread_count);

	pthread_t *threads = (pthread_t *)malloc(sizeof(pthread_t) * max_searchs);

	table=(struct pat_length *)malloc( sizeof(struct pat_length) * max_searchs);

	for(num1=0; num1<max_searchs; num1++){
		table[num1].member=malloc( 256*sizeof(void *) );
		for(num2=0; num2<256 ; num2++){
                        table[num1].member[num2]=malloc( 256*sizeof(void *) );
				for(num3=0; num3<256; num3++){
					table[num1].member[num2][num3]=malloc( 2*sizeof(struct translation_structure) );
					table[num1].member[num2][num3][0].times=0;
				}			
                }
        }


//////////////////////////////////


	progress=0;
	progress2=0;

	progress_func(max_searchs, 0);

	gettimeofday(&starttime, NULL);
	num2=1;
	for(num1=0; num1<max_searchs; num1++){
		table[num1].len=pat_size+num1;		
		table[num1].search_num=num1;
		table[num1].highest_times=0;

               	if (pthread_create(&threads[num1], NULL, match_search, &table[num1].search_num) != 0)
                       	perror("pthread_create"), exit(1);
				
		if(num2 % thread_count == 0 || num2 == max_searchs){
			for(num3=progress; num3<num2; num3++){
				if (pthread_join(threads[num3], NULL) != 0)
					perror("pthread_join"),exit(1);
			}
			progress=num2;
		}
		num2++;
	}
	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);
	printf("generating output file...\n");

	gettimeofday(&starttime, NULL);

        //parse structures and write to output

	int index1=0;
	int index2=0;
	int max_print=0;
	int num6;
        struct sort_structure *sort_index;
	sort_index = malloc(max_searchs*sizeof(struct sort_structure));

	//initialize sort_index to all 0's
	for(num5=0; num5<max_searchs; num5++){
		sort_index[num5].highest=0;
		for(num6=0; num6<256; num6++){
			for(index1=0; index1<256; index1++){
				sort_index[num5].sub_highest[num6][index1]=0;
			}
		}
	}
	
	//search tables for the one containing the highest count
	num2=0;
	for(num1=0; num1<max_searchs; num1++){
		//find highest member count
		sort_index[num1].highest=table[num1].highest_times;
		if(sort_index[num1].highest > num2){
			num2=sort_index[num1].highest;
		}
	}

	//search tables for highest in all sets and save to sort_index
	for(num1=max_searchs-1; num1>-1; num1--){
		for(index1=0; index1<256; index1++){
			for(index2=0; index2<256; index2++){
				num5=table[num1].member[index1][index2][0].times+1;
				for(num4=1; num4<num5; num4++){
					if(table[num1].member[index1][index2][num4].times > sort_index[num1].sub_highest[index1][index2]){
						sort_index[num1].sub_highest[index1][index2]=table[num1].member[index1][index2][num4].times;
					}
				}
			}
		}
	}

	while(num2>=frequency){
		for(num1=max_searchs-1; num1>-1; num1--){
			if(sort_index[num1].highest == num2){
				sort_index[num1].highest = 0;
				for(index1=0; index1<256; index1++){
					for(index2=0; index2<256; index2++){
						if(sort_index[num1].sub_highest[index1][index2] == num2){
							sort_index[num1].sub_highest[index1][index2]=0;
							num5=table[num1].member[index1][index2][0].times+1;
							for(num4=1; num4<num5; num4++){
								if(table[num1].member[index1][index2][num4].times == num2){			
									fprintf(output, "%d %d", table[num1].len, table[num1].member[index1][index2][num4].times);
                        						for(num3=0; num3<table[num1].len; num3++){
										fprintf(output, " %d", mem_file[table[num1].member[index1][index2][num4].series+num3]);
                        						}
									fprintf(output, "\n");
									max_print++;
									if(max_print == 40000){
										goto complete;
									}
                        					}else if(table[num1].member[index1][index2][num4].times < num2){
									if(table[num1].member[index1][index2][num4].times > sort_index[num1].sub_highest[index1][index2]){
										sort_index[num1].sub_highest[index1][index2] = table[num1].member[index1][index2][num4].times;
									}
								}
							}
						}
						if(sort_index[num1].sub_highest[index1][index2] > sort_index[num1].highest){
							sort_index[num1].highest=sort_index[num1].sub_highest[index1][index2];
						}
					}
				}
			}
		}
				
		num2=0;
		for(num6=0; num6<max_searchs; num6++){
			//find highest member count
			if(sort_index[num6].highest > num2){
				num2=sort_index[num6].highest;
			}
		}
        }

	complete:
	;

	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);

        if (munmap(mem_file, file_bytes) == -1) {
                perror("Error un-mmapping the file");
        }

	//free(mem_file);
        close(input);

        fclose(output);
	printf("done.\n");
        return 0;
}

