#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>
//#include <bitset>
//#include <xmmintrin.h>

struct sort_structure{
	//unsigned char indice[2];
	//unsigned int highest;
	//unsigned int next_highest;
	//unsigned int times[256][256];
	unsigned int highest;
	unsigned int sub_highest[256][256];
	//unsinged int index1[256];
	//unsigned int index2[256];
};


struct translation_structure{
        char *series;
        unsigned int times;
	unsigned int unique_id;
};

struct pat_length{
	unsigned int highest_times;
	unsigned int len;
	unsigned int search_num;
	unsigned int sizes[256][256];
	//char index[256][256];
	//	short index2[256][256];
	//unsigned int sub_highest[256][256];
	struct translation_structure ***member;
}*table;

struct indices{
	char index[256][256];
	//unsigned int last[128000];
	char *series[256][320];
	unsigned int times[256][320];
	unsigned int sub[256][320];
};

int file_bytes;
char *mem_file;

static void progress_func(int max_searchs, int pat_len);
void *match_search(void *set);

static void progress_func(int max_searchs, int pat_len){
	static float a = 0;
	static float b = 0;
	
	if(max_searchs == 0){
		b++;
		printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len );
	}else{
		a = max_searchs;
		printf("progress: %.02lf%%\n",(double)((b/a)*100));
	}
}

void *match_search(void *set){
        //returns 0 for match found and written
        //returns 1 for no matches found
	//int num8;
	int num7;
        int num4;
        int num2=0;
        int num3;
	int num5=0;
	int num6;
	const int num1 = table[*(unsigned int *)set].search_num;
	const int stop_byte = file_bytes-table[num1].len;
	const int member_length = table[num1].len-1;
	unsigned char first_char;
	unsigned char last_char;
	unsigned char sec_char;
	unsigned char b_char;
	//int max_index=floor(table[num1].len/2)-1;
	//const int max_index=floor((member_length-1)/2);
	//const int max_index =
	register unsigned int id;

	unsigned int match_miss = 0;
	unsigned int match_found = 0;
	unsigned int cache_found = 0;

	struct indices index;
	
	//calculate how many patterns + times we can fit in 2MB cache
	//const unsigned int i_members = floor(3145728 / (table[num1].len*sizeof(char)+sizeof(unsigned int)*2) / 256);
	const unsigned int i_members = 320;
printf("i_members: %u\n", i_members);
	//index.times = malloc(sizeof(int *)*256);
	//index.sub = malloc(sizeof(int *)*256);
	//index.series = malloc(sizeof(char **)*256);
	
	for(num3=0; num3<256; num3++){	
		//index.times[num3] = malloc(sizeof(int)*i_members);
		//index.sub[num3] = malloc(sizeof(int)*i_members);
		//index.series[num3] = malloc(i_members*sizeof(char *));
		for(num4=0; num4<i_members; num4++){
			index.series[num3][num4] = malloc(table[num1].len*sizeof(char));
		}
	}

	//for(num3=0; num3<i_members; num3++){
	//	index.series[num3]=malloc(table[num1].len*sizeof(char));
	//}

	
	for(num7=0; num7<256; num7++){
		num2=0;
		for(num3=0; num3<256; num3++){
			for(num4=0; num4<256; num4++){
				index.index[num3][num4]=0;
			}
		
			for(num4=0; num4<i_members; num4++){
				index.times[num3][num4]=0;
			}
		}

		while(num2<stop_byte){
			first_char = (unsigned char)mem_file[num2];
			if(first_char != num7){
				goto found;
			}

			sec_char = (unsigned char)mem_file[num2+1];
			last_char = (unsigned char)mem_file[num2+member_length];

			if(!index.index[sec_char][last_char]){
				num6 = num2+member_length-1;
				b_char = (unsigned char)mem_file[num6];
				num5 = table[num1].member[first_char][sec_char][0].times;
				goto no_search;
			}

			//search cached xMB worth of most often found patterns
			//for(num3=0; num3<256; num3++){
			for(num3=0; num3<i_members; num3++){
				if(index.times[sec_char][num3] == 0){
					//num3=i_members;
					goto search;
				}
					//if( (unsigned char)index.series[num3][member_length] == last_char){
					if( (unsigned char)index.series[sec_char][num3][member_length] == last_char){
							for( num4=2; num4 < member_length; num4++){
								if( mem_file[num2+num4] != index.series[sec_char][num3][num4] ){
									goto next1;
								}
							}
							index.times[sec_char][num3]++;

							table[num1].member[first_char][sec_char][index.sub[sec_char][num3]].times++;

							if(index.times[sec_char][num3] > table[num1].highest_times){
								table[num1].highest_times=index.times[sec_char][num3];
							}
							cache_found++;
							goto found;
						//}
					}
				
				next1:
				;
			}

			//last_char = (unsigned char)mem_file[num2+member_length];
			//sec_char = (unsigned char)mem_file[num2+1];
			//a_char = (unsigned char)mem_file[num2+];
			//b_char = (unsigned char)mem_file[num2+member_length-1];
			//c_char = (unsigned char)mem_file[num2+member_length-1];
			search:
			num5 = table[num1].member[first_char][sec_char][0].times;
			//num4 = num2+member_length; 
			num6 = num2+member_length-1;
			//last_char = (unsigned char)mem_file[num4];

			b_char = (unsigned char)mem_file[num6];	
			id = first_char*4294967296 + sec_char*65536 + b_char*256 + last_char;


				num3 = num5;
				//b_char = (unsigned char)mem_file[num2+member_length-1];
				//last_char = (unsigned char)mem_file[num2+member_length];
				//id = first_char*4294967296 + sec_char*65536 + b_char*256 + last_char;			
				while(num3>0){
					if(table[num1].member[first_char][sec_char][num3].unique_id == id){
						for( num4=2; num4 < member_length-1; num4++){
							if( mem_file[num2+num4] != table[num1].member[first_char][sec_char][num3].series[num4]){
								goto next2;
							}
						}
						table[num1].member[first_char][sec_char][num3].times++;
						if(table[num1].member[first_char][sec_char][num3].times > table[num1].highest_times){
							table[num1].highest_times=table[num1].member[first_char][sec_char][num3].times;
						}
						for(num4=0; num4<i_members; num4++){
							if(table[num1].member[first_char][sec_char][num3].times>index.times[sec_char][num4]){
								index.times[sec_char][num4]=table[num1].member[first_char][sec_char][num3].times;
								index.sub[sec_char][num4]=num3;
								for(num6=0; num6<member_length+1; num6++){
									index.series[sec_char][num4][num6]=table[num1].member[first_char][sec_char][num3].series[num6];
								}
								//num4=-1;
								match_found++;
								goto found;
							}
						}						

						match_found++;
						goto found;
					}

					next2:
					num3--;
				}
				match_miss++;
		
			no_search:
			;
			if(table[num1].sizes[first_char][sec_char] != num5){
				table[num1].member[first_char][sec_char][num5+1].series = &mem_file[num2];
				table[num1].member[first_char][sec_char][num5+1].times=1;
				table[num1].member[first_char][sec_char][0].times++;
			}else{
				table[num1].member[first_char][sec_char]=realloc(table[num1].member[first_char][sec_char], sizeof(struct translation_structure) * (table[num1].sizes[first_char][sec_char]+1+3) );
				table[num1].member[first_char][sec_char][num5+1].series = &mem_file[num2];
				table[num1].member[first_char][sec_char][num5+1].times=1;
				table[num1].member[first_char][sec_char][0].times++;
				table[num1].sizes[first_char][sec_char]+=3;
			}
			index.index[sec_char][last_char]=1;
			table[num1].member[first_char][sec_char][num5+1].unique_id=first_char*4294967296 + sec_char*65536 + b_char*256 + last_char;
			found:
			num2++;

                	//num4 = num2+member_length;
                	//num6 = num2+member_length-1;

                	//for(num3=0; num3<max_index; num3++){
                        //	index[(unsigned char)mem_file[num4]][(unsigned char)mem_file[num6]][num3]=1;
                        //	num4-=2;
                        //	num6-=2;
                	//}
			//skip:
			//num2++;
		}
	}
	printf("match_miss: %d match_found: %d cache_found: %d\n", match_miss, match_found, cache_found);
	progress_func(0, member_length+1);
	pthread_exit(0);
}

void usage(void){
        printf("\tUsage: patterns <output> <input> <min pattern size (bytes)> <max pattern size (bytes)> <threads>\n");
        exit(1);
}


int main(int argc, char **argv){
        FILE *output;
	int input;
        //FILE *input;
        struct stat buffer;
        int status;
        int pat_size;
        int num1, num2, num3, num4, num5;
        int progress=0;
	double progress2=0;
        int max_searchs=0;
        int max_pat;
        int half_file;
        const int frequency=4;
	int thread_count;
	struct timeval starttime,endtime;
	double time1;

        if(argc != 6){
                usage();
        }

	//frequency = atoi(argv[5]);
	//frequency = 4;
	thread_count = atoi(argv[5]);
        pat_size = atoi(argv[3]);
        max_pat = atoi(argv[4]);
	//input = fopen(argv[2], "r");
        input = open(argv[2], O_RDONLY);
        if(input == -1){
	//if(input == NULL){
                fprintf( stderr, "Error opening %s\n", argv[2] );
                exit( 1 );
        }
	
        status = fstat(input, &buffer);
	//status = stat(argv[2], &buffer);
        file_bytes=buffer.st_size;

/*
	mem_file=malloc(file_bytes);
	for(num1=0; num1<file_bytes; num1++){
		mem_file[num1]=getc(input);
	}
        //character=getc(input);
        //while(character != EOF){  
*/


        mem_file=(char *)mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0);
        if (mem_file == MAP_FAILED) {
                close(input);
                perror("Error mmapping the file");
                exit(EXIT_FAILURE);
        }


        if( ( output = fopen( argv[1], "w" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[1] );
                exit( 1 );
        }

        printf("input: %s\n", argv[2]);
        printf("output: %s\n", argv[1]);
        printf("pat_size: %d\n", pat_size);
        printf("bytes: %d\n", file_bytes);
        if(max_pat > file_bytes/2){
                max_pat = floor(file_bytes/2);
                printf("max_pat: %d (reduced to limit!)\n", max_pat);
        }else{
                printf("max_pat: %d\n", max_pat);
        }



        half_file = sizeof(unsigned char)*ceil(file_bytes/2);

        for(num1=pat_size; num1<max_pat; num1++){
                max_searchs++;
        }

	max_searchs++;
        printf("max_searchs: %d\n", max_searchs);
        printf("frequency: %d\n", frequency);
	printf("threads: %d\n", thread_count);

	//pat_index=(struct one_index *)malloc( sizeof(struct one_index) * max_searchs);
	//table=(struct pat_length *)malloc( sizeof(struct pat_length) * max_searchs);
	pthread_t *threads = (pthread_t *)malloc(sizeof(pthread_t) * max_searchs);


	table=(struct pat_length *)malloc( sizeof(struct pat_length) * max_searchs);


	for(num1=0; num1<max_searchs; num1++){
		table[num1].member=malloc( 256*sizeof(void *) );
		for(num2=0; num2<256 ; num2++){
                        table[num1].member[num2]=malloc( 256*sizeof(void *) );
			//for(num5=0; num5<256; num5++){
				//table[num1].member[num2][num5]=malloc( 256*sizeof(void *) );
				for(num3=0; num3<256; num3++){
					//table[num1].index[num2][num3]=0;
					//table[num1].index2[num2][num3]=0;
					table[num1].sizes[num2][num3]=1;
					table[num1].member[num2][num3]=malloc( 2*sizeof(struct translation_structure) );
					table[num1].member[num2][num3][0].times=0;
					table[num1].member[num2][num3][1].unique_id=1;
					//table[num1].member[num2][num3][1].unique_id=-1;
				}
			
                }
        }

/*
	for(num1=0; num1<max_searchs; num1++){
		for(num5=0; num5<256; num5++){
			for(num2=0; num2<256; num2++){
				//table[num1].index2[num5][num2]=1;
				for(num3=0; num3<256; num3++){
					table[num1].index[num5][num2][num3]=0;
				}
			}
		}
	}
*/

//////////////////////////////////


	progress=0;
	progress2=0;

	progress_func(max_searchs, 0);

	gettimeofday(&starttime, NULL);
	num2=1;
	for(num1=0; num1<max_searchs; num1++){
		table[num1].len=pat_size+num1;		
		table[num1].search_num=num1;
		table[num1].highest_times=0;

               	if (pthread_create(&threads[num1], NULL, match_search, &table[num1].search_num) != 0)
                       	perror("pthread_create"), exit(1);
		
		
		if(num2 % thread_count == 0 || num2 == max_searchs){
			for(num3=progress; num3<num2; num3++){
				if (pthread_join(threads[num3], NULL) != 0)
					perror("pthread_join"),exit(1);
			}
			progress=num2;
		}
		num2++;
	}
	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);
	printf("generating output file...\n");

	gettimeofday(&starttime, NULL);

        //parse structures and write to output

	int next_highest=0;
	int index1=0;
	int index2=0;
	int max_print=0;
	int num6;
	//int hi_tbl=0;
	//int hi_in1=0;
	//int hi_in2=0;
        struct sort_structure *sort_index;
	sort_index = malloc(max_searchs*sizeof(struct sort_structure));
printf("here 1\n");
	//initialize sort_index to all 0's
	for(num5=0; num5<max_searchs; num5++){
		//sort_index[num5].indice[0]=0;
		//sort_index[num5].indice[1]=0;
		sort_index[num5].highest=0;
		for(num6=0; num6<256; num6++){
			for(index1=0; index1<256; index1++){
				sort_index[num5].sub_highest[num6][index1]=0;
			}
		}
	}
	
printf("here 2\n");
	//search tables for the one containing the highest count
	num2=0;
	for(num1=0; num1<max_searchs; num1++){
		//find highest member count
		sort_index[num1].highest=table[num1].highest_times;
		if(sort_index[num1].highest > num2){
			num2=sort_index[num1].highest;
			//hi_tbl = num1;

		}
	}
printf("here 3\n");

	//search tables for highest in all sets and save to sort_index
	for(num1=max_searchs-1; num1>-1; num1--){
		for(index1=0; index1<256; index1++){
			for(index2=0; index2<256; index2++){
				num5=table[num1].member[index1][index2][0].times+1;
				for(num4=1; num4<num5; num4++){
					if(table[num1].member[index1][index2][num4].times > sort_index[num1].sub_highest[index1][index2]){
						//sort_index[num1].times[index1][index2]=table[num1].member[index1][index2][num4].times;
						sort_index[num1].sub_highest[index1][index2]=table[num1].member[index1][index2][num4].times;
						//if(num1 == hi_tbl){
						//	hi_in1=index1;
						//	hi_in2=index2;
						//}
						//if(sort_index[num1].sub_highest[index1][index2]>sort_index[num1].highest){
						//	sort_index[num1].highest=sort_index[num1].sub_highest[index1][index2];
							//sort_index[num1].indice[0]=index1;
							//sort_index[num1].indice[1]=index2;
						//}

					}
				}
			}
		}
	}

/*
printf("here 4\n");
	//prime search
	next_highest=0;  
	for(num1=max_searchs-1; num1>-1; num1--){
		if(sort_index[num1].highest>next_highest){
			next_highest=sort_index[num1].highest;
			//hi_tbl=num1;
			//hi_in1=sort_index[num1].indice[0];
			//hi_in2=sort_index[num1].indice[1];
		}
	}
        num2=next_highest;


printf("%d\n", num2);
printf("here 5\n");
	//main search
	while(num2>=frequency){
		for(num1=max_searchs-1; num1>-1; num1--){
			if(sort_index[num1].highest_times==num2){
				//sort_index[num1].highest_times=0;
				for(index1=0; index1<256; index1++){
					if(!sort_index[num1].index1[index1]){
						for(index2=0; index2<256; index2++){
							if(!sort_index[num1].index2[index2]){
								//
								
								num5=table[num1].member[index1][index2][0].times+1;
								for(num4=1; num4<num5; num4++){
								if(table[num1].member[index1][index2][num4].times == num2){
									fprintf(output, "%d %d", table[num1].len, table[num1].member[index1][index2][num4].times);
									for(num3=0; num3<table[hi_tbl].len; num3++){
										fprintf(output, " %d", table[num1].member[index1][index2][num4].series[num3]);
									}
									fprintf(output, "\n");
									max_print++;
									if(max_print == 40000){
										goto complete;
									}
								}else if(table[num1].member[index1][index2][num4].times < num2){
									if(table[num1].member[index1][index2][num4].times > next_highest){
										next_highest=table[num1].member[index1][index2][num4].times;
										sort_index[num1].index2[index2]=1;
									}
								}
							}else if(
									
							}
					}else if(

					}
				}
			}else if(
			
			}
		}
	}
				hi_tbl=num1;
				//hi_in1=sort_index[num1].indice[0];
				//hi_in2=sort_index[num1].indice[1];
				hi_in1=index1;
				hi_in2=index2;

				sort_index[hi_tbl].times[hi_in1][hi_in2]=0;
				num5=table[hi_tbl].member[hi_in1][hi_in2][0].times+1;

				sort_index[hi_tbl].next_highest=0;		
				//next_highest=0;

				for(num4=1; num4<num5; num4++){
					
					if(table[hi_tbl].member[hi_in1][hi_in2][num4].times == num2){
						fprintf(output, "%d %d", table[hi_tbl].len, table[hi_tbl].member[hi_in1][hi_in2][num4].times);
						for(num3=0; num3<table[hi_tbl].len; num3++){
							fprintf(output, " %d", table[hi_tbl].member[hi_in1][hi_in2][num4].series[num3]);
						}
						fprintf(output, "\n");
						max_print++;
						if(max_print == 40000){
							goto complete;
						}
					}else if(table[hi_tbl].member[hi_in1][hi_in2][num4].times < num2){
						if(table[hi_tbl].member[hi_in1][hi_in2][num4].times > sort_index[hi_tbl].times[hi_in1][hi_in2]){
							sort_index[hi_tbl].times[hi_in1][hi_in2] = table[hi_tbl].member[hi_in1][hi_in2][num4].times;
							if(sort_index[hi_tbl].times[hi_in1][hi_in2]>sort_index[hi_tbl].next_highest){
								sort_index[hi_tbl].highest=sort_index[hi_tbl].times[hi_in1][hi_in2];
								sort_index[hi_tbl].indice[0]=hi_in1;
								sort_index[hi_tbl].indice[1]=hi_in2;
							}
						}
					}
				}
			}
		}

		next_highest=0;
		for(num1=max_searchs-1; num1>-1; num1--){
			if(sort_index[num1].highest>next_highest){
				next_highest=sort_index[num1].highest;
				//hi_tbl=num1;
				//hi_in1=sort_index[num1].indice[0];
				//hi_in2=sort_index[num1].indice[1];
			}
		}
		num2=next_highest;
		//num2=sort_index[hi_tbl].times[hi_in1][hi_in2];
	}
*/
printf("here 6\n");


	while(num2>=frequency){
		for(num1=max_searchs-1; num1>-1; num1--){
			if(sort_index[num1].highest == num2){
				sort_index[num1].highest = 0;
				//table[num1].highest_times = 0;
				for(index1=0; index1<256; index1++){
					for(index2=0; index2<256; index2++){
						
						
						if(sort_index[num1].sub_highest[index1][index2] == num2){
							sort_index[num1].sub_highest[index1][index2]=0;

							num5=table[num1].member[index1][index2][0].times+1;
							//num2=table[num1].highest_times[index1][index2] == num2){
		
							for(num4=1; num4<num5; num4++){
								if(table[num1].member[index1][index2][num4].times == num2){			
									fprintf(output, "%d %d", table[num1].len, table[num1].member[index1][index2][num4].times);
                        						for(num3=0; num3<table[num1].len; num3++){
                        							fprintf(output, " %d", table[num1].member[index1][index2][num4].series[num3]);
                        						}
									fprintf(output, "\n");
									max_print++;
									if(max_print == 40000){
										goto complete;
									}
                        					}else if(table[num1].member[index1][index2][num4].times < num2){
									if(table[num1].member[index1][index2][num4].times > sort_index[num1].sub_highest[index1][index2]){
										sort_index[num1].sub_highest[index1][index2] = table[num1].member[index1][index2][num4].times;
									}
								}
							}
							//
						}
						if(sort_index[num1].sub_highest[index1][index2] > sort_index[num1].highest){
							sort_index[num1].highest=sort_index[num1].sub_highest[index1][index2];
						}
						//set sort_index[num1].highest to greatest sort_index[num1].sub_highest[index1][index2]

					}
				}
			// 
			}
		}
		
		


		num2=0;
		for(num6=0; num6<max_searchs; num6++){
			//find highest member count
			if(sort_index[num6].highest > num2){
				num2=sort_index[num6].highest;
			}
		}
		//num2 = next_highest;
		//next_highest = 0;
        }


	complete:
	;

	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);

        if (munmap(mem_file, file_bytes) == -1) {
                perror("Error un-mmapping the file");
        }

	//free(mem_file);
        close(input);

        fclose(output);
	printf("done.\n");
        return 0;
}

