#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>
#include <bitset>
#include <xmmintrin.h>

struct translation_structure{
        char *series;
        int times;
	char first;
};

struct pat_length{
	int highest_times;
	//short ordered_list[39679];
	//int starts[512];
	//int stops[512];
	int len;
	unsigned short table_members[256];
	unsigned short empty[256];
	unsigned short search_num;
	std::bitset<65536> index;
	struct translation_structure member[256][39679];
}*table;

/*
struct one_index{
	int len;
	int table_members;
	int empty;
	int search_num;
	std::bitset<65536> c;
};

struct one_index *pat_index;
*/

int file_bytes=0;
char *mem_file;

//static void sort_list(int tbl_number);
static void progress_func(int max_searchs, int pat_len);
void *match_search(void *set);
static int empty_search(int num1, char *linestring);
static int full_search(int num1, char *linestring);

static void progress_func(int max_searchs, int pat_len){
	static float a = 0;
	static float b = 0;
	
	if(max_searchs == 0){
		b++;
		printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len );
	}else{
		a = max_searchs;
		printf("progress: %.02lf%%\n",(double)((b/a)*100));
	}
}

void *match_search(void *set){
        //returns 0 for match found and written
        //returns 1 for no matches found
        register int num4;
        int num2;
        int num3;
	int return_val = 1;
	int num1 = table[*(int *)set].search_num;
	int stop_byte = file_bytes-table[num1].len;
	int member_length = table[num1].len;
	int tbl_members;
	register int index;
	register int second_char;

	for(num2=0; num2< stop_byte; num2++){
		if( table[num1].index[ ((unsigned char)mem_file[num2]*256)+(unsigned char)mem_file[num2+1] ] ) {
		//if( pat_index[num1].c[ ((unsigned char)mem_file[num2]*256)+(unsigned char)mem_file[num2+1] ] ) { 						

			index=(unsigned char)mem_file[num2];
			second_char = mem_file[num2+1];
			tbl_members=table[num1].table_members[ index ];

			//index=(unsigned char)mem_file[num2];			
			//tbl_members=table[num1].stops[(unsigned char)mem_file[num2]];
        		//table[tbl_number].ordered_list[39679]
        		//num4 = table[num1].starts[(unsigned char)mem_file[num2]]; 
			//table[tbl_number].ordered_list[added]
			//printf("start: %d stop: %d \n", num4, tbl_members);

			for(num4=0; num4<tbl_members; num4++){								
				//if( table[num1].member[index][num4].first != mem_file[num2] ){
				if(table[num1].member[ index ][num4].series[1] != second_char){
					goto next;
				}
 
				for( num3=2; num3 < member_length; num3++){
					if(table[num1].member[ index ][num4].series[num3] != mem_file[num2+num3]){
						//if(table[num1].member[num4].series[num3] != mem_file[num2+num3]){	
						goto next;
					}
				}
				table[num1].member[ index ][num4].times++;
				if(table[num1].member[ index ][num4].times > table[num1].highest_times){
					table[num1].highest_times=table[num1].member[ index ][num4].times;
				}
				goto found;
				next:
				;
			}
		}else{
			return_val = 1;
		}

		if(return_val==1){
			return_val = empty_search(num1, &mem_file[num2]);
		}
		if(return_val==1){
			return_val = full_search(num1, &mem_file[num2]);
		}
		if(return_val==1){
			printf("error!\n");
			exit(1);
		}

		found:
		;

	}
       
progress_func(0, member_length);
pthread_exit(0);
}


static int empty_search(int num1, char *linestring){
        //return 0 if empty spot found and wrote to
        //return 1 if no empty spots found

        int num4;
	int index = (unsigned char)linestring[0];
	

        if(table[num1].table_members[ index ] == 39679){
                return (1);
        }

        for(num4=table[num1].empty[ index ]; num4 <= table[num1].table_members[ index ]; num4++){
                if(table[num1].member[ index ][num4].times==0){

                        //array is not full
			table[num1].member[ index ][num4].first=linestring[0];
                        table[num1].member[ index ][num4].series=linestring;
                        table[num1].member[ index ][num4].times=1;
                        table[num1].empty[ index ]=num4;
                        if(table[num1].table_members[ index ] < 39679){
                                table[num1].table_members[ index ]++;
                        }
                        table[num1].index[ index*256+(unsigned char)linestring[1] ]=1;
			//sort_list(num1);
                        return(0);
                }
        }
        return(1);
}

static int full_search(int num1, char *linestring){
        //return 0 for found and replaced array string of 1x
        //return 1 for error (no 1x strings found, for now)
        int num4;
	int index = (unsigned char)linestring[0];

        for(num4=0; num4<39679; num4++){
                if(table[num1].member[index][num4].times==1){
			//replace it
			table[num1].member[index][num4].first=linestring[0];
                        table[num1].member[index][num4].series=linestring;
                        table[num1].member[index][num4].times=1;
                        table[num1].index[ index*256+(unsigned char)linestring[1] ]=1;
			//sort_list(num1);
                        return(0);
                }
        }
        return(1);
}

/*
static void sort_list(int tbl_number){
	int num1 = 0;
	int num2 = 0;
	int added = 0;
	int tbl_members = 0;

	added = 0;
	tbl_members = pat_index[tbl_number].table_members;
	for(num1=0; num1<256; num1++){
		table[tbl_number].starts[num1]=-1;
		for(num2=0; num2<tbl_members; num2++){
			if(num1 == (unsigned char)table[tbl_number].member[num2].first){
				table[tbl_number].ordered_list[added]=num2;
				if(table[tbl_number].starts[num1] == -1){
					table[tbl_number].starts[num1]=added;
					//table[tbl_number].stops[num1]=added;
				}
				added++;
				table[tbl_number].stops[num1]=added;
			}
		}

	}

	
	table[tbl_number].member[num2].first

	table[tbl_number].ordered_list[39679]
	table[tbl_number].starts[256]
	table[tbl_number].member[num4].first
	}
	

}
*/



void usage(void){
        printf("\tUsage: patterns <output> <input> <min pattern size (bytes) 2 or higher> <max pattern size (bytes)> <frequency> <threads>\n");
        exit(1);
}


int main(int argc, char **argv){
        FILE *output;
        int input;
        struct stat buffer;
        int status;
        int pat_size;
        int num1, num2, num3, num4;
        int progress=0;
	double progress2=0;
        int max_searchs=0;
        int max_pat;
        int half_file;
        int frequency=4;
	int thread_count;
	struct timeval starttime,endtime;
	double time1;

        if(argc != 7){
                usage();
        }

	frequency = atoi(argv[5]);
	thread_count = atoi(argv[6]);
        pat_size = atoi(argv[3]);
        max_pat = atoi(argv[4]);

        input = open(argv[2], O_RDONLY);
        if(input == -1){
                fprintf( stderr, "Error opening %s\n", argv[2] );
                exit( 1 );
        }

        status = fstat(input, &buffer);
        file_bytes=buffer.st_size;
        mem_file=(char *)mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0);
        if (mem_file == MAP_FAILED) {
                close(input);
                perror("Error mmapping the file");
                exit(EXIT_FAILURE);
        }

        if( ( output = fopen( argv[1], "w" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[1] );
                exit( 1 );
        }

        printf("input: %s\n", argv[2]);
        printf("output: %s\n", argv[1]);
        printf("pat_size: %d\n", pat_size);
        printf("bytes: %d\n", file_bytes);
        if(max_pat > file_bytes/2){
                max_pat = floor(file_bytes/2);
                printf("max_pat: %d (reduced to limit!)\n", max_pat);
        }else{
                printf("max_pat: %d\n", max_pat);
        }



        half_file = sizeof(unsigned char)*ceil(file_bytes/2);

        for(num1=pat_size; num1<max_pat; num1++){
                max_searchs++;
        }

	max_searchs++;
        printf("max_searchs: %d\n", max_searchs);
        printf("frequency: %d\n", frequency);
	printf("threads: %d\n", thread_count);

	//pat_index=(struct one_index *)malloc( sizeof(struct one_index) * max_searchs);
	table=(struct pat_length *)malloc( sizeof(struct pat_length) * max_searchs);
	pthread_t *threads = (pthread_t *)malloc(sizeof(pthread_t) * max_searchs);

	printf("memory allocated: indices=%.02lfKB total(-virtual)=%.02lfKB\n", (double)(((sizeof(table[0].index)* max_searchs) ) / 1024),  
(double)(((sizeof(struct pat_length)* max_searchs) ) / 1024) );

	for(num1=0; num1<max_searchs; num1++){
        	for(num2=0; num2<39679; num2++){
			for(num3=0; num3<256; num3++){
                		table[num1].member[num3][num2].times = 0;
			}
        	}
	}


//////////////////////////////////


	progress=0;
	progress2=0;

	progress_func(max_searchs, 0);

	gettimeofday(&starttime, NULL);
	num2=0;
	for(num1=max_searchs-1; num1>-1; num1--){
        //for(num1=0; num1<max_searchs; num1++){
		table[num1].len=pat_size+num1;		
		for(num3=0; num3<256; num3++){
			table[num1].table_members[num3]=0;
			table[num1].empty[num3]=0;
		}
		table[num1].search_num=num1;
		table[num1].highest_times=0;
               	for(num3=0; num3<256; num3++){
                       	for(num4=0; num4<256; num4++){
				table[num1].index[(num3*256)+num4]=0;
                       	}
               	}

               	if (pthread_create(&threads[num2], NULL, match_search, &table[num1].search_num) != 0)
                       	perror("pthread_create"), exit(1);
		
		if(num2 % thread_count-1 == 0){
			for(num3=progress; num3<num2+1; num3++){
				if (pthread_join(threads[num3], NULL) != 0)
					perror("pthread_join"),exit(1);
			}
			progress=num2+1;
		}else if (num2 == max_searchs-1){
			for(num3=progress; num3<num2+1; num3++){
				if (pthread_join(threads[num3], NULL) != 0)
					perror("pthread_join"),exit(1);
			}
			progress=num2+1;
		}
		num2++;
	}
	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);
	printf("generating output file...\n");

	gettimeofday(&starttime, NULL);

        //parse structures and write to output

	num2=0;
	int highest_tbl_count=0;
	for(num1=0; num1<max_searchs; num1++){
			//find highest member count
                	if(table[num1].highest_times > num2){
                        	num2=table[num1].highest_times;
                	}
			//find highest table count
			for(num3=0; num3<256; num3++){
				if(table[num1].table_members[num3] > highest_tbl_count){
					highest_tbl_count=table[num1].table_members[num3];
				}
			}
	}


	int print_max=0;
	int next_highest=0;
	int char_index=0;
        for(num2=num2; num2>=frequency; num2--){

                for(num4=0; num4<highest_tbl_count; num4++){

			for(num1=max_searchs-1; num1>-1; num1--){
				for(char_index=0; char_index<256; char_index++){			
                        		if(table[num1].member[char_index][num4].times == num2){
						fprintf(output, "%d %d", table[num1].len, table[num1].member[char_index][num4].times);
                                		for(num3=0; num3<table[num1].len; num3++){
                                        		fprintf(output, " %d", table[num1].member[char_index][num4].series[num3]);
                                		}
						print_max++;	
						//search for next highest while parsing
						if(print_max==39679){
							goto max_reached;
						}
						fprintf(output, "\n");
                        		}else if(table[num1].member[char_index][num4].times < num2){
						if(table[num1].member[char_index][num4].times > next_highest){
							next_highest = table[num1].member[char_index][num4].times;
						}
					}
				}



			}
                }
		num2 = next_highest+1;
		next_highest = 0;
        }
	max_reached:
	;

	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);

        if (munmap(mem_file, file_bytes) == -1) {
                perror("Error un-mmapping the file");
        }
        close(input);

        fclose(output);
	printf("done.\n");
        return 0;
}

