#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>
#include <bitset>
#include <xmmintrin.h>

struct translation_structure{
        char *series;
        int times;
	char first;
};

struct pat_length{ 
        int highest_times;
        //short ordered_list[39679];
        //int starts[512];
        //int stops[512];
        int len;
        int table_members;
        int empty;   
        int search_num;
        //std::bitset<65536> index;
        //int once_finds[39679];
        int fs_index_ones[39679];
        int fs_index_start;
        int fs_index_stop;
        int index[256][256];
        struct translation_structure ***member;
}*table;

int file_bytes;
char *mem_file;

static void progress_func(int max_searchs, int pat_len);
void *match_search(void *set);
static int empty_search(int num1, char *linestring);
static int full_search(int num1, char *linestring);

static void progress_func(int max_searchs, int pat_len){
	static float a = 0;
	static float b = 0;
	
	if(max_searchs == 0){
		b++;
		printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len );
	}else{
		a = max_searchs;
		printf("progress: %.02lf%%\n",(double)((b/a)*100));
	}
}

void *match_search(void *set){
        //returns 0 for match found and written
        //returns 1 for no matches found
        int num4;
        int num2;
        int num3;
	int return_val = 1;
	int num1 = table[*(int *)set].search_num;
	int stop_byte = file_bytes-table[num1].len;
	int member_length = table[num1].len;
	int tbl_members;
	unsigned char first_char;
	char last_char;
	unsigned short last_index;
	unsigned int array_full = 0;

	unsigned int indx_found=0;
	unsigned int searched=0;
	unsigned int search_found=0;
	unsigned int e_search = 0;
	unsigned int f_search = 0;


	for(num2=0; num2< stop_byte; num2++){
		first_char=(unsigned char)mem_file[num2];
		last_char = mem_file[num2+(member_length-1)];
		num3 = table[num1].index[ first_char ][ (unsigned char)last_char ][0];
		if( num3 > 0 ) {

			while(num3>0){
			
				last_index = table[num1].index[ first_char ][ (unsigned char)last_char ][num3];
				for( num4=2; num4 < member_length; num4++){
					if(table[num1].member[ last_index ].series[num4] != mem_file[num2+num4]){
						num4=member_length;
					}
				}
				if(num4==member_length){
					table[num1].member[ last_index ].times++;
					//remove from once_finds list

					if(table[num1].member[ last_index ].times > table[num1].highest_times){
						table[num1].highest_times=table[num1].member[ last_index ].times;
					}
					indx_found++;
					goto found;
				}
				num3--;
			}
			if(table[num1].index[ first_char ][ (unsigned char)last_char ][0] == 48){

				tbl_members=table[num1].table_members;
				searched++;
				for(num4=0; num4<tbl_members; num4++){
					if((unsigned char)table[num1].member[num4].first != first_char){
						goto next;
					}
					for( num3=1; num3 < member_length; num3++){
						if(table[num1].member[num4].series[num3] != mem_file[num2+num3]){
							goto next;
						}
					}
					table[num1].member[num4].times++;
					//remove from once_finds list

					if(table[num1].member[num4].times > table[num1].highest_times){
						table[num1].highest_times=table[num1].member[num4].times;
					}
					search_found++;
					goto found;
					next:
					;
				}
			}
		}
			e_search++;
			return_val = empty_search(num1, &mem_file[num2]);

		if(return_val==1 && array_full==0){
			f_search++;
			return_val = full_search(num1, &mem_file[num2]);
		}
		if(return_val==1){
			array_full++;
		}

		found:
		;

	}

	if(array_full > 0){
		printf("\tMax possible patterns reached for %d length search! %u possible lost patterns\n", member_length, array_full);
	}

	printf("tbl_mems: %d empty: %d ", table[num1].table_members, table[num1].empty);
	printf("indx_found: %u searched: %u search_found: %u e_search: %u f_search: %u\n", indx_found, searched, search_found, e_search, f_search);       
	progress_func(0, member_length);
	pthread_exit(0);
}


static int empty_search(int num1, char *linestring){
        //return 0 if empty spot found and wrote to
        //return 1 if no empty spots found

        int num4, num3;
	int member_length = table[num1].len - 1;
        unsigned char first_char = (unsigned char)linestring[0];
        unsigned char last_char = (unsigned char)linestring[member_length];

        if(table[num1].table_members == 39679){
                return (1);
        }
	num4=table[num1].empty;
	table[num1].member[num4].first=linestring[0];
	table[num1].member[num4].series=linestring;
	table[num1].member[num4].times=1;
	table[num1].empty=num4+1;
	table[num1].table_members++;

        num3 = table[num1].index[ first_char ][ last_char ][0];
	if(num3 != 48){
		table[num1].index[ first_char ][ last_char ][ num3+1 ] = num4;
		table[num1].index[ first_char ][ last_char ][0]++;
 	}

	return(0);
}

static int full_search(int num1, char *linestring){
        //return 0 for found and replaced array string of 1x
        //return 1 for error (no 1x strings found, for now)
        int num4; 
	int num3=0;
	int num2;

	int member_length = table[num1].len - 1;	
        unsigned char first_char; 
        unsigned char last_char;

	//first time thru scan for all 1x strings and add to fs_index_ones array
	if(table[num1].fs_index_start == -1){
		for(num2=0; num2<39679; num2++){
			if(table[num1].member[num2].times==1){
				table[num1].fs_index_ones[num3]=num2;
				num3++;
			}
		}
		table[num1].fs_index_start=0;
		table[num1].fs_index_stop=num3;
	}

	//each scan after, scan strings in fs_index_ones array till 1x is found(to be replaced) remove != 1x strings from array along the way
	for(num2=table[num1].fs_index_start; num2<table[num1].fs_index_stop; num2++){
		num3 = table[num1].fs_index_ones[num2];
		if(table[num1].member[num3].times==1){
			//remove from index if exists
			first_char = (unsigned char)table[num1].member[num3].first;
			last_char = (unsigned char)table[num1].member[num3].series[member_length];
			for(num4=1; num4< 49; num4++){
				if(table[num1].index[ first_char ][ last_char ][ num4 ] == num3){
					//replace with next highest (bit shift?)
					while(num4+1 < 49){
						table[num1].index[ first_char ][ last_char ][num4] = table[num1].index[ first_char ][ last_char ][num4+1];
						num4++;
					}
					table[num1].index[ first_char ][ last_char ][num4] = -1;
					table[num1].index[ first_char ][ last_char ][0]--;
					num4=49;
				}
			}

			//replace
			first_char = (unsigned char)linestring[0];
			last_char = (unsigned char)linestring[member_length];
			table[num1].member[num3].first=linestring[0];
			table[num1].member[num3].times=1;
			//add to index if room
			num4 = table[num1].index[ first_char ][ last_char ][0];
			if(num4 != 48){
				table[num1].index[ first_char ][ last_char ][ num4+1 ] = num3;
				table[num1].index[ first_char ][ last_char ][0]++;
			}

			//increment start counter, it's been used
			table[num1].fs_index_start=num2+1;
			//add to end of fs_index_one if room
			if(table[num1].fs_index_stop < 39679){
				table[num1].fs_index_ones[table[num1].fs_index_stop]=num3;
				table[num1].fs_index_stop++;
			}
			return(0); 
		}
	}

	//if no 1x strings left or all 39679 spots exhausted then return 1
	return(1);
}

void usage(void){
        printf("\tUsage: patterns <output> <input> <min pattern size (bytes) 2 or higher> <max pattern size (bytes)> <frequency> <threads>\n");
        exit(1);
}


int main(int argc, char **argv){
        FILE *output;
        int input;
        struct stat buffer;
        int status;
        int pat_size;
        int num1, num2, num3, num4, num5;
        int progress=0;
	double progress2=0;
        int max_searchs=0;
        int max_pat;
        int half_file;
        int frequency=4;
	int thread_count;
	struct timeval starttime,endtime;
	double time1;

        if(argc != 7){
                usage();
        }

	frequency = atoi(argv[5]);
	thread_count = atoi(argv[6]);
        pat_size = atoi(argv[3]);
        max_pat = atoi(argv[4]);

        input = open(argv[2], O_RDONLY);
        if(input == -1){
                fprintf( stderr, "Error opening %s\n", argv[2] );
                exit( 1 );
        }

        status = fstat(input, &buffer);
        file_bytes=buffer.st_size;
        mem_file=(char *)mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0);
        if (mem_file == MAP_FAILED) {
                close(input);
                perror("Error mmapping the file");
                exit(EXIT_FAILURE);
        }

        if( ( output = fopen( argv[1], "w" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[1] );
                exit( 1 );
        }

        printf("input: %s\n", argv[2]);
        printf("output: %s\n", argv[1]);
        printf("pat_size: %d\n", pat_size);
        printf("bytes: %d\n", file_bytes);
        if(max_pat > file_bytes/2){
                max_pat = floor(file_bytes/2);
                printf("max_pat: %d (reduced to limit!)\n", max_pat);
        }else{
                printf("max_pat: %d\n", max_pat);
        }



        half_file = sizeof(unsigned char)*ceil(file_bytes/2);

        for(num1=pat_size; num1<max_pat; num1++){
                max_searchs++;
        }

	max_searchs++;
        printf("max_searchs: %d\n", max_searchs);
        printf("frequency: %d\n", frequency);
	printf("threads: %d\n", thread_count);

	//pat_index=(struct one_index *)malloc( sizeof(struct one_index) * max_searchs);
	//table=(struct pat_length *)malloc( sizeof(struct pat_length) * max_searchs);
	pthread_t *threads = (pthread_t *)malloc(sizeof(pthread_t) * max_searchs);


	table=(struct pat_length *)malloc( sizeof(struct pat_length) * max_searchs);
	int pre_scan_nums[256][256];


	for(num1=0; num1<256; num1++){
		for(num2=0; num2<256; num2++){
			pre_scan_nums[num1][num2]=0;
			for(num3=0; num3<max_searchs; num3++){
				table[num3].index[num1][num2]=0;
			}
		}
	}

	for(num1=0; num1<file_bytes-1; num1++){
		pre_scan_nums[ (unsigned char)mem_file[num1] ][ (unsigned char)mem_file[num1+1] ]++;
		for(num2=0; num2<max_searchs; num2++){
			table[num3].index[ (unsigned char)mem_file[num1] ][ (unsigned char)mem_file[num1+1] ]++;
		}
	}
printf("here 1\n");

	for(num1=0; num1<max_searchs; num1++){
		table[num1].member=(struct translation_structure ***)malloc( 256*sizeof(struct translation_structure*) );
		for(num2=0; num2<256 ; num2++){
			table[num1].member[num2]=(struct translation_structure **)malloc( 256*sizeof(struct translation_structure**) );
			for(num3=0; num3<256 ; num3++){
				table[num1].member[num2][num3]=(struct translation_structure *)malloc( pre_scan_nums[num2][num3]*sizeof(struct translation_structure ***) );

				//for(num4=0; num4< pre_scan_nums[num2][num3]; num4++){
				//	table[num1].member[num2][num3][num4]=(struct translation_structure *)malloc( 
                                //      pre_scan_nums[num2][num3] * sizeof(struct translation_structure) );
				//}
			}
		}
	}


	for(num1=0; num1<max_searchs; num1++){
		for(num2=0; num2<256; num2++){
			for(num3=0; num3<256; num3++){
				for(num4=0; num4<2; num4++){
					table[num1].member[num2][num3][num4].times=2;
				}
			}
		}
	}

printf("success\n");

//printf("dyn table: %lu\n", sizeof( sizeof(struct pat_length2) * max_searchs)  );
printf("should be: %lu\n", sizeof(struct translation_structure)*256*256*2 );
exit(0);

//	for(num1=0; num1<max_searchs; num1++){
//		table[num1].member[num1]
//	}



	double indx_size = (sizeof(table[0].index)* max_searchs) / 1024;
	double tbl_size = ((sizeof(struct pat_length)* max_searchs) / 1024) - indx_size;

	printf("memory allocated: indices=%.02lfKB tables=%.02lfKB virtual=%.02lfKB\n", indx_size, tbl_size, (double)file_bytes/1024 );

	for(num1=0; num1<max_searchs; num1++){
        	for(num2=0; num2<39679; num2++){
                	table[num1].member[num2].times = 0;
        	}
	}


//////////////////////////////////


	progress=0;
	progress2=0;

	progress_func(max_searchs, 0);

	gettimeofday(&starttime, NULL);
	num2=0;
	for(num1=max_searchs-1; num1>-1; num1--){
        //for(num1=0; num1<max_searchs; num1++){
		table[num1].len=pat_size+num1;		
		
		table[num1].table_members=0;
		table[num1].empty=0;
		table[num1].fs_index_start = -1;

		table[num1].search_num=num1;
		table[num1].highest_times=0;
               	for(num3=0; num3<256; num3++){
                       	for(num4=0; num4<256; num4++){

				table[num1].index[num3][num4][0]=0;
				for(num5=1; num5<49; num5++){
					table[num1].index[num3][num4][num5]=-1;
				}
                       	}
               	}

               	if (pthread_create(&threads[num2], NULL, match_search, &table[num1].search_num) != 0)
                       	perror("pthread_create"), exit(1);
		
		
		if(num2 % thread_count-1 == 0){
			for(num3=progress; num3<num2+1; num3++){
				if (pthread_join(threads[num3], NULL) != 0)
					perror("pthread_join"),exit(1);
			}
			progress=num2+1;
		}else if (num2 == max_searchs-1){
			for(num3=progress; num3<num2+1; num3++){
				if (pthread_join(threads[num3], NULL) != 0)
					perror("pthread_join"),exit(1);
			}
			progress=num2+1;
		}
		num2++;
	}
	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);
	printf("generating output file...\n");

	gettimeofday(&starttime, NULL);

        //parse structures and write to output

	num2=0;
	int highest_tbl_count=0;
	for(num1=0; num1<max_searchs; num1++){
			//find highest member count
                	if(table[num1].highest_times > num2){
                        	num2=table[num1].highest_times;
                	}
			//find highest table count
			
			if(table[num1].table_members > highest_tbl_count){
				highest_tbl_count=table[num1].table_members;
			}
			
	}


	int print_max=0;
	int next_highest=0;
	//int char_index=0;
        for(num2=num2; num2>=frequency; num2--){

                for(num4=0; num4<highest_tbl_count; num4++){

			for(num1=max_searchs-1; num1>-1; num1--){
				//for(char_index=0; char_index<256; char_index++){			
                        		if(table[num1].member[num4].times == num2){
						fprintf(output, "%d %d", table[num1].len, table[num1].member[num4].times);
                                		for(num3=0; num3<table[num1].len; num3++){
                                        		fprintf(output, " %d", table[num1].member[num4].series[num3]);
                                		}
						print_max++;	
						//search for next highest while parsing
						if(print_max==39679){
							goto max_reached;
						}
						fprintf(output, "\n");
                        		}else if(table[num1].member[num4].times < num2){
						if(table[num1].member[num4].times > next_highest){
							next_highest = table[num1].member[num4].times;
						}
					}
				//}



			}
                }
		num2 = next_highest+1;
		next_highest = 0;
        }
	max_reached:
	;

	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);

        if (munmap(mem_file, file_bytes) == -1) {
                perror("Error un-mmapping the file");
        }
        close(input);

        fclose(output);
	printf("done.\n");
        return 0;
}

