#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#include <time.h>
#include <sys/time.h>
#include <bitset>
#include <xmmintrin.h>

struct translation_structure{
        char *series;
        int times;
        //int len;
};

//struct translation_structure table[39679];

struct pat_length{
	int highest_times;
	struct translation_structure member[39679];

}*table;


//pat_table=malloc( sizeof(struct pat_length) );

//pat_table[0].member[].

//[39679];

/*
struct one_bit{
	//unsigned c : 1;
	char c;
};
*/

struct one_index{
	int len;
	int table_members;
	int empty;
	int search_num;
	std::bitset<65536> c;
	//std::bitset<65536> d;
	//struct one_bit c[256][256];
};


struct one_index *pat_index;

int file_bytes=0;
char *mem_file;

//int table_members=1;

static void progress_func(int max_searchs, int pat_len);
void *match_search(void *set);
static int empty_search(int num1, char *linestring);
static int full_search(int num1, char *linestring);


static void progress_func(int max_searchs, int pat_len){
	static float a = 0;
	static float b = 0;
	
	if(max_searchs == 0){
		b++;
		printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len );
	}else{
		a = max_searchs;
		printf("progress: %.02lf%%\n",(double)((b/a)*100));
	}
	//printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len );

}

void *match_search(void *set){
        //returns 0 for match found and written
        //returns 1 for no matches found
        int num4;
        int num2;
        int num3;
	int return_val = 1;
	int num1 = pat_index[*(int *)set].search_num;
	int stop_byte = file_bytes-pat_index[num1].len;
	int member_length = pat_index[num1].len;
	//int buff1;
	//int buff2;
	//__m128* pSrc1 = (__m128*) pArray1;
	//__m128 a;
	//__m128 b;
	//__m128 c;
	//register int f[4];

	//char mem_buffer1;
	//char mem_buffer2;

	for(num2=0; num2< stop_byte; num2++){
		
		if( pat_index[num1].c[ (unsigned char)mem_file[num2]*256+(unsigned char)mem_file[num2+1] ] ){
			
			//if(member_length>3){
			//	if( !pat_index[num1].d[ (unsigned char)mem_file[num2+(member_length-2)]*256+(unsigned char)mem_file[num2+(member_length-1)] ] )			
			//		goto second;
			//}
			for(num4=0; num4<pat_index[num1].table_members; num4++){
			//for(num4=pat_index[num1].table_members-1; num4>-1; num4--){
				/*
				a = _mm_setr_ps(table[num1].member[num4].series[0], table[num1].member[num4].series[1], table[num1].member[num4].series[2], table[num1].member[num4].series[3]);
				b = _mm_setr_ps(mem_file[num2+0], mem_file[num2+1], mem_file[num2+2], mem_file[num2+3]);

				b = _mm_cmpeq_ps( b, a );				
				_mm_comieq_ss(
				_mm_storeu_ps((float*) f, b);


				//for( num3=0; num3 < 4; num3++){				
					if(f[0] == 0 || f[1] == 0 || f[2] == 0 || f[3] == 0)
						goto next;
				//}
				*/
				

				for( num3=0; num3 < member_length; num3++){
					
					//for( num3=member_length-1; num3 > -1; num3--){
					//mem_buffer1 = table[num1].member[num4].series[num3];
					//mem_buffer2 = mem_file[num2+num3];
					//num3++;
					//}while(mem_buffer1 == mem_buffer2);
					 //s = x != 0.0 ? Math.Sin(x)/x : 1.0;
					if(table[num1].member[num4].series[num3] != mem_file[num2+num3]){	
						//if(mem_buffer1 != mem_buffer2){
						goto next;
					}
				}
				


				//if(num3==pat_index[num1].len-1){
						//match found
                                               	table[num1].member[num4].times++;
						if(table[num1].member[num4].times>table[num1].highest_times){
							table[num1].highest_times=table[num1].member[num4].times;
						}
                                                	//return_val = 0;
							//num4=-1;
						goto found;
							//num3 = pat_index[num1].len;
						
				//}
                                	//}else{
					//goto next;
					//num3 = pat_index[num1].len;
					//while(mem_buffer1 == mem_buffer2);	
				next:
				;
			}
		
		
	
		}else{
			//second:
			return_val = 1;
		}

		if(return_val==1){
			return_val = empty_search(num1, &mem_file[num2]);
		}
		if(return_val==1){
			return_val = full_search(num1, &mem_file[num2]);
		}
		if(return_val==1){
			printf("error!\n");
			exit(1);
		}

		found:
		;

	}
       
/*
        for(num4=pat_index[num1].table_members-2; num4>-1; num4--){
                        //if(table[num4].len == num1){
                                num3=0;
                                while(table[num1].member[num4].series[num3] == linestring[num3]){
                                        num3++;
                                        if(num3==pat_index[num1].len){
                                                table[num1].member[num4].times++;
                                                return(0);
						printf("here 3\n");
                                        }
                                }
                        //}
                //}
        }
        return (1);
*/

progress_func(0, member_length);
//printf("\tpatterns %d done.\n", member_length);
pthread_exit(0);
//return 0;

//void * pthread_exit(NULL);
}


static int empty_search(int num1, char *linestring){
        //return 0 if empty spot found and wrote to
        //return 1 if no empty spots found

        //static int empty=0;
        int num4;
        //int num5=0;

        if(pat_index[num1].table_members == 39680){
                return (1);
        }
	//int member_length = pat_index[num1].len;


        for(num4=pat_index[num1].empty; num4<=pat_index[num1].table_members; num4++){
                if(table[num1].member[num4].times==0){

                        //array is not full
                        table[num1].member[num4].series=linestring;
                        table[num1].member[num4].times=1;
                        //table[num4].len = num1;
                        pat_index[num1].empty=num4;
                        if(pat_index[num1].table_members < 39680){
                                pat_index[num1].table_members++;
//printf("tbl mems %d\n", pat_index[num1].table_members);
                        }
                        pat_index[num1].c[ (unsigned char)linestring[0]*256+(unsigned char)linestring[1] ]=1;
			//if(member_length > 3){
			//	pat_index[num1].d[ (unsigned char)linestring[member_length-2]*256+(unsigned char)linestring[member_length-1] ]=1;
			//}
			//table[num1].highest_times++;
                        return(0);

                        //num5=1;
                        //num4=39679;
                }
        }
        return(1);
}

static int full_search(int num1, char *linestring){
        //return 0 for found and replaced array string of 1x
        //return 1 for error (no 1x strings found, for now)
        int num4;
	//int member_length = pat_index[num1].len;

        for(num4=0; num4<39679; num4++){
                if(table[num1].member[num4].times==1){
                        //replace it
                        //table[num4].series=realloc (table[num4].series, sizeof(unsigned char)*num1);
                        //table[num4].series=(unsigned char *)malloc(sizeof(unsigned char)*num1);
                        table[num1].member[num4].series=linestring;
                        //for(num3=0; num3<num1; num3++){
                        //      table[num4].series[num3]=linestring[num3];
                        //}
                        table[num1].member[num4].times=1;
                        //table[num4].len = num1;
                        pat_index[num1].c[ (unsigned char)linestring[0]*256+(unsigned char)linestring[1] ]=1;
			//if(member_length > 3){
			//	pat_index[num1].d[ (unsigned char)linestring[member_length-2]*256+(unsigned char)linestring[member_length-1] ]=1;
			//}
                        return(0);
                }
        }
        return(1);
}

void usage(void){
        printf("\tUsage: patterns <output> <input> <min pattern size (bytes) 2 or higher> <max pattern size (bytes)> <frequency> <threads>\n");
        exit(1);
}


int main(int argc, char **argv){
        FILE *output;
        int input;
        struct stat buffer;
        int status;
        //char *mem_file;
        int pat_size;
        //unsigned long file_bytes=0;
        //unsigned char *linestring;
        //int character;
        int num1, num2, num3, num4;
        //, num4, num5;
        int progress=0;
	double progress2=0;
        int max_searchs=0;
        int max_pat;
        int half_file;
        //int return_val;
        int frequency=4;
	int thread_count;
	//void *pth_return;	
	//int pth_rval;
	//void *pth_return = &pth_rval;
	//void **pth_return2 = &pth_return;

	//struct timespec ts;
	struct timeval starttime,endtime;
	double time1;


        //if(argc == 6){
                //frequency = atoi(argv[5]);
		//thread_count = atoi(argv[6]);
        if(argc != 7){
                usage();
        }

//        for(num1=0; num1<39679; num1++){
//                //table[num1].series = NULL;
//                table[num1].times = 0;
//                table[num1].len = 0;
//        }

	frequency = atoi(argv[5]);
	thread_count = atoi(argv[6]);
        pat_size = atoi(argv[3]);
        max_pat = atoi(argv[4]);

        input = open(argv[2], O_RDONLY);
        if(input == -1){
                fprintf( stderr, "Error opening %s\n", argv[2] );
                exit( 1 );
        }

        status = fstat(input, &buffer);
        file_bytes=buffer.st_size;
        mem_file=(char *)mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0);
        if (mem_file == MAP_FAILED) {
                close(input);
                perror("Error mmapping the file");
                exit(EXIT_FAILURE);
        }

        if( ( output = fopen( argv[1], "w" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[1] );
                exit( 1 );
        }

        printf("input: %s\n", argv[2]);
        printf("output: %s\n", argv[1]);
        printf("pat_size: %d\n", pat_size);
        printf("bytes: %d\n", file_bytes);
        if(max_pat > file_bytes/2){
                max_pat = floor(file_bytes/2);
                printf("max_pat: %d (reduced to limit!)\n", max_pat);
        }else{
                printf("max_pat: %d\n", max_pat);
        }



        half_file = sizeof(unsigned char)*ceil(file_bytes/2);

        for(num1=pat_size; num1<max_pat; num1++){
                max_searchs++;
        }

	max_searchs++;
        printf("max_searchs: %d\n", max_searchs);
        printf("frequency: %d\n", frequency);
	printf("threads: %d\n", thread_count);

	pat_index=(struct one_index *)malloc( sizeof(struct one_index) * max_searchs);
	table=(struct pat_length *)malloc( sizeof(struct pat_length) * max_searchs);
	pthread_t *threads = (pthread_t *)malloc(sizeof(pthread_t) * max_searchs);
/*
printf("sizes:\n");
printf("\tstruct one_bit: %lu\n", sizeof(struct one_bit));
printf("\tstruct one_index: %lu\n", sizeof(struct one_index));
printf("\tstruct int: %lu\n", sizeof(int));
printf("\tstruct char *: %lu\n", sizeof(char *));
*/

	printf("memory allocated: indices=%.02lfKB tables=%.02lfKB\n", (double)(((sizeof(struct one_index)* max_searchs) ) / 1024), 
		(double)(((sizeof(struct pat_length)* max_searchs) ) / 1024) );

	for(num1=0; num1<max_searchs; num1++){
        	for(num2=0; num2<39679; num2++){
                	//table[num1].series = NULL;
                	table[num1].member[num2].times = 0;
                	//table[num1].len = 0;  
        	}
	}


//////////////////////////////////


	progress=0;
	progress2=0;

	progress_func(max_searchs, 0);

	//printf("progress: %.02lf%%\n", (double)0 );

	//clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &starttime);
	gettimeofday(&starttime, NULL);
	//clock_gettime(CLOCK_REALTIME, &starttime);
        for(num1=0; num1<max_searchs; num1++){
                //system("clear");
                //printf("progress: %.02lf%%\n", (double)((progress/max_searchs)*100) );
                //progress++;
		//for(num3=0; num3<thread_count && num3<max_searchs; num3++){
		//for(num1=0; num1<max_searchs; num1++){
			pat_index[num1].len=pat_size+num1;		
			pat_index[num1].table_members=0;
			pat_index[num1].empty=0;
			pat_index[num1].search_num=num1;
			table[num1].highest_times=0;
                	for(num2=0; num2<256; num2++){
                        	for(num4=0; num4<256; num4++){
                                	pat_index[num1].c[(num2*256)+num4]=0;
					//pat_index[num1].d[(num2*256)+num4]=0;
                        	}
                	}

			//printf("here 1\n");
			// void* (*)(void*)
                	if (pthread_create(&threads[num1], NULL, match_search, &pat_index[num1].search_num) != 0)
                        	perror("pthread_create"), exit(1);
			//num1++;
			//printf("here 2\n");
			if(num1 == thread_count-1){
				for(num3=progress; num3<num1+1; num3++){
					//pth_return=&pat_index[num1].search_num;
					//pth_return2=&pth_return;
					if (pthread_join(threads[num3], NULL) != 0)
						perror("pthread_join"),exit(1);
					//printf("progress: %.02lf%% %d length patterns done.\n",(double)((progress2/max_searchs)*100),pat_size+num3 );
					//progress2++;
				}
				progress=num1+1;
			}else if (num1 == max_searchs-1){
				for(num3=progress; num3<num1+1; num3++){
					//pth_return=&pat_index[num1].search_num;
					//pth_return2=&pth_return;
					if (pthread_join(threads[num3], NULL) != 0)
						perror("pthread_join"),exit(1);
					//printf("progress: %.02lf%% %d length patterns done.\n",(double)((progress2/max_searchs)*100),pat_size+num3 );
					//progress2++;
				}
				progress=num1+1;
				//progress2++;
			}
			//progress++;


	}
	//clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &endtime);
	gettimeofday(&endtime, NULL);
	//clock_gettime(CLOCK_REALTIME, &endtime);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	//time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000));
	//printf("progress: %.02lf%%\n", (double)100 );
	printf("time: %.02lfs\n", time1);
	printf("generating output file...\n");



/*
		for(num3=0; num3<thread_count && num3<max_searchs; num3++){
                	if (pthread_join(threads[num1], NULL) != 0)
                        	perror("pthread_join"),exit(1);
		}
*/

	
/*
                //start position in mem array
                for(num2=0; num2< (file_bytes-num1); num2++){
                        if(pat_index[num1].c[(unsigned char)mem_file[num2]][(unsigned char)mem_file[num2+1]].c==1){
                                return_val = match_search(num1, &mem_file[num2]);
                        }else{
                                return_val = 1;
                        }
                        if(return_val==1){
                                return_val=empty_search(num1, &mem_file[num2]);
                        }
                        if(return_val==1){
                                return_val=full_search(num1, &mem_file[num2]);
                        }
                        if(return_val==1){
                                printf("error!\n");
                                exit(1);
                        }
                }
        }
*/



	gettimeofday(&starttime, NULL);

        //parse structures and write to output

	num2=0;
	int highest_tbl_count=0;
	for(num1=0; num1<max_searchs; num1++){
        	//num2=0;
        	//for(num3=0; num3<39679; num3++){
			//find highest member count
//printf("ht: %d\n", table[num1].highest_times);
                	if(table[num1].highest_times > num2){
                        	num2=table[num1].highest_times;
                	}
			//find highest table count
//printf("tc: %d\n", pat_index[num1].table_members);
			if(pat_index[num1].table_members > highest_tbl_count){
				highest_tbl_count=pat_index[num1].table_members;
			}

        	//}
	}

//printf("num2: %d frequency: %d tbl_count: %d max_s: %d\n", num2, frequency, highest_tbl_count, max_searchs);

	int print_max=0;
	int next_highest=0;
	//printf("here 1\n");
	//6  1
        for(num2=num2; num2>=frequency; num2--){

                for(num4=0; num4<highest_tbl_count; num4++){

			for(num1=0; num1<max_searchs; num1++){
			
                        	if(table[num1].member[num4].times == num2){
					fprintf(output, "%d %d", pat_index[num1].len, table[num1].member[num4].times);
                                	for(num3=0; num3<pat_index[num1].len; num3++){
                                        	fprintf(output, " %d", table[num1].member[num4].series[num3]);
                                	}
                                	//fprintf(output, "\n");
					print_max++;	
					//search for next highest while parsing
					if(print_max==39679){
						goto max_reached;
					}
					fprintf(output, "\n");
                        	}else if(table[num1].member[num4].times < num2){
					if(table[num1].member[num4].times > next_highest){
						next_highest = table[num1].member[num4].times;
					}
				}



			}
			//num2 must be set to next highest
			//num2 = next_highest;
			//next_highest = frequency;
                }
		num2 = next_highest+1;
		next_highest = 0;
		//printf("num2: %d frequency: %d\n", num2, frequency);
		//if(print_max == 1){
		//	num2 = frequency-1;
		//}

        }
	max_reached:
	;

	gettimeofday(&endtime, NULL);
	time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000;
	printf("time: %.02lfs\n", time1);



        if (munmap(mem_file, file_bytes) == -1) {
                perror("Error un-mmapping the file");
        }
        close(input);

        fclose(output);
	printf("done.\n");
        return 0;
}

