#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <pthread.h>


struct translation_structure{
        char *series;   
	int times;
	int len;
}table[39679];

int found;

//struct one_byte{
//	unsigned inum : 1;
//};

//void *index;

//int list[2];

//struct ind_str{
//	void *ptr;
//}index_tbl[256][256];

//struct ind_str{   
//        struct two_byte *list;   
//}index_tbl[256][256];  

//index_tbl[256][256].list[].inum

int cur_pat_size;
short pat_index[256][256];
//short lengths[39679];
short table_members=1;

struct pth_data{
	int thread;
	char *line;
	int start;
	int stop;
	int ret_val;
};

struct pth_data *pthread_returns;

void pthread_worker(void *address);
static int match_search(int thread_count, char *linestring);
static int empty_search(char *linestring);
static int full_search(char *linestring);

void pthread_worker(void *address ){
	
	int num4, num3;
	int ret_val=1;
	char *linestring = (char *)address;
	

        for(num4=table_members-1; num4>-1 && ret_val == 1; num4--){
		//while(return_val==0){
                        if(table[num4].len == cur_pat_size){
                                num3=0;
                                while(table[num4].series[num3] == linestring[num3]){
                                        num3++;
                                        if(num3==cur_pat_size){
                                                table[num4].times++;
                                                ret_val=0;
                                        }  
                                }

                                //matched
        
                        }
		//}

        }
        


                        if(ret_val==1){
                                ret_val=empty_search(linestring);
                        }
                                                        
                        if(ret_val==1){
                                ret_val=full_search(linestring);
                        }
                                                 
                        if(ret_val==1){
                                printf("error!\n");
                                exit(1);
                        }

}






	//int index = (int )mydata;

//printf("here: %d %d %d %d", index, pthread_returns[index].start, pthread_returns[index].stop, pthread_returns[index].line[0]);	

//for(num4=0; num4 < 2; num4++){
//	printf("%d ", pthread_returns[index].line[num4]);
//}
//printf("here\n");

/*
        for(num4=pthread_returns[index].start; num4>pthread_returns[index].stop && found !=1; num4--){
			//while(found == 0){
			//if(found == 1){
			//	pthread_returns[index].ret_val=1;
			//	return;
			//}
				
                        	if(table[num4].len == cur_pat_size){
//printf("here");

//printf("test: %d\n", pthread_returns[(int )mydata].line[0] );

                                	num3=0;
                                	while( table[num4].series[num3] == pthread_returns[index].line[num3] ){
                                        	num3++;
//printf("here1\n");
                                        	if(num3 == cur_pat_size){
//printf("here2\n");
							found=1;
                                                	table[num4].times++;
                                                	pthread_returns[index].ret_val=0;
							//void pthread_exit(void *);
                                        	}
                                	}

                
                                //matched
         
                        	}
			//}
        }
        pthread_returns[index].ret_val=1;
	//void pthread_exit(void *);
*/





static int match_search(int thread_count, char *linestring){
	//returns 0 for match found and written
	//returns 1 for no matches found                                
	int num4;
	//int num5;
	int num3;
	//int start = table_members-1;
	//pthread_t       *threads;	
	pthread_t *threads = (pthread_t *)malloc(thread_count*sizeof(pthread_t));
	//pthread_returns = (int *)malloc(thread_count*sizeof(int));

	found = 0;
	int step = floor(table_members/thread_count);


	for(num3=0; num3<thread_count; num3++){         
		pthread_returns[num3].thread = num3;
		pthread_returns[num3].line = &linestring[num3];

		
		pthread_returns[num3].start = table_members - 1 - (step*num3);

		if(num3 != thread_count-1){
			pthread_returns[num3].stop = table_members - 1 - (step*(num3))-step;
		}else{
			pthread_returns[num3].stop = -1;
		}

		pthread_returns[num3].ret_val = 1;

		if (pthread_create(&threads[num3], NULL, (void *) pthread_worker, (void *) pthread_returns[num3].thread) != 0)
			perror("pthread_create"), exit(1);
	}                
	for(num3=0; num3<thread_count; num3++){
		if (pthread_join(threads[num3], NULL) != 0)
			perror("pthread_join"),exit(1);
	}  

	for(num3=0; num3<thread_count; num3++){
		if(pthread_returns[num3].ret_val == 0){
			//printf("here: ");
			//for(num4=0; num4 < cur_pat_size; num4++){
			//	printf("%c ", pthread_returns[num3].line[num4]);
			//}
			//printf("\n");
			return(0);
		}
	}
	return(1);


	//num2+=thread_count;   


/*
	for(num4=table_members-1; num4>-1; num4--){
			if(table[num4].len == cur_pat_size){
				num3=0;
				while(table[num4].series[num3] == linestring[num3]){
					num3++;
					if(num3==cur_pat_size){
						table[num4].times++;
						return(0);
					}	
				}
				
				//matched
				
			}
	}
	return (1);
*/

}


static int empty_search(char *linestring){
	//return 0 if empty spot found and wrote to
	//return 1 if no empty spots found

	static int empty=0;
	//int num3;
	int num4;                        
	//int num5=0;

	if(table_members == 39680){
		return (1);
	}

	for(num4=empty; num4<table_members; num4++){
        	if(table[num4].times==0){
			//array is not full
			//num4 here is where you write it to
			//table[num4].series=malloc(sizeof(unsigned char)*num1);
			table[num4].series=linestring;
			//for(num3=0; num3<num1; num3++){
			//	table[num4].series[num3]=linestring[num3];
			//}
			//index_tbl[256][256].list[]			
			//index[linestring[0]][linestring[1]].firsttwo[0]=num4;
			table[num4].times=1;                                                
			table[num4].len = cur_pat_size;
			empty=num4;
			if(table_members < 39680){
				table_members++;
			}
			pat_index[(unsigned char)linestring[0]][(unsigned char)linestring[1]]=1;
			return(0);
                                                
			//num5=1;                                                
			//num4=39679;
		}
	}
	return(1);
}




static int full_search(char *linestring){
	//return 0 for found and replaced array string of 1x
	//return 1 for error (no 1x strings found, for now)
	int num4; 

	for(num4=0; num4<39679; num4++){
		if(table[num4].times==1){
			//replace it
			//table[num4].series=realloc (table[num4].series, sizeof(unsigned char)*num1);
			//table[num4].series=(unsigned char *)malloc(sizeof(unsigned char)*num1);
			table[num4].series=linestring;  
			//for(num3=0; num3<num1; num3++){                                                                
			//	table[num4].series[num3]=linestring[num3];                        
			//}              
			table[num4].times=1;                                         
			table[num4].len = cur_pat_size;
			pat_index[(unsigned char)linestring[0]][(unsigned char)linestring[1]]=1;
         		return(0);
		}
	}
	return(1);
}

void usage(void){
        printf("\tUsage: patterns <output> <input> <min pattern size (bytes) 2 or higher> <max pattern size (bytes)> <thread count> [frequency (4 default)]\n");
        exit(1);
}


int main(int argc, char **argv){
        FILE *output;
        int input;
	struct stat buffer;
	int status;
	char *mem_file;
	char *memptr;
	int pat_size;
	unsigned long file_bytes=0;
	//unsigned char *linestring;
	//int character;
	int num1, num2, num3, num4;
	//, num4, num5;
	double progress=0;
	int max_searchs=0;
	int max_pat;
	int half_file;
	int return_val;	
	int frequency=4;
	int thread_count;
	//pthread_t       *threads;
	int end_position;
	int start_position;
	int step;

        //if(argc != 5 || argc != 6){
        //        usage();
	//}

	if(argc == 7){
		frequency = atoi(argv[6]);
	}else if(argc != 6){
		usage();
	}
	
	//printf("argc %d\n", argc);
	thread_count = atoi(argv[5]);
	

	for(num1=0; num1<39679; num1++){
		//table[num1].series = NULL;  
		table[num1].times = 0;
		table[num1].len = 0;                
	}

	pat_size = atoi(argv[3]);
	max_pat = atoi(argv[4]);

	input = open(argv[2], O_RDONLY);
	if(input == -1){
		fprintf( stderr, "Error opening %s\n", argv[2] );
		exit( 1 );
	}
	
	status = fstat(input, &buffer);
	file_bytes=buffer.st_size;

	mem_file=mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0);
	if (mem_file == MAP_FAILED) {
		close(input);
		perror("Error mmapping the file");
		exit(EXIT_FAILURE);
	}
    
        if( ( output = fopen( argv[1], "w" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[1] );
                exit( 1 );
        }                   

        printf("input: %s\n", argv[2]);
        printf("output: %s\n", argv[1]);
        printf("pat_size: %d\n", pat_size);
        printf("bytes: %lu\n", file_bytes);
	printf("threads: %d\n", thread_count);
        if(max_pat > file_bytes/2){
                max_pat = floor(file_bytes/2);
		printf("max_pat: %d (reduced to limit!)\n", max_pat);
        }else{
		printf("max_pat: %d\n", max_pat);
	}



	half_file = sizeof(unsigned char)*ceil(file_bytes/2);

	pthread_returns = malloc(thread_count*sizeof(struct pth_data));
	//void *pthread_returns=pth_structure;
	//threads = (pthread_t *)malloc(thread_count*sizeof(pthread_t));
	pthread_t *threads = (pthread_t *)malloc(thread_count*sizeof(pthread_t));
	//printf("linestring size: %d bytes\n", half_file);

	for(num1=pat_size; num1<max_pat; num1++){
		max_searchs++;
	}

	printf("max_searchs: %d\n", max_searchs);
	printf("frequency: %d\n", frequency);



//////////////////////////////////




	for(cur_pat_size=pat_size; cur_pat_size<max_pat+1; cur_pat_size++){
		//system("clear");
		printf("progress: %.02lf%%\n", (double)((progress/max_searchs)*100) );
		progress++;
		for(num2=0; num2<256; num2++){
			for(num3=0; num3<256; num3++){
				pat_index[num2][num3]=0;
			}
		}

		
		//end_position = file_bytes-cur_pat_size-thread_count;

		step = floor((file_bytes-cur_pat_size)/thread_count);
		//if ( file_bytes % thread_count == 0 ){
		//	step = file_bytes/thread_count;
		//}
		//start position in mem array


		for(num2=0; num2<step; num2++){

			for(num3=0; num3<thread_count; num3++){
				//start_position = num3*step;
				//end_position = (num3*step)+step;
				//for(num2=start_position; num2<end_position; num2+=step){


				//for(num3=0; num3<thread_count; num3++){
					//memptr = &mem_file[num2+num3];
				//if(num3!=thread_count-1 && num3 != step-1){
				if (pthread_create(&threads[num3], NULL, (void *)pthread_worker, (void *) &mem_file[(num3*step)+num2]) != 0)
        				perror("pthread_create"), exit(1);
				//}else{
				//	num4=0;
				//	while(num4<floor((file_bytes%thread_count)*thread_count) ){
				//	
				//	if (pthread_create(&threads[num3], NULL, (void *)pthread_worker, (void *) &mem_file[step+num2+num4]) != 0)
				//		perror("pthread_create"), exit(1);
				//		num4++;
				//	}
				//}

			}

			
			for(num3=0; num3<thread_count; num3++){
				if (pthread_join(threads[num3], NULL) != 0)
					perror("pthread_join"),exit(1);
			}



			//num2+=thread_count;
			//step++;
			

		}

		//num4=0;
		//while(num4<floor((file_bytes%thread_count)*thread_count) ){
		//	 if (pthread_create(&threads[num3], NULL, (void *)pthread_worker, (void *) &mem_file[step+num2+num4]) != 0)
		

		//last few bytes of files won't be scanned in some cases !



	}

/*
			if(pat_index[(unsigned char)mem_file[num2]][(unsigned char)mem_file[num2+1]]==1){
				//if(cur_pat_size<7){
				//	return_val = match_search(1, &mem_file[num2]);
				//}else{
					return_val = match_search(thread_count, &mem_file[num2]);
				//}
			}else{
				//pat_index[linestring[0]][linestring[1]]=1;
				return_val = 1;
			}


			if(return_val==1){
				return_val=empty_search(&mem_file[num2]);
			}

			if(return_val==1){
        			return_val=full_search(&mem_file[num2]);
			}

			if(return_val==1){
				printf("error!\n");
				exit(1);
			}




		}
		

	}

*/

	//parse structure and write to output 
	num2=0;
	for(num3=0; num3<39679; num3++){
		if(table[num3].times > num2){
			num2=table[num3].times;
		}	

	}

/*
        	if(table[num2].times >= frequency){
			for(num3=0; num3<table[num2].len; num3++){
				fprintf(output, "%c ", table[num2].series[num3]);
			}
			fprintf(output, "%d %d\n", table[num2].len, table[num2].times);
		}
        }
*/


	for(num1=num2; num1>=frequency; num1--){
		for(num4=0; num4<39679; num4++){

			if(table[num4].times == num1){
				fprintf(output, "%d %d", table[num4].len, table[num4].times);
                        	for(num3=0; num3<table[num4].len; num3++){
                                	fprintf(output, " %d", table[num4].series[num3]);
                        	}
				fprintf(output, "\n");
                        	//fprintf(output, "%d %d\n", table[num4].len, table[num4].times);
			}
		}
	}



	if (munmap(mem_file, file_bytes) == -1) {
		perror("Error un-mmapping the file");
	}
	close(input);

	fclose(output);
	return 0;
}

/*
output:
	x unique strings of atleast pat_size length
	10 most common:
	1). bla z
	2). foo z
	3). foobla z
	4). blafoo z
	.
	.
save to output.txt upto 39680 most common strings ordered by z

*/
