#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

struct translation_structure{
        char *series;   
	int times;
	int len;
}table[39679];


short pat_index[256][256];
short table_members=1;

static int match_search(int num1, char *linestring);
static int empty_search(int num1, char *linestring);
static int full_search(int num1, char *linestring);


static int match_search(int num1, char *linestring){
	//returns 0 for match found and written
	//returns 1 for no matches found                                
	int num4;
	//int num5;
	int num3;

	for(num4=table_members-1; num4>0; num4--){
		//if(table[num4].times>0){
			//if(strcmp(table[num4].series, linestring) == 0){
			if(table[num4].len == num1){
				num3=0;
				//for(num3=0; num3<num1; num3++){
				//	if(table[num4].series[num3] != linestring[num3]){
				//		num5=1;
				//		num3=num1;
				//	}
				//}
				while(table[num4].series[num3] == linestring[num3]){
					num3++;
					if(num3==num1){
						table[num4].times++;
						return(0);
					}	
				}
				
				/*
				for(num3=0; num3<num1; num3++){
					if(table[num4].series[num3] != linestring[num3]){
						num5=1;
						num3=num1;
						//return(1);
					}
				}
				*/
				//matched
//printf("num3: %d num1: %d\n", num3, num1);
				//if(num3==num1){
//printf("here\n");
				//	table[num4].times++;
					//num4=39679;
				//	return(0);
				//}
				
			}
		//}
	}
	return (1);
}


static int empty_search(int num1, char *linestring){
	//return 0 if empty spot found and wrote to
	//return 1 if no empty spots found

	static int empty=0;
	//int num3;
	int num4;                        
	//int num5=0;

	if(table_members == 39680){
		return (1);
	}

	for(num4=empty; num4<table_members; num4++){
        	if(table[num4].times==0){
			//array is not full
			//num4 here is where you write it to
			//table[num4].series=malloc(sizeof(unsigned char)*num1);
			table[num4].series=linestring;
			//for(num3=0; num3<num1; num3++){
			//	table[num4].series[num3]=linestring[num3];
			//}
			//index_tbl[256][256].list[]			
			//index[linestring[0]][linestring[1]].firsttwo[0]=num4;
			table[num4].times=1;                                                
			table[num4].len = num1;
			empty=num4;
			if(table_members < 39680){
				table_members++;
			}
			pat_index[(unsigned char)linestring[0]][(unsigned char)linestring[1]]=1;
			return(0);
                                                
			//num5=1;                                                
			//num4=39679;
		}
	}
	return(1);
}




static int full_search(int num1, char *linestring){
	//return 0 for found and replaced array string of 1x
	//return 1 for error (no 1x strings found, for now)
	int num4; 

	for(num4=0; num4<39679; num4++){
		if(table[num4].times==1){
			//replace it
			//table[num4].series=realloc (table[num4].series, sizeof(unsigned char)*num1);
			//table[num4].series=(unsigned char *)malloc(sizeof(unsigned char)*num1);
			table[num4].series=linestring;  
			//for(num3=0; num3<num1; num3++){                                                                
			//	table[num4].series[num3]=linestring[num3];                        
			//}              
			table[num4].times=1;                                         
			table[num4].len = num1;
			pat_index[(unsigned char)linestring[0]][(unsigned char)linestring[1]]=1;
         		return(0);
		}
	}
	return(1);
}





/*

table[0].series=(char *)malloc(sizeof(char)*whatever);
={
    {(char[]){0, 1, 2}, 5, 100, 3},
    {(char[]){4, 5, 6, 7}, 1, 2, 4},
    {(char[]){0, 1, 2}, 5, 100, 3},   
    {(char[]){4, 5, 6, 7}, 1, 2, 4},
    {(char[]){0, 1, 2}, 5, 100, 3},
    {(char[]){4, 5, 6, 7}, 1, 2, 4}
};
*/


void usage(void){
        printf("\tUsage: patterns <output> <input> <min pattern size (bytes) 2 or higher> <max pattern size (bytes)> [frequency (4 default)]\n");
        exit(1);
}


int main(int argc, char **argv){
        FILE *output;
        int input;
	struct stat buffer;
	int status;
	char *mem_file;
	int pat_size;
	unsigned long file_bytes=0;
	//unsigned char *linestring;
	//int character;
	int num1, num2, num3, num4;
	//, num4, num5;
	double progress=0;
	int max_searchs=0;
	int max_pat;
	int half_file;
	int return_val;	
	int frequency=4;

        //if(argc != 5 || argc != 6){
        //        usage();
	//}

	if(argc == 6){
		frequency = atoi(argv[5]);
	}else if(argc != 5){
		usage();
	}



	//128KB
	//index_tbl[256][256].list=(struct two_byte *)malloc(2*sizeof(struct two_byte) );


	for(num1=0; num1<39679; num1++){
		//table[num1].series = NULL;  
		table[num1].times = 0;
		table[num1].len = 0;                
	}

	pat_size = atoi(argv[3]);
	max_pat = atoi(argv[4]);

	input = open(argv[2], O_RDONLY);
	if(input == -1){
		fprintf( stderr, "Error opening %s\n", argv[2] );
		exit( 1 );
	}
	
	status = fstat(input, &buffer);
	file_bytes=buffer.st_size;
	//printf("number of data items in file: %lu\n", file_bytes);
	//close(input);
	//return 0;

	mem_file=mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0);
	if (mem_file == MAP_FAILED) {
		close(input);
		perror("Error mmapping the file");
		exit(EXIT_FAILURE);
	}
    

/*
        if( ( input = fopen( argv[2], "r" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[2] );
                exit( 1 );
        }
*/

        if( ( output = fopen( argv[1], "w" ) ) == NULL ) {
                fprintf( stderr, "Error opening %s\n", argv[1] );
                exit( 1 );
        }                   



        printf("input: %s\n", argv[2]);
        printf("output: %s\n", argv[1]);
        printf("pat_size: %d\n", pat_size);
        printf("bytes: %lu\n", file_bytes);
        if(max_pat > file_bytes/2){
                max_pat = floor(file_bytes/2);
		printf("max_pat: %d (reduced to limit!)\n", max_pat);
        }else{
		printf("max_pat: %d\n", max_pat);
	}



	half_file = sizeof(unsigned char)*ceil(file_bytes/2);
	//linestring = (unsigned char *)malloc(max_pat*sizeof(unsigned char));

	//printf("linestring size: %d bytes\n", half_file);

	for(num1=pat_size; num1<max_pat; num1++){
		max_searchs++;
	}

	printf("max_searchs: %d\n", max_searchs);
	printf("frequency: %d\n", frequency);



//////////////////////////////////




	for(num1=pat_size; num1<max_pat+1; num1++){
		//system("clear");
		printf("progress: %.02lf%%\n", (double)((progress/max_searchs)*100) );
		progress++;
		for(num2=0; num2<256; num2++){
			for(num3=0; num3<256; num3++){
				pat_index[num2][num3]=0;
			}
		}


		//start position in mem array
		for(num2=0; num2< (file_bytes-num1); num2++){
			//printf("progress: %.02lf%%\n", (progress/max_searchs)*100 );
			//progress++;
			//upto pat_size from start position

			//memcpy(linestring, &mem_file[num2], num1);
			//for(num3=0; num3<num1; num3++){
			//	linestring[num3]=mem_file[num2+num3];
			//}
			
			//printf("linestring: %s\n", linestring);
			if(pat_index[(unsigned char)mem_file[num2]][(unsigned char)mem_file[num2+1]]==1){
				return_val = match_search(num1, &mem_file[num2]);
			}else{
				//pat_index[linestring[0]][linestring[1]]=1;
				return_val = 1;
			}
			if(return_val==1){
				return_val=empty_search(num1, &mem_file[num2]);
			}

			if(return_val==1){
        			return_val=full_search(num1, &mem_file[num2]);
			}

			if(return_val==1){
				printf("error!\n");
				exit(1);
			}
		}
		

	}

	//parse structure and write to output 
	num2=0;
	for(num3=0; num3<39679; num3++){
		if(table[num3].times > num2){
			num2=table[num3].times;
		}	

	}

/*
        	if(table[num2].times >= frequency){
			for(num3=0; num3<table[num2].len; num3++){
				fprintf(output, "%c ", table[num2].series[num3]);
			}
			fprintf(output, "%d %d\n", table[num2].len, table[num2].times);
		}
        }
*/


	//for(num1=num2; num1>=frequency; num1--){
		for(num4=0; num4<39679; num4++){

			if(table[num4].times > 0){
				fprintf(output, "%d %d", table[num4].len, table[num4].times);
                        	for(num3=0; num3<table[num4].len; num3++){
                                	fprintf(output, " %d", table[num4].series[num3]);
                        	}
				fprintf(output, "\n");
                        	//fprintf(output, "%d %d\n", table[num4].len, table[num4].times);
			}
		}
	//}



	if (munmap(mem_file, file_bytes) == -1) {
		perror("Error un-mmapping the file");
	}
	close(input);

	fclose(output);
	return 0;
}

/*
output:
	x unique strings of atleast pat_size length
	10 most common:
	1). bla z
	2). foo z
	3). foobla z
	4). blafoo z
	.
	.
save to output.txt upto 39680 most common strings ordered by z

*/
