/* dict.c */ #include #include #include #include "dict.h" #include "lz.h" #define MAX_DICT_LENGTH 20000 /* type of an entry in the dictionary */ typedef struct { char string[MAX_MATCH_LENGTH]; /* the string */ int code; /* code for this string */ } dict_element; /* now define the dictionary */ struct DICT { dict_element entry[MAX_DICT_LENGTH]; /* array of entries */ /* each entry holds a string and the /* associated code */ int dict_size; /* size of dictionary */ int next_code; /* next code to be used */ int BPC; /* bits per code*/ int MAXCODE; /* maximum number of codes possible*/ } ; /* define the dictionary to be an array of the type dict_element */ char buffer_char; /* buffer character used while reading the input */ /* initialize the dictionary by inserting characters from 0 to */ /* NO_CHAR into it. */ /* also initialize buffer character */ DICT_TYPE init_dict(int BPC) { char string[2]; int i; DICT_TYPE dict; /* malloc*/ dict = (DICT_TYPE) malloc (sizeof(struct DICT)); /* initialize some fields */ dict->dict_size = 0; dict->next_code = 0; dict->BPC=BPC; dict->MAXCODE=((0x1 << BPC) - 1); /* insert the first NO_CHARS in the standard alphabet */ for (i = 0; i <= NO_CHARS; i++) { /* create a sring which just contains the i'th character */ string[0] = i; string[1] = '\0'; insert_dict(string, dict); } /* set the buffered character to NULL */ buffer_char = '\0'; return dict; } /* This procedure inserts an element in the array having code value */ /* next_code and string value str */ /* It returns 0 if the dictionary is full and so the insert cannot */ /* made. Otherwise, it returns 1 */ int insert_dict(char *str, DICT_TYPE dict) { /* first check that the dictionary is not full */ /* also check that we have not run out of codes */ /* We can have at most 2^BPC - 1 different codes */ if (dict->dict_size == MAX_DICT_LENGTH || dict->next_code > dict->MAXCODE) { return 0; /* the dictionary is full */ } dict->entry[dict->dict_size].code = dict->next_code; strcpy(dict->entry[dict->dict_size].string,str); dict->dict_size++; dict->next_code++; return 1; } /* This procedure searches the dictionary for a string */ /* If the string is present, it returns its code value */ /* or else it returns -1 */ int dict_lookup(char *str,DICT_TYPE dict) { int i; for (i=0; i < dict->dict_size; i++) /* search in every entry in the dictionary */ if (strcmp(str, dict->entry[i].string) == 0) /* if a match found */ return dict->entry[i].code; return -1; } /* This procedure searches the dictionary for an item */ /* whose code value is equal to the given code. If it */ /* is found, then the procedure returns the associated */ /* string , else it returns NULL. Actually the search */ /* procedure is very simple. The code of a string and */ /* its index into the array are the same ! */ void dict_string_lookup(int code, char curr_string[],DICT_TYPE dict) { /* The code and index into the array (dictionary) are */ /* the same. So, just check that code < dictionary size */ /* first and if so, copy the string in that location */ /* into curr_string */ if (code > dict->dict_size-1) /* code starts from 0 */ curr_string[0] = '\0'; /* set it to NULL */ else strcpy(curr_string,dict->entry[code].string); } /* This procedure takes as input the file pointer and */ /* finds the largest string in the file which is */ /* present in the dictionary followed by one more */ /* character of the file. It buffers this character in */ /* a variable buffer_char because it will need this */ /* character next time when this procedure is called */ /* it returns the number of bytes read. Also, there is */ /* a parameter finished which is set to 1 if the */ /* procedure encounters EOF while scanning the input. */ /* Else, this is set to 0. Caution : This is a highly */ /* inefficient algorithm */ /* The longest match is contained in the actual parameter */ /* string[] */ int get_longest_match(FILE *in, char string[], int *finished, DICT_TYPE dict) { int input; int string_length; int bytes_read = 0; /* initialize string to contain just the buffer character */ string[0] = buffer_char; string[1] = '\0'; string_length = strlen(string); /* it can be 0 or 1 */ while((input = getc(in)) != EOF) { bytes_read ++; string[string_length++] = input; string[string_length] = '\0'; if (dict_lookup(string,dict) < 0) /* we have found the string */ { /* set the buffer_char */ buffer_char = input; *finished = 0; return bytes_read; } } *finished = 1; /* we encountered the EOF */ return bytes_read; }