/* dict.c */ #include #include #include #include "dict.h" #include "lz.h" #define MAX_STRING_LENGTH 100 /* type of an entry in the dictionary */ typedef struct trie_type{ char letter; /* the letter corresponding to this node */ int code; /* code for the string from the root to this letter */ struct trie_type *sibling; struct trie_type *child; struct trie_type *parent; /* parent pointer */ /* code for this string */ } trie_node_type; /* now define the dictionary */ struct DICT { trie_node_type *index[NO_CHARS]; /* array of pointers to the trie nodes. The i'th entry in the array corresponds to the i'th ASCII character */ int next_code; /* next code to be used */ } ; /* define an array of pointers indexed by the code number which */ /* will help in getting the string, given its code number */ trie_node_type *index_arr[1<next_code = 0; /* insert the first NO_CHARS in the standard alphabet */ for (i = 0; i <= NO_CHARS; i++) { dict->index[i] = new_node((char)i,dict); dict->index[i]->parent = NULL; } /* set the buffered character to NULL */ buffer_char = '\0'; /* initialize index array */ for (i = 0; i <= NO_CHARS; i++) index_arr[i] = dict->index[i]; printf("init_dict exiting\n"); return dict; } /* This procedure creates a new node of type trie_node_type */ /* and returns it after properly initializing it. The letter */ /* field of this node contains c */ trie_node_type *new_node(char c, DICT_TYPE dict) { trie_node_type *temp; temp = (trie_node_type *) malloc(sizeof(trie_node_type)); temp->letter = c; index_arr[dict->next_code] = temp; temp->code = dict->next_code++; temp->sibling = temp->child = NULL; temp->parent = NULL; return temp; } /* This procedure searches the dictionary for a string */ /* If the string is present, it returns its code value */ /* else it returns -1 */ int dict_lookup(char *str, DICT_TYPE dict) { trie_node_type *p, *previous; int index = 0; /* index into str, assuming that str is at */ /* least one character long */ p = dict->index[str[0]]; while(str[index] != '\0') { if (p == NULL) /* the string is not present */ return -1; /* now serach the siblings of p */ while (p!=NULL && p->letter != str[index]) p = p->sibling; if (p == NULL) return -1; /* we have found the sibling */ previous = p; p = p->child; index++; } return previous->code; } /* This procedure inserts an element in the dictionary having code value */ /* next_code and string value str */ /* It returns 0 if the dictionary is full and so the insert cannot */ /* made. Otherwise, it returns 1 */ int insert_dict(char *str, DICT_TYPE dict) { trie_node_type *p; int done = 0; /* indicates when we are done */ int i = 0; /* index into str */ /* check that we have not run out of codes */ /* We can have at most 2^BPC - 1 different codes */ if (dict->next_code > MAXCODE) return 0; /* no more codes available */ p = dict->index[str[0]]; /* search in this tree as long as we don't get to */ /* the last node */ while(!(done)) { /* find the appropriate sibling of p if it exists*/ while (p->sibling != NULL && p->letter != str[i]) p = p->sibling; if (p->letter != str[i]) /* i.e., the appropriate sibling does not exist */ { /* create a new sibling */ trie_node_type *temp; temp = new_node(str[i], dict); /* now we are done because the nature of inserts into this datastructure is such that if we are inserting a string in this, then we are guaranteed that the prefix of this string which consists of all but last letter in this string is already in the trie */ p->sibling = temp; temp->parent = p->parent; done = 1; } /* if ... */ else /* the sibling is there */ { /* now one should explore the child */ if (p->child != NULL) p = p->child; else /* create a new child node */ { trie_node_type *temp = new_node(str[i], dict); p ->child = temp; temp->parent = p; done = 1; /* we are done */ } i++; } /* else ... */ }/* while ... */ return 1; } /* This procedure seraches the dictionary for an item */ /* whose code value is equal to the given code. If it */ /* is found, then the procedure returns the associated*/ /* string, else it returns NULL */ void dict_string_lookup(int code, char curr_string[], DICT_TYPE dict) { curr_string[0] = '\0'; if (code <= NO_CHARS) { /* the string is the single character whose ASCII */ /* code is equal to this code */ curr_string[0] = code; curr_string[1] = '\0'; } else if (index_arr[code] != NULL) { /* follow the parent pointers to reconstruct the */ /* string */ char temp_str[MAX_STRING_LENGTH]; int length = 0, i; trie_node_type *ptr = index_arr[code]; temp_str[0] = ptr->letter; length = 1; while (ptr->parent != NULL) { ptr = ptr->parent; temp_str[length++] = ptr->letter; } /* copy this into curr_string in the * /* reverse order */ for (i = 0; i < length; i++) curr_string[i] = temp_str[length-i-1]; curr_string[length] = '\0'; } } /* This procedure takes as input the file pointer and */ /* finds the largest string in the file which is */ /* present in the dictionary followed by one more */ /* character of the file. It buffers this character in */ /* a variable buffer_char because it will need this */ /* character next time when this procedure is called */ /* it returns the number of bytes read. Also, there is */ /* a parameter finished which is set to 1 if the */ /* procedure encounters EOF while scanning the input. */ /* Else, this is set to 0. */ /* The longest match is contained in the actual parameter */ /* string[] */ int get_longest_match(FILE *in, char string[], int *finished, DICT_TYPE dict) { int input; int string_length; int bytes_read = 0; trie_node_type *ptr; if (buffer_char == '\0') /* if the buffer character is NULL, read one form the input */ { buffer_char = getc(in); bytes_read++; } /* initialize string to contain just the buffer character */ string[0] = buffer_char; string[1] = '\0'; string_length = 1; /* ptr now points to the node corresponding to this letter */ ptr = dict->index[buffer_char]; while((input = getc(in)) != EOF) { bytes_read ++; string[string_length++] = input; string[string_length] = '\0'; /* ptr should now accordingly move to the corresponding */ /* child */ ptr = ptr->child; /* now search the siblings for "input" */ while (ptr!=NULL && ptr->letter != input) ptr = ptr->sibling; if (ptr == NULL) /* we have found the string */ { /* set the buffer_char */ buffer_char = input; *finished = 0; return bytes_read; } } *finished = 1; /* we encountered the EOF */ return bytes_read; }