#include #include #include #define HASH_TABLE_SIZE 9973 #define ALLOCATION_SIZE (1024*1024*8) static unsigned long memsize = 0; char* allocation_buffer = NULL; int allocation_index = 0; #define PADDING 8 typedef struct _Tree { char* word; struct _Tree* left; struct _Tree* right; } Tree; Tree* hash_table[HASH_TABLE_SIZE]; static unsigned long hash(const char* string, unsigned long len) { unsigned long addr; unsigned long i; addr = 0; for(i = 0; i < len; i++) addr += (addr << 1) + string[i]; return addr; } void init_buffer(void) { if((allocation_buffer = malloc(ALLOCATION_SIZE)) == NULL) { perror("malloc"); exit(1); } allocation_index = 0; } static void* new_memory(long sz) { void* ptr; ptr = (void *)(allocation_buffer + allocation_index); allocation_index += sz; allocation_index += (PADDING-1) - (allocation_index+PADDING-1)%PADDING; memsize += sz; return ptr; } static Tree* new_tree(const char* word, long len) { Tree* tree; if(allocation_index + len + 1 + sizeof(Tree) + 2 * PADDING > ALLOCATION_SIZE) init_buffer(); tree = new_memory(sizeof(Tree)); tree->word = new_memory(len + 1); memcpy(tree->word, word, len + 1); tree->left = tree->right = NULL; return tree; } static void add_tree(Tree* cur, const char* word, long len) { for(;;) { int cmp = strcmp(cur->word, word); if(cmp < 0) { if(cur->left != NULL) { cur = cur->left; continue; } cur->left = new_tree(word, len); return; } else if(cmp > 0) { if(cur->right != NULL) { cur = cur->right; continue; } cur->right = new_tree(word, len); return; } else { return; } } } void add_word(const char* word, int len) { unsigned long addr; addr = hash(word, len) % HASH_TABLE_SIZE; if(hash_table[addr] == NULL) hash_table[addr] = new_tree(word, len); else add_tree(hash_table[addr], word, len); } char* read_word(FILE* fp, int* len) { static char buff[BUFSIZ]; int n; if(fgets(buff, BUFSIZ, fp) == NULL) return NULL; n = strlen(buff); if(n > 0 && buff[n - 1] == '\n') { n--; if(n > 0 && buff[n - 1] == '\r') n--; buff[n] = '\0'; } *len = n; return buff; } void flush_tree(Tree* tree) { printf("%s\n", tree->word); if(tree->right) flush_tree(tree->right); while(tree->left) { tree = tree->left; printf("%s\n", tree->word); if(tree->right) flush_tree(tree->right); } } int main(int argc, char** argv) { int i; int verbose; for(i = 0; i < HASH_TABLE_SIZE; i++) hash_table[i] = NULL; init_buffer(); if(argc >= 2 && !strcmp(argv[1], "-v")) { verbose = 1; i = 2; } else { verbose = 0; i = 1; } for(; i < argc; i++) { FILE* fp; char* word; int len; if((fp = fopen(argv[i], "r")) == NULL) { perror(argv[i]); continue; } verbose && fprintf(stderr, "reading %s...", argv[i]); while((word = read_word(fp, &len)) != NULL) add_word(word, len); verbose && fprintf(stderr, "Done. (Allocated memory is %lu)\n", memsize); } verbose && fprintf(stderr, "Writting..."); for(i = 0; i < HASH_TABLE_SIZE; i++) if(hash_table[i]) { verbose && fprintf(stderr, " %d", i); flush_tree(hash_table[i]); } verbose && fprintf(stderr, "Done.\n"); return 0; }