Please help with this Word Frequency Binary Tree C data fi

Please help with this..

Word Frequency - Binary Tree - C++

data file: http://pastebin.com/q4ZK46RQ

Given an arbitrarily long list of unordered words with an arbitrary number of different values (words) appearing in it, determine and print the marginal distribution for this list of words. That is, count how many times each different value appears in the list and then print out each value along with its count and its marginal distribution. The output should be arranged from the smallest to the largest value (in alphabetical order). (Print the first 100 words only for output.)

You are to use binary tree that will handle single words. The binary tree is to be maintained as an ordered tree.

Note:   ‘the’ and ‘The’ are the same words so there should be only one entry into the tree.

Now we need to delete the most common words used. Delete 15% of the most common words found in this text. Now compute their NEW relative values for the new set of words and print the list of words left in the data structure. (Again print out only the first 100 words.) (15% of the words?: If I had 100 words in my tree, then 15 words would be deleted.)

Example

Word               Absolute           Relative

Smith               677                  1.832 %

Jones                88                    3.548%

Solution

#include #include #include #include using namespace std; int no_input(); int no_output(); ifstream infile; ofstream outfile; int no_input() { if(!infile) { cout << \"Error opening input file.\ \"; return 1; } else return 0; } int no_output() { if(!outfile) { cout << \"Error opening output file.\ \"; return 1; } else return 0; } struct treeNode { public: string word; int count; treeNode *leftChild; treeNode *rightChild; }; class Tree { public: Tree() { root = NULL; totalWords = 0; totalUnique = 0; } treeNode* myNewNode() { treeNode *tmp; tmp = new treeNode; tmp->word.clear(); tmp->count = 1; tmp->leftChild = NULL; tmp->rightChild = NULL; return tmp; } void insert(treeNode *current, char *wrdPTR) { treeNode *previous; treeNode *tmp; totalWords++; *wrdPTR = toupper(*wrdPTR); //ensure all words start uppercase if(current == NULL) { //start tree root = myNewNode(); totalUnique++; root->word = wrdPTR; return; } while(current != NULL) { //search routine if(wrdPTR < current->word) { previous = current; current = current->leftChild; } else if(wrdPTR > current->word) { previous = current; current = current->rightChild; } else //if duplicate word, stop search break; } if(current != NULL && wrdPTR == current->word) { current->count++; //if duplicate, increment count & return return; } tmp = myNewNode(); totalUnique++; tmp->word = wrdPTR; if(wrdPTR < previous->word) previous->leftChild = tmp; else if(wrdPTR > previous->word) previous->rightChild = tmp; return; } treeNode* getroot() { return root; } int gettotal() { return totalWords; } int getunique() { return totalUnique; } void printPreorder(treeNode *current) { if(current == NULL) return; else { outfile << current->word; if(current->word.length() < 8) outfile << \"\\t\"; if(current->word.length() < 16) outfile << \"\\t\"; outfile << \"\\t\" << current->count << \"\\t\\t\"; float percentage = current->count; percentage = (percentage/totalWords); outfile << (percentage*100) << \"%\" << endl; printPreorder(current->leftChild); printPreorder(current->rightChild); } } void printInorder(treeNode *current) { if(current == NULL) return; else { printInorder(current->leftChild); outfile << current->word; if(current->word.length() < 8) outfile << \"\\t\"; if(current->word.length() < 16) outfile << \"\\t\"; outfile << \"\\t\" << current->count << \"\\t\\t\"; float percentage = current->count; percentage = (percentage/totalWords); outfile << (percentage*100) << \"%\" << endl; printInorder(current->rightChild); } } void printPostorder(treeNode *current) { if(current == NULL) return; else { printPostorder(current->leftChild); printPostorder(current->rightChild); outfile << current->word; if(current->word.length() < 8) outfile << \"\\t\"; if(current->word.length() < 16) outfile << \"\\t\"; outfile << \"\\t\" << current->count << \"\\t\\t\"; float percentage = current->count; percentage = (percentage/totalWords); outfile << (percentage*100) << \"%\" << endl; } } private: int totalWords; int totalUnique; treeNode *root; }; class heap { public: heap() { //no default } heap(int x) { size = x + 1; //for array starting at element[1] countARY = new int[size]; countARY[0] = -1; index = 0; } void buildheap(treeNode *node) { if(node == NULL) return; else { insert(node->count); buildheap(node->leftChild); buildheap((node->rightChild)); } } void insert(int cnt) { index++; countARY[index] = cnt; reheapUP(index); } void reheapUP(int position) { //push larger values to top after an insert if(position == 1) return; int parent = position / 2; if(countARY[position] > countARY[parent]) { int tmp; tmp = countARY[parent]; countARY[parent] = countARY[position]; countARY[position] = tmp; reheapUP(parent); //recursive call to check new position } } void reheapDWN(int position) { //push smaller values down after a delete int childL = position * 2; int childR = (position * 2) + 1; int tmp; int max; if(childR >= index) { if(childL >= index) //if at bottom of the heap return; else max = childL; } else { //find largest child if(countARY[childL] >= countARY[childR]) max = childL; else max = childR; } if(countARY[position] < countARY[max]) { tmp = countARY[max]; //swap parent for child countARY[max] = countARY[position]; countARY[position] = tmp; reheapDWN(max); //recursive call on new position } } void printlargest() { outfile << countARY[1] << \" -> \"; } void printheap() { printlargest(); countARY[1] = countARY[index]; //put last value in heap into heap root if(index > 1) //unless last value in heap reheapDWN(1); index--; if(index != 0) //print entire heap printheap(); } private: int size; int index; int *countARY; }; int main() { infile.open(\"freqInwords.txt\"); no_input(); outfile.open(\"wordfreqout.txt\"); no_output(); outfile << setprecision(1) << fixed; Tree wordTree; char *putChar; char delim[] = \"1234567890\\\" \\t\ ,?!;-[:]/(.)\"; char wordsIn[120]; while(infile) { infile.getline(wordsIn, sizeof(wordsIn), \'\ \'); if(!infile) break; putChar = strtok(wordsIn, delim); while(putChar!=NULL) { wordTree.insert(wordTree.getroot(), putChar); putChar = strtok(NULL, delim); } } outfile << \"Word\\t\\t\\tAbsolute\\tRelative\ \ \"; //print out header wordTree.printInorder(wordTree.getroot()); heap countHeap(wordTree.getunique()); //declare heap array implementation countHeap.buildheap(wordTree.getroot()); outfile << \"\ Heap of counts print:\ \"; countHeap.printheap(); outfile << \"END of heap.\"; infile.close(); outfile.close(); return 0; }
Please help with this.. Word Frequency - Binary Tree - C++ data file: http://pastebin.com/q4ZK46RQ Given an arbitrarily long list of unordered words with an arb
Please help with this.. Word Frequency - Binary Tree - C++ data file: http://pastebin.com/q4ZK46RQ Given an arbitrarily long list of unordered words with an arb

Get Help Now

Submit a Take Down Notice

Tutor
Tutor: Dr Jack
Most rated tutor on our site