You are to write an efficient program that will read a dicti
You are to write an efficient program that will read a dictionary of 100,000 words, and then check a document of 2,000,000 words. The program should insert into an array the position of each misspelled word. I have provided the driver program SpellRunner.cpp, and the necessary class stubs in speller.h, and speller.cpp. You may add any classes and/or code you wish to speller.cpp, and speller.h. You may also use, and alter any Weiss files. CreateDocs.out creates files used for testing. Doc-3-4-1 and Dict-3-4-1 are two testing files for small file.
______________________________________________________________________________________________
SpellRunner.cpp
#include <fstream>
#include <cstdlib>
#include <iostream>
#include <cstring>
#include <cstdio>
#include \"CPUTimer.h\"
#include \"mynew.h\"
#include \"speller.h\"
// uncomment line below for DOS/Windows OS.
// #define DOS
#ifdef DOS
#include <sys\\stat.h>
#else
#include <sys/stat.h>
#endif
extern int maxRAM;
extern int currentRAM;
using namespace std;
char* readDictionary(char *filename, char **dictionary, int dictSize)
{
int fileSize;
char *s;
struct stat statbuf;
stat(filename, &statbuf);
fileSize = statbuf.st_size;
s = new char[fileSize + 1];
ifstream inf(filename);
inf.read(s, fileSize);
inf.close();
s[fileSize] = \'\\0\';
dictionary[0] = strtok(s, \"\ \");
for(int i = 1; i < dictSize; i++)
dictionary[i] = strtok(NULL, \"\ \ \");
return s;
} // readDictionary
char* readDocument(char **document, int dictSize, int docSize, int seed)
{
char *s, filename[80];
int fileSize;
struct stat statbuf;
sprintf(filename, \"Doc-%d-%d-%d.txt\", dictSize, docSize, seed);
stat(filename, &statbuf);
fileSize = statbuf.st_size;
s = new char[fileSize + 100];
ifstream inf(filename);
inf.read(s, fileSize);
inf.close();
s[fileSize] = \'\\0\';
document[0] = strtok(s, \"\ \");
for(int i = 1; i < docSize; i++)
document[i] = strtok(NULL, \"\ \ \");
return s;
} // readDocument()
void readWrongs(int *wrongs, int dictSize, int docSize, int seed,
int &wrongCount)
{
char filename[80];
wrongCount = 0;
sprintf(filename, \"Wrongs-%d-%d-%d.txt\", dictSize, docSize, seed);
ifstream inf(filename);
while(inf >> wrongs[wrongCount++]);
wrongCount--;
} // readWrongs()
void checkAnswers(int wrongs[], int wrongCount, int misspelled[],
int misspelledCount)
{
for(int i = 0; i < wrongCount && i < misspelledCount; i++)
if(wrongs[i] < misspelled[i])
{
cout << \"Speller missed misspelled word # \" << wrongs[i] << endl;
return;
}
else
if(wrongs[i] > misspelled[i])
{
cout << \"Speller thought correctly spelled word # \" << misspelled[i]
<< \" was wrong\ \";
return;
}
if(wrongCount != misspelledCount)
cout << \"Speller found \" << misspelledCount << \" misspelled words when \"
<< \" there were really \" << wrongCount << \" misspelled words.\ \";
} // checkAnswers
void cleanup(char **dictionary, char *docFilePtr, char **document, int *wrongs, int *misspelled)
{
delete [] dictionary;
delete [] docFilePtr;
delete [] document;
delete [] wrongs;
delete [] misspelled;
} // cleanup()
int main(int argc, char* argv[])
{
char line[80], **dictionary, **document, *dictFilePtr, *docFilePtr;
int *wrongs, *misspelled, misspelledCount = 0, seed, dictSize, docSize,
wrongCount, tempMaxRAM, tempCurrentRAM;
strcpy(line, argv[1]);
strtok(line, \"-\");
dictSize = atoi(strtok(NULL, \"-\"));
docSize = atoi(strtok(NULL, \"-\"));
seed = atoi(strtok(NULL, \".\"));
dictionary = new char*[dictSize + 3];
dictFilePtr = readDictionary(argv[1], dictionary, dictSize);
document = new char*[docSize + 3];
docFilePtr = readDocument(document, dictSize, docSize, seed);
wrongs = new int[docSize];
readWrongs(wrongs, dictSize, docSize, seed, wrongCount);
misspelled = new int[docSize];
CPUTimer ct;
maxRAM = currentRAM = 0;
Speller *speller = new Speller(dictionary, dictSize);
tempMaxRAM = maxRAM;
tempCurrentRAM = currentRAM;
delete [] dictFilePtr;
maxRAM = tempMaxRAM;
currentRAM = tempCurrentRAM;
speller->check(document, docSize, misspelled, &misspelledCount);
cout << \"CPU Time: \" << ct.cur_CPUTime() << \" Real RAM: \" << maxRAM << endl;
checkAnswers(wrongs, wrongCount, misspelled, misspelledCount);
cleanup(dictionary, docFilePtr, document, wrongs, misspelled);
delete speller;
return 0;
} // main()
______________________________________________________________________________________________
Speller.h
#ifndef SPELLER_H
#define SPELLER_H
#include \"mynew.h\"
#include \"QuadraticProbing.h\"
#include <string.h>
class Speller
{
public:
QuadraticHashTable<char*> *hashDict;
Speller(char *dictionary[], int dictSize);
~Speller();
void check(char *document[], int docSize, int misspelled[], int *misspelledCount);
}; // class Speller
#endif
______________________________________________________________________________________________
Speller.cpp
#include \"speller.h\"
Speller::Speller(char *dictionary[], int dictSize)
{
hashDict = new QuadraticHashTable<char*> (-1, dictSize);
for(int i = 0; i < dictSize; i++)
hashDict->insert(dictionary[i]);
} // Speller()
void Speller::check(char *document[], int docSize, int misspelled[],
int *misspelledCount)
{
*misspelledCount = 0;
//Check document array using dictionary that you stored in constructor
for(int i = 0; i < docSize; i++)
{
if(!(hashDict->find(document[i])))
{
++(*misspelledCount);
misspelled[i] = 1;
}
else
{
misspelled[i] = 0;
}
}
//Store the array index of the misspelled word in the corresponding array and increments the count of misspelled words
} // check()
Speller::~Speller()
{
} // ~Speller()
_________________________________________________________________________________________
QuadraticProbing.cpp
#include \"QuadraticProbing.h\"
/**
* Internal method to test if a positive number is prime.
* Not an efficient algorithm.
*/
template <class HashedObj>
bool QuadraticHashTable<HashedObj>::isPrime( int n ) const
{
if( n == 2 || n == 3 )
return true;
if( n == 1 || n % 2 == 0 )
return false;
for( int i = 3; i * i <= n; i += 2 )
if( n % i == 0 )
return false;
return true;
}
/**
* Internal method to return a prime number at least as large as n.
* Assumes n > 0.
*/
template <class HashedObj>
int QuadraticHashTable<HashedObj>::nextPrime( int n ) const
{
if( n % 2 == 0 )
n++;
for( ; !isPrime( n ); n += 2 )
;
return n;
}
/**
* Construct the hash table.
*/
template <class HashedObj>
QuadraticHashTable<HashedObj>::QuadraticHashTable( const HashedObj & notFound, int size )
: array( nextPrime( size ) ), ITEM_NOT_FOUND( notFound )
{
makeEmpty( );
}
/**
* Insert item x into the hash table. If the item is
* already present, then do nothing.
*/
template <class HashedObj>
void QuadraticHashTable<HashedObj>::insert( const HashedObj & x )
{
// Insert x as active
int currentPos = findPos( x );
if( isActive( currentPos ) )
return;
array[ currentPos ] = HashEntry( x, ACTIVE );
// Rehash; see Section 5.5
if( ++currentSize > array.size( ) / 2 )
rehash( );
}
/**
* Expand the hash table.
*/
template <class HashedObj>
void QuadraticHashTable<HashedObj>::rehash( )
{
vector<HashEntry> oldArray = array;
// Create new double-sized, empty table
array.resize( nextPrime( 2 * oldArray.size( ) ) );
for( int j = 0; j < array.size( ); j++ )
array[ j ].info = EMPTY;
// Copy table over
currentSize = 0;
for( int i = 0; i < oldArray.size( ); i++ )
if( oldArray[ i ].info == ACTIVE )
insert( oldArray[ i ].element );
}
/**
* Method that performs quadratic probing resolution.
* Return the position where the search for x terminates.
*/
template <class HashedObj>
int QuadraticHashTable<HashedObj>::findPos( const HashedObj & x ) const
{
/* 1*/ int collisionNum = 0;
/* 2*/ int currentPos = hash( x, array.size( ) );
/* 3*/ while( array[ currentPos ].info != EMPTY &&
array[ currentPos ].element != x )
{
/* 4*/ currentPos += 2 * ++collisionNum - 1; // Compute ith probe
/* 5*/ if( currentPos >= array.size( ) )
/* 6*/ currentPos -= array.size( );
}
/* 7*/ return currentPos;
}
/**
* Remove item x from the hash table.
*/
template <class HashedObj>
void QuadraticHashTable<HashedObj>::remove( const HashedObj & x )
{
int currentPos = findPos( x );
if( isActive( currentPos ) )
array[ currentPos ].info = DELETED;
}
/**
* Find item x in the hash table.
* Return the matching item, or ITEM_NOT_FOUND, if not found.
*/
template <class HashedObj>
const HashedObj & QuadraticHashTable<HashedObj>::find( const HashedObj & x ) const
{
int currentPos = findPos( x );
return isActive( currentPos ) ? array[ currentPos ].element : ITEM_NOT_FOUND;
// Above mentions only if active, if it is return array element, else return not found
}
/**
* Make the hash table logically empty.
*/
template <class HashedObj>
void QuadraticHashTable<HashedObj>::makeEmpty( )
{
currentSize = 0;
for( int i = 0; i < array.size( ); i++ )
array[ i ].info = EMPTY;
}
/**
* Deep copy.
*/
template <class HashedObj>
const QuadraticHashTable<HashedObj> & QuadraticHashTable<HashedObj>::
operator=( const QuadraticHashTable<HashedObj> & rhs )
{
if( this != &rhs )
{
array = rhs.array;
currentSize = rhs.currentSize;
}
return *this;
}
/**
* Return true if currentPos exists and is active.
*/
template <class HashedObj>
bool QuadraticHashTable<HashedObj>::isActive( int currentPos ) const
{
return array[ currentPos ].info == ACTIVE;
}
/**
* A hash routine for string objects.
*/
template <class HashedObj>
int QuadraticHashTable<HashedObj>::hash( const string & key, int tableSize ) const
{
int hashVal = 0;
for( int i = 0; i < key.length( ); i++ )
hashVal = 37 * hashVal + key[ i ];
hashVal %= tableSize;
if( hashVal < 0 )
hashVal += tableSize;
return hashVal;
}
/**
* A hash routine for ints.
*/
template <class HashedObj>
int QuadraticHashTable<HashedObj>::hash( int key, int tableSize ) const
{
if( key < 0 ) key = -key;
return key % tableSize;
}
_________________________________________________________________________________________
QuadraticProbing.h
#ifndef _QUADRATIC_PROBING_H_
#define _QUADRATIC_PROBING_H_
#include \"vector.h\"
#include \"mystring.h\"
// QuadraticProbing Hash table class
//
// CONSTRUCTION: an initialization for ITEM_NOT_FOUND
// and an approximate initial size or default of 101
//
// ******************PUBLIC OPERATIONS*********************
// void insert( x ) --> Insert x
// void remove( x ) --> Remove x
// Hashable find( x ) --> Return item that matches x
// void makeEmpty( ) --> Remove all items
// int hash( String str, int tableSize )
// --> Static method to hash strings
class QuadraticHashTable
{
public:
explicit QuadraticHashTable( const HashedObj & notFound, int size = 101 );
QuadraticHashTable( const QuadraticHashTable & rhs )
: ITEM_NOT_FOUND( rhs.ITEM_NOT_FOUND ),
array( rhs.array ), currentSize( rhs.currentSize ) { }
const HashedObj & find( const HashedObj & x ) const;
void makeEmpty( );
void insert( const HashedObj & x );
void remove( const HashedObj & x );
const QuadraticHashTable & operator=( const QuadraticHashTable & rhs );
enum EntryType { ACTIVE, EMPTY, DELETED };
private:
struct HashEntry
{
HashedObj element;
EntryType info;
HashEntry( const HashedObj & e = HashedObj( ), EntryType i = EMPTY )
: element( e ), info( i ) { }
};
vector<HashEntry> array;
int currentSize;
const HashedObj ITEM_NOT_FOUND;
bool isPrime( int n ) const;
int nextPrime( int n ) const;
bool isActive( int currentPos ) const;
char* findPos( const HashedObj & x ) const;
char* hash( const string & key, int tableSize ) const;
int hash( int key, int tableSize ) const;
void rehash( );
};
#include \"QuadraticProbing.cpp\"
#endif
_________________________________________________________________________________________
Dict-3-4-1.txt (Dictionary)
luov
vwucoxynjm
lsrwwvov
_________________________________________________________________________________________
Doc-3-4-1.txt (Checking for misspell words)
lsrwwvov
vwuboxynjm
luov
vwucoxynj
Solution
int main(int argc, char* argv[])
{
char line[80], **dictionary, **document, *dictFilePtr, *docFilePtr;
int *wrongs, *misspelled, misspelledCount = 0, seed, dictSize, docSize,
wrongCount, tempMaxRAM, tempCurrentRAM;
strcpy(line, argv[1]);
strtok(line, \"-\");
dictSize = atoi(strtok(NULL, \"-\"));
docSize = atoi(strtok(NULL, \"-\"));
seed = atoi(strtok(NULL, \".\"));
dictionary = new char*[dictSize + 3];
dictFilePtr = readDictionary(argv[1], dictionary, dictSize);
document = new char*[docSize + 3];
docFilePtr = readDocument(document, dictSize, docSize, seed);
wrongs = new int[docSize];
readWrongs(wrongs, dictSize, docSize, seed, wrongCount);
misspelled = new int[docSize];
CPUTimer ct;
maxRAM = currentRAM = 0;
Speller *speller = new Speller(dictionary, dictSize);
tempMaxRAM = maxRAM;
tempCurrentRAM = currentRAM;
delete [] dictFilePtr;
maxRAM = tempMaxRAM;
currentRAM = tempCurrentRAM;
speller->check(document, docSize, misspelled, &misspelledCount);
cout << \"CPU Time: \" << ct.cur_CPUTime() << \" Real RAM: \" << maxRAM << endl;
checkAnswers(wrongs, wrongCount, misspelled, misspelledCount);
cleanup(dictionary, docFilePtr, document, wrongs, misspelled);
delete speller;
return 0;
} // main()ZZ
[ssdavis@lect1 p4]$ CreateDocs.out
Dictionary size: 3
Document size: 4
Seed: 1
[ssdavis@lect1 p4]$ cat Dict-3-4-1.txt
luov
vwucoxynjm
lsrwwvov
[ssdavis@lect1 p4]
[ssdavis@lect1 p4]$ cat Doc-3-4-1.txt
lsrwwvov
vwuboxynjm
luov
vwucoxynjm
[ssdavis@lect1 p4]
[ssdavis@lect1 p4]$ cat Wrongs-3-4-1.txt
1
[ssdavis@lect1 p4]$
[ssdavis@lect1 p4]$ speller.out Dict-100000-2000000-7.txt
CPU Time: 0.170697 Real RAM: 1230872
[ssdavis@lect1 p4]$









