Project for Python Create an application containing the foll
Solution
I am giving one example code that covers all the topics which you have mentioned.
import sys
import nltk
from nltk.tokenize import PunktSentenceTokenizer
from nltk.corpus import product_reviews_2
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from replacers import RegexpReplacer
from nltk import pos_tag
from nltk.tree import Tree
from nltk import ne_chunk
text = product_reviews_2.raw(\'ipod.txt\')
sent_tokenizer = PunktSentenceTokenizer()
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words(\'english\'))
reg_tokenizer = RegexpTokenizer(\"[\\w\']+\")
custom_reg_tokenizer = RegexpTokenizer(\"[##]\")
replacer = RegexpReplacer()
stemmer = PorterStemmer()
class DataPreprocess:
def __init__(self):
pass
def stopWordRemoval(self):
try:
stopWriteFile = open (\"stopwords_deleted.txt\",\"a\")
i = 1
for sent in sent_tokenizer.tokenize(replacer.replace(text)):
for word in reg_tokenizer.tokenize(sent):
if word.lower() not in stop_words:
stopWriteFile.write((lemmatizer.lemmatize(word.lower()) + \" \"))
stopWriteFile.write(\".\ \")
except Exception as ex:
print str(ex)
finally:
stopWriteFile.close()
def pos_Tagging(self):
freq_list = []
try:
stopReadFile = open (\"stopwords_deleted.txt\",\"r\")
posWriteFile = open (\"pos_tagged.txt\",\"a\")
nounWriteFile = open(\"cSet_1.txt\",\"a\")
nounSentWriteFile = open(\"nounSents.txt\",\"a\")
for review_sent in sent_tokenizer.tokenize(stopReadFile.read()):
review_word = word_tokenize(review_sent)
tagged = ne_chunk(nltk.pos_tag(review_word),binary=False)
for key,tag in tagged:
if tag == \'NN\':
nounSentWriteFile.write(\" \"+key)
nounSentWriteFile.write(\".\ \")
posWriteFile.write(str(tagged)+\"\ \")
chunkGrammar_NN = r\"\"\"NN: {<NN>} \"\"\"
chunkParser_NN = nltk.RegexpParser(chunkGrammar_NN)
tree_NN = chunkParser_NN.parse(tagged)
for subtree in tree_NN.subtrees():
if subtree.label() == \'NN\':
for word,label in subtree.leaves():
nounWriteFile.write(word+\" \")#candidate sets
else:
pass
except Exception as ex:
print(str(ex))
finally:
stopReadFile.close()
posWriteFile.close()
nounWriteFile.close()
nounSentWriteFile.close()

