I am trying different learning methods (Decision Tree, NaiveBayes, MaxEnt) to compare their relative performance to get to know the best method among them. How to implement the Decision Tree and get its accuracy?
import string
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
import nltk, nltk.classify.util, nltk.metrics
from nltk.classify import MaxentClassifier
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
from nltk.probability import FreqDist, ConditionalFreqDist
from sklearn import cross_validation
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
from nltk.classify import MaxentClassifier
from nltk.corpus import movie_reviews
from nltk.corpus import movie_reviews as mr
stop = stopwords.words('english')
words = [([w for w in mr.words(i) if w.lower() not in stop and w.lower() not in string.punctuation], i.split('/')[0]) for i in mr.fileids()]
def word_feats(words):
return dict([(word, True) for word in words])
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')
negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
negcutoff = len(negfeats)*3/4
poscutoff = len(posfeats)*3/4
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
DecisionTree_classifier = DecisionTreeClassifier.train(trainfeats, binary=True, depth_cutoff=20, support_cutoff=20, entropy_cutoff=0.01)
print(accuracy(DecisionTree_classifier, testfeats))
You will have to look at the code (or documentation strings) of nltk3. There is also a chance the examples given in nltk book will work without any changes. See http://www.nltk.org/book/ch06.html#DecisionTrees
Or you could just run a test sample and count the false positive and false negative rates yourself
That is your accuracy.