#!/usr/bin/python


"""
    Starter code for the validation mini-project.
    The first step toward building your POI identifier!

    Start by loading/formatting the data

    After that, it's not our code anymore--it's yours!
"""

import pickle
import sys
sys.path.append("../tools/")
from feature_format import featureFormat, targetFeatureSplit

data_dict = pickle.load(open("../final_project/final_project_dataset.pkl", "r") )

### first element is our labels, any added elements are predictor
### features. Keep this the same for the mini-project, but you'll
### have a different feature list when you do the final project.
features_list = ["poi", "salary"]

data = featureFormat(data_dict, features_list)
labels, features = targetFeatureSplit(data)


### it's all yours from here forward!  

from time import time 
from sklearn.cross_validation import train_test_split
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.3, random_state=42)

'''
features_train = features
features_test = features
labels_train = labels
labels_test = labels
'''


from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

print("Features: ", len(features[0]))


clf = DecisionTreeClassifier()

t0 = time()
clf.fit(features_train, labels_train)
print "training time:", round(time()-t0, 3), "s"

t1 = time()
print (clf.score(features_test, labels_test))
print "scoring time:", round(time()-t1, 3), "s"

pred = clf.predict(features_test)
acc = accuracy_score(labels_test, pred)

print ("Accuracy: ", acc)