# -*- coding: utf-8 -*-
"""
Created on Tue Sep 30 17:30:39 2014

@author: vlad
"""
%load_ext autoreload
%autoreload 2

import os
os.chdir('/home/vlad/ownCloud/Work/Teach/Pattern recognition/code')

import numpy as np
from sklearn import datasets

from matplotlib import pylab as plt

# check the help for 'datasets.make_classification' function
X, y = datasets.make_classification(1000, n_features=2, 
                                    n_informative=2, n_redundant=0, 
                                    n_clusters_per_class=1)
                                    
plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.Paired)     

# In the following, the linear models will contain .coeff_ and
# .intercept_ attributes that you can check and use to plot the descision
# surface (plane)

##########################
# 1. LDA:
# -train and classify
from sklearn.lda import LDA

# check the help of lda.LDA() class
model = LDA()
model.fit(X[0:500,:], y[0:500])

# check the performance on the new data:
yp = model.predict(X[500:1001,:])
sum(yp != y[500:1001]) / 500

# transform the data into the maximum separating space
Xt = model.transform(X)
plt.hist([X[y==0,0], Xt[y==1,0]], bins=30, color=['blue','red'])     

# check the fitted posterior probabilities:
prb = model.predict_proba(X)
plt.hist([prb[y==0,0], prb[y==1,0]], bins=30, color=['blue','red'])     

# see how the class label is chosen:
df = model.decision_function(X)    # returns h(x)=<w,x>+w0


## TODO:
## -Plot the transformed data separately for training data and testing data.
## -check how the sign (use numpy.sign()) of df is mapped onto class labels
## -generate a higher dimensional dataset (n_features=5) - call it differently - 
## and study 
##  --the optimal subspace (plot different pairs of coordinates)
##  --select to have more redundant or linearly dependent features and see
##  how the LD subspace changes

##########################
# 2. Logistic regression:
# -train and classify
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(C=1000)  # large values imply less penalty

#X, y = datasets.make_classification(1000, n_features=2, 
#                                    n_informative=2, n_redundant=0, 
#                                    n_clusters_per_class=1)
#                                    
plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.Paired)     

model.fit(X[0:500,:], y[0:500])
yp = model.predict(X[500:1001,:])
sum(yp != y[500:1001])/ 500            # compare with LDA

# check the fitted posterior probabilities:
prb = model.predict_proba(X)
plt.hist([prb[y==0,0], prb[y==1,0]], bins=30, color=['blue','red'])     

## TODO;
## -Check what .transform() method does
## -Generate a higher dimensional space, with some correlation structure
## between variables and see how the subspace generated by .transform() changes


############################
# 3. Linear SVM
# -train and classify
from sklearn.svm import LinearSVC

# check the documentation!
model = LinearSVC()

model.fit(X[0:500,:], y[0:500])
yp = model.predict(X[500:1001,:])
sum(yp != y[500:1001])/ 500            # compare with LDA and logistic regression

## TODO
## -Use model.decision_function(X) * y to get the margins
## -plot the margin density (histogram); change C (e.g. 0.1, 10, 1000)
## and see how the margin distribution changes