import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LassoCV
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

sleep = pd.read_csv("synthetic_coffee_health_10000.csv")
sleep

sleep = sleep[['Coffee_Intake', 'Age', 'BMI', 'Heart_Rate', 'Stress_Level','Alcohol_Consumption', 'Physical_Activity_Hours', 'Sleep_Quality']]

sleep['Stress_Level'].unique()

array(['Low', 'Medium', 'High'], dtype=object)

sleep['Sleep_Quality'].unique()

array(['Good', 'Fair', 'Excellent', 'Poor'], dtype=object)

X_table = sleep[['Stress_Level','Coffee_Intake', 'Age', 'BMI', 'Heart_Rate', 'Alcohol_Consumption', 'Physical_Activity_Hours']]
X = sleep[['Stress_Level','Coffee_Intake', 'Age', 'BMI', 'Heart_Rate','Alcohol_Consumption', 'Physical_Activity_Hours']].values

X

array([['Low', 3.5, 40, ..., 78, 0, 14.5],
       ['Low', 1.0, 33, ..., 67, 0, 11.0],
       ['Medium', 5.3, 42, ..., 59, 0, 11.2],
       ...,
       ['Low', 1.6, 26, ..., 66, 1, 13.7],
       ['Low', 3.4, 40, ..., 80, 0, 0.1],
       ['Low', 2.9, 42, ..., 72, 0, 9.8]], dtype=object)

from sklearn import preprocessing

le_SL = preprocessing.LabelEncoder()
le_SL.fit(['Low', 'Medium', 'High'])
le_SL.classes_ = np.array(['Low', 'Medium', 'High'])
X[:,0] = le_SL.transform(X[:,0])

X

array([[0, 3.5, 40, ..., 78, 0, 14.5],
       [0, 1.0, 33, ..., 67, 0, 11.0],
       [1, 5.3, 42, ..., 59, 0, 11.2],
       ...,
       [0, 1.6, 26, ..., 66, 1, 13.7],
       [0, 3.4, 40, ..., 80, 0, 0.1],
       [0, 2.9, 42, ..., 72, 0, 9.8]], dtype=object)

y = sleep['Sleep_Quality']
y

le_SQ = preprocessing.LabelEncoder()
le_SQ.fit([ 'Poor', 'Fair', 'Good', 'Excellent'])
le_SQ.classes_ = np.array(['Poor', 'Fair', 'Good', 'Excellent'])
y = le_SQ.transform(y)
y

array([2, 2, 1, ..., 2, 2, 2])

np.unique(y)

array([0, 1, 2, 3])

from sklearn.model_selection import train_test_split

X_trainset, X_testset, y_trainset, y_testset = train_test_split(X, y, test_size=0.5, random_state=3)

X_trainset

array([[0, 1.7, 41, ..., 74, 0, 3.5],
       [1, 3.4, 60, ..., 74, 1, 0.8],
       [0, 2.7, 22, ..., 69, 0, 14.4],
       ...,
       [1, 3.4, 59, ..., 75, 0, 0.3],
       [0, 0.0, 38, ..., 69, 1, 5.3],
       [0, 2.4, 45, ..., 71, 0, 2.6]], dtype=object)

# your code
print('Shape of X training set {}'.format(X_trainset.shape),'&',' Size of Y training set {}'.format(y_trainset.shape))

Shape of X training set (5000, 7) &  Size of Y training set (5000,)

# your code
print('Shape of X test set {}'.format(X_testset.shape),'&',' Size of Y training set {}'.format(y_testset.shape))

Shape of X test set (5000, 7) &  Size of Y training set (5000,)

# Additional Pre-processing
lasso_cv = LassoCV(cv=10)
lasso_cv.fit(X_trainset, y_trainset)

LassoCV(cv=10)

LassoCV(cv=10)

# Feature selection
sfm = SelectFromModel(lasso_cv, prefit=True)
X_train_selected = sfm.transform(X_trainset )
X_test_selected = sfm.transform(X_testset)

print(X_train_selected)

[[0 1.7 41 ... 74 0 3.5]
 [1 3.4 60 ... 74 1 0.8]
 [0 2.7 22 ... 69 0 14.4]
 ...
 [1 3.4 59 ... 75 0 0.3]
 [0 0.0 38 ... 69 1 5.3]
 [0 2.4 45 ... 71 0 2.6]]

sleepTree = DecisionTreeClassifier(criterion="entropy", max_depth = 5)
sleepTree # it shows the default parameters

DecisionTreeClassifier(criterion='entropy', max_depth=5)

DecisionTreeClassifier(criterion='entropy', max_depth=5)

sleepTree.fit(X_train_selected,y_trainset)

DecisionTreeClassifier(criterion='entropy', max_depth=5)

DecisionTreeClassifier(criterion='entropy', max_depth=5)

predTree = sleepTree.predict(X_test_selected)

print (predTree [0:5])
print (y_testset [0:5])

[2 1 1 2 2]
[2 1 1 2 2]

from sklearn import metrics
import matplotlib.pyplot as plt
print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_testset, predTree))

DecisionTrees's Accuracy:  0.8672

X_test_selected

array([[0, 1.3, 18, ..., 53, 0, 6.3],
       [1, 3.7, 18, ..., 87, 0, 3.1],
       [1, 4.5, 49, ..., 69, 1, 1.0],
       ...,
       [2, 4.8, 32, ..., 78, 0, 7.8],
       [0, 3.6, 54, ..., 51, 0, 13.5],
       [0, 0.5, 43, ..., 62, 0, 4.6]], dtype=object)

X_testset

array([[0, 1.3, 18, ..., 53, 0, 6.3],
       [1, 3.7, 18, ..., 87, 0, 3.1],
       [1, 4.5, 49, ..., 69, 1, 1.0],
       ...,
       [2, 4.8, 32, ..., 78, 0, 7.8],
       [0, 3.6, 54, ..., 51, 0, 13.5],
       [0, 0.5, 43, ..., 62, 0, 4.6]], dtype=object)

from  io import StringIO
import pydotplus
import matplotlib.image as mpimg
from sklearn import tree
%matplotlib inline

class_names= np.unique(y_trainset)
class_names

array([0, 1, 2, 3])

plt.figure(figsize=(35,30))
featureNames = ('Stress Level', 'Coffee Intake', 'Age', 'Heart Rate', 'Alcohol Consumption', 'Physical Activity Hours', 'Sleep Quality')
classes = ['Poor', 'Fair', 'Good', 'Excellent']
tree.plot_tree(sleepTree,feature_names=featureNames, class_names = classes, filled = True, rounded = True,  fontsize = 10)
plt.show()

	ID	Age	Gender	Country	Coffee_Intake	Caffeine_mg	Sleep_Hours	Sleep_Quality	BMI	Heart_Rate	Stress_Level	Physical_Activity_Hours	Health_Issues	Occupation	Smoking	Alcohol_Consumption
0	1	40	Male	Germany	3.5	328.1	7.5	Good	24.9	78	Low	14.5	NaN	Other	0	0
1	2	33	Male	Germany	1.0	94.1	6.2	Good	20.0	67	Low	11.0	NaN	Service	0	0
2	3	42	Male	Brazil	5.3	503.7	5.9	Fair	22.7	59	Medium	11.2	Mild	Office	0	0
3	4	53	Male	Germany	2.6	249.2	7.3	Good	24.7	71	Low	6.6	Mild	Other	0	0
4	5	32	Female	Spain	3.1	298.0	5.3	Fair	24.1	76	Medium	8.5	Mild	Student	0	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9995	9996	50	Female	Japan	2.1	199.8	6.0	Fair	30.5	50	Medium	10.1	Moderate	Healthcare	0	1
9996	9997	18	Female	UK	3.4	319.2	5.8	Fair	19.1	71	Medium	11.6	Mild	Service	0	0
9997	9998	26	Male	China	1.6	153.4	7.1	Good	25.1	66	Low	13.7	NaN	Student	1	1
9998	9999	40	Female	Finland	3.4	327.1	7.0	Good	19.3	80	Low	0.1	NaN	Student	0	0
9999	10000	42	Female	Brazil	2.9	277.5	6.4	Good	28.1	72	Low	9.8	NaN	Student	1	0

Predicting Sleep Quality¶

Decision Trees with LassoCV¶

First Steps¶

Pre-Processing¶

Training¶

Practice

Modeling

Prediction¶

Evaluation

Visualization