A Simple Template for Machine Learning in Python
The following shows a simple flow to do machine learning in Python:
- Load dataset
- Split the dataset into train and test subsets
- Create a classifier for classification task
- Fit the train dataset
- Predict the test labels using test dataset
- Find out the accuracy
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
def train():
# Load your data set, e.g. the sklearn digits dataset
digits = datasets.load_digits()
# Split the data set into random train and test subsets
features_train, features_test, labels_train, labels_test = \
train_test_split(digits.data, digits.target, test_size=0.3, random_state=42)
# Create a classifier, e.g. a DecisionTree classifier
classifier = DecisionTreeClassifier(random_state=11)
# Fit the train dataset in the classifier
classifier.fit(features_train, labels_train)
# Use the trained model to make predictions against the test dataset
predictions = classifier.predict(features_test)
# Calculate the prediction accuracy
f1_score = metrics.f1_score(labels_test, predictions, average="macro")
accuracy = metrics.accuracy_score(labels_test, predictions)
print "F1 score = ", f1_score
print "Accuracy = ", accuracy