Machine Learning Algorithms: Practical Implementations

Find-S Algorithm Implementation in Python

This section demonstrates the Find-S algorithm using the Pandas library in Python.

First Find-S Example


import pandas as pd

attributes = ['AirTemp', 'Temp', 'Humidity', 'Wind', 'Water', 'Forecast']
num_attributes = len(attributes)
filename = pd.read_csv('Weather.csv')
print(filename)
target = ['Yes', 'Yes', 'No', 'Yes']
print(target)
hypothesis = ['0'] * num_attributes
for i in range(len(target)):
    if target[i] == 'Yes':
        for j in range(num_attributes):
            if hypothesis[j] == '0':
                hypothesis[j] = filename.iloc[i, j]
            if hypothesis[j] != filename.iloc[i, j]:
                hypothesis[j] = '?'
    print(i + 1, "Hypothesis :", hypothesis)
print("Final Hypothesis", hypothesis)

Second Find-S Example


import pandas as pd

df1 = pd.read_csv('dataset/mammal.csv')
dataset = df1.values.tolist()
row = len(dataset)
col = len(dataset[0]) - 1
hypothesis = ['phi'] * col
for i in range(0, row):
    if dataset[i][col] == 'mammals':
        if 'phi' in hypothesis:
            hypothesis = list(dataset[1][:col])
        for j in range(0, col):
            if dataset[i][j] != hypothesis[j]:
                hypothesis[j] = '?'
    print('Hypothesis after {0}th iteration:'.format(i), hypothesis)
print('Hypothesis after {0}th iteration:'.format(row - 1), hypothesis)

Candidate Elimination Algorithm

Implementation of the Candidate Elimination algorithm using Pandas and NumPy.


import pandas as pd
import numpy as np

filename = pd.read_csv('Weather.csv')
concepts = np.array(filename.iloc[:, 0:-1])
print(concepts)
target = np.array(filename.iloc[:, -1])
print(target)

def learn(concepts, target):
    print("Initialization:")
    specific_h = concepts[0].copy()
    print(specific_h)
    general_h = [['?' for i in range(len(specific_h))] for i in range(len(specific_h))]
    print(general_h)
    for i, h in enumerate(concepts):
        if target[i] == 'Yes':
            for x in range(len(specific_h)):
                if h[x] != specific_h[x]:
                    specific_h[x] = '?'
                    general_h[x][x] = '?'
        if target[i] == 'No':
            for x in range(len(specific_h)):
                if h[x] != specific_h[x]:
                    general_h[x][x] = specific_h[x]
                else:
                    general_h[x][x] = '?'
        print("Hypothesis ", i + 1)
        print(general_h)
        print(specific_h)
    indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
    for i in indices:
        general_h.remove(['?', '?', '?', '?', '?', '?'])
    return specific_h, general_h


final_specific, final_general = learn(concepts, target)
print("Specific:", final_specific)
print("General:", final_general)

Artificial Neural Networks (ANN)

Building an ANN using Scikit-learn for Iris dataset classification.


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import warnings

warnings.filterwarnings("ignore")
iris = load_iris()
values = iris.data
target = iris.target
standard_scaler = StandardScaler()
values = standard_scaler.fit_transform(values)
values_train, values_test, target_train, target_test = train_test_split(values, target, test_size=0.3)
n = 1000
loss_current = 999
classifier = MLPClassifier(
    hidden_layer_sizes=(4, 3),
    activation='logistic',
    solver='sgd',
    learning_rate_init=0.5,
    warm_start=True,
    max_iter=1,
    random_state=1,
    verbose=True
)
for _ in range(n):
    classifier.fit(values_train, target_train)
    loss_previous = loss_current
    loss_current = classifier.loss_
    if abs(loss_current - loss_previous) < 0.0001:
        break
target_prediction = classifier.predict(values_test)
print(classifier.score(values_test, target_test))
for i in classifier.coefs_:
    print(i, end='\n\n')

Naive Bayes Classifier

Implementing a Gaussian Naive Bayes classifier on a custom dataset.


from sklearn import preprocessing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics

label_encoder = preprocessing.LabelEncoder()
dataset = pd.read_csv('naive_dataset.csv')
data_df = pd.DataFrame(dataset)
data_df_encoded = data_df.apply(label_encoder.fit_transform)
data = data_df_encoded.drop(['Play_tennis'], axis=1)
target = data_df_encoded['Play_tennis']
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.30)
model = GaussianNB()
naive_bayes_train = model.fit(x_train, y_train)
y_prediction = naive_bayes_train.predict(x_test)
print(list(y_prediction))
print(list(y_test))
print("Accuracy: ", metrics.accuracy_score(y_prediction, y_test))

K-Nearest Neighbors (KNN)

Applying KNN on the Iris dataset for classification.


from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

iris = load_iris()
print("Features:", iris.feature_names, "\n data:", iris.data, "\n target name:", iris.target_names, "\n target:", iris.target)
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25)
classifier = KNeighborsClassifier()
classifier.fit(x_train, y_train)
print("Accuracy:", classifier.score(x_test, y_test))
prediction = classifier.predict(x_test)
print("Predicted data:", prediction)
print("y_test data :", y_test)
difference = prediction - y_test
print("Result is:", difference)
count = 0
for i in difference:
    if i != 0:
        count += 1
print("Total points misclassified:", count)

Bayesian Network for Heart Disease Prediction

Creating and using a Bayesian Network to predict heart disease.


import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
import networkx as nx
import warnings

warnings.filterwarnings("ignore")
heart_disease = pd.read_csv('heart.csv')
heart_disease = heart_disease.replace('?', np.nan)
# Model Bayesian Network
model = BayesianNetwork([
    ('Age', 'Trestbps'),
    ('Age', 'Fbs'),
    ('Sex', 'Trestbps'),
    ('Exang', 'Trestbps'),
    ('Trestbps', 'Target'),
    ('Fbs', 'Target'),
    ('Target', 'Restecg'),
    ('Target', 'Thalach'),
    ('Target', 'Chol')
])
pos = nx.circular_layout(model)
nx.draw(model, pos=pos, with_labels=True)
# Learning CPDs using Maximum Likelihood Estimators
print('\n Learning CPD using Maximum likelihood estimators')
model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)
# Inferencing with Bayesian Network
print('\n Inferencing with Bayesian Network:')
heart_disease_infer = VariableElimination(model)
query = heart_disease_infer.query(variables=['Target'], evidence={
    'Age': 70,
    'Sex': 1,
    'Trestbps': 145,
    'Chol': 174,
    'Fbs': 0,
    'Restecg': 1,
    'Thalach': 125,
    'Exang': 1
})
print(query)

Expectation-Maximization (EM) and K-Means Clustering

Comparing EM and K-Means clustering on a customer dataset.


import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import silhouette_score
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans

df = pd.read_csv("Mall_Customers.csv")
x = df.iloc[:, 1:-1].values
x[:, 0] = LabelEncoder().fit_transform(x[:, 0])
em_cluster = GaussianMixture(n_components=4)
km_cluster = KMeans(n_clusters=4, n_init=5)
em_cluster.fit(x)
km_cluster.fit(x)
em_predictions = em_cluster.predict(x)
em_silhouette_score = silhouette_score(x, em_predictions)
km_predictions = km_cluster.predict(x)
km_silhouette_score = silhouette_score(x, km_predictions)
print("\nEM predictions")
print(em_predictions)
print("Silhouette Score - Gaussian Mixture Model:", em_silhouette_score)
print("\nKM predictions")
print(km_predictions)
print("Silhouette Score - K-Means:", km_silhouette_score)
plt.scatter(x[:, 1], x[:, 2], c=em_predictions)
plt.show()
plt.scatter(x[:, 1], x[:, 2], c=km_predictions)
plt.show()

Locally Weighted Linear Regression

Implementing locally weighted linear regression on a tips dataset.


import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def kernel(point, xmat, k):
    m, n = np.shape(xmat)
    weights = np.mat(np.eye(m))
    for j in range(m):
        diff = point - X[j]
        weights[j, j] = np.exp(diff * diff.T / (-2.0 * k**2))
    return weights

def local_weight(point, xmat, ymat, k):
    weight = kernel(point, xmat, k)
    W = (X.T * (weight * X)).I * (X.T * (weight * ymat.T))
    return W

def local_weight_regression(xmat, ymat, k):
    m, n = np.shape(xmat)
    ypred = np.zeros(m)
    for i in range(m):
        ypred[i] = xmat[i] * local_weight(xmat[i], xmat, ymat, k)
    return ypred

data = pd.read_csv('tips.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)
mbill = np.mat(bill)
mtip = np.mat(tip)
m = np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T, mbill.T))
# Set k here
ypred = local_weight_regression(X, mtip, 0.5)
SortIndex = X[:, 1].argsort(0)
xsort = X[SortIndex][:, 0]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(bill, tip, color='green')
ax.plot(xsort[:, 1], ypred[SortIndex], color='red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show()