Machine Learning Algorithms: Practical Implementations
Find-S Algorithm Implementation in Python
This section demonstrates the Find-S algorithm using the Pandas library in Python.
First Find-S Example
import pandas as pd
attributes = ['AirTemp', 'Temp', 'Humidity', 'Wind', 'Water', 'Forecast']
num_attributes = len(attributes)
filename = pd.read_csv('Weather.csv')
print(filename)
target = ['Yes', 'Yes', 'No', 'Yes']
print(target)
hypothesis = ['0'] * num_attributes
for i in range(len(target)):
if target[i] == 'Yes':
for j in range(num_attributes):
if hypothesis[j] == '0':
hypothesis[j] = filename.iloc[i, j]
if hypothesis[j] != filename.iloc[i, j]:
hypothesis[j] = '?'
print(i + 1, "Hypothesis :", hypothesis)
print("Final Hypothesis", hypothesis)
Second Find-S Example
import pandas as pd
df1 = pd.read_csv('dataset/mammal.csv')
dataset = df1.values.tolist()
row = len(dataset)
col = len(dataset[0]) - 1
hypothesis = ['phi'] * col
for i in range(0, row):
if dataset[i][col] == 'mammals':
if 'phi' in hypothesis:
hypothesis = list(dataset[1][:col])
for j in range(0, col):
if dataset[i][j] != hypothesis[j]:
hypothesis[j] = '?'
print('Hypothesis after {0}th iteration:'.format(i), hypothesis)
print('Hypothesis after {0}th iteration:'.format(row - 1), hypothesis)
Candidate Elimination Algorithm
Implementation of the Candidate Elimination algorithm using Pandas and NumPy.
import pandas as pd
import numpy as np
filename = pd.read_csv('Weather.csv')
concepts = np.array(filename.iloc[:, 0:-1])
print(concepts)
target = np.array(filename.iloc[:, -1])
print(target)
def learn(concepts, target):
print("Initialization:")
specific_h = concepts[0].copy()
print(specific_h)
general_h = [['?' for i in range(len(specific_h))] for i in range(len(specific_h))]
print(general_h)
for i, h in enumerate(concepts):
if target[i] == 'Yes':
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if target[i] == 'No':
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("Hypothesis ", i + 1)
print(general_h)
print(specific_h)
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
final_specific, final_general = learn(concepts, target)
print("Specific:", final_specific)
print("General:", final_general)
Artificial Neural Networks (ANN)
Building an ANN using Scikit-learn for Iris dataset classification.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings("ignore")
iris = load_iris()
values = iris.data
target = iris.target
standard_scaler = StandardScaler()
values = standard_scaler.fit_transform(values)
values_train, values_test, target_train, target_test = train_test_split(values, target, test_size=0.3)
n = 1000
loss_current = 999
classifier = MLPClassifier(
hidden_layer_sizes=(4, 3),
activation='logistic',
solver='sgd',
learning_rate_init=0.5,
warm_start=True,
max_iter=1,
random_state=1,
verbose=True
)
for _ in range(n):
classifier.fit(values_train, target_train)
loss_previous = loss_current
loss_current = classifier.loss_
if abs(loss_current - loss_previous) < 0.0001:
break
target_prediction = classifier.predict(values_test)
print(classifier.score(values_test, target_test))
for i in classifier.coefs_:
print(i, end='\n\n')
Naive Bayes Classifier
Implementing a Gaussian Naive Bayes classifier on a custom dataset.
from sklearn import preprocessing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
label_encoder = preprocessing.LabelEncoder()
dataset = pd.read_csv('naive_dataset.csv')
data_df = pd.DataFrame(dataset)
data_df_encoded = data_df.apply(label_encoder.fit_transform)
data = data_df_encoded.drop(['Play_tennis'], axis=1)
target = data_df_encoded['Play_tennis']
x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.30)
model = GaussianNB()
naive_bayes_train = model.fit(x_train, y_train)
y_prediction = naive_bayes_train.predict(x_test)
print(list(y_prediction))
print(list(y_test))
print("Accuracy: ", metrics.accuracy_score(y_prediction, y_test))
K-Nearest Neighbors (KNN)
Applying KNN on the Iris dataset for classification.
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
iris = load_iris()
print("Features:", iris.feature_names, "\n data:", iris.data, "\n target name:", iris.target_names, "\n target:", iris.target)
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25)
classifier = KNeighborsClassifier()
classifier.fit(x_train, y_train)
print("Accuracy:", classifier.score(x_test, y_test))
prediction = classifier.predict(x_test)
print("Predicted data:", prediction)
print("y_test data :", y_test)
difference = prediction - y_test
print("Result is:", difference)
count = 0
for i in difference:
if i != 0:
count += 1
print("Total points misclassified:", count)
Bayesian Network for Heart Disease Prediction
Creating and using a Bayesian Network to predict heart disease.
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
import networkx as nx
import warnings
warnings.filterwarnings("ignore")
heart_disease = pd.read_csv('heart.csv')
heart_disease = heart_disease.replace('?', np.nan)
# Model Bayesian Network
model = BayesianNetwork([
('Age', 'Trestbps'),
('Age', 'Fbs'),
('Sex', 'Trestbps'),
('Exang', 'Trestbps'),
('Trestbps', 'Target'),
('Fbs', 'Target'),
('Target', 'Restecg'),
('Target', 'Thalach'),
('Target', 'Chol')
])
pos = nx.circular_layout(model)
nx.draw(model, pos=pos, with_labels=True)
# Learning CPDs using Maximum Likelihood Estimators
print('\n Learning CPD using Maximum likelihood estimators')
model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)
# Inferencing with Bayesian Network
print('\n Inferencing with Bayesian Network:')
heart_disease_infer = VariableElimination(model)
query = heart_disease_infer.query(variables=['Target'], evidence={
'Age': 70,
'Sex': 1,
'Trestbps': 145,
'Chol': 174,
'Fbs': 0,
'Restecg': 1,
'Thalach': 125,
'Exang': 1
})
print(query)
Expectation-Maximization (EM) and K-Means Clustering
Comparing EM and K-Means clustering on a customer dataset.
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import silhouette_score
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
df = pd.read_csv("Mall_Customers.csv")
x = df.iloc[:, 1:-1].values
x[:, 0] = LabelEncoder().fit_transform(x[:, 0])
em_cluster = GaussianMixture(n_components=4)
km_cluster = KMeans(n_clusters=4, n_init=5)
em_cluster.fit(x)
km_cluster.fit(x)
em_predictions = em_cluster.predict(x)
em_silhouette_score = silhouette_score(x, em_predictions)
km_predictions = km_cluster.predict(x)
km_silhouette_score = silhouette_score(x, km_predictions)
print("\nEM predictions")
print(em_predictions)
print("Silhouette Score - Gaussian Mixture Model:", em_silhouette_score)
print("\nKM predictions")
print(km_predictions)
print("Silhouette Score - K-Means:", km_silhouette_score)
plt.scatter(x[:, 1], x[:, 2], c=em_predictions)
plt.show()
plt.scatter(x[:, 1], x[:, 2], c=km_predictions)
plt.show()
Locally Weighted Linear Regression
Implementing locally weighted linear regression on a tips dataset.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def kernel(point, xmat, k):
m, n = np.shape(xmat)
weights = np.mat(np.eye(m))
for j in range(m):
diff = point - X[j]
weights[j, j] = np.exp(diff * diff.T / (-2.0 * k**2))
return weights
def local_weight(point, xmat, ymat, k):
weight = kernel(point, xmat, k)
W = (X.T * (weight * X)).I * (X.T * (weight * ymat.T))
return W
def local_weight_regression(xmat, ymat, k):
m, n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i] * local_weight(xmat[i], xmat, ymat, k)
return ypred
data = pd.read_csv('tips.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)
mbill = np.mat(bill)
mtip = np.mat(tip)
m = np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T, mbill.T))
# Set k here
ypred = local_weight_regression(X, mtip, 0.5)
SortIndex = X[:, 1].argsort(0)
xsort = X[SortIndex][:, 0]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(bill, tip, color='green')
ax.plot(xsort[:, 1], ypred[SortIndex], color='red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show()