Code
from tensorflow.keras.datasets import mnist
import numpy as np
import pandas as pd
from matplotlib.pyplot import plot as plt
import sys
def train(X, y, learning_rate, epochs):
W = np.random.rand(X.shape[1], y.shape[1])
B = np.random.random()
costs = np.array([])
for i in range(epochs):
y_hat = predict(X, W, B)
dW = np.dot(X.T, (y_hat - y)) / len(X)
db = np.mean(y_hat - y)
W -= learning_rate * dW
B -= learning_rate * db
if i % 100 == 0:
costValue: float = cost(y, y_hat)
costs = np.append(costs, costValue)
print(f"Cost at epoch {i}: {costValue}")
return W, B, costs
def train_L1(X, y, learning_rate, epochs, Lambda):
W = np.random.random(X.shape[1])
B = np.random.random()
costs = np.array([])
for i in range(epochs):
y_hat = predict(X, W, B)
dW = np.dot(X.T, (y_hat - y)) / len(X)
db = np.mean(y_hat - y)
W -= learning_rate * (dW + Lambda * np.sign(W))
B -= learning_rate * db
if i % 100 == 0:
costValue: float = cost(y, y_hat)
costs = np.append(costs, costValue)
print(f"Cost at epoch {i}: {costValue}")
return W, B, costs
def train_mini_batch(X, y, learning_rate, epochs, batch_size):
W = np.random.random(X.shape[1])
B = np.random.random()
costs = np.array([])
for i in range(epochs):
for j in range(0, len(X), batch_size):
X_batch = X[j : j + batch_size]
y_batch = y[j : j + batch_size]
y_hat = predict(X_batch, W, B)
dW = np.dot(X_batch.T, (y_hat - y_batch)) / len(X_batch)
db = np.mean(y_hat - y_batch)
W -= learning_rate * dW
B -= learning_rate * db
if i % 100 == 0:
costValue: float = cost(y, y_hat)
costs = np.append(costs, costValue)
print(f"Cost at epoch {i}: {costValue}")
return W, B, costs
def train_rms_prop(X, y, learning_rate, epochs, beta, epsilon=sys.float_info.min):
W = np.random.random(X.shape[1])
B = np.random.random()
costs = np.array([])
vW = np.zeros(X.shape[1])
vB = 0
for i in range(epochs):
y_hat = predict(X, W, B)
dW = np.dot(X.T, (y_hat - y)) / len(X)
dB = np.mean(y_hat - y)
vW = beta * vW + (1 - beta) * dW**2
vB = beta * vB + (1 - beta) * dB**2
W -= learning_rate * dW / (np.sqrt(vW) + epsilon)
B -= learning_rate * dB / (np.sqrt(vB) + epsilon)
if i % 100 == 0:
costValue: float = cost(y, y_hat)
costs = np.append(costs, costValue)
print(f"Cost at epoch {i}: {costValue}")
return W, B, costs
def train_adam(X, y, learning_rate, epochs, beta1, beta2, epsilon=sys.float_info.min):
W = np.random.random(X.shape[1])
B = np.random.random()
costs = np.array([])
vW = np.zeros(X.shape[1])
vB = 0
sW = np.zeros(X.shape[1])
sB = 0
for i in range(epochs):
y_hat = predict(X, W, B)
dW = np.dot(X.T, (y_hat - y)) / len(X)
dB = np.mean(y_hat - y)
vW = beta1 * vW + (1 - beta1) * dW
vB = beta1 * vB + (1 - beta1) * dB
sW = beta2 * sW + (1 - beta2) * dW**2
sB = beta2 * sB + (1 - beta2) * dB**2
vW_corrected = vW / (1 - beta1 ** (i + 1))
vB_corrected = vB / (1 - beta1 ** (i + 1))
sW_corrected = sW / (1 - beta2 ** (i + 1))
sB_corrected = sB / (1 - beta2 ** (i + 1))
W -= learning_rate * vW_corrected / (np.sqrt(sW_corrected) + epsilon)
B -= learning_rate * vB_corrected / (np.sqrt(sB_corrected) + epsilon)
if i % 100 == 0:
costValue: float = cost(y, y_hat)
costs = np.append(costs, costValue)
print(f"Cost at epoch {i}: {costValue}")
return W, B, costs
xTrain = trainSet[0].astype("float32")
yTrain = trainSet[1].astype("int32")
xTest = testSet[0].astype("float32")
yTest = testSet[1].astype("int32")
xTrain = xTrain.reshape(xTrain.shape[0], -1)
xTest = xTest.reshape(xTest.shape[0], -1)
xTrain = standardize(xTrain)
xTest = standardize(xTest)
p1 = np.random.permutation(len(xTrain))
p2 = np.random.permutation(len(xTest))
xTrain = xTrain[p1]
yTrain = yTrain[p1]
xTest = xTest[p2]
yTest = yTest[p2]
def k_fold_cross_validation(X, y, K, lr, epochs):
fold_size = len(X) // K
accuracies = []
for i in range(K):
# Divide the data into training and testing sets
X_train = np.concatenate([X[: i * fold_size], X[(i + 1) * fold_size :]])
Y_train = np.concatenate([y[: i * fold_size], y[(i + 1) * fold_size :]])
x_valid = X[i * fold_size : (i + 1) * fold_size]
y_valid = y[i * fold_size : (i + 1) * fold_size]
# Train the model
print(f"\nTraining at Iteration {i + 1} of {K}")
w, b, Costs = train(X_train, Y_train, lr, epochs)
# Test the model
print(f"\nTesting at Iteration {i + 1} of {K}")
acc = test(x_valid, y_valid, w, b)
print(f"Accuracy at Iteration {i + 1} of {K}: {acc}")
accuracies.append(acc)
return np.mean(accuracies), Costs