Code

from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay,
    f1_score,
)
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
import numpy as np

Code

x, y = make_classification(
    n_features=4, n_classes=2, n_samples=2000, n_informative=4, n_redundant=0
)

Code

plt.scatter(x[:, 0], x[:, 1], c=y, marker="*")

Code

x.shape, y.shape

Code

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

Code

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

Naive Bayes from Built-in

Code

classifier = GaussianNB()

Code

classifier.fit(x_train, y_train)

Code

y_pred = classifier.predict(x_test)

Code

accuracy_score(y_test, y_pred)

Code

f1_score(y_pred, y_test, average="weighted")

Code

ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix(y_test, y_pred, labels=[0, 1]),
    display_labels=[0, 1],
).plot()

Code

print(classification_report(y_test, y_pred, target_names=["class 0", "class 1"]))

Naive Bayes from Scratch

Code

class naive_bayes:
    def __init__(self):
        pass

    def fit(self, x, y):
        self.x, self.y = x, y
        self.classes = list(set(y))
        self.parameters = []
        for i, c in enumerate(self.classes):
            x_where_c = x[np.where(y == c)]
            self.parameters.append([])
            for col in x_where_c.T:
                self.parameters[i].append({"mean": col.mean(), "std": col.std()})

    def predict(self, x):
        self.prediction = []
        for row in x:
            probs = []
            for i, c in enumerate(self.classes):
                prob = len(self.y[self.y == c]) / len(self.y)
                for feature, param in zip(row, self.parameters[i]):
                    prob *= self.gaussian_prob(feature, param["mean"], param["std"])
                probs.append(prob)
            self.prediction.append(self.classes[np.argmax(probs)])
        return self.prediction

    def gaussian_prob(self, x, mean, std):
        return (1 / (std * np.sqrt(2 * np.pi))) * np.exp(
            -((x - mean) ** 2) / (2 * std**2)
        )

Code

model = naive_bayes()

Code

model.fit(x_train, y_train)

Code

y_pred = model.predict(x_test)

Code

accuracy_score(y_test, y_pred)

Code

f1_score(y_pred, y_test, average="weighted")

Code

ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix(y_test, y_pred, labels=[0, 1]),
    display_labels=[0, 1],
).plot()

Code

print(classification_report(y_test, y_pred))