MH0386’s Jupyter Notebooks
  • Home
  1. Notebooks
  2. Machine Learning
  3. Loading Data
  • Notebooks
    • Deep Learning
      • Create Autoencoder Model
      • Inception Network
      • Word2vec
    • Fine Tuning
      • Data
      • Data
    • Machine Learning
      • Iris Classification from Scratch
      • Using Chain Code
      • Loading Data
      • Imports
      • Model
      • Naive Bayes from Built-in
    • Pre Processing
      • Text Processing
    • Rag
      • Try Docling

On this page

  • Loading Data
  • Feature Extraction
  • Model
  • Particle Swarm Optimization
  • After Particle Swarm Optimization
  • Feature Reduction
  • After Feature Reduction
  • Edit this page
  • View source
  • Report an issue
  1. Notebooks
  2. Machine Learning
  3. Loading Data

  • Show All Code
  • Hide All Code

Kaggle

Code
from collections import Counter
from pathlib import Path
from sys import float_info
from typing import Literal, Callable
import matplotlib.pyplot as plt
import numpy as np
from cv2 import flip, getRotationMatrix2D, resize, warpAffine
from kagglehub import dataset_download
from numpy import float64, uint, array
from numpy.typing import NDArray
from sklearn.metrics import (
    classification_report,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split

Loading Data

Code
path: str = dataset_download(handle="rashikrahmanpritom/covid-wwo-pneumonia-chest-xray")

covid_without_PNEUMONIA_train_path = Path(f"{path}/Data/train/covid_without_PNEUMONIA")
covid_with_PNEUMONIA_train_path = Path(f"{path}/Data/train/covid_with_PNEUMONIA")

covid_without_PNEUMONIA_test_path = Path(f"{path}/Data/test/covid_without_PNEUMONIA")
covid_with_PNEUMONIA_test_path = Path(f"{path}/Data/test/covid_with_PNEUMONIA")
Code
covid_without_PNEUMONIA_path: list[Path] = [
    covid_without_PNEUMONIA_train_path,
    covid_without_PNEUMONIA_test_path,
]

covid_with_PNEUMONIA_path: list[Path] = [
    covid_with_PNEUMONIA_train_path,
    covid_with_PNEUMONIA_test_path,
]
Code
np.random.seed(seed=42)
Code
label2category: dict[str, int] = {
    "covid_without_PNEUMONIA": 0,
    "covid_with_PNEUMONIA": 1,
}
category2label: dict[int, str] = {
    0: "covid_without_PNEUMONIA",
    1: "covid_with_PNEUMONIA",
}
Code
data: list[dict[str, str]] = []

for path in covid_without_PNEUMONIA_path:
    for img in path.glob(pattern="*.jpeg"):
        data.append({"img_path": str(object=img), "label": "covid_without_PNEUMONIA"})

for path in covid_with_PNEUMONIA_path:
    for img in path.glob(pattern="*.jpeg"):
        data.append({"img_path": str(object=img), "label": "covid_with_PNEUMONIA"})
Code
def counter_label(data: list[dict[str, str]], key: str) -> Counter[str]:
    return Counter([record[key] for record in data])
Code
img_l_counter: Counter[str] = counter_label(data=data, key="label")

keys = list(img_l_counter.keys())
values = list(img_l_counter.values())

plt.bar(x=keys, height=values)
plt.show()
Code
plt.figure(figsize=(20, 7))

for i, data_ in enumerate(iterable=data[:10]):
    path: str = data_["img_path"]
    label: str = data_["label"]
    plt.subplot(2, 5, i + 1)
    img: NDArray[uint] = plt.imread(fname=path)
    plt.imshow(X=img)
    plt.xlabel(xlabel=label)

plt.show()
Code
def rotating(
    image: NDArray[float64],
    angle: int = 90,
    scale: float = 1.0,
) -> NDArray[float64]:
    # Get the dimensions of the image
    h: int = image.shape[:2][0]
    w: int = image.shape[:2][1]
    # Compute the center of the image
    center: tuple[float, float] = (w / 2.0, h / 2.0)
    # Perform the rotation
    M: NDArray[float64] = array(
        object=getRotationMatrix2D(center=center, angle=angle, scale=scale),
        dtype=float64,
    )
    rotated: NDArray[float64] = array(
        object=warpAffine(src=image, M=M, dsize=(w, h)), dtype=float64
    )
    return rotated
Code
def fliping(image: NDArray[float64], flip_code: int) -> NDArray[float64]:
    # Flip the image horizontally, vertically, or both
    # flip_code = 0: flip vertically
    # flip_code > 0: flip horizontally
    # flip_code < 0: flip vertically and horizontally
    return array(object=flip(src=image, flipCode=flip_code), dtype=float64)
Code
def resizing(image: NDArray[float64], size: tuple[int, int]) -> NDArray[float64]:
    return array(object=resize(src=image, dsize=size), dtype=float64)
Code
images_array: list[NDArray[float64]] = []
images_label: list[str] = []
IMG_SIZE: tuple[Literal[100], Literal[100]] = (100, 100)
Code
%time

for data_ in data:
    category: int = label2category[data_["label"]]
    img: NDArray[float64] = plt.imread(fname=data_["img_path"])
    if category == 0:
        random_angle: int = np.random.choice(a=[90, 180, 270])
        random_flip: int = np.random.choice(a=[-1, 0, 1])
        for i in range(2):
            img_rotate: NDArray[float64] = rotating(image=img, angle=random_angle)
            img_flip: NDArray[float64] = fliping(
                image=img_rotate, flip_code=random_flip
            )
            img_resize: NDArray[float64] = resizing(image=img_flip, size=IMG_SIZE)
            images_array.append(img_resize)
            images_label.append(data_["label"])
    else:
        img_resize: NDArray[float64] = resizing(image=img, size=IMG_SIZE)
        images_array.append(img_resize)
        images_label.append(data_["label"])
Code
# plot number of samples in each class
# Assuming 'data' is your numpy array with class labels

unique_elements = np.unique(ar=images_label, return_counts=True)
classes = unique_elements[0]
counts = unique_elements[1]

print(f"Classes: {classes}")
print(f"Counts: {counts}")
Code
# Plotting the bar graph
plt.bar(x=classes, height=counts)
plt.xlabel(xlabel="Class")
plt.ylabel(ylabel="Number of samples")
plt.title(label="Number of samples in each class")
plt.show()
Code
images_array: NDArray[float64] = np.array(object=images_array)
images_label: NDArray = np.array(object=images_label)

print(f"shape of images array: {images_array.shape}, dtype: {images_array.dtype}")
print(f"shape of images label: {images_label.shape}, dtype: {images_label.dtype}")
Code
plt.figure(figsize=(20, 7))

for i, data_ in enumerate(iterable=images_array[:10]):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X=data_.astype(uint))
    plt.xlabel(xlabel=images_label[i])

plt.show()
Code
images_label: NDArray[uint] = np.where(images_label == "covid_without_PNEUMONIA", 0, 1)

Feature Extraction

Code
def calculate_centroid(block: NDArray[float64]) -> tuple[float64, float64, float64]:
    """Calculate the centroid of a 3D matrix"""
    rows: int = block.shape[0]
    cols: int = block.shape[1]
    dep: int = block.shape[2]
    x_centroid: float = 0.0
    y_centroid: float = 0.0
    z_centroid: float = 0.0
    for i in range(rows):
        for j in range(cols):
            for k in range(dep):
                x_centroid += i * block[i, j, k]
                y_centroid += j * block[i, j, k]
                z_centroid += k * block[i, j, k]
    total: float64 = np.sum(a=block)
    return x_centroid / total, y_centroid / total, z_centroid / total
Code
def extract_features_with_centroid(
    image: NDArray[float64],
    block_size: int = 50,
) -> NDArray[float64]:
    features: NDArray[float64] = np.zeros(
        shape=(100 // block_size, 100 // block_size, 3)
    )
    for i in range(0, 100, block_size):
        for j in range(0, 100, block_size):
            block: NDArray[float64] = image[i : i + block_size, j : j + block_size]
            centroid: tuple[float64, float64, float64] = calculate_centroid(block=block)
            x_centroid: float64 = centroid[0]
            y_centroid: float64 = centroid[1]
            z_centroid: float64 = centroid[2]
            features[int(i / block_size), int(j / block_size), 0] = x_centroid
            features[int(i / block_size), int(j / block_size), 1] = y_centroid
            features[int(i / block_size), int(j / block_size), 2] = z_centroid
    return features
Code
split: list[NDArray] = train_test_split(
    images_array,
    images_label,
    test_size=0.3,
    random_state=42,
)
x_train: NDArray[float64] = split[0]
x_test: NDArray[float64] = split[1]
y_train: NDArray[uint] = split[2]
y_test: NDArray[uint] = split[3]
Code
print(f"train input shape: {x_train.shape}, with type: {x_train.dtype}")
print(f"train output shape: {y_train.shape}, with type: {y_train.dtype}")
print(f"test input shape: {x_test.shape}, with type: {x_test.dtype}")
print(f"test output shape: {y_test.shape}, with type: {y_test.dtype}")
Code
%time

train_features_with_centroid: NDArray[float64] = np.nan_to_num(
    x=np.array(
        object=[extract_features_with_centroid(image=image) for image in x_train]
    )
)

test_features_with_centroid: NDArray[float64] = np.nan_to_num(
    x=np.array(object=[extract_features_with_centroid(image=image) for image in x_test])
)
Code
train_features_with_centroid.shape, test_features_with_centroid.shape
Code
train_features_with_centroid: NDArray[float64] = train_features_with_centroid.reshape(
    train_features_with_centroid.shape[0], -1
)

test_features_with_centroid: NDArray[float64] = test_features_with_centroid.reshape(
    test_features_with_centroid.shape[0], -1
)
Code
train_features_with_centroid.shape, test_features_with_centroid.shape

Model

Code
class LogisticRegression:
    def __init__(
        self,
        lr: float = 0.01,
        num_iter: int = 1000,
        verbose: bool = False,
    ) -> None:
        self.lr: float = lr
        self.num_iter: int = num_iter
        self.verbose: bool = verbose
        self.weights: NDArray[float64] = np.array(object=[])
        self.bias: float64 = float64(0.0)
        self.x: NDArray[float64] = np.array(object=[])
        self.y: NDArray[uint] = np.array(object=[])
        self.classes_ = None
        self.m: int = 0
        self.n: int = 0
        self.losses: list[float64] = []
        self.accuracies: list[float64] = []

    def gradient_descent(self) -> None:
        y_hat: NDArray[float64] = self.predict(x=self.x)
        dw: NDArray[float64] = (1.0 / self.m) * np.dot(a=self.x.T, b=(y_hat - self.y))
        db: float64 = (1.0 / self.m) * np.sum(a=y_hat - self.y)
        self.weights -= self.lr * dw
        self.bias -= self.lr * db

    def predict(self, x: NDArray[float64]) -> NDArray[float64]:
        z: NDArray[float64] = np.dot(a=x, b=self.weights) + self.bias
        return 1.0 / (1.0 + np.exp(-z))

    def plot(self) -> None:
        plt.plot(self.losses)
        plt.plot(self.accuracies)
        plt.xlabel(xlabel="Number of iterations")
        plt.ylabel(ylabel="Loss")
        plt.legend(["Loss", "Accuracy"])
        plt.show()

    def loss(self) -> None:
        y_hat: NDArray[float64] = self.predict(x=self.x)
        self.losses.append(
            -np.mean(
                a=self.y * np.log(y_hat + float_info.min)
                + (1 - self.y) * np.log(1 - y_hat + float_info.min)
            )
        )

    def accuracy(self) -> None:
        y_hat: NDArray[float64] = self.predict(x=self.x)
        acc: float64 = np.mean(y_hat.round() == self.y)
        self.accuracies.append(acc)

    def fit(self, x: NDArray[float64], y: NDArray[uint]) -> None:
        self.x = x
        self.y = y
        best_loss: float64 = float64(float_info.max)
        patience: int = 5
        self.m = x.shape[0]
        self.n = x.shape[1]
        self.weights = np.random.rand(self.n)
        self.bias = float64(np.random.rand())
        for _ in range(self.num_iter):
            self.gradient_descent()
            self.loss()
            self.accuracy()
            if self.losses[-1] < best_loss:
                best_loss = self.losses[-1]
                patience = 5
            else:
                patience -= 1
                if patience == 0:
                    break
        if self.verbose:
            print(f"\nFinal Loss: {self.losses[-1]}")
            print(f"Final Accuracy: {self.accuracies[-1]}")

    def eval(self, x: NDArray[float64], y: NDArray[uint]) -> None:
        self.x = x
        self.y = y
        self.accuracy()
        if self.verbose:
            y_hat: NDArray[float64] = self.predict(x=self.x)
            print(f"Accuracy: {self.accuracies[-1]}")
            print(f"Loss: {self.losses[-1]}")
            print(f"F1 Score: {f1_score(y_true=self.y, y_pred=y_hat.round())}")
            print(f"Precision: {precision_score(y_true=self.y, y_pred=y_hat.round())}")
            print(f"Recall: {recall_score(y_true=self.y, y_pred=y_hat.round())}")
Code
model = LogisticRegression(verbose=True)
Code
np.unique(ar=y_train), np.unique(ar=y_test)
Code
train_features_with_centroid: NDArray[float64] = (
    train_features_with_centroid - train_features_with_centroid.mean()
) / train_features_with_centroid.std()

test_features_with_centroid: NDArray[float64] = (
    test_features_with_centroid - test_features_with_centroid.mean()
) / test_features_with_centroid.std()
Code
model.fit(x=train_features_with_centroid, y=y_train)
Code
model.plot()
Code
model.eval(x=train_features_with_centroid, y=y_train)
Code
model.eval(x=test_features_with_centroid, y=y_test)
Code
print(
    classification_report(
        y_true=y_test,
        y_pred=model.predict(x=test_features_with_centroid).round(),
    )
)
Code
x_train.shape
Code
y_train.shape

Particle Swarm Optimization

Code
tries: list[NDArray[float64]] = []
Code
class Particle:
    def __init__(self, bounds: list[tuple[int | float, int | float]]) -> None:
        self.position: NDArray[float64] = np.array(
            object=[np.random.uniform(low=b[0], high=b[1]) for b in bounds]
        )
        self.velocity: NDArray[float64] = np.array(
            object=[
                np.random.uniform(low=-abs(b[1] - b[0]), high=abs(b[1] - b[0]))
                for b in bounds
            ]
        )
        self.best_position: NDArray[float64] = self.position.copy()
        self.best_fitness: float = float_info.max
        self.fitness: float = float_info.max
Code
def update_velocity(
    particle: Particle,
    global_best_position: NDArray[float64],
    w: float = 0.5,
    c1: float = 1.0,
    c2: float = 2.0,
) -> None:
    inertia: NDArray[float64] = w * particle.velocity
    cognitive: NDArray[float64] = (
        c1 * np.random.random() * (particle.best_position - particle.position)
    )
    social: NDArray[float64] = (
        c2 * np.random.random() * (global_best_position - particle.position)
    )
    particle.velocity = inertia + cognitive + social
Code
def update_position(
    particle: Particle,
    bounds: list[tuple[int | float, int | float]],
) -> None:
    particle.position += particle.velocity
    # Ensure the particle's position is within the bounds
    for i in range(len(bounds)):
        if particle.position[i] < bounds[i][0]:
            particle.position[i] = bounds[i][0]
        elif particle.position[i] > bounds[i][1]:
            particle.position[i] = bounds[i][1]
Code
def objective_function(subset: NDArray[float64]) -> float:
    tries.append(subset)
    print(f"\tTrying Subset: {round(number=subset[0])}, LR: {subset[1]}")
    X_train_subset: NDArray[float64] = train_features_with_centroid[
        :, : round(number=subset[0])
    ]
    X_test_subset: NDArray[float64] = test_features_with_centroid[
        :, : round(number=subset[0])
    ]
    model = LogisticRegression(lr=subset[1])
    model.fit(x=X_train_subset, y=y_train)
    model.eval(x=X_test_subset, y=y_test)
    y_hat: NDArray[float64] = model.predict(x=X_test_subset)
    return -float(f1_score(y_true=y_test, y_pred=y_hat.round()))
Code
def pso(
    objective_function: Callable[[NDArray[float64]], float],
    bounds: list[tuple[int | float, int | float]],
    num_particles: int,
    max_iter: int,
) -> tuple[NDArray[float64], float]:
    swarm: list[Particle] = [Particle(bounds=bounds) for _ in range(num_particles)]
    global_best_position = np.zeros(shape=len(bounds))
    global_best_fitness: float = float_info.max
    for iteration in range(max_iter):
        for particle in swarm:
            particle.fitness = objective_function(particle.position)
            if particle.fitness < particle.best_fitness:
                particle.best_fitness = particle.fitness
                particle.best_position = particle.position.copy()
            if particle.fitness < global_best_fitness:
                global_best_fitness = particle.fitness
                global_best_position: NDArray[float64] = particle.position.copy()
        for particle in swarm:
            update_velocity(
                particle=particle, global_best_position=global_best_position
            )
            update_position(particle=particle, bounds=bounds)
        print(f"Iteration {iteration + 1}/{max_iter}")
        print(f"Global Best Fitness: {-global_best_fitness}")
        print(f"Global Best Position: {global_best_position}")
    return global_best_position, -global_best_fitness
Code
best: tuple[NDArray[float64], float] = pso(
    objective_function=objective_function,
    bounds=[(2, 12), (0.01, 0.99)],
    num_particles=100,
    max_iter=100,
)
best_position: NDArray[float64] = best[0]
best_fitness: float = best[1]

print("Best position:", best_position)
print("Best fitness:", best_fitness)
Code
tries: NDArray[float64] = np.array(object=tries)
Code
tries[:, 0] = np.round(a=tries[:, 0])
subsets: NDArray[float64] = np.unique(ar=tries[:, 0])
lrs: NDArray[float64] = np.unique(ar=tries[:, 1])
Code
subsets, lrs

After Particle Swarm Optimization

Code
model = LogisticRegression(verbose=True, lr=best_position[1])
Code
model.fit(
    x=train_features_with_centroid[:, : round(number=best_position[0])],
    y=y_train,
)
Code
model.plot()
Code
model.eval(
    x=train_features_with_centroid[:, : round(number=best_position[0])],
    y=y_train,
)
Code
model.eval(
    x=test_features_with_centroid[:, : round(number=best_position[0])],
    y=y_test,
)
Code
print(
    classification_report(
        y_true=y_test,
        y_pred=model.predict(
            x=test_features_with_centroid[:, : round(number=best_position[0])]
        ).round(),
    )
)

Feature Reduction

Code
class LDA:
    def __init__(self) -> None:
        self.w: NDArray[float64] = np.array(object=[])

    def fit(self, X: NDArray[float64], y: NDArray[uint]) -> None:
        m0 = np.mean(a=X[y == 0], axis=0)
        m1 = np.mean(a=X[y == 1], axis=0)
        S0 = np.dot(a=(X[y == 0] - m0).T, b=(X[y == 0] - m0))
        S1 = np.dot(a=(X[y == 1] - m1).T, b=(X[y == 1] - m1))
        Sw = S0 + S1
        self.w = np.dot(a=np.linalg.inv(Sw), b=(m1 - m0))

    def transform(self, X: NDArray[float64]) -> NDArray[float64]:
        result: NDArray[float64] = np.dot(a=X, b=self.w)
        return (result - result.mean()) / result.std()
Code
lda = LDA()
Code
lda.fit(
    X=train_features_with_centroid[:, : round(number=best_position[0])],
    y=y_train,
)
Code
train_features_with_centroid: NDArray[float64] = lda.transform(
    X=train_features_with_centroid[:, : round(number=best_position[0])]
)
test_features_with_centroid: NDArray[float64] = lda.transform(
    X=test_features_with_centroid[:, : round(number=best_position[0])]
)
Code
train_features_with_centroid: NDArray[float64] = train_features_with_centroid.reshape(
    -1, 1
)
test_features_with_centroid: NDArray[float64] = test_features_with_centroid.reshape(
    -1, 1
)
Code
train_features_with_centroid.shape, test_features_with_centroid.shape

After Feature Reduction

Code
model = LogisticRegression(verbose=True, lr=best_position[1])
Code
model.fit(x=train_features_with_centroid, y=y_train)
Code
model.plot()
Code
model.eval(x=train_features_with_centroid, y=y_train)
Code
model.eval(x=test_features_with_centroid, y=y_test)
Code
print(
    classification_report(
        y_true=y_test,
        y_pred=model.predict(x=test_features_with_centroid).round(),
    )
)
Back to top
Using Chain Code
Imports

Mohamed Hisham Abdelzaher

 
  • Edit this page
  • View source
  • Report an issue