What is a neural network, and what are its core components (e.g., layers, weights, biases)?

A neural network is a computational model inspired by the structure and function of the human brain. It’s widely used in machine learning to recognize patterns, make decisions, or predict outcomes from data. Neural networks are the backbone of deep learning , a subfield of artificial intelligence (AI).

🔍 What is a Neural Network?

At its core, a neural network consists of layers of interconnected nodes (also called neurons), where each node processes information and passes it to the next layer. These networks learn by adjusting the strength of connections between nodes—called weights —and using biases to shift activation thresholds.

Neural networks can be trained to perform tasks like:

Image classification
Speech recognition
Language translation
Stock prediction
Game playing

🧱 Core Components of a Neural Network

1. Layers

Neural networks are composed of three main types of layers:

a) Input Layer

The first layer that receives raw input data.
Each neuron represents a feature in the input data.

Example : In an image classifier, if the input image is 28×28 pixels (like MNIST digits), the input layer has 784 neurons (one per pixel).

b) Hidden Layers

Intermediate layers between input and output.
Extract features from the input through transformations.
Can be one or many layers deep (hence “deep learning”).

c) Output Layer

Final layer that produces the result.
Number of neurons depends on the task:
- Binary classification: 1 neuron
- Multi-class classification: N neurons (one for each class)
- Regression: 1 or more neurons

2. Weights

Weights determine how strongly each input influences the output of a neuron.
During training, weights are adjusted to minimize error.

3. Biases

A bias allows shifting the activation function left or right, which helps the model fit the data better.
Think of it as an intercept term in linear regression.

4. Activation Functions

Applied to the output of each neuron to introduce non-linearity.
Allows the network to learn complex patterns.

Common activation functions:

Sigmoid: Maps values to (0, 1), useful for binary classification.

ReLU (Rectified Linear Unit): Outputs the input if positive; otherwise, outputs zero. Commonly used in hidden layers

Softmax: Converts outputs into probabilities for multi-class classification

Tanh:It maps input values to the range (-1, 1).

🧠 How a Neuron Works – Step-by-Step Example

Code Example using OOP Concepts Are Applied

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader

class DataProcessor:
    """Encapsulates data loading and preprocessing."""
    def __init__(self, test_size=0.2, random_state=42, batch_size=16):
        self._test_size = test_size
        self._random_state = random_state
        self._batch_size = batch_size
        self._scaler = StandardScaler()
        self._train_loader = None
        self._X_test = None
        self._y_test = None

    def load_and_preprocess(self):
        """Loads and preprocesses Iris dataset."""
        iris = load_iris()
        X, y = iris.data, iris.target  # 4 features, 3 classes
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=self._test_size, random_state=self._random_state
        )
        # Standardize features
        X_train = self._scaler.fit_transform(X_train)
        X_test = self._scaler.transform(X_test)
        # Convert to tensors
        X_train_tensor = torch.FloatTensor(X_train)
        y_train_tensor = torch.LongTensor(y_train)
        self._X_test = torch.FloatTensor(X_test)
        self._y_test = torch.LongTensor(y_test)
        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        self._train_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
        return self._train_loader, self._X_test, self._y_test

    def get_test_data(self):
        """Returns test data."""
        return self._X_test, self._y_test

    def get_sample(self, index=0):
        """Returns a single test sample."""
        return self._X_test[index:index+1], self._y_test[index].item()

class BaseModel(nn.Module):
    """Base class for neural network models."""
    def __init__(self):
        super(BaseModel, self).__init__()
        self._criterion = None
        self._optimizer = None
        self._train_losses = []
        self._train_accuracies = []

    def configure(self, learning_rate=0.001):
        """Configures loss function and optimizer."""
        self._criterion = nn.CrossEntropyLoss()  # Softmax + cross-entropy
        self._optimizer = optim.Adam(self.parameters(), lr=learning_rate)

    def train_model(self, train_loader, epochs=50):
        """Trains model with forward/backward propagation."""
        self.train()
        for epoch in range(epochs):
            running_loss = 0.0
            correct = 0
            total = 0
            for inputs, labels in train_loader:
                self._optimizer.zero_grad()
                outputs = self(inputs)  # Forward propagation
                loss = self._criterion(outputs, labels)
                loss.backward()  # Backward propagation
                self._optimizer.step()  # Update weights/biases
                running_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            self._train_losses.append(running_loss / len(train_loader))
            self._train_accuracies.append(correct / total)
        return self._train_losses, self._train_accuracies

    def evaluate(self, X_test, y_test):
        """Evaluates model on test data."""
        self.eval()
        with torch.no_grad():
            outputs = self(X_test)
            loss = self._criterion(outputs, y_test).item()
            _, predicted = torch.max(outputs, 1)
            accuracy = (predicted == y_test).sum().item() / len(y_test)
        return loss, accuracy

    def predict(self, sample):
        """Predicts class and probabilities for a sample."""
        self.eval()
        with torch.no_grad():
            output = self(sample)
            probabilities = torch.softmax(output, dim=1).numpy()[0]
            predicted_class = torch.argmax(output).item()
        return probabilities, predicted_class

    def summary(self):
        """Prints model architecture and parameter count."""
        total_params = sum(p.numel() for p in self.parameters())
        print(f"Model Architecture:\n{self}\nTotal Parameters: {total_params}")

class IrisNet(BaseModel):
    """Neural network for Iris classification."""
    def __init__(self):
        super(IrisNet, self).__init__()
        self.hidden = nn.Linear(4, 16)  # 4 inputs -> 16 neurons
        self.relu = nn.ReLU()
        self.output = nn.Linear(16, 3)  # 16 neurons -> 3 outputs

    def forward(self, x):
        """Defines forward propagation."""
        x = self.hidden(x)  # Weights * inputs + biases
        x = self.relu(x)
        x = self.output(x)
        return x

class Visualizer:
    """Handles visualization of training metrics."""
    @staticmethod
    def plot_metrics(losses, accuracies, filename='iris_training_progress.png'):
        """Plots training loss and accuracy."""
        plt.figure(figsize=(10, 4))
        plt.subplot(1, 2, 1)
        plt.plot(accuracies, label='Training Accuracy', color='#1f77b4')
        plt.title('Model Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.grid(True)
        plt.subplot(1, 2, 2)
        plt.plot(losses, label='Training Loss', color='#ff7f0e')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(filename)
        plt.close()

def main():
    # Initialize data processor and load data
    data_processor = DataProcessor()
    train_loader, X_test, y_test = data_processor.load_and_preprocess()

    # Initialize and configure model
    model = IrisNet()
    model.configure()
    model.summary()  # Display model details

    # Train model
    losses, accuracies = model.train_model(train_loader, epochs=50)

    # Evaluate model
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {test_accuracy:.4f}")

    # Predict on a single sample
    sample, actual = data_processor.get_sample()
    probabilities, predicted_class = model.predict(sample)
    print(f"Predicted probabilities: {probabilities}")
    print(f"Predicted class: {predicted_class} (Actual: {actual})")

    # Inspect weights (example for hidden layer)
    print(f"Hidden layer weights shape: {model.hidden.weight.shape}")
    print(f"Sample weights (first neuron): {model.hidden.weight[0].detach().numpy()}")

    # Visualize training progress
    Visualizer.plot_metrics(losses, accuracies)

if __name__ == "__main__":
    main()

output: