# load libs
import sys
sys.path.append("../")
import numpy as np
import scipy.io
import pandas as pd
import matplotlib.pyplot as plt
import math
from logistic_regression import *
from image_recognition import *
from utils import *
from optimize import find_coefficients
def load_admission_data():
return pd.read_csv("../data/ex2data1.csv",
names=["Exam A", "Exam B", "Admission"])
admission_data = load_admission_data()
print(admission_data.head())
print(admission_data.shape)
# scatter plot with admitted and not admitted students
def plot_admission_data(show = True):
admitted = admission_data.loc[admission_data["Admission"] == 1]
not_admitted = admission_data.loc[admission_data["Admission"] == 0]
plt.scatter(admitted[["Exam A"]], admitted[["Exam B"]], marker="+")
plt.scatter(not_admitted[["Exam A"]], not_admitted[["Exam B"]], marker=".")
plt.xlabel("Exam A")
plt.ylabel("Exam B")
plt.title("Admission Results Distribution")
if show:
plt.show()
plot_admission_data()
Split data with logistic function:
$$h(X^{(i)}) = \frac{1}{1 + e^{-z}}$$Where:
$$z = \theta_0 x_0^{(i)} + \theta_1 x_1^{(i)} + ... + \theta_n x_n^{(i)} = \Theta^T X^{(i)} $$$h(X^{(i)})$ ranges from 0 to 1 inclusive, represents probability $P(y = 1)$ of positive outcome.
Positive ($y = 1$) when $h(X^{(i)}) \ge 0.5; z \ge 0$
Negative ($y = 0$) when $h(X^{(i)}) \lt 0.5; z \lt 0$
def plot_logistic_hypothesis_function(show = True):
z = np.linspace(-10, 10, 100)
h = [sigmoid(val) for val in z]
plt.plot(z, h, "-b")
plt.axhline(0.5, color='r', ls="--", lw=1)
plt.xlabel("Z value")
plt.ylabel("Hypothesis value")
plt.title("Logistic Hypothesis")
if show:
plt.show()
plot_logistic_hypothesis_function()
Find vector of $\Theta$ parameters which minimize $J(\Theta)$ function:
$$J(\Theta) = - \frac{1}{m} \sum_{i=0}^{m}[ { y^{(i)} log(h(X^{(i)})) + (1 - y^{(i)}) log(1 - h(X^{(i)})) }]$$First derivative of the logistic cost function looks simillar to the one in Linear Regression:
$$ \theta_j = \theta_j - \alpha \frac{dJ}{d\theta_j} = \theta_j - \alpha \frac{1}{m} \sum_{i=0}^{m} (h(X^{(i)}) - y^{(i)})x^{(i)}_j $$Using BFGS algorithm to minimize coefficients.
# find coefficients
x1 = concat_with_x0(admission_data[["Exam A", "Exam B"]])
y1 = admission_data[["Admission"]].values
coefficients = find_coefficients(x1, y1,
regularized_cost_function, regularized_cost_function_derivative, 0)
print(coefficients)
To identify decision boundary with given coefficients, need to solve the following equation:
$h(X^{(i)}) = 0.5$ when $z = 0$, thus $z = \Theta^T X = 0$
def plot_admission_results_decision_boundary():
x1_1 = [np.min(admission_data["Exam A"]), np.max(admission_data["Exam A"])]
x1_2 = [- (coefficients[0] + coefficients[1] * x) / coefficients[2] for x in x1_1]
plt.plot(x1_1, x1_2, "-r")
plt.show()
plot_admission_data(False)
plot_admission_results_decision_boundary()
# load data
def load_qa_data():
return pd.read_csv("../data/ex2data2.csv", names=["Test 1", "Test 2", "QA"])
qa_data = load_qa_data()
print(qa_data.head())
print(qa_data.shape)
# scatter plot with admitted and not admitted students
def plot_qa_data(show = True):
passed = qa_data.loc[qa_data["QA"] == 1]
failed = qa_data.loc[qa_data["QA"] == 0]
plt.scatter(passed[["Test 1"]], passed[["Test 2"]], marker="+")
plt.scatter(failed[["Test 1"]], failed[["Test 2"]], marker=".")
plt.xlabel("Test 1")
plt.ylabel("Test 2")
plt.title("QA Results")
if show:
plt.show()
plot_qa_data()
Linear decision boundary doesn't work in this case. Will introduce feature mapping function to use polynomial features.
$$mapFeatures(x_1, x_2) = \{1, x_1, x_2, x_1^2, x_1 x_2, x_2^2, ..., x_1 x_2^5, x_2^6\}$$Adding extra parameters sum to the cost function $J(\Theta)$:
$$J_{reg}(\Theta) = J(\Theta) + \frac{\lambda}{2 m} \sum_{j=1}^n \theta_j^2$$Where $\lambda$ is regularization rate and $\theta_0$ is skipped.
Gradient and first derivative:
$$ \theta_j = \theta_j - \alpha \frac{dJ_{reg}}{d\theta_j} \\ = \theta_j - \alpha ( \frac{1}{m} \sum_{i=0}^{m} (h(X^{(i)}) - y^{(i)})x^{(i)}_j + \frac{\lambda}{m}\theta_j ) \\ = \theta_j (1 - \frac{\alpha \lambda}{m}) - \alpha \frac{1}{m} \sum_{i=0}^{m} (h(X^{(i)}) - y^{(i)})x^{(i)}_j $$So $\theta_j$ will be less with each gradient iteration. As the result some features may be considered insignificant.
# find optimal coefficients
polynomial_degree = 6
x2_mapped = map_features(qa_data[["Test 1"]].values, qa_data[["Test 2"]].values, polynomial_degree)
x2_mapped = concat_with_x0(x2_mapped)
y2 = qa_data[["QA"]].values
regularization_rate = 1
coefficients = find_coefficients(x2_mapped, y2,
regularized_cost_function, regularized_cost_function_derivative,
regularization_rate)
print(coefficients)
# calculate model accuracy by comparing model outcomes with original ones
pred = [sigmoid(x2_mapped @ coefficients) >= 0.5]
print(np.mean(pred == y2.flatten()) * 100)
# plot decision boundary
def plot_qa_data_decision_boundary():
t1 = np.linspace(-1, 1.5, 50)
t2 = np.linspace(-1, 1.5, 50)
z = np.array([[coefficients @ \
np.concatenate([[1], map_features([t1[i]], [t2[j]], polynomial_degree)])
for j in range(t2.size)]
for i in range(t1.size)]).transpose()
plt.contour(t1, t2, z, 0)
plt.title("QA Results, lambda = " + str(regularization_rate))
plot_qa_data(False)
plot_qa_data_decision_boundary()
plt.show()
Using one-vs-all approach to train 10 separate logistic regression classifiers and find the one with:
$$\frac{1}{1 + e^{-\Theta^T X}} \ge 0.5$$# load and prepare data
def load_image_data():
data = scipy.io.loadmat('../data/ex3data1.mat')
x = data['X'] # m x n^2, where m - experiments count, n - square image size
y = data['y'] # m x 1 vector of image classes (numbers 0 - 9)
return x, y
x, y = load_image_data()
def replace_0_with_10(y, digit):
"""
Replace 0 with 10s, because of data representation
"""
return (y == (10 if digit == 0 else digit)).astype(int)
# train 10 logistic regression classifiers for every digit
regularization_rate = 0.1
coefficients = [fit_logistic_regression(x, replace_0_with_10(y, digit),
regularization_rate, 50) for digit in range(10)]
# predict images
predictions = [predict_digit(coefficients, image) for image in x] # 1 x m vector of predictions for each image
print_predictions_accuracy(predictions, y)
predict_random_images(predictions, x)