Written by
wano
on
on
PCA
Simple PCA example using NumPy
import numpy as np
# samples
# https://www.kaggle.com/xvivancos/image-compression-using-pca
data = [[2.5, 2.4],
[0.5, 0.7],
[2.2, 2.9],
[1.9, 2.2],
[3.1, 3.0],
[2.3, 2.7],
[2.0, 1.6],
[1.0, 1.1],
[1.5, 1.6],
[1.1, 0.9]]
number_of_samples = len(data)
# D: data matrix
D = np.array(data)
# centering
D = D - D.mean()
# C: covariance matrix
C = np.cov(D.T)
n = len(C) # feature count
# eigenvalues & eigenvectors of C
eigen_values, eigen_vectors = np.linalg.eig(C)
# from increaing order to decreasing order
sorting_index = np.argsort(eigen_values)[::-1]
eigen_values = eigen_values[sorting_index]
eigen_vectors = eigen_vectors[sorting_index]
# v: principal axes
v = []
for i in range(n):
v.append(eigen_vectors[:,i]) # the i-th column
# data restoration test
restored = []
for i in range(number_of_samples):
p = D[i] # original point
q = np.zeros_like(p) # restored point
for j in range(n):
projected_distance = np.dot(p, v[j])
q += projected_distance * v[j]
q = q.tolist()
q = [round(qi, 2) for qi in q]
restored.append(q)
print(p, q)