# /!\ I'm using Python 3 !
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# to display directly in the notebook
plt.rcParams['image.cmap'] = 'gray'
# by default, the grayscale images are displayed with the jet colormap: use grayscale instead
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
import plotly.offline
plotly.offline.init_notebook_mode(connected=True)
from IPython.core.display import display, HTML, Markdown
# The polling here is to ensure that plotly.js has already been loaded before
# setting display alignment in order to avoid a race condition.
display(HTML(
'<script>'
'var waitForPlotly = setInterval( function() {'
'if( typeof(window.Plotly) !== "undefined" ){'
'MathJax.Hub.Config({ SVG: { font: "STIX-Web" }, displayAlign: "center" });'
'MathJax.Hub.Queue(["setRenderer", MathJax.Hub, "SVG"]);'
'clearInterval(waitForPlotly);'
'}}, 250 );'
'</script>'
))
import sklearn
from sklearn import manifold, datasets, decomposition
from sklearn.utils import check_random_state
from scipy.spatial.distance import squareform, pdist
def from_matplotlib(colormap, nb_points):
colorscale = []
for k in range(nb_points):
colors = np.array([int(c) for c in colormap(k/(nb_points-1))[:3]])*255
colorscale.append([k/(nb_points-1), 'rgb{}'.format(colors[0], colors[1], colors[2])])
return colorscale
colormap = from_matplotlib(plt.cm.jet, 4)
N = 1000 # number of data points
# Data points
X, colors = datasets.samples_generator.make_swiss_roll(n_samples=1500)
x, y, z = X[:, 0], X[:,1], X[:,2]
raw_data = go.Scatter3d(x = x,
y = y,
z = z,
mode = 'markers',
marker = dict(
color = x,
colorscale = colormap,
line = dict(color='black', width=1),
showscale = False),
)
layout = dict(
margin = dict(
l=15,
r=15,
t=20,
b=15)
)
fig = go.Figure(data=[raw_data], layout=layout)
plotly.offline.iplot(fig)
data_traces = []
titles = []
data = np.array([x, y, z]).T
def MDS(data=data, data_traces=data_traces, colors=colors, colormap=colormap, ticktext=[]):
ticktext = np.asarray(ticktext) # converts it to an array
mds = manifold.MDS(2, max_iter=100, n_init=1)
data_MDS = mds.fit_transform(data).T
trace = go.Scatter(
x = data_MDS[0],
y = data_MDS[1],
mode='markers',
marker=dict(color = colors,
colorscale = colormap,
size= 10,
showscale=bool(ticktext.size),
colorbar = dict(
tickmode = 'array',
tickvals = list(range(len(ticktext))),
ticktext = ticktext,
ticks = 'outside'
),
line = dict(color='black', width=2)
),
)
if data_traces is not None:
data_traces.append(trace)
plotly.offline.iplot(go.Figure(data=[trace], layout=layout))
MDS()
def PCA(data=data, data_traces=data_traces, colors=colors, colormap=colormap, ticktext=[]):
ticktext = np.asarray(ticktext) # converts it to an array
data_PCA = decomposition.TruncatedSVD(n_components=2).fit_transform(data).T
trace = go.Scatter(
x = data_PCA[0],
y = data_PCA[1],
mode='markers',
marker = dict(
color = colors,
colorscale = colormap,
size= 10,
showscale = bool(ticktext.size),
colorbar = dict(
tickmode = 'array',
tickvals = list(range(len(ticktext))),
ticktext = ticktext,
ticks = 'outside'
),
line=dict(color='black', width=2)
)
)
if data_traces is not None:
data_traces.append(trace)
plotly.offline.iplot(go.Figure(data=[trace], layout=layout))
PCA()
def Isomap(data=data, data_traces=data_traces, colors=colors, colormap=colormap, ticktext=[]):
ticktext = np.asarray(ticktext) # converts it to an array
nb_neighbors = 10
data_Isomap = manifold.Isomap(nb_neighbors, n_components=2).fit_transform(data).T
trace = go.Scatter(
x = data_Isomap[0],
y = data_Isomap[1],
mode='markers',
marker = dict(
color = colors,
colorscale = colormap,
size= 10,
showscale = bool(ticktext.size),
colorbar = dict(
tickmode = 'array',
tickvals = list(range(len(ticktext))),
ticktext = ticktext,
ticks = 'outside'
),
line=dict(color='black', width=2)
)
)
if data_traces is not None:
data_traces.append(trace)
plotly.offline.iplot(go.Figure(data=[trace], layout=layout))
Isomap()
def LLE(data=data, data_traces=data_traces, colors=colors, colormap=colormap, ticktext=[]):
ticktext = np.asarray(ticktext) # converts it to an array
nb_neighbors = 10
data_LLE = manifold.LocallyLinearEmbedding(nb_neighbors, 2, method='standard').fit_transform(data).T
trace = go.Scatter(
x = data_LLE[0],
y = data_LLE[1],
mode='markers',
marker = dict(
color = colors,
colorscale = colormap,
size= 10,
line=dict(color='black', width=2),
showscale=bool(ticktext.size),
colorbar = dict(
tickmode = 'array',
tickvals = list(range(len(ticktext))),
ticktext = ticktext,
ticks = 'outside'
)
)
)
if data_traces is not None:
data_traces.append(trace)
plotly.offline.iplot(go.Figure(data=[trace], layout=layout))
LLE()
def tSNE(data=data, data_traces=data_traces, colors=colors, colormap=colormap, ticktext=[]):
ticktext = np.asarray(ticktext) # converts it to an array
tSNE = manifold.TSNE(n_components=2)
data_tSNE = tSNE.fit_transform(data).T
trace = go.Scatter(
x = data_tSNE[0],
y = data_tSNE[1],
mode='markers',
marker=dict(
color = colors,
colorscale = colormap,
size = 10,
showscale = bool(ticktext.size),
colorbar = dict(
tickmode = 'array',
tickvals = list(range(len(ticktext))),
ticktext = ticktext,
ticks = 'outside'
),
line = dict(color='black', width=2)
)
)
if data_traces is not None:
data_traces.append(trace)
plotly.offline.iplot(go.Figure(data=[trace], layout=layout))
tSNE()
import random
import fashion_mnist.utils.mnist_reader as mnist_reader
labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',\
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
colormap2 = from_matplotlib(plt.cm.Set3, 10)
def convert_labels(lab, labels=labels):
return labels[lab]
X_train, Y_train = mnist_reader.load_mnist('fashion_mnist/data/fashion', kind='train')
nbr_img = 1500 # number of images to randomly pick
ind_rand = np.random.randint(len(X_train), size=nbr_img) # indices of images
X, Y = X_train[ind_rand, :], Y_train[ind_rand] # images, labels
Y_labels = np.array(list(map(convert_labels, Y.tolist())))
# Histogram of labels
trace = go.Histogram(
x=Y.flatten(),
xbins=dict(
start=-0.5,
end=9,
size=1
)
)
layout = go.Layout(
title='Labels of picked images',
xaxis=dict(
title='Label'
),
yaxis=dict(
title='Number'
)
)
fig = go.Figure(data=[trace], layout=layout)
plotly.offline.iplot(fig)
display(Markdown("### Display a few of them:"))
rows, cols = 2, 5
plt.figure(figsize=(15,6))
for i in range(cols*rows):
ax = plt.subplot(rows, cols, i + 1)
ax.matshow(X[i].reshape((28, 28)))
plt.xticks([]); plt.yticks([])
plt.title(Y_labels[i])
plt.show()
N, d = X.shape
X_centered = X - np.mean(X, axis=0)
C = X_centered.T.dot(X_centered)
eig, v = np.linalg.eigh(C)
eigenvalues = eig[-1], eig[-2]
vectors_PCA = v[:,-1], v[:, -2]
plt.figure(figsize=(13,13))
plt.subplot(1, 2, 1)
plt.imshow(vectors_PCA[0].reshape(28, 28), cmap='gist_gray')
plt.title('First eigenvector (associated to spectral radius)')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(vectors_PCA[1].reshape(28, 28), cmap='gist_gray')
plt.title('Second eigenvector')
plt.axis('off')
plt.show()
W = np.vstack(vectors_PCA).T
proj = X.dot(W)
traces_PDA = []
for i in range(10):
traces_PDA.append(go.Scatter(
x = proj[:, 0][Y.flatten() == i],
y = proj[:, 1][Y.flatten() == i],
mode = 'markers',
marker = dict(
size = 7,
showscale = False,
line = dict(
width = 1,
color = 'rgb(0, 0, 0)'
)
),
name = '{}'.format(labels[i]),
showlegend= True
))
layout_PDA = dict(
title = 'Projection of images on the space spanned by the first two eigenvectors',
xaxis = dict(
title = 'Premier eigenvector',
ticklen = 5,
zeroline = False,
gridwidth = 2,
),
yaxis = dict(
title = 'Second eigenvector',
ticklen = 5,
gridwidth = 2,
),
legend = dict(
orientation = 'h',
y = -0.2
)
)
plotly.offline.iplot(go.Figure(data=traces_PDA, layout=layout_PDA))
tSNE(data=X, data_traces=None, colors=Y, colormap=colormap2, ticktext=labels)
MDS(data=X, data_traces=None, colors=Y, colormap=colormap2, ticktext=labels)
# Pairwise distances between all data points.
Dist = sklearn.metrics.pairwise.pairwise_distances(X, squared=True)
plt.imshow(Dist[::10, ::10], interpolation='none')
plt.title("Pairwise distances matrix")
plt.show()
# Similarity
Sim = squareform(sklearn.manifold.t_sne._joint_probabilities(Dist, 30., False))
plt.imshow(Sim[::10, ::10], interpolation='none')
plt.title("Conditional probabilities")
plt.show()