update draft notebook
This commit is contained in:
parent
11cdb40b3f
commit
005e808d39
|
@ -0,0 +1,211 @@
|
||||||
|
#%% [markdown]
|
||||||
|
# # Clustering classique
|
||||||
|
|
||||||
|
#%% [markdown]
|
||||||
|
# ## import classique
|
||||||
|
import os
|
||||||
|
|
||||||
|
#%%
|
||||||
|
%load_ext autoreload
|
||||||
|
%autoreload 2
|
||||||
|
os.chdir('/home/jovyan/work')
|
||||||
|
|
||||||
|
#%% [markdown]
|
||||||
|
# ## Import iss
|
||||||
|
|
||||||
|
#%%
|
||||||
|
from iss.tools import Config
|
||||||
|
from iss.tools import Tools
|
||||||
|
from iss.models import SimpleConvAutoEncoder
|
||||||
|
from iss.clustering import ClassicalClustering
|
||||||
|
from iss.clustering import AdvancedClustering
|
||||||
|
from dotenv import find_dotenv, load_dotenv
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
#%% [markdown]
|
||||||
|
# ## Chargement de la config
|
||||||
|
|
||||||
|
#%%
|
||||||
|
load_dotenv(find_dotenv())
|
||||||
|
cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))
|
||||||
|
|
||||||
|
#%% [markdown]
|
||||||
|
# ## Chargement du modèle
|
||||||
|
|
||||||
|
#%%
|
||||||
|
## charger le modèle
|
||||||
|
|
||||||
|
model_type = 'simple_conv'
|
||||||
|
cfg.get('models')[model_type]['model_name'] = 'model_colab'
|
||||||
|
model = SimpleConvAutoEncoder(cfg.get('models')[model_type])
|
||||||
|
|
||||||
|
#%% [markdown]
|
||||||
|
## Chargement des images
|
||||||
|
|
||||||
|
#%%
|
||||||
|
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
|
||||||
|
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 10, scale = 1/255)
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
pictures_id, pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
intermediate_output = pictures_preds.reshape((pictures_preds.shape[0], 3*6*16))
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
clustering = AdvancedClustering(cfg.get('clustering')['advanced'], pictures_id, intermediate_output)
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
clustering.compute_pca()
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
clustering.compute_kmeans()
|
||||||
|
|
||||||
|
#%%
|
||||||
|
clustering.compute_kmeans_centers()
|
||||||
|
|
||||||
|
#%%
|
||||||
|
len(clustering.kmeans_centers)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
clustering.dbscan_args = {'eps': 50, 'min_samples':1}
|
||||||
|
clustering.compute_dbscan()
|
||||||
|
|
||||||
|
#%%
|
||||||
|
clustering.compute_dbscan_labels()
|
||||||
|
|
||||||
|
#%%
|
||||||
|
len(clustering.final_labels)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
np.unique(clustering.final_labels, return_counts = True)
|
||||||
|
|
||||||
|
#%%[markdown]
|
||||||
|
# # Graphiques
|
||||||
|
|
||||||
|
#%%
|
||||||
|
def select_cluster(clustering, id_cluster):
|
||||||
|
return [os.path.join('data/processed/models/autoencoder/train/k/', res[0] + '.jpg') for res in clustering.get_zip_results() if res[2] == id_cluster]
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
for cl in np.unique(clustering.kmeans_labels):
|
||||||
|
print("Cluster %s" % (cl))
|
||||||
|
res_tmp = select_cluster(clustering, cl)
|
||||||
|
if len(res_tmp) >= 0:
|
||||||
|
print(len(res_tmp))
|
||||||
|
image_array = [Tools.read_np_picture(f, target_size = (54, 96)) for f in res_tmp[:100]]
|
||||||
|
img = Tools.display_mosaic(image_array, nrow = 10)
|
||||||
|
fig = plt.figure(1, figsize=(12, 7))
|
||||||
|
plt.imshow(img, aspect = 'auto')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#%% [markdown]
|
||||||
|
# ## faut essayer de faire des paquets
|
||||||
|
|
||||||
|
#%%
|
||||||
|
from sklearn.manifold import TSNE
|
||||||
|
|
||||||
|
output_tnse = TSNE(n_components=2).fit_transform(clustering.pca_reduction)
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
plt.scatter(
|
||||||
|
output_tnse[:,0],
|
||||||
|
output_tnse[:,1],
|
||||||
|
c = clustering.kmeans_labels
|
||||||
|
)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
#%%
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
tmp_km = KMeans(n_clusters = 15)
|
||||||
|
tmp_res = tmp_km.fit(output_tnse)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
tmp_res.labels_
|
||||||
|
|
||||||
|
#%%
|
||||||
|
plt.scatter(
|
||||||
|
output_tnse[:,0],
|
||||||
|
output_tnse[:,1],
|
||||||
|
c = tmp_res.labels_
|
||||||
|
)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
clustering.final_labels = tmp_res.labels_
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
from scipy.cluster.hierarchy import dendrogram
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
|
||||||
|
#%%
|
||||||
|
def plot_dendrogram(model, **kwargs):
|
||||||
|
|
||||||
|
# Children of hierarchical clustering
|
||||||
|
children = model.children_
|
||||||
|
|
||||||
|
# Distances between each pair of children
|
||||||
|
# Since we don't have this information, we can use a uniform one for plotting
|
||||||
|
distance = np.arange(children.shape[0])
|
||||||
|
|
||||||
|
# The number of observations contained in each cluster level
|
||||||
|
no_of_observations = np.arange(2, children.shape[0]+2)
|
||||||
|
|
||||||
|
# Create linkage matrix and then plot the dendrogram
|
||||||
|
linkage_matrix = np.column_stack([children, distance, no_of_observations]).astype(float)
|
||||||
|
|
||||||
|
# Plot the corresponding dendrogram
|
||||||
|
dendrogram(linkage_matrix, **kwargs)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
cah_fit = AgglomerativeClustering(n_clusters=10)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
cah_fit = cah_fit.fit(clustering.kmeans_centers)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
fig = plt.figure(1, figsize=(12, 7))
|
||||||
|
plot_dendrogram(cah_fit, labels = cah_fit.labels_)
|
||||||
|
|
||||||
|
#%%
|
||||||
|
cah_fit.labels_
|
||||||
|
|
||||||
|
#%%
|
||||||
|
tmp = Tools.read_np_picture('data/processed/models/autoencoder/train/k/20171109-192001.jpg',target_size = (27, 48), scale = 1/255)
|
||||||
|
tmp = tmp.reshape((1,27,48,3))
|
||||||
|
np.sum(model.get_encoded_prediction(tmp))
|
||||||
|
|
||||||
|
#%%
|
||||||
|
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
|
||||||
|
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 10, nb_batch = 3, scale = 1/255)
|
||||||
|
|
||||||
|
predictions_list = []
|
||||||
|
predictions_id = []
|
||||||
|
for imgs in generator_imgs:
|
||||||
|
predictions_id.append(imgs[0])
|
||||||
|
predictions_list.append(model.get_encoded_prediction(imgs[1]))
|
||||||
|
|
||||||
|
#%%
|
||||||
|
np.concatenate(tuple(predictions_list), axis = 0)[0,:,:,:]
|
||||||
|
|
||||||
|
#%%
|
||||||
|
predictions_list[0][0,:,:,:]
|
||||||
|
|
||||||
|
#%%
|
||||||
|
print(pictures_preds[1,:,:,:])
|
||||||
|
|
||||||
|
|
||||||
|
#%%
|
||||||
|
pictures_preds.shape
|
||||||
|
|
||||||
|
#%%
|
|
@ -18,6 +18,7 @@ from iss.tools import Config
|
||||||
from iss.tools import Tools
|
from iss.tools import Tools
|
||||||
from iss.models import SimpleConvAutoEncoder
|
from iss.models import SimpleConvAutoEncoder
|
||||||
from iss.clustering import ClassicalClustering
|
from iss.clustering import ClassicalClustering
|
||||||
|
from iss.clustering import AdvancedClustering
|
||||||
from dotenv import find_dotenv, load_dotenv
|
from dotenv import find_dotenv, load_dotenv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
@ -149,7 +150,3 @@ plt.scatter(clustering.pca_reduction[:, 0], clustering.pca_reduction[:, 1], c =
|
||||||
|
|
||||||
#%%
|
#%%
|
||||||
plt.scatter(clustering.pca_reduction[np.array(col) == 1, 0], clustering.pca_reduction[np.array(col) == 1, 1])
|
plt.scatter(clustering.pca_reduction[np.array(col) == 1, 0], clustering.pca_reduction[np.array(col) == 1, 1])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#%%
|
|
||||||
|
|
|
@ -147,47 +147,28 @@
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 9,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"i_debut:0\n",
|
|
||||||
"i_fin:496\n",
|
|
||||||
"i_debut:496\n",
|
|
||||||
"i_fin:992\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)"
|
"pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"ename": "AttributeError",
|
||||||
"text/plain": [
|
"evalue": "'tuple' object has no attribute 'reshape'",
|
||||||
"(992, 3, 6, 16)"
|
"output_type": "error",
|
||||||
]
|
"traceback": [
|
||||||
},
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
"execution_count": 10,
|
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||||
"metadata": {},
|
"\u001b[0;32m<ipython-input-13-e3d22d0becf7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mintermediate_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpictures_preds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m992\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||||
"output_type": "execute_result"
|
"\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'reshape'"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
|
||||||
"pictures_preds.shape"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 11,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
"source": [
|
||||||
"intermediate_output = pictures_preds.reshape((992, 3*6*16))"
|
"intermediate_output = pictures_preds.reshape((992, 3*6*16))"
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -31,17 +31,21 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Requirement already satisfied: MySQL-connector-python in /opt/conda/lib/python3.6/site-packages (8.0.15)\n",
|
"Collecting MySQL-connector-python\n",
|
||||||
"Requirement already satisfied: protobuf>=3.0.0 in /opt/conda/lib/python3.6/site-packages (from MySQL-connector-python) (3.6.1)\n",
|
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/f7/59/c2220c52d747da492f2aed108cdf99b640b88cf89dbbe2ea13a8c04201aa/mysql_connector_python-8.0.18-cp36-cp36m-manylinux1_x86_64.whl (16.1MB)\n",
|
||||||
|
"\u001b[K 100% |████████████████████████████████| 16.1MB 4.1MB/s \n",
|
||||||
|
"\u001b[?25hRequirement already satisfied: protobuf>=3.0.0 in /opt/conda/lib/python3.6/site-packages (from MySQL-connector-python) (3.6.1)\n",
|
||||||
"Requirement already satisfied: six>=1.9 in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (1.12.0)\n",
|
"Requirement already satisfied: six>=1.9 in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (1.12.0)\n",
|
||||||
"Requirement already satisfied: setuptools in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (40.8.0)\n"
|
"Requirement already satisfied: setuptools in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (40.8.0)\n",
|
||||||
|
"Installing collected packages: MySQL-connector-python\n",
|
||||||
|
"Successfully installed MySQL-connector-python-8.0.18\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -51,7 +55,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -66,9 +70,21 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "TypeError",
|
||||||
|
"evalue": "__init__() missing 2 required positional arguments: 'project_dir' and 'mode'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||||
|
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
|
||||||
|
"\u001b[0;32m<ipython-input-6-e6f50bbb757a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcfg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mConfig\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||||
|
"\u001b[0;31mTypeError\u001b[0m: __init__() missing 2 required positional arguments: 'project_dir' and 'mode'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"cfg = Config()"
|
"cfg = Config()"
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue