update draft notebook

This commit is contained in:
Francois Vieille 2019-11-16 18:29:42 +01:00
parent 11cdb40b3f
commit 005e808d39
5 changed files with 504 additions and 156 deletions

View File

@ -0,0 +1,211 @@
#%% [markdown]
# # Clustering classique
#%% [markdown]
# ## import classique
import os
#%%
%load_ext autoreload
%autoreload 2
os.chdir('/home/jovyan/work')
#%% [markdown]
# ## Import iss
#%%
from iss.tools import Config
from iss.tools import Tools
from iss.models import SimpleConvAutoEncoder
from iss.clustering import ClassicalClustering
from iss.clustering import AdvancedClustering
from dotenv import find_dotenv, load_dotenv
import numpy as np
#%% [markdown]
# ## Chargement de la config
#%%
load_dotenv(find_dotenv())
cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))
#%% [markdown]
# ## Chargement du modèle
#%%
## charger le modèle
model_type = 'simple_conv'
cfg.get('models')[model_type]['model_name'] = 'model_colab'
model = SimpleConvAutoEncoder(cfg.get('models')[model_type])
#%% [markdown]
## Chargement des images
#%%
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 10, scale = 1/255)
#%%
pictures_id, pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)
#%%
intermediate_output = pictures_preds.reshape((pictures_preds.shape[0], 3*6*16))
#%%
clustering = AdvancedClustering(cfg.get('clustering')['advanced'], pictures_id, intermediate_output)
#%%
clustering.compute_pca()
#%%
clustering.compute_kmeans()
#%%
clustering.compute_kmeans_centers()
#%%
len(clustering.kmeans_centers)
#%%
clustering.dbscan_args = {'eps': 50, 'min_samples':1}
clustering.compute_dbscan()
#%%
clustering.compute_dbscan_labels()
#%%
len(clustering.final_labels)
#%%
np.unique(clustering.final_labels, return_counts = True)
#%%[markdown]
# # Graphiques
#%%
def select_cluster(clustering, id_cluster):
return [os.path.join('data/processed/models/autoencoder/train/k/', res[0] + '.jpg') for res in clustering.get_zip_results() if res[2] == id_cluster]
#%%
for cl in np.unique(clustering.kmeans_labels):
print("Cluster %s" % (cl))
res_tmp = select_cluster(clustering, cl)
if len(res_tmp) >= 0:
print(len(res_tmp))
image_array = [Tools.read_np_picture(f, target_size = (54, 96)) for f in res_tmp[:100]]
img = Tools.display_mosaic(image_array, nrow = 10)
fig = plt.figure(1, figsize=(12, 7))
plt.imshow(img, aspect = 'auto')
plt.show()
#%% [markdown]
# ## faut essayer de faire des paquets
#%%
from sklearn.manifold import TSNE
output_tnse = TSNE(n_components=2).fit_transform(clustering.pca_reduction)
#%%
plt.scatter(
output_tnse[:,0],
output_tnse[:,1],
c = clustering.kmeans_labels
)
plt.show()
#%%
from sklearn.cluster import KMeans
tmp_km = KMeans(n_clusters = 15)
tmp_res = tmp_km.fit(output_tnse)
#%%
tmp_res.labels_
#%%
plt.scatter(
output_tnse[:,0],
output_tnse[:,1],
c = tmp_res.labels_
)
plt.show()
#%%
clustering.final_labels = tmp_res.labels_
#%%
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering
#%%
def plot_dendrogram(model, **kwargs):
# Children of hierarchical clustering
children = model.children_
# Distances between each pair of children
# Since we don't have this information, we can use a uniform one for plotting
distance = np.arange(children.shape[0])
# The number of observations contained in each cluster level
no_of_observations = np.arange(2, children.shape[0]+2)
# Create linkage matrix and then plot the dendrogram
linkage_matrix = np.column_stack([children, distance, no_of_observations]).astype(float)
# Plot the corresponding dendrogram
dendrogram(linkage_matrix, **kwargs)
#%%
cah_fit = AgglomerativeClustering(n_clusters=10)
#%%
cah_fit = cah_fit.fit(clustering.kmeans_centers)
#%%
fig = plt.figure(1, figsize=(12, 7))
plot_dendrogram(cah_fit, labels = cah_fit.labels_)
#%%
cah_fit.labels_
#%%
tmp = Tools.read_np_picture('data/processed/models/autoencoder/train/k/20171109-192001.jpg',target_size = (27, 48), scale = 1/255)
tmp = tmp.reshape((1,27,48,3))
np.sum(model.get_encoded_prediction(tmp))
#%%
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 10, nb_batch = 3, scale = 1/255)
predictions_list = []
predictions_id = []
for imgs in generator_imgs:
predictions_id.append(imgs[0])
predictions_list.append(model.get_encoded_prediction(imgs[1]))
#%%
np.concatenate(tuple(predictions_list), axis = 0)[0,:,:,:]
#%%
predictions_list[0][0,:,:,:]
#%%
print(pictures_preds[1,:,:,:])
#%%
pictures_preds.shape
#%%

View File

@ -18,6 +18,7 @@ from iss.tools import Config
from iss.tools import Tools from iss.tools import Tools
from iss.models import SimpleConvAutoEncoder from iss.models import SimpleConvAutoEncoder
from iss.clustering import ClassicalClustering from iss.clustering import ClassicalClustering
from iss.clustering import AdvancedClustering
from dotenv import find_dotenv, load_dotenv from dotenv import find_dotenv, load_dotenv
import numpy as np import numpy as np
@ -149,7 +150,3 @@ plt.scatter(clustering.pca_reduction[:, 0], clustering.pca_reduction[:, 1], c =
#%% #%%
plt.scatter(clustering.pca_reduction[np.array(col) == 1, 0], clustering.pca_reduction[np.array(col) == 1, 1]) plt.scatter(clustering.pca_reduction[np.array(col) == 1, 0], clustering.pca_reduction[np.array(col) == 1, 1])
#%%

View File

@ -147,47 +147,28 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"i_debut:0\n",
"i_fin:496\n",
"i_debut:496\n",
"i_fin:992\n"
]
}
],
"source": [ "source": [
"pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)" "pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "ename": "AttributeError",
"text/plain": [ "evalue": "'tuple' object has no attribute 'reshape'",
"(992, 3, 6, 16)" "output_type": "error",
] "traceback": [
}, "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"execution_count": 10, "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"metadata": {}, "\u001b[0;32m<ipython-input-13-e3d22d0becf7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mintermediate_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpictures_preds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m992\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"output_type": "execute_result" "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'reshape'"
]
} }
], ],
"source": [
"pictures_preds.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [ "source": [
"intermediate_output = pictures_preds.reshape((992, 3*6*16))" "intermediate_output = pictures_preds.reshape((992, 3*6*16))"
] ]

File diff suppressed because one or more lines are too long

View File

@ -31,17 +31,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Requirement already satisfied: MySQL-connector-python in /opt/conda/lib/python3.6/site-packages (8.0.15)\n", "Collecting MySQL-connector-python\n",
"Requirement already satisfied: protobuf>=3.0.0 in /opt/conda/lib/python3.6/site-packages (from MySQL-connector-python) (3.6.1)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f7/59/c2220c52d747da492f2aed108cdf99b640b88cf89dbbe2ea13a8c04201aa/mysql_connector_python-8.0.18-cp36-cp36m-manylinux1_x86_64.whl (16.1MB)\n",
"\u001b[K 100% |████████████████████████████████| 16.1MB 4.1MB/s \n",
"\u001b[?25hRequirement already satisfied: protobuf>=3.0.0 in /opt/conda/lib/python3.6/site-packages (from MySQL-connector-python) (3.6.1)\n",
"Requirement already satisfied: six>=1.9 in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (1.12.0)\n", "Requirement already satisfied: six>=1.9 in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (1.12.0)\n",
"Requirement already satisfied: setuptools in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (40.8.0)\n" "Requirement already satisfied: setuptools in /opt/conda/lib/python3.6/site-packages (from protobuf>=3.0.0->MySQL-connector-python) (40.8.0)\n",
"Installing collected packages: MySQL-connector-python\n",
"Successfully installed MySQL-connector-python-8.0.18\n"
] ]
} }
], ],
@ -51,7 +55,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -66,9 +70,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"ename": "TypeError",
"evalue": "__init__() missing 2 required positional arguments: 'project_dir' and 'mode'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-e6f50bbb757a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcfg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mConfig\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: __init__() missing 2 required positional arguments: 'project_dir' and 'mode'"
]
}
],
"source": [ "source": [
"cfg = Config()" "cfg = Config()"
] ]