mirror of
https://github.com/prise6/smart-iss-posts
synced 2024-06-08 00:32:12 +02:00
test du clustering simple
This commit is contained in:
parent
6a45ee0b04
commit
4c5a826ec7
|
@ -2,7 +2,7 @@
|
|||
|
||||
class AbstractClustering:
|
||||
|
||||
def __init__(config, pictures_id, pictures_np):
|
||||
def __init__(self, config, pictures_id, pictures_np):
|
||||
|
||||
self.config = config
|
||||
self.pictures_id = pictures_id
|
||||
|
|
|
@ -1,53 +1,63 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
from iss.clustering import AbstractClustering
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.cluster import AgglomerativeClustering
|
||||
from iss.tools import Tools
|
||||
from sklearn.externals import joblib
|
||||
|
||||
class ClassicalClustering(AbstractClustering):
|
||||
|
||||
def __init__(config, pictures_id, pictures_np):
|
||||
def __init__(self, config, pictures_id = None, pictures_np = None):
|
||||
|
||||
super().__init__(config, pictures_id, pictures_np)
|
||||
|
||||
self.pca_fit = None
|
||||
self.pca_args = self.config['PCA']
|
||||
self.pca_reduction = None
|
||||
self.pca_save_name = "PCA_model_v%s.pkl" % (self.config['version'])
|
||||
|
||||
self.kmeans_fit = None
|
||||
self.kmeans_args = self.config['kmeans']
|
||||
self.kmeans_labels = None
|
||||
self.kmeans_centers = []
|
||||
self.kmeans_save_name = "kmeans_model_v%s.pkl" % (self.config['version'])
|
||||
|
||||
|
||||
self.cah_fit = None
|
||||
self.cah_args = self.config['CAH']
|
||||
self.cah_labels = None
|
||||
self.cah_save_name = "cah_model_v%s.pkl" % (self.config['version'])
|
||||
|
||||
self.final_labels = None
|
||||
|
||||
super().__init__(config, pictures_id, pictures_np)
|
||||
|
||||
def compute_pca(self):
|
||||
|
||||
def pca_fit(self):
|
||||
|
||||
self.pca_fit = PCA(**self.pca_args**)
|
||||
self.pca_fit = PCA(**self.pca_args)
|
||||
self.pca_fit.fit(self.pictures_np)
|
||||
self.pca_reduction = self.pca_fit.transform(self.pictures_np)
|
||||
|
||||
return self
|
||||
|
||||
def kmeans_fit(self):
|
||||
self.kmeans_fit = KMeans(self.kmeans_args**)
|
||||
def compute_kmeans(self):
|
||||
self.kmeans_fit = KMeans(**self.kmeans_args)
|
||||
self.kmeans_fit.fit(self.pca_reduction)
|
||||
self.kmeans_labels = self.kmeans_fit.labels_
|
||||
return self
|
||||
|
||||
def compute_kmeans_centers(self):
|
||||
for cl in range(self.kmeans_args['n_clusters']):
|
||||
tmp = self.[np.where(self.kmeans_labels == cl)]
|
||||
tmp = self.pca_reduction[np.where(self.kmeans_labels == cl)]
|
||||
self.kmeans_centers.append(np.mean(tmp, axis = 0))
|
||||
return self
|
||||
|
||||
def cah_fit(self):
|
||||
def compute_cah(self):
|
||||
|
||||
self.cah_fit = AgglomerativeClustering(self.cah_args**)
|
||||
self.cah_fit = AgglomerativeClustering(**self.cah_args)
|
||||
self.cah_fit.fit_predict(self.kmeans_centers)
|
||||
self.cah_labels = self.cah_fit.labels_
|
||||
return self
|
||||
|
@ -55,4 +65,19 @@ class ClassicalClustering(AbstractClustering):
|
|||
def compute_cah_labels(self):
|
||||
self.final_labels = [self.cah_labels[old_cl] for old_cl in self.kmeans_labels]
|
||||
|
||||
def get_zip_results(self):
|
||||
return zip(self.pictures_id, self.final_labels, self.kmeans_labels, self.pictures_np)
|
||||
|
||||
def save(self):
|
||||
Tools.create_dir_if_not_exists(self.config['save_directory'])
|
||||
|
||||
joblib.dump(self.pca_fit, os.path.join(self.config['save_directory'], self.pca_save_name))
|
||||
joblib.dump(self.kmeans_fit, os.path.join(self.config['save_directory'], self.kmeans_save_name))
|
||||
joblib.dump(self.cah_fit, os.path.join(self.config['save_directory'], self.cah_save_name))
|
||||
|
||||
def load(self):
|
||||
self.pca_fit = joblib.load(os.path.join(self.config['save_directory'], self.pca_save_name))
|
||||
self.kmeans_fit = joblib.load(os.path.join(self.config['save_directory'], self.kmeans_save_name))
|
||||
self.cah_fit = joblib.load(os.path.join(self.config['save_directory'], self.cah_save_name))
|
||||
|
||||
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
from .AbstractClustering import AbstractClustering
|
||||
from .ClassicalClustering import ClassicalClustering
|
155
notebooks/classical_clustering.py
Normal file
155
notebooks/classical_clustering.py
Normal file
|
@ -0,0 +1,155 @@
|
|||
#%% [markdown]
|
||||
# # Clustering classique
|
||||
|
||||
#%% [markdown]
|
||||
# ## import classique
|
||||
import os
|
||||
|
||||
#%%
|
||||
%load_ext autoreload
|
||||
%autoreload 2
|
||||
os.chdir('/home/jovyan/work')
|
||||
|
||||
#%% [markdown]
|
||||
# ## Import iss
|
||||
|
||||
#%%
|
||||
from iss.tools import Config
|
||||
from iss.tools import Tools
|
||||
from iss.models import SimpleConvAutoEncoder
|
||||
from iss.clustering import ClassicalClustering
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
import numpy as np
|
||||
|
||||
#%% [markdown]
|
||||
# ## Chargement de la config
|
||||
|
||||
#%%
|
||||
load_dotenv(find_dotenv())
|
||||
cfg = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))
|
||||
|
||||
#%% [markdown]
|
||||
# ## Chargement du modèle
|
||||
|
||||
#%%
|
||||
## charger le modèle
|
||||
model_type = 'simple_conv'
|
||||
cfg.get('models')[model_type]['model_name'] = 'model_colab'
|
||||
model = SimpleConvAutoEncoder(cfg.get('models')[model_type])
|
||||
|
||||
#%% [markdown]
|
||||
## Chargement des images
|
||||
|
||||
#%%
|
||||
filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')
|
||||
generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 10)
|
||||
|
||||
#%%
|
||||
pictures_id, pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)
|
||||
|
||||
#%%
|
||||
intermediate_output = pictures_preds.reshape((pictures_preds.shape[0], 3*6*16))
|
||||
|
||||
|
||||
#%% [markdown]
|
||||
# ## ACP
|
||||
# Réduction de la dimension
|
||||
|
||||
#%%
|
||||
clustering = ClassicalClustering(cfg.get('clustering')['classical'], pictures_id, intermediate_output)
|
||||
|
||||
#%%
|
||||
clustering.compute_pca()
|
||||
|
||||
|
||||
#%% [markdown]
|
||||
# ## Kmeans
|
||||
# Premiers clusters
|
||||
|
||||
#%%
|
||||
clustering.compute_kmeans()
|
||||
clustering.compute_kmeans_centers()
|
||||
|
||||
#%% [markdown]
|
||||
# ## CAH
|
||||
# Seconds clusters
|
||||
|
||||
#%%
|
||||
clustering.compute_cah()
|
||||
clustering.compute_cah_labels()
|
||||
|
||||
#%% [markdown]
|
||||
# ## Résultats
|
||||
|
||||
#%% [markdown]
|
||||
# ### Clusters intermediaires
|
||||
#%%
|
||||
fig = plt.figure(1, figsize=(12, 7))
|
||||
plt.scatter(clustering.pca_reduction[:, 0], clustering.pca_reduction[:, 1], c = clustering.kmeans_labels)
|
||||
|
||||
|
||||
#%% [markdown]
|
||||
# ### Clusters finaux
|
||||
|
||||
#%%
|
||||
plt.scatter(clustering.pca_reduction[:, 0], clustering.pca_reduction[:, 1], c = clustering.final_labels)
|
||||
|
||||
|
||||
#%% [markdown]
|
||||
# ### Sauvegarde des modèles
|
||||
|
||||
#%%
|
||||
clustering.save()
|
||||
|
||||
|
||||
#%%
|
||||
# clustering = ClassicalClustering(cfg.get('clustering')['classical'])
|
||||
clustering.load()
|
||||
|
||||
#%% [markdown]
|
||||
# ## Visualisation des clusters
|
||||
|
||||
#%%
|
||||
def select_cluster(clustering, id_cluster):
|
||||
return [os.path.join('data/processed/models/autoencoder/train/k/', res[0] + '.jpg') for res in clustering.get_zip_results() if res[2] == id_cluster]
|
||||
|
||||
#%%
|
||||
from IPython.display import Image
|
||||
|
||||
#%%
|
||||
for cl in range(0,19):
|
||||
print("Cluster %s" % (cl))
|
||||
res_tmp = select_cluster(clustering, cl)
|
||||
print(len(res_tmp))
|
||||
image_array = [Tools.read_np_picture(f, target_size = (54, 96)) for f in res_tmp[:100]]
|
||||
# img = Tools.display_mosaic(image_array, nrow = 10)
|
||||
# fig = plt.figure(1, figsize=(12, 7))
|
||||
# plt.imshow(img, aspect = 'auto')
|
||||
# plt.show()
|
||||
|
||||
|
||||
#%% [markdown]
|
||||
# ## Zoom sur le cluster 0
|
||||
|
||||
#%%
|
||||
res_tmp = select_cluster(clustering, 1)
|
||||
|
||||
#%%
|
||||
print(len(res_tmp))
|
||||
image_array = [Tools.read_np_picture(f, target_size = (54, 96)) for f in res_tmp]
|
||||
|
||||
|
||||
#%%
|
||||
Tools.display_mosaic(image_array, nrow = 18)
|
||||
|
||||
|
||||
#%%
|
||||
col = [1 if l == 1 else 0 for l in clustering.kmeans_labels]
|
||||
plt.scatter(clustering.pca_reduction[:, 0], clustering.pca_reduction[:, 1], c = col)
|
||||
|
||||
#%%
|
||||
plt.scatter(clustering.pca_reduction[np.array(col) == 1, 0], clustering.pca_reduction[np.array(col) == 1, 1])
|
||||
|
||||
|
||||
|
||||
#%%
|
789
notebooks/test_clustering.ipynb
Normal file
789
notebooks/test_clustering.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue