diff --git a/iss/clustering/__init__.py b/iss/clustering/__init__.py index 3a6c88f..9e37cc3 100644 --- a/iss/clustering/__init__.py +++ b/iss/clustering/__init__.py @@ -1,2 +1,4 @@ from .AbstractClustering import AbstractClustering -from .ClassicalClustering import ClassicalClustering \ No newline at end of file +from .ClassicalClustering import ClassicalClustering +from .AdvancedClustering import AdvancedClustering +from .N2DClustering import N2DClustering \ No newline at end of file diff --git a/iss/exec/clustering.py b/iss/exec/clustering.py new file mode 100644 index 0000000..e1e2671 --- /dev/null +++ b/iss/exec/clustering.py @@ -0,0 +1,186 @@ +import os +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +import pandas as pd +from bokeh.plotting import figure, output_file, show +from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper + +from iss.init_config import CONFIG +from iss.tools import Tools +from iss.models import SimpleConvAutoEncoder +from iss.clustering import ClassicalClustering, AdvancedClustering, N2DClustering + +## variable globales + +_MODEL_TYPE = 'simple_conv' +_MODEL_NAME = 'model_colab' +_BATCH_SIZE = 496 +_N_BATCH = 10 +_DEBUG = True +_CLUSTERING_TYPE = 'n2d' +_OUTPUT_IMAGE_WIDTH = 96 +_OUTPUT_IMAGE_HEIGHT = 54 +_MOSAIC_NROW = 10 +_MOSAIC_NCOL_MAX = 10 + + +## Charger le modèle +CONFIG.get('models')[_MODEL_TYPE]['model_name'] = _MODEL_NAME +model = SimpleConvAutoEncoder(CONFIG.get('models')[_MODEL_TYPE]) +model_config = CONFIG.get('models')[_MODEL_TYPE] + +## Charger les images +filenames = Tools.list_directory_filenames(os.path.join(CONFIG.get('directory')['autoencoder']['train'])) +generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (model_config['input_height'], model_config['input_width']), batch = _BATCH_SIZE, nb_batch = _N_BATCH) + +pictures_id, pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model) +intermediate_output = pictures_preds.reshape((pictures_preds.shape[0], model_config['latent_width']*model_config['latent_height']*model_config['latent_channel'])) + + +if _DEBUG: + for i, p_id in enumerate(pictures_id[:2]): + print("%s: %s" % (p_id, pictures_preds[i])) + print(len(pictures_id)) + print(len(intermediate_output)) + + +## Clustering +if _CLUSTERING_TYPE == 'classical': + if _DEBUG: + print("Classical Clustering") + clustering = ClassicalClustering(CONFIG.get('clustering')['classical'], pictures_id, intermediate_output) + clustering.compute_pca() + clustering.compute_kmeans() + clustering.compute_kmeans_centers() + clustering.compute_cah() + clustering.compute_final_labels() + clustering.compute_tsne() + clustering.compute_colors() +elif _CLUSTERING_TYPE == 'advanced': + if _DEBUG: + print("Advanced Clustering") + clustering = AdvancedClustering(CONFIG.get('clustering')['classical'], pictures_id, intermediate_output) +elif _CLUSTERING_TYPE == 'n2d': + if _DEBUG: + print("Not2Deep Clustering") + clustering = N2DClustering(CONFIG.get('clustering')['n2d'], pictures_id, intermediate_output) + clustering.compute_umap() + clustering.compute_kmeans() + clustering.compute_final_labels() + clustering.compute_colors() + +silhouettes = clustering.compute_silhouette_score() +clustering_res = clustering.get_results() + +if _DEBUG: + print(clustering_res[:2]) + print(silhouettes) + + +if _CLUSTERING_TYPE in ['classical']: + ## Graphs of PCA and final clusters + fig, ax = plt.subplots(figsize=(24, 14)) + scatter = ax.scatter(clustering.pca_reduction[:, 0], clustering.pca_reduction[:, 1], c = clustering.colors) + legend1 = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes") + ax.add_artist(legend1) + plt.savefig(os.path.join(CONFIG.get('clustering')[_CLUSTERING_TYPE]['save_directory'], 'pca_clusters.png')) + +if _CLUSTERING_TYPE in ['classical']: + ## Graphs of TSNE and final clusters + fig, ax = plt.subplots(figsize=(24, 14)) + classes = clustering.final_labels + scatter = ax.scatter(clustering.tsne_embedding[:, 0], clustering.tsne_embedding[:, 1], c = clustering.colors) + legend1 = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes") + ax.add_artist(legend1) + plt.savefig(os.path.join(CONFIG.get('clustering')[_CLUSTERING_TYPE]['save_directory'], 'tsne_clusters.png')) + +if _CLUSTERING_TYPE in ['n2d']: + ## Graphs of TSNE and final clusters + fig, ax = plt.subplots(figsize=(24, 14)) + classes = clustering.final_labels + scatter = ax.scatter(clustering.umap_embedding[:, 0], clustering.umap_embedding[:, 1], c = clustering.colors) + legend1 = ax.legend(*scatter.legend_elements(), loc="lower left", title="Classes") + ax.add_artist(legend1) + plt.savefig(os.path.join(CONFIG.get('clustering')[_CLUSTERING_TYPE]['save_directory'], 'umap_clusters.png')) + +if _CLUSTERING_TYPE in ['n2d']: + filenames = [os.path.join(CONFIG.get('directory')['collections'], "%s.jpg" % one_res[0]) for one_res in clustering_res] + images_array = [Tools.read_np_picture(img_filename, target_size = (54, 96)) for img_filename in filenames] + base64_images = [Tools.base64_image(img) for img in images_array] + + print(clustering.umap_embedding) + print(clustering.umap_embedding.shape) + + x = clustering.umap_embedding[:, 0] + y = clustering.umap_embedding[:, 1] + + df = pd.DataFrame({'x': x, 'y': y}) + df['image'] = base64_images + df['label'] = clustering.final_labels.astype(str) + df['color'] = df['label'].apply(Tools.get_color_from_label) + + datasource = ColumnDataSource(df) + + output_file(os.path.join(CONFIG.get('clustering')[_CLUSTERING_TYPE]['save_directory'], 'umap_bokeh.html')) + + plot_figure = figure( + title='UMAP projection of iss clusters', + # plot_width=1200, + # plot_height=1200, + tools=('pan, wheel_zoom, reset') + ) + + plot_figure.add_tools(HoverTool(tooltips=""" +