diff --git a/Makefile b/Makefile index f36a9d3..36062ea 100644 --- a/Makefile +++ b/Makefile @@ -52,13 +52,21 @@ config_template: # PROJECT RULES # ################################################################################# -## Make Dataset -data: requirements - $(PYTHON_INTERPRETER) src/data/make_dataset.py - ## Sync photos with my refs sync_collections: iss/data/sync_collections.sh - $(PYTHON_INTERPRETER) iss/data/sync_collections.sh + /bin/sh iss/data/sync_collections.sh + +populate_db: + $(PYTHON_INTERPRETER) -m iss.exec.bdd + +sampling: + $(PYTHON_INTERPRETER) -m iss.exec.sampling + +training: + $(PYTHON_INTERPRETER) -m iss.exec.training + +exec_clustering: + $(PYTHON_INTERPRETER) -m iss.exec.clustering ################################################################################# diff --git a/iss/data/CollectionManager.py b/iss/data/CollectionManager.py index aefae49..d64ff3a 100644 --- a/iss/data/CollectionManager.py +++ b/iss/data/CollectionManager.py @@ -8,20 +8,21 @@ import re class CollectionManagerFromDirectory: - def __init__(self, config): + def __init__(self, config, sampling_type = 'autoencoder'): self.config = config - self.dir = self.config.get('directory')['collections'] + config_sampling = self.config.get('sampling')[sampling_type] + self.dir = self.config.get('directory')[config_sampling['directory']['from']] jpg_regex = re.compile(".*jpg$") self.pictures_id = [pict for pict in os.listdir(self.dir) if jpg_regex.match(pict)] - self.dir_base = self.config.get('directory')['autoencoder']['base'] - self.dir_train = self.config.get('directory')['autoencoder']['train'] - self.dir_test = self.config.get('directory')['autoencoder']['test'] - self.dir_valid = self.config.get('directory')['autoencoder']['valid'] + self.dir_base = config_sampling['directory']['base'] + self.dir_train = config_sampling['directory']['train'] + self.dir_test = config_sampling['directory']['test'] + self.dir_valid = config_sampling['directory']['valid'] - self.seed = self.config.get('training')['seed'] - self.proportions = self.config.get('training')['proportions'] + self.seed = config_sampling['seed'] + self.proportions = config_sampling['proportions'] self.volumes = {} self.shuffle() diff --git a/iss/data/database_init.py b/iss/data/database_init.py deleted file mode 100644 index 633f866..0000000 --- a/iss/data/database_init.py +++ /dev/null @@ -1,2 +0,0 @@ -# -*- coding: utf-8 -*- - diff --git a/iss/data/make_dataset.py b/iss/data/make_dataset.py deleted file mode 100644 index 96b377a..0000000 --- a/iss/data/make_dataset.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -import click -import logging -from pathlib import Path -from dotenv import find_dotenv, load_dotenv - - -@click.command() -@click.argument('input_filepath', type=click.Path(exists=True)) -@click.argument('output_filepath', type=click.Path()) -def main(input_filepath, output_filepath): - """ Runs data processing scripts to turn raw data from (../raw) into - cleaned data ready to be analyzed (saved in ../processed). - """ - logger = logging.getLogger(__name__) - logger.info('making final data set from raw data') - - -if __name__ == '__main__': - log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - logging.basicConfig(level=logging.INFO, format=log_fmt) - - # not used in this stub but often useful for finding various files - project_dir = Path(__file__).resolve().parents[2] - - # find .env automagically by walking up directories until it's found, then - # load up the .env entries as environment variables - load_dotenv(find_dotenv()) - - main() diff --git a/iss/data/resize_collections.py b/iss/data/resize_collections.py deleted file mode 100644 index 64f6022..0000000 --- a/iss/data/resize_collections.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -import click -import logging -import os -import sys -from pathlib import Path -from dotenv import find_dotenv, load_dotenv -from PIL import Image - -load_dotenv(find_dotenv()) - -@click.command() -@click.argument('RESIZE_WIDTH', type=int, default=os.getenv('RESIZE_WIDTH')) -@click.argument('RESIZE_HEIGHT', type=int, default=os.getenv('RESIZE_HEIGHT')) -def main(resize_width, resize_height): - """ Resize image - """ - - logger.info('Resize collections to {}x{}'.format(resize_width, resize_height)) - - try: - imgs_path = os.path.join(str(project_dir), 'data', 'external', 'collections') - [resize_one_img(os.path.join(imgs_path, img_path), resize_width, resize_height) for img_path in os.listdir(imgs_path)] - except: - logger.error(sys.exc_info()[0]) - exit() - - -def resize_one_img(img_path, resize_width, resize_height): - - logger.info('Resize {}'.format(img_path)) - size = (resize_width, resize_height) - outfile = os.path.join(str(project_dir), 'data', 'interim', 'collections', os.path.basename(img_path)) - try: - im = Image.open(os.path.join(str(project_dir), img_path)) - im.thumbnail(size) - im.save(outfile, "JPEG") - except IOError: - logger.info('Cannot resize {}'.format(img_path)) - -if __name__ == '__main__': - log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - logging.basicConfig(level=logging.INFO, format=log_fmt) - - project_dir = Path(__file__).resolve().parents[2] - logger = logging.getLogger(__name__) - - main() - \ No newline at end of file diff --git a/iss/data/sync_collections.py b/iss/data/sync_collections.py deleted file mode 100644 index f997fd0..0000000 --- a/iss/data/sync_collections.py +++ /dev/null @@ -1,68 +0,0 @@ -# -*- coding: utf-8 -*- -import click -import logging -import os -import sys -import pandas as pd -from shutil import copyfile -from pathlib import Path -from dotenv import find_dotenv, load_dotenv - -load_dotenv(find_dotenv()) - -@click.command() -@click.argument('R_COLLECTIONS_PROJECT', type=click.Path(exists=True), default=os.getenv('R_COLLECTIONS_PROJECT')) -def main(r_collections_project): - """ Synchronize my labeled image from another project with this one - """ - - logger.info('Synchronize labeled images') - - try: - imgs = get_unique_imgs(r_collections_project) - cp_imgs(r_collections_project, imgs) - except: - logger.error(sys.exc_info()[0]) - exit() - - -def get_unique_imgs(r_collections_project): - - logger.info('Copy reference file') - copyfile( - os.path.join(r_collections_project, 'datas', 'Export', 'references_labels.csv'), - os.path.join(str(project_dir), "data", "external", "refs", "references_labels.csv") - ) - refs = pd.read_csv(os.path.join(str(project_dir), 'data', 'external', 'refs', 'references_labels.csv')) - imgs = refs.image.unique() - - return(imgs) - - -def cp_imgs(r_collections_project, imgs): - - logger.info('Synchronize images') - - img_path = os.path.join(r_collections_project, 'datas', 'Collections') - - i = 0 - img_len = len(imgs) - - for img in imgs: - i += 1 - logger.info('Synchronize image {} {}/{}'.format(img, i, img_len)) - if(os.path.isfile(os.path.join(img_path, img))): - copyfile( - os.path.join(img_path, img), - os.path.join(str(project_dir), "data", "external", "collections", img) - ) - - -if __name__ == '__main__': - log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - logging.basicConfig(level=logging.INFO, format=log_fmt) - - project_dir = Path(__file__).resolve().parents[2] - logger = logging.getLogger(__name__) - - main() \ No newline at end of file diff --git a/iss/exec/bdd.py b/iss/exec/bdd.py new file mode 100644 index 0000000..33d4721 --- /dev/null +++ b/iss/exec/bdd.py @@ -0,0 +1,30 @@ +import os +import time +import mysql.connector +import pandas as pd +import datetime as dt + +from iss.init_config import CONFIG +from iss.data.DataBaseManager import DataBaseManager + +CON_MYSQL = mysql.connector.connect( + host = CONFIG.get('mysql')['database']['server'], + user = CONFIG.get('mysql')['database']['user'], + passwd = CONFIG.get('mysql')['database']['password'], + database = CONFIG.get('mysql')['database']['name'], + port = CONFIG.get('mysql')['database']['port'] +) + +dbm = DataBaseManager(CON_MYSQL, CONFIG) + + +history = pd.read_csv(os.path.join(CONFIG.get("directory")['data_dir'], "raw", "history", "history.txt"), sep=";", names=['latitude', 'longitude', 'id', 'location']) +history['timestamp'] = pd.to_datetime(history.id, format="%Y%m%d-%H%M%S").dt.strftime("%Y-%m-%d %H:%M:%S") +history.fillna('NULL', inplace=True) +history = history[['latitude', 'longitude', 'id', 'timestamp', 'location']] +history_tuple = [tuple(x) for x in history.values] + +dbm.createPicturesTable(force=True) +count = dbm.insertRowPictures(history_tuple) + +print(count) \ No newline at end of file diff --git a/iss/exec/sampling.py b/iss/exec/sampling.py new file mode 100644 index 0000000..78d76fc --- /dev/null +++ b/iss/exec/sampling.py @@ -0,0 +1,18 @@ +import os + +from iss.init_config import CONFIG +from iss.data.CollectionManager import CollectionManagerFromDirectory + + +## Variables globales +_SAMPLING_TYPE = 'autoencoder' + +## Collection Manager +collection = CollectionManagerFromDirectory(config = CONFIG, sampling_type = _SAMPLING_TYPE) + +## Volumes des images +volumes = collection.count().volumes +print(volumes) + +## Creation des repertoires +collection.populateDirectories() \ No newline at end of file diff --git a/iss/exec/training.py b/iss/exec/training.py new file mode 100644 index 0000000..428c7e6 --- /dev/null +++ b/iss/exec/training.py @@ -0,0 +1,36 @@ +import os + +from iss.init_config import CONFIG +from iss.models.DataLoader import ImageDataGeneratorWrapper +from iss.models.ModelTrainer import ModelTrainer +from iss.models import SimpleAutoEncoder +from iss.models import SimpleConvAutoEncoder +from iss.models import VarAutoEncoder +from iss.models import VarConvAutoEncoder + +## Variables globales +_MODEL_TYPE = 'simple_conv' +_LOAD_NAME = None +_LOAD = False + +## Data loader +data_loader = ImageDataGeneratorWrapper(CONFIG, model = _MODEL_TYPE) + +## Model +if _MODEL_TYPE in ['simple_conv']: + model = SimpleConvAutoEncoder(CONFIG.get('models')[_MODEL_TYPE]) + if _LOAD: + model.load(which = _LOAD_NAME) + model.encoder_model.summary() + model.decoder_model.summary() + +model.model.summary() + +## Entraineur +trainer = ModelTrainer(model, data_loader, CONFIG.get('models')[_MODEL_TYPE], callbacks=[]) + +## Entrainement +try: + trainer.train() +except KeyboardInterrupt: + trainer.model.save() diff --git a/iss/init_config.py b/iss/init_config.py new file mode 100644 index 0000000..f092383 --- /dev/null +++ b/iss/init_config.py @@ -0,0 +1,8 @@ +import os +from dotenv import find_dotenv, load_dotenv + +from iss.tools import Config + +load_dotenv(find_dotenv()) +CONFIG = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE")) + diff --git a/iss/models/AbstractModel.py b/iss/models/AbstractModel.py index b443854..fe4438c 100644 --- a/iss/models/AbstractModel.py +++ b/iss/models/AbstractModel.py @@ -5,53 +5,52 @@ import numpy as np import os class AbstractModel: - def __init__(self, save_directory, model_name): - self.save_directory = save_directory - self.model = None - self.model_name = model_name + def __init__(self, save_directory, model_name): + self.save_directory = save_directory + self.model = None + self.model_name = model_name - def save(self): - if not os.path.exists(self.save_directory): - os.makedirs(self.save_directory) + def save(self): + if not os.path.exists(self.save_directory): + os.makedirs(self.save_directory) + self.model.save_weights('{}/final_{}.hdf5'.format(self.save_directory, self.model_name)) - self.model.save('{}/final_{}.hdf5'.format(self.save_directory, self.model_name)) + def load(self, which = None): + which = 'final_{}'.format(self.model_name) if which is None else which + self.model.load_weights('{}/{}.hdf5'.format(self.save_directory, which)) + def predict(self, x, batch_size = None, verbose = 0, steps = None, callbacks = None): + return self.model.predict(x, batch_size, verbose, steps) - def load(self, which = 'final_model'): - self.model = load_model('{}/{}.hdf5'.format(self.save_directory, which)) - - def predict(self, x, batch_size = None, verbose = 0, steps = None, callbacks = None): - return self.model.predict(x, batch_size, verbose, steps) - - def predict_one(self, x, batch_size = 1, verbose = 0, steps = None): - x = np.expand_dims(x, axis = 0) - return self.predict(x, batch_size, verbose, steps) + def predict_one(self, x, batch_size = 1, verbose = 0, steps = None): + x = np.expand_dims(x, axis = 0) + return self.predict(x, batch_size, verbose, steps) class AbstractAutoEncoderModel(AbstractModel): - def __init__(self, save_directory, model_name): - super().__init__(save_directory, model_name) - self.encoder_model = None - self.decoder_model = None + def __init__(self, save_directory, model_name): + super().__init__(save_directory, model_name) + self.encoder_model = None + self.decoder_model = None - def get_encoded_prediction(self, pictures): - return self.encoder_model.predict(pictures) + def get_encoded_prediction(self, pictures): + return self.encoder_model.predict(pictures) - def get_full_encoded_prediction(self, generator, nb_batch = None): + def get_full_encoded_prediction(self, generator, nb_batch = None): - generator.reset() - div = np.divmod(generator.n, generator.batch_size) - - if nb_batch is None: - nb_batch = div[0] + 1 * (div[1] != 0) - 1 + generator.reset() + div = np.divmod(generator.n, generator.batch_size) + + if nb_batch is None: + nb_batch = div[0] + 1 * (div[1] != 0) - 1 - if nb_batch <= 0: - return + if nb_batch <= 0: + return - predictions = self.get_encoded_prediction(generator.next()[1]) - while generator.batch_index <= (nb_batch - 1): - predictions = np.concatenate((predictions, self.get_encoded_prediction(generator.next()[1]) ), axis = 0) - - return predictions + predictions = self.get_encoded_prediction(generator.next()[1]) + while generator.batch_index <= (nb_batch - 1): + predictions = np.concatenate((predictions, self.get_encoded_prediction(generator.next()[1]) ), axis = 0) + + return predictions diff --git a/iss/models/Callbacks.py b/iss/models/Callbacks.py index fd6c51b..653b384 100644 --- a/iss/models/Callbacks.py +++ b/iss/models/Callbacks.py @@ -18,8 +18,6 @@ class DisplayPictureCallback(Callback): def on_epoch_end(self, epoch, logs): if epoch % self.epoch_laps == 0: - print("ok") - input_pict = self.data_loader.next()[0][1] output_pict = self.model_class.predict_one(input_pict) diff --git a/iss/models/DataLoader.py b/iss/models/DataLoader.py index 6a60e25..f450be0 100644 --- a/iss/models/DataLoader.py +++ b/iss/models/DataLoader.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import os from keras.preprocessing.image import ImageDataGenerator @@ -14,8 +15,12 @@ class ImageDataGeneratorWrapper: self.image_data_generator(config) - self.set_train_generator() - self.set_test_generator() + sampling_type = self.config.get('models')[self.model]['sampling'] + train_dir = os.path.join(self.config.get('sampling')[sampling_type]['directory']['train'], '..') + test_dir = os.path.join(self.config.get('sampling')[sampling_type]['directory']['test'], '..') + + self.set_train_generator(train_dir) + self.set_test_generator(test_dir) def image_data_generator(self, config): self.datagen = ImageDataGenerator( @@ -34,16 +39,14 @@ class ImageDataGeneratorWrapper: batch_size = self.config.get('models')[self.model]['batch_size'], ) - def set_train_generator(self): - train_dir = self.config.get('directory')['autoencoder']['train'] + '/..' + def set_train_generator(self, train_dir): self.train_generator = self.build_generator(directory = train_dir) return self def get_train_generator(self): return self.train_generator - def set_test_generator(self): - test_dir = self.config.get('directory')['autoencoder']['test'] + '/..' + def set_test_generator(self, test_dir): self.test_generator = self.build_generator(directory = test_dir) return self diff --git a/iss/models/ModelTrainer.py b/iss/models/ModelTrainer.py index 92b2d91..fa40f91 100644 --- a/iss/models/ModelTrainer.py +++ b/iss/models/ModelTrainer.py @@ -59,12 +59,13 @@ class ModelTrainer: def init_callbacks(self, config): + if 'csv_logger' in config['callbacks']: log_dir = config['callbacks']['csv_logger']['directory'] Tools.create_dir_if_not_exists(log_dir) self.csv_logger = CSVLogger( - filename = '{}/{}training.log'.format(log_dir, self.model.model_name), + filename = '{}/{}_training.log'.format(log_dir, self.model.model_name), append = config['callbacks']['csv_logger']['append'] ) self.callbacks.extend([self.csv_logger]) @@ -86,5 +87,17 @@ class ModelTrainer: epoch_laps = config['callbacks']['display_picture']['epoch_laps'] ) self.callbacks.extend([self.picture_displayer]) + + if 'tensorboard' in config['callbacks']: + log_dir = config['callbacks']['tensorboard']['log_dir'] + Tools.create_dir_if_not_exists(log_dir) + self.callbacks.extend([keras.callbacks.TensorBoard( + log_dir = log_dir, + histogram_freq=0, + batch_size=32, + write_graph=False, + write_images = True + )]) + return self diff --git a/iss/models/SimpleConvAutoEncoder.py b/iss/models/SimpleConvAutoEncoder.py index dad9ccf..80b1335 100644 --- a/iss/models/SimpleConvAutoEncoder.py +++ b/iss/models/SimpleConvAutoEncoder.py @@ -16,14 +16,13 @@ class SimpleConvAutoEncoder(AbstractAutoEncoderModel): super().__init__(save_directory, model_name) + np.random.seed(42) self.activation = config['activation'] self.input_shape = (config['input_height'], config['input_width'], config['input_channel']) self.latent_shape = (config['latent_height'], config['latent_width'], config['latent_channel']) self.lr = config['learning_rate'] self.build_model() - def load(self, which = 'final_model'): - self.model = load_model('{}/{}.hdf5'.format(self.save_directory, which), custom_objects= {'my_loss':self.my_loss}) def build_model(self): input_shape = self.input_shape @@ -80,8 +79,6 @@ class SimpleConvAutoEncoder(AbstractAutoEncoderModel): optimizer = Adam(lr = self.lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) - - # self.model.compile(optimizer = optimizer, loss = 'binary_crossentropy') self.model.compile(optimizer = optimizer, loss = self.my_loss) diff --git a/iss/untitled.txt b/iss/untitled.txt deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index 0df7daf..6c03bc5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,6 @@ ipython==7.3.0 Pillow==5.4.1 python-dotenv==0.10.1 PyYAML==3.13 +matplotlib>=3.1.0 +umap-learn==0.3.10 +bokeh==0.13.0 \ No newline at end of file