cleanc code + adaptation

This commit is contained in:
Francois Vieille 2019-11-11 04:16:43 +01:00
parent 4c5a826ec7
commit 0811e3d3a5
17 changed files with 176 additions and 211 deletions

View File

@ -52,13 +52,21 @@ config_template:
# PROJECT RULES #
#################################################################################
## Make Dataset
data: requirements
$(PYTHON_INTERPRETER) src/data/make_dataset.py
## Sync photos with my refs
sync_collections: iss/data/sync_collections.sh
$(PYTHON_INTERPRETER) iss/data/sync_collections.sh
/bin/sh iss/data/sync_collections.sh
populate_db:
$(PYTHON_INTERPRETER) -m iss.exec.bdd
sampling:
$(PYTHON_INTERPRETER) -m iss.exec.sampling
training:
$(PYTHON_INTERPRETER) -m iss.exec.training
exec_clustering:
$(PYTHON_INTERPRETER) -m iss.exec.clustering
#################################################################################

View File

@ -8,20 +8,21 @@ import re
class CollectionManagerFromDirectory:
def __init__(self, config):
def __init__(self, config, sampling_type = 'autoencoder'):
self.config = config
self.dir = self.config.get('directory')['collections']
config_sampling = self.config.get('sampling')[sampling_type]
self.dir = self.config.get('directory')[config_sampling['directory']['from']]
jpg_regex = re.compile(".*jpg$")
self.pictures_id = [pict for pict in os.listdir(self.dir) if jpg_regex.match(pict)]
self.dir_base = self.config.get('directory')['autoencoder']['base']
self.dir_train = self.config.get('directory')['autoencoder']['train']
self.dir_test = self.config.get('directory')['autoencoder']['test']
self.dir_valid = self.config.get('directory')['autoencoder']['valid']
self.dir_base = config_sampling['directory']['base']
self.dir_train = config_sampling['directory']['train']
self.dir_test = config_sampling['directory']['test']
self.dir_valid = config_sampling['directory']['valid']
self.seed = self.config.get('training')['seed']
self.proportions = self.config.get('training')['proportions']
self.seed = config_sampling['seed']
self.proportions = config_sampling['proportions']
self.volumes = {}
self.shuffle()

View File

@ -1,2 +0,0 @@
# -*- coding: utf-8 -*-

View File

@ -1,30 +0,0 @@
# -*- coding: utf-8 -*-
import click
import logging
from pathlib import Path
from dotenv import find_dotenv, load_dotenv
@click.command()
@click.argument('input_filepath', type=click.Path(exists=True))
@click.argument('output_filepath', type=click.Path())
def main(input_filepath, output_filepath):
""" Runs data processing scripts to turn raw data from (../raw) into
cleaned data ready to be analyzed (saved in ../processed).
"""
logger = logging.getLogger(__name__)
logger.info('making final data set from raw data')
if __name__ == '__main__':
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)
# not used in this stub but often useful for finding various files
project_dir = Path(__file__).resolve().parents[2]
# find .env automagically by walking up directories until it's found, then
# load up the .env entries as environment variables
load_dotenv(find_dotenv())
main()

View File

@ -1,49 +0,0 @@
# -*- coding: utf-8 -*-
import click
import logging
import os
import sys
from pathlib import Path
from dotenv import find_dotenv, load_dotenv
from PIL import Image
load_dotenv(find_dotenv())
@click.command()
@click.argument('RESIZE_WIDTH', type=int, default=os.getenv('RESIZE_WIDTH'))
@click.argument('RESIZE_HEIGHT', type=int, default=os.getenv('RESIZE_HEIGHT'))
def main(resize_width, resize_height):
""" Resize image
"""
logger.info('Resize collections to {}x{}'.format(resize_width, resize_height))
try:
imgs_path = os.path.join(str(project_dir), 'data', 'external', 'collections')
[resize_one_img(os.path.join(imgs_path, img_path), resize_width, resize_height) for img_path in os.listdir(imgs_path)]
except:
logger.error(sys.exc_info()[0])
exit()
def resize_one_img(img_path, resize_width, resize_height):
logger.info('Resize {}'.format(img_path))
size = (resize_width, resize_height)
outfile = os.path.join(str(project_dir), 'data', 'interim', 'collections', os.path.basename(img_path))
try:
im = Image.open(os.path.join(str(project_dir), img_path))
im.thumbnail(size)
im.save(outfile, "JPEG")
except IOError:
logger.info('Cannot resize {}'.format(img_path))
if __name__ == '__main__':
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)
project_dir = Path(__file__).resolve().parents[2]
logger = logging.getLogger(__name__)
main()

View File

@ -1,68 +0,0 @@
# -*- coding: utf-8 -*-
import click
import logging
import os
import sys
import pandas as pd
from shutil import copyfile
from pathlib import Path
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
@click.command()
@click.argument('R_COLLECTIONS_PROJECT', type=click.Path(exists=True), default=os.getenv('R_COLLECTIONS_PROJECT'))
def main(r_collections_project):
""" Synchronize my labeled image from another project with this one
"""
logger.info('Synchronize labeled images')
try:
imgs = get_unique_imgs(r_collections_project)
cp_imgs(r_collections_project, imgs)
except:
logger.error(sys.exc_info()[0])
exit()
def get_unique_imgs(r_collections_project):
logger.info('Copy reference file')
copyfile(
os.path.join(r_collections_project, 'datas', 'Export', 'references_labels.csv'),
os.path.join(str(project_dir), "data", "external", "refs", "references_labels.csv")
)
refs = pd.read_csv(os.path.join(str(project_dir), 'data', 'external', 'refs', 'references_labels.csv'))
imgs = refs.image.unique()
return(imgs)
def cp_imgs(r_collections_project, imgs):
logger.info('Synchronize images')
img_path = os.path.join(r_collections_project, 'datas', 'Collections')
i = 0
img_len = len(imgs)
for img in imgs:
i += 1
logger.info('Synchronize image {} {}/{}'.format(img, i, img_len))
if(os.path.isfile(os.path.join(img_path, img))):
copyfile(
os.path.join(img_path, img),
os.path.join(str(project_dir), "data", "external", "collections", img)
)
if __name__ == '__main__':
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)
project_dir = Path(__file__).resolve().parents[2]
logger = logging.getLogger(__name__)
main()

30
iss/exec/bdd.py Normal file
View File

@ -0,0 +1,30 @@
import os
import time
import mysql.connector
import pandas as pd
import datetime as dt
from iss.init_config import CONFIG
from iss.data.DataBaseManager import DataBaseManager
CON_MYSQL = mysql.connector.connect(
host = CONFIG.get('mysql')['database']['server'],
user = CONFIG.get('mysql')['database']['user'],
passwd = CONFIG.get('mysql')['database']['password'],
database = CONFIG.get('mysql')['database']['name'],
port = CONFIG.get('mysql')['database']['port']
)
dbm = DataBaseManager(CON_MYSQL, CONFIG)
history = pd.read_csv(os.path.join(CONFIG.get("directory")['data_dir'], "raw", "history", "history.txt"), sep=";", names=['latitude', 'longitude', 'id', 'location'])
history['timestamp'] = pd.to_datetime(history.id, format="%Y%m%d-%H%M%S").dt.strftime("%Y-%m-%d %H:%M:%S")
history.fillna('NULL', inplace=True)
history = history[['latitude', 'longitude', 'id', 'timestamp', 'location']]
history_tuple = [tuple(x) for x in history.values]
dbm.createPicturesTable(force=True)
count = dbm.insertRowPictures(history_tuple)
print(count)

18
iss/exec/sampling.py Normal file
View File

@ -0,0 +1,18 @@
import os
from iss.init_config import CONFIG
from iss.data.CollectionManager import CollectionManagerFromDirectory
## Variables globales
_SAMPLING_TYPE = 'autoencoder'
## Collection Manager
collection = CollectionManagerFromDirectory(config = CONFIG, sampling_type = _SAMPLING_TYPE)
## Volumes des images
volumes = collection.count().volumes
print(volumes)
## Creation des repertoires
collection.populateDirectories()

36
iss/exec/training.py Normal file
View File

@ -0,0 +1,36 @@
import os
from iss.init_config import CONFIG
from iss.models.DataLoader import ImageDataGeneratorWrapper
from iss.models.ModelTrainer import ModelTrainer
from iss.models import SimpleAutoEncoder
from iss.models import SimpleConvAutoEncoder
from iss.models import VarAutoEncoder
from iss.models import VarConvAutoEncoder
## Variables globales
_MODEL_TYPE = 'simple_conv'
_LOAD_NAME = None
_LOAD = False
## Data loader
data_loader = ImageDataGeneratorWrapper(CONFIG, model = _MODEL_TYPE)
## Model
if _MODEL_TYPE in ['simple_conv']:
model = SimpleConvAutoEncoder(CONFIG.get('models')[_MODEL_TYPE])
if _LOAD:
model.load(which = _LOAD_NAME)
model.encoder_model.summary()
model.decoder_model.summary()
model.model.summary()
## Entraineur
trainer = ModelTrainer(model, data_loader, CONFIG.get('models')[_MODEL_TYPE], callbacks=[])
## Entrainement
try:
trainer.train()
except KeyboardInterrupt:
trainer.model.save()

8
iss/init_config.py Normal file
View File

@ -0,0 +1,8 @@
import os
from dotenv import find_dotenv, load_dotenv
from iss.tools import Config
load_dotenv(find_dotenv())
CONFIG = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))

View File

@ -5,53 +5,52 @@ import numpy as np
import os
class AbstractModel:
def __init__(self, save_directory, model_name):
self.save_directory = save_directory
self.model = None
self.model_name = model_name
def __init__(self, save_directory, model_name):
self.save_directory = save_directory
self.model = None
self.model_name = model_name
def save(self):
if not os.path.exists(self.save_directory):
os.makedirs(self.save_directory)
def save(self):
if not os.path.exists(self.save_directory):
os.makedirs(self.save_directory)
self.model.save_weights('{}/final_{}.hdf5'.format(self.save_directory, self.model_name))
self.model.save('{}/final_{}.hdf5'.format(self.save_directory, self.model_name))
def load(self, which = None):
which = 'final_{}'.format(self.model_name) if which is None else which
self.model.load_weights('{}/{}.hdf5'.format(self.save_directory, which))
def predict(self, x, batch_size = None, verbose = 0, steps = None, callbacks = None):
return self.model.predict(x, batch_size, verbose, steps)
def load(self, which = 'final_model'):
self.model = load_model('{}/{}.hdf5'.format(self.save_directory, which))
def predict(self, x, batch_size = None, verbose = 0, steps = None, callbacks = None):
return self.model.predict(x, batch_size, verbose, steps)
def predict_one(self, x, batch_size = 1, verbose = 0, steps = None):
x = np.expand_dims(x, axis = 0)
return self.predict(x, batch_size, verbose, steps)
def predict_one(self, x, batch_size = 1, verbose = 0, steps = None):
x = np.expand_dims(x, axis = 0)
return self.predict(x, batch_size, verbose, steps)
class AbstractAutoEncoderModel(AbstractModel):
def __init__(self, save_directory, model_name):
super().__init__(save_directory, model_name)
self.encoder_model = None
self.decoder_model = None
def __init__(self, save_directory, model_name):
super().__init__(save_directory, model_name)
self.encoder_model = None
self.decoder_model = None
def get_encoded_prediction(self, pictures):
return self.encoder_model.predict(pictures)
def get_encoded_prediction(self, pictures):
return self.encoder_model.predict(pictures)
def get_full_encoded_prediction(self, generator, nb_batch = None):
def get_full_encoded_prediction(self, generator, nb_batch = None):
generator.reset()
div = np.divmod(generator.n, generator.batch_size)
if nb_batch is None:
nb_batch = div[0] + 1 * (div[1] != 0) - 1
generator.reset()
div = np.divmod(generator.n, generator.batch_size)
if nb_batch is None:
nb_batch = div[0] + 1 * (div[1] != 0) - 1
if nb_batch <= 0:
return
if nb_batch <= 0:
return
predictions = self.get_encoded_prediction(generator.next()[1])
while generator.batch_index <= (nb_batch - 1):
predictions = np.concatenate((predictions, self.get_encoded_prediction(generator.next()[1]) ), axis = 0)
return predictions
predictions = self.get_encoded_prediction(generator.next()[1])
while generator.batch_index <= (nb_batch - 1):
predictions = np.concatenate((predictions, self.get_encoded_prediction(generator.next()[1]) ), axis = 0)
return predictions

View File

@ -18,8 +18,6 @@ class DisplayPictureCallback(Callback):
def on_epoch_end(self, epoch, logs):
if epoch % self.epoch_laps == 0:
print("ok")
input_pict = self.data_loader.next()[0][1]
output_pict = self.model_class.predict_one(input_pict)

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import os
from keras.preprocessing.image import ImageDataGenerator
@ -14,8 +15,12 @@ class ImageDataGeneratorWrapper:
self.image_data_generator(config)
self.set_train_generator()
self.set_test_generator()
sampling_type = self.config.get('models')[self.model]['sampling']
train_dir = os.path.join(self.config.get('sampling')[sampling_type]['directory']['train'], '..')
test_dir = os.path.join(self.config.get('sampling')[sampling_type]['directory']['test'], '..')
self.set_train_generator(train_dir)
self.set_test_generator(test_dir)
def image_data_generator(self, config):
self.datagen = ImageDataGenerator(
@ -34,16 +39,14 @@ class ImageDataGeneratorWrapper:
batch_size = self.config.get('models')[self.model]['batch_size'],
)
def set_train_generator(self):
train_dir = self.config.get('directory')['autoencoder']['train'] + '/..'
def set_train_generator(self, train_dir):
self.train_generator = self.build_generator(directory = train_dir)
return self
def get_train_generator(self):
return self.train_generator
def set_test_generator(self):
test_dir = self.config.get('directory')['autoencoder']['test'] + '/..'
def set_test_generator(self, test_dir):
self.test_generator = self.build_generator(directory = test_dir)
return self

View File

@ -59,12 +59,13 @@ class ModelTrainer:
def init_callbacks(self, config):
if 'csv_logger' in config['callbacks']:
log_dir = config['callbacks']['csv_logger']['directory']
Tools.create_dir_if_not_exists(log_dir)
self.csv_logger = CSVLogger(
filename = '{}/{}training.log'.format(log_dir, self.model.model_name),
filename = '{}/{}_training.log'.format(log_dir, self.model.model_name),
append = config['callbacks']['csv_logger']['append']
)
self.callbacks.extend([self.csv_logger])
@ -86,5 +87,17 @@ class ModelTrainer:
epoch_laps = config['callbacks']['display_picture']['epoch_laps']
)
self.callbacks.extend([self.picture_displayer])
if 'tensorboard' in config['callbacks']:
log_dir = config['callbacks']['tensorboard']['log_dir']
Tools.create_dir_if_not_exists(log_dir)
self.callbacks.extend([keras.callbacks.TensorBoard(
log_dir = log_dir,
histogram_freq=0,
batch_size=32,
write_graph=False,
write_images = True
)])
return self

View File

@ -16,14 +16,13 @@ class SimpleConvAutoEncoder(AbstractAutoEncoderModel):
super().__init__(save_directory, model_name)
np.random.seed(42)
self.activation = config['activation']
self.input_shape = (config['input_height'], config['input_width'], config['input_channel'])
self.latent_shape = (config['latent_height'], config['latent_width'], config['latent_channel'])
self.lr = config['learning_rate']
self.build_model()
def load(self, which = 'final_model'):
self.model = load_model('{}/{}.hdf5'.format(self.save_directory, which), custom_objects= {'my_loss':self.my_loss})
def build_model(self):
input_shape = self.input_shape
@ -80,8 +79,6 @@ class SimpleConvAutoEncoder(AbstractAutoEncoderModel):
optimizer = Adam(lr = self.lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
# self.model.compile(optimizer = optimizer, loss = 'binary_crossentropy')
self.model.compile(optimizer = optimizer, loss = self.my_loss)

View File

View File

@ -8,3 +8,6 @@ ipython==7.3.0
Pillow==5.4.1
python-dotenv==0.10.1
PyYAML==3.13
matplotlib>=3.1.0
umap-learn==0.3.10
bokeh==0.13.0