mirror of
https://github.com/prise6/smart-iss-posts
synced 2024-05-02 21:53:10 +02:00
cleanc code + adaptation
This commit is contained in:
parent
4c5a826ec7
commit
0811e3d3a5
18
Makefile
18
Makefile
|
@ -52,13 +52,21 @@ config_template:
|
|||
# PROJECT RULES #
|
||||
#################################################################################
|
||||
|
||||
## Make Dataset
|
||||
data: requirements
|
||||
$(PYTHON_INTERPRETER) src/data/make_dataset.py
|
||||
|
||||
## Sync photos with my refs
|
||||
sync_collections: iss/data/sync_collections.sh
|
||||
$(PYTHON_INTERPRETER) iss/data/sync_collections.sh
|
||||
/bin/sh iss/data/sync_collections.sh
|
||||
|
||||
populate_db:
|
||||
$(PYTHON_INTERPRETER) -m iss.exec.bdd
|
||||
|
||||
sampling:
|
||||
$(PYTHON_INTERPRETER) -m iss.exec.sampling
|
||||
|
||||
training:
|
||||
$(PYTHON_INTERPRETER) -m iss.exec.training
|
||||
|
||||
exec_clustering:
|
||||
$(PYTHON_INTERPRETER) -m iss.exec.clustering
|
||||
|
||||
|
||||
#################################################################################
|
||||
|
|
|
@ -8,20 +8,21 @@ import re
|
|||
|
||||
class CollectionManagerFromDirectory:
|
||||
|
||||
def __init__(self, config):
|
||||
def __init__(self, config, sampling_type = 'autoencoder'):
|
||||
self.config = config
|
||||
self.dir = self.config.get('directory')['collections']
|
||||
config_sampling = self.config.get('sampling')[sampling_type]
|
||||
self.dir = self.config.get('directory')[config_sampling['directory']['from']]
|
||||
|
||||
jpg_regex = re.compile(".*jpg$")
|
||||
self.pictures_id = [pict for pict in os.listdir(self.dir) if jpg_regex.match(pict)]
|
||||
|
||||
self.dir_base = self.config.get('directory')['autoencoder']['base']
|
||||
self.dir_train = self.config.get('directory')['autoencoder']['train']
|
||||
self.dir_test = self.config.get('directory')['autoencoder']['test']
|
||||
self.dir_valid = self.config.get('directory')['autoencoder']['valid']
|
||||
self.dir_base = config_sampling['directory']['base']
|
||||
self.dir_train = config_sampling['directory']['train']
|
||||
self.dir_test = config_sampling['directory']['test']
|
||||
self.dir_valid = config_sampling['directory']['valid']
|
||||
|
||||
self.seed = self.config.get('training')['seed']
|
||||
self.proportions = self.config.get('training')['proportions']
|
||||
self.seed = config_sampling['seed']
|
||||
self.proportions = config_sampling['proportions']
|
||||
self.volumes = {}
|
||||
|
||||
self.shuffle()
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import click
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('input_filepath', type=click.Path(exists=True))
|
||||
@click.argument('output_filepath', type=click.Path())
|
||||
def main(input_filepath, output_filepath):
|
||||
""" Runs data processing scripts to turn raw data from (../raw) into
|
||||
cleaned data ready to be analyzed (saved in ../processed).
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info('making final data set from raw data')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=log_fmt)
|
||||
|
||||
# not used in this stub but often useful for finding various files
|
||||
project_dir = Path(__file__).resolve().parents[2]
|
||||
|
||||
# find .env automagically by walking up directories until it's found, then
|
||||
# load up the .env entries as environment variables
|
||||
load_dotenv(find_dotenv())
|
||||
|
||||
main()
|
|
@ -1,49 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import click
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
from PIL import Image
|
||||
|
||||
load_dotenv(find_dotenv())
|
||||
|
||||
@click.command()
|
||||
@click.argument('RESIZE_WIDTH', type=int, default=os.getenv('RESIZE_WIDTH'))
|
||||
@click.argument('RESIZE_HEIGHT', type=int, default=os.getenv('RESIZE_HEIGHT'))
|
||||
def main(resize_width, resize_height):
|
||||
""" Resize image
|
||||
"""
|
||||
|
||||
logger.info('Resize collections to {}x{}'.format(resize_width, resize_height))
|
||||
|
||||
try:
|
||||
imgs_path = os.path.join(str(project_dir), 'data', 'external', 'collections')
|
||||
[resize_one_img(os.path.join(imgs_path, img_path), resize_width, resize_height) for img_path in os.listdir(imgs_path)]
|
||||
except:
|
||||
logger.error(sys.exc_info()[0])
|
||||
exit()
|
||||
|
||||
|
||||
def resize_one_img(img_path, resize_width, resize_height):
|
||||
|
||||
logger.info('Resize {}'.format(img_path))
|
||||
size = (resize_width, resize_height)
|
||||
outfile = os.path.join(str(project_dir), 'data', 'interim', 'collections', os.path.basename(img_path))
|
||||
try:
|
||||
im = Image.open(os.path.join(str(project_dir), img_path))
|
||||
im.thumbnail(size)
|
||||
im.save(outfile, "JPEG")
|
||||
except IOError:
|
||||
logger.info('Cannot resize {}'.format(img_path))
|
||||
|
||||
if __name__ == '__main__':
|
||||
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=log_fmt)
|
||||
|
||||
project_dir = Path(__file__).resolve().parents[2]
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
main()
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import click
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
from shutil import copyfile
|
||||
from pathlib import Path
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
|
||||
load_dotenv(find_dotenv())
|
||||
|
||||
@click.command()
|
||||
@click.argument('R_COLLECTIONS_PROJECT', type=click.Path(exists=True), default=os.getenv('R_COLLECTIONS_PROJECT'))
|
||||
def main(r_collections_project):
|
||||
""" Synchronize my labeled image from another project with this one
|
||||
"""
|
||||
|
||||
logger.info('Synchronize labeled images')
|
||||
|
||||
try:
|
||||
imgs = get_unique_imgs(r_collections_project)
|
||||
cp_imgs(r_collections_project, imgs)
|
||||
except:
|
||||
logger.error(sys.exc_info()[0])
|
||||
exit()
|
||||
|
||||
|
||||
def get_unique_imgs(r_collections_project):
|
||||
|
||||
logger.info('Copy reference file')
|
||||
copyfile(
|
||||
os.path.join(r_collections_project, 'datas', 'Export', 'references_labels.csv'),
|
||||
os.path.join(str(project_dir), "data", "external", "refs", "references_labels.csv")
|
||||
)
|
||||
refs = pd.read_csv(os.path.join(str(project_dir), 'data', 'external', 'refs', 'references_labels.csv'))
|
||||
imgs = refs.image.unique()
|
||||
|
||||
return(imgs)
|
||||
|
||||
|
||||
def cp_imgs(r_collections_project, imgs):
|
||||
|
||||
logger.info('Synchronize images')
|
||||
|
||||
img_path = os.path.join(r_collections_project, 'datas', 'Collections')
|
||||
|
||||
i = 0
|
||||
img_len = len(imgs)
|
||||
|
||||
for img in imgs:
|
||||
i += 1
|
||||
logger.info('Synchronize image {} {}/{}'.format(img, i, img_len))
|
||||
if(os.path.isfile(os.path.join(img_path, img))):
|
||||
copyfile(
|
||||
os.path.join(img_path, img),
|
||||
os.path.join(str(project_dir), "data", "external", "collections", img)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
logging.basicConfig(level=logging.INFO, format=log_fmt)
|
||||
|
||||
project_dir = Path(__file__).resolve().parents[2]
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
main()
|
30
iss/exec/bdd.py
Normal file
30
iss/exec/bdd.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
import os
|
||||
import time
|
||||
import mysql.connector
|
||||
import pandas as pd
|
||||
import datetime as dt
|
||||
|
||||
from iss.init_config import CONFIG
|
||||
from iss.data.DataBaseManager import DataBaseManager
|
||||
|
||||
CON_MYSQL = mysql.connector.connect(
|
||||
host = CONFIG.get('mysql')['database']['server'],
|
||||
user = CONFIG.get('mysql')['database']['user'],
|
||||
passwd = CONFIG.get('mysql')['database']['password'],
|
||||
database = CONFIG.get('mysql')['database']['name'],
|
||||
port = CONFIG.get('mysql')['database']['port']
|
||||
)
|
||||
|
||||
dbm = DataBaseManager(CON_MYSQL, CONFIG)
|
||||
|
||||
|
||||
history = pd.read_csv(os.path.join(CONFIG.get("directory")['data_dir'], "raw", "history", "history.txt"), sep=";", names=['latitude', 'longitude', 'id', 'location'])
|
||||
history['timestamp'] = pd.to_datetime(history.id, format="%Y%m%d-%H%M%S").dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
history.fillna('NULL', inplace=True)
|
||||
history = history[['latitude', 'longitude', 'id', 'timestamp', 'location']]
|
||||
history_tuple = [tuple(x) for x in history.values]
|
||||
|
||||
dbm.createPicturesTable(force=True)
|
||||
count = dbm.insertRowPictures(history_tuple)
|
||||
|
||||
print(count)
|
18
iss/exec/sampling.py
Normal file
18
iss/exec/sampling.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import os
|
||||
|
||||
from iss.init_config import CONFIG
|
||||
from iss.data.CollectionManager import CollectionManagerFromDirectory
|
||||
|
||||
|
||||
## Variables globales
|
||||
_SAMPLING_TYPE = 'autoencoder'
|
||||
|
||||
## Collection Manager
|
||||
collection = CollectionManagerFromDirectory(config = CONFIG, sampling_type = _SAMPLING_TYPE)
|
||||
|
||||
## Volumes des images
|
||||
volumes = collection.count().volumes
|
||||
print(volumes)
|
||||
|
||||
## Creation des repertoires
|
||||
collection.populateDirectories()
|
36
iss/exec/training.py
Normal file
36
iss/exec/training.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
import os
|
||||
|
||||
from iss.init_config import CONFIG
|
||||
from iss.models.DataLoader import ImageDataGeneratorWrapper
|
||||
from iss.models.ModelTrainer import ModelTrainer
|
||||
from iss.models import SimpleAutoEncoder
|
||||
from iss.models import SimpleConvAutoEncoder
|
||||
from iss.models import VarAutoEncoder
|
||||
from iss.models import VarConvAutoEncoder
|
||||
|
||||
## Variables globales
|
||||
_MODEL_TYPE = 'simple_conv'
|
||||
_LOAD_NAME = None
|
||||
_LOAD = False
|
||||
|
||||
## Data loader
|
||||
data_loader = ImageDataGeneratorWrapper(CONFIG, model = _MODEL_TYPE)
|
||||
|
||||
## Model
|
||||
if _MODEL_TYPE in ['simple_conv']:
|
||||
model = SimpleConvAutoEncoder(CONFIG.get('models')[_MODEL_TYPE])
|
||||
if _LOAD:
|
||||
model.load(which = _LOAD_NAME)
|
||||
model.encoder_model.summary()
|
||||
model.decoder_model.summary()
|
||||
|
||||
model.model.summary()
|
||||
|
||||
## Entraineur
|
||||
trainer = ModelTrainer(model, data_loader, CONFIG.get('models')[_MODEL_TYPE], callbacks=[])
|
||||
|
||||
## Entrainement
|
||||
try:
|
||||
trainer.train()
|
||||
except KeyboardInterrupt:
|
||||
trainer.model.save()
|
8
iss/init_config.py
Normal file
8
iss/init_config.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
import os
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
|
||||
from iss.tools import Config
|
||||
|
||||
load_dotenv(find_dotenv())
|
||||
CONFIG = Config(project_dir = os.getenv("PROJECT_DIR"), mode = os.getenv("MODE"))
|
||||
|
|
@ -5,53 +5,52 @@ import numpy as np
|
|||
import os
|
||||
|
||||
class AbstractModel:
|
||||
def __init__(self, save_directory, model_name):
|
||||
self.save_directory = save_directory
|
||||
self.model = None
|
||||
self.model_name = model_name
|
||||
def __init__(self, save_directory, model_name):
|
||||
self.save_directory = save_directory
|
||||
self.model = None
|
||||
self.model_name = model_name
|
||||
|
||||
def save(self):
|
||||
if not os.path.exists(self.save_directory):
|
||||
os.makedirs(self.save_directory)
|
||||
def save(self):
|
||||
if not os.path.exists(self.save_directory):
|
||||
os.makedirs(self.save_directory)
|
||||
self.model.save_weights('{}/final_{}.hdf5'.format(self.save_directory, self.model_name))
|
||||
|
||||
self.model.save('{}/final_{}.hdf5'.format(self.save_directory, self.model_name))
|
||||
def load(self, which = None):
|
||||
which = 'final_{}'.format(self.model_name) if which is None else which
|
||||
self.model.load_weights('{}/{}.hdf5'.format(self.save_directory, which))
|
||||
|
||||
def predict(self, x, batch_size = None, verbose = 0, steps = None, callbacks = None):
|
||||
return self.model.predict(x, batch_size, verbose, steps)
|
||||
|
||||
def load(self, which = 'final_model'):
|
||||
self.model = load_model('{}/{}.hdf5'.format(self.save_directory, which))
|
||||
|
||||
def predict(self, x, batch_size = None, verbose = 0, steps = None, callbacks = None):
|
||||
return self.model.predict(x, batch_size, verbose, steps)
|
||||
|
||||
def predict_one(self, x, batch_size = 1, verbose = 0, steps = None):
|
||||
x = np.expand_dims(x, axis = 0)
|
||||
return self.predict(x, batch_size, verbose, steps)
|
||||
def predict_one(self, x, batch_size = 1, verbose = 0, steps = None):
|
||||
x = np.expand_dims(x, axis = 0)
|
||||
return self.predict(x, batch_size, verbose, steps)
|
||||
|
||||
|
||||
class AbstractAutoEncoderModel(AbstractModel):
|
||||
|
||||
def __init__(self, save_directory, model_name):
|
||||
super().__init__(save_directory, model_name)
|
||||
self.encoder_model = None
|
||||
self.decoder_model = None
|
||||
def __init__(self, save_directory, model_name):
|
||||
super().__init__(save_directory, model_name)
|
||||
self.encoder_model = None
|
||||
self.decoder_model = None
|
||||
|
||||
def get_encoded_prediction(self, pictures):
|
||||
return self.encoder_model.predict(pictures)
|
||||
def get_encoded_prediction(self, pictures):
|
||||
return self.encoder_model.predict(pictures)
|
||||
|
||||
def get_full_encoded_prediction(self, generator, nb_batch = None):
|
||||
def get_full_encoded_prediction(self, generator, nb_batch = None):
|
||||
|
||||
generator.reset()
|
||||
div = np.divmod(generator.n, generator.batch_size)
|
||||
|
||||
if nb_batch is None:
|
||||
nb_batch = div[0] + 1 * (div[1] != 0) - 1
|
||||
generator.reset()
|
||||
div = np.divmod(generator.n, generator.batch_size)
|
||||
|
||||
if nb_batch is None:
|
||||
nb_batch = div[0] + 1 * (div[1] != 0) - 1
|
||||
|
||||
if nb_batch <= 0:
|
||||
return
|
||||
if nb_batch <= 0:
|
||||
return
|
||||
|
||||
predictions = self.get_encoded_prediction(generator.next()[1])
|
||||
while generator.batch_index <= (nb_batch - 1):
|
||||
predictions = np.concatenate((predictions, self.get_encoded_prediction(generator.next()[1]) ), axis = 0)
|
||||
|
||||
return predictions
|
||||
predictions = self.get_encoded_prediction(generator.next()[1])
|
||||
while generator.batch_index <= (nb_batch - 1):
|
||||
predictions = np.concatenate((predictions, self.get_encoded_prediction(generator.next()[1]) ), axis = 0)
|
||||
|
||||
return predictions
|
||||
|
||||
|
|
|
@ -18,8 +18,6 @@ class DisplayPictureCallback(Callback):
|
|||
def on_epoch_end(self, epoch, logs):
|
||||
if epoch % self.epoch_laps == 0:
|
||||
|
||||
print("ok")
|
||||
|
||||
input_pict = self.data_loader.next()[0][1]
|
||||
output_pict = self.model_class.predict_one(input_pict)
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
|
||||
from keras.preprocessing.image import ImageDataGenerator
|
||||
|
||||
|
@ -14,8 +15,12 @@ class ImageDataGeneratorWrapper:
|
|||
|
||||
self.image_data_generator(config)
|
||||
|
||||
self.set_train_generator()
|
||||
self.set_test_generator()
|
||||
sampling_type = self.config.get('models')[self.model]['sampling']
|
||||
train_dir = os.path.join(self.config.get('sampling')[sampling_type]['directory']['train'], '..')
|
||||
test_dir = os.path.join(self.config.get('sampling')[sampling_type]['directory']['test'], '..')
|
||||
|
||||
self.set_train_generator(train_dir)
|
||||
self.set_test_generator(test_dir)
|
||||
|
||||
def image_data_generator(self, config):
|
||||
self.datagen = ImageDataGenerator(
|
||||
|
@ -34,16 +39,14 @@ class ImageDataGeneratorWrapper:
|
|||
batch_size = self.config.get('models')[self.model]['batch_size'],
|
||||
)
|
||||
|
||||
def set_train_generator(self):
|
||||
train_dir = self.config.get('directory')['autoencoder']['train'] + '/..'
|
||||
def set_train_generator(self, train_dir):
|
||||
self.train_generator = self.build_generator(directory = train_dir)
|
||||
return self
|
||||
|
||||
def get_train_generator(self):
|
||||
return self.train_generator
|
||||
|
||||
def set_test_generator(self):
|
||||
test_dir = self.config.get('directory')['autoencoder']['test'] + '/..'
|
||||
def set_test_generator(self, test_dir):
|
||||
self.test_generator = self.build_generator(directory = test_dir)
|
||||
return self
|
||||
|
||||
|
|
|
@ -59,12 +59,13 @@ class ModelTrainer:
|
|||
|
||||
def init_callbacks(self, config):
|
||||
|
||||
|
||||
if 'csv_logger' in config['callbacks']:
|
||||
log_dir = config['callbacks']['csv_logger']['directory']
|
||||
Tools.create_dir_if_not_exists(log_dir)
|
||||
|
||||
self.csv_logger = CSVLogger(
|
||||
filename = '{}/{}training.log'.format(log_dir, self.model.model_name),
|
||||
filename = '{}/{}_training.log'.format(log_dir, self.model.model_name),
|
||||
append = config['callbacks']['csv_logger']['append']
|
||||
)
|
||||
self.callbacks.extend([self.csv_logger])
|
||||
|
@ -86,5 +87,17 @@ class ModelTrainer:
|
|||
epoch_laps = config['callbacks']['display_picture']['epoch_laps']
|
||||
)
|
||||
self.callbacks.extend([self.picture_displayer])
|
||||
|
||||
if 'tensorboard' in config['callbacks']:
|
||||
log_dir = config['callbacks']['tensorboard']['log_dir']
|
||||
Tools.create_dir_if_not_exists(log_dir)
|
||||
self.callbacks.extend([keras.callbacks.TensorBoard(
|
||||
log_dir = log_dir,
|
||||
histogram_freq=0,
|
||||
batch_size=32,
|
||||
write_graph=False,
|
||||
write_images = True
|
||||
)])
|
||||
|
||||
|
||||
return self
|
||||
|
|
|
@ -16,14 +16,13 @@ class SimpleConvAutoEncoder(AbstractAutoEncoderModel):
|
|||
|
||||
super().__init__(save_directory, model_name)
|
||||
|
||||
np.random.seed(42)
|
||||
self.activation = config['activation']
|
||||
self.input_shape = (config['input_height'], config['input_width'], config['input_channel'])
|
||||
self.latent_shape = (config['latent_height'], config['latent_width'], config['latent_channel'])
|
||||
self.lr = config['learning_rate']
|
||||
self.build_model()
|
||||
|
||||
def load(self, which = 'final_model'):
|
||||
self.model = load_model('{}/{}.hdf5'.format(self.save_directory, which), custom_objects= {'my_loss':self.my_loss})
|
||||
|
||||
def build_model(self):
|
||||
input_shape = self.input_shape
|
||||
|
@ -80,8 +79,6 @@ class SimpleConvAutoEncoder(AbstractAutoEncoderModel):
|
|||
|
||||
optimizer = Adam(lr = self.lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
|
||||
|
||||
|
||||
|
||||
# self.model.compile(optimizer = optimizer, loss = 'binary_crossentropy')
|
||||
self.model.compile(optimizer = optimizer, loss = self.my_loss)
|
||||
|
||||
|
|
|
@ -8,3 +8,6 @@ ipython==7.3.0
|
|||
Pillow==5.4.1
|
||||
python-dotenv==0.10.1
|
||||
PyYAML==3.13
|
||||
matplotlib>=3.1.0
|
||||
umap-learn==0.3.10
|
||||
bokeh==0.13.0
|
Loading…
Reference in a new issue