From 1e06a7cea768408b0d1b144c8e0a4db0b6073078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Vieille?= Date: Thu, 9 Aug 2018 01:25:19 +0200 Subject: [PATCH] test du notebook sur les refs --- notebooks/1.0-fvi-tests-sur-refs.ipynb | 699 +++++++++++++++++++++++++ 1 file changed, 699 insertions(+) create mode 100644 notebooks/1.0-fvi-tests-sur-refs.ipynb diff --git a/notebooks/1.0-fvi-tests-sur-refs.ipynb b/notebooks/1.0-fvi-tests-sur-refs.ipynb new file mode 100644 index 0000000..febbd05 --- /dev/null +++ b/notebooks/1.0-fvi-tests-sur-refs.ipynb @@ -0,0 +1,699 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [], + "source": [ + "refs = pd.read_csv('../data/external/refs/references_labels.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imagelabel
020170416-012001.jpg1
120170416-012001.jpg3
220170416-013001.jpg1
320170416-014001.jpg2
420170416-014001.jpg9
520170416-015001.jpg1
620170416-015001.jpg3
720170416-015001.jpg6
820170416-015001.jpg9
920170416-025001.jpg1
1020170416-025001.jpg8
1120170416-030001.jpg1
1220170416-030001.jpg3
1320170416-030001.jpg6
1420170416-030001.jpg9
1520170416-032001.jpg1
1620170416-044001.jpg1
1720170416-044001.jpg8
1820170416-045001.jpg2
1920170416-045001.jpg4
2020170416-050001.jpg1
2120170416-050001.jpg3
2220170416-050001.jpg6
2320170416-050001.jpg9
2420170416-051002.jpg1
2520170416-051002.jpg3
2620170416-051002.jpg7
2720170416-051002.jpg8
2820170416-062001.jpg1
2920170416-062001.jpg3
.........
64620180415-022001.jpg5
64720180415-022001.jpg9
64820170803-210001.jpg1
64920170803-210001.jpg3
65020170803-210001.jpg5
65120170803-210001.jpg9
65220180101-174001.jpg2
65320180101-174001.jpg3
65420180101-174001.jpg9
65520180327-112001.jpg1
65620180327-112001.jpg8
65720180418-060001.jpg1
65820170813-034001.jpg1
65920170813-034001.jpg3
66020170813-034001.jpg4
66120170716-094001.jpg1
66220170716-094001.jpg4
66320180501-120001.jpg2
66420180501-120001.jpg3
66520180501-120001.jpg4
66620170913-012001.jpg1
66720170726-033001.jpg2
66820171216-170001.jpg2
66920171224-144001.jpg2
67020171224-144001.jpg3
67120170423-003001.jpg1
67220170423-003001.jpg3
67320170424-092001.jpg1
67420170424-092001.jpg3
67520170424-092001.jpg8
\n", + "

676 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " image label\n", + "0 20170416-012001.jpg 1\n", + "1 20170416-012001.jpg 3\n", + "2 20170416-013001.jpg 1\n", + "3 20170416-014001.jpg 2\n", + "4 20170416-014001.jpg 9\n", + "5 20170416-015001.jpg 1\n", + "6 20170416-015001.jpg 3\n", + "7 20170416-015001.jpg 6\n", + "8 20170416-015001.jpg 9\n", + "9 20170416-025001.jpg 1\n", + "10 20170416-025001.jpg 8\n", + "11 20170416-030001.jpg 1\n", + "12 20170416-030001.jpg 3\n", + "13 20170416-030001.jpg 6\n", + "14 20170416-030001.jpg 9\n", + "15 20170416-032001.jpg 1\n", + "16 20170416-044001.jpg 1\n", + "17 20170416-044001.jpg 8\n", + "18 20170416-045001.jpg 2\n", + "19 20170416-045001.jpg 4\n", + "20 20170416-050001.jpg 1\n", + "21 20170416-050001.jpg 3\n", + "22 20170416-050001.jpg 6\n", + "23 20170416-050001.jpg 9\n", + "24 20170416-051002.jpg 1\n", + "25 20170416-051002.jpg 3\n", + "26 20170416-051002.jpg 7\n", + "27 20170416-051002.jpg 8\n", + "28 20170416-062001.jpg 1\n", + "29 20170416-062001.jpg 3\n", + ".. ... ...\n", + "646 20180415-022001.jpg 5\n", + "647 20180415-022001.jpg 9\n", + "648 20170803-210001.jpg 1\n", + "649 20170803-210001.jpg 3\n", + "650 20170803-210001.jpg 5\n", + "651 20170803-210001.jpg 9\n", + "652 20180101-174001.jpg 2\n", + "653 20180101-174001.jpg 3\n", + "654 20180101-174001.jpg 9\n", + "655 20180327-112001.jpg 1\n", + "656 20180327-112001.jpg 8\n", + "657 20180418-060001.jpg 1\n", + "658 20170813-034001.jpg 1\n", + "659 20170813-034001.jpg 3\n", + "660 20170813-034001.jpg 4\n", + "661 20170716-094001.jpg 1\n", + "662 20170716-094001.jpg 4\n", + "663 20180501-120001.jpg 2\n", + "664 20180501-120001.jpg 3\n", + "665 20180501-120001.jpg 4\n", + "666 20170913-012001.jpg 1\n", + "667 20170726-033001.jpg 2\n", + "668 20171216-170001.jpg 2\n", + "669 20171224-144001.jpg 2\n", + "670 20171224-144001.jpg 3\n", + "671 20170423-003001.jpg 1\n", + "672 20170423-003001.jpg 3\n", + "673 20170424-092001.jpg 1\n", + "674 20170424-092001.jpg 3\n", + "675 20170424-092001.jpg 8\n", + "\n", + "[676 rows x 2 columns]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "refs" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [], + "source": [ + "refs2 = refs.pivot_table(index=\"image\", columns=\"label\", aggfunc=len, fill_value=0).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['image', 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='object', name='label')" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "refs2.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "refs2.index.name = None" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Length mismatch: Expected axis has 10 elements, new values have 9 elements", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mrefs2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"label\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrefs2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'image'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/anaconda3/envs/py35/lib/python3.5/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 3625\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3626\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3627\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3628\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3629\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/properties.pyx\u001b[0m in \u001b[0;36mpandas._libs.properties.AxisProperty.__set__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/py35/lib/python3.5/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_set_axis\u001b[0;34m(self, axis, labels)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/envs/py35/lib/python3.5/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mset_axis\u001b[0;34m(self, axis, new_labels)\u001b[0m\n\u001b[1;32m 3072\u001b[0m raise ValueError('Length mismatch: Expected axis has %d elements, '\n\u001b[1;32m 3073\u001b[0m \u001b[0;34m'new values have %d elements'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3074\u001b[0;31m (old_len, new_len))\n\u001b[0m\u001b[1;32m 3075\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3076\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 10 elements, new values have 9 elements" + ] + } + ], + "source": [ + "refs2.columns = [\"label\" + str(col) for col in refs2.columns.tolist() if col != 'image']" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [], + "source": [ + "refs2.rename(columns=lambda x: \"label\" + str(x) if x != 'image' else 'image', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=320, step=1)" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "refs2.index" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "label\n", + "label1 1\n", + "label2 0\n", + "Name: 0, dtype: object\n" + ] + } + ], + "source": [ + "for i, row in refs2.iterrows():\n", + " print(row[{'label1', 'label2'}])\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "320" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "refs2.index.size" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(676, 2)" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "refs.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "refs2." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}