{ "cells": [ { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [], "source": [ "refs = pd.read_csv('../data/external/refs/references_labels.csv')" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
imagelabel
020170416-012001.jpg1
120170416-012001.jpg3
220170416-013001.jpg1
320170416-014001.jpg2
420170416-014001.jpg9
520170416-015001.jpg1
620170416-015001.jpg3
720170416-015001.jpg6
820170416-015001.jpg9
920170416-025001.jpg1
1020170416-025001.jpg8
1120170416-030001.jpg1
1220170416-030001.jpg3
1320170416-030001.jpg6
1420170416-030001.jpg9
1520170416-032001.jpg1
1620170416-044001.jpg1
1720170416-044001.jpg8
1820170416-045001.jpg2
1920170416-045001.jpg4
2020170416-050001.jpg1
2120170416-050001.jpg3
2220170416-050001.jpg6
2320170416-050001.jpg9
2420170416-051002.jpg1
2520170416-051002.jpg3
2620170416-051002.jpg7
2720170416-051002.jpg8
2820170416-062001.jpg1
2920170416-062001.jpg3
.........
64620180415-022001.jpg5
64720180415-022001.jpg9
64820170803-210001.jpg1
64920170803-210001.jpg3
65020170803-210001.jpg5
65120170803-210001.jpg9
65220180101-174001.jpg2
65320180101-174001.jpg3
65420180101-174001.jpg9
65520180327-112001.jpg1
65620180327-112001.jpg8
65720180418-060001.jpg1
65820170813-034001.jpg1
65920170813-034001.jpg3
66020170813-034001.jpg4
66120170716-094001.jpg1
66220170716-094001.jpg4
66320180501-120001.jpg2
66420180501-120001.jpg3
66520180501-120001.jpg4
66620170913-012001.jpg1
66720170726-033001.jpg2
66820171216-170001.jpg2
66920171224-144001.jpg2
67020171224-144001.jpg3
67120170423-003001.jpg1
67220170423-003001.jpg3
67320170424-092001.jpg1
67420170424-092001.jpg3
67520170424-092001.jpg8
\n", "

676 rows × 2 columns

\n", "
" ], "text/plain": [ " image label\n", "0 20170416-012001.jpg 1\n", "1 20170416-012001.jpg 3\n", "2 20170416-013001.jpg 1\n", "3 20170416-014001.jpg 2\n", "4 20170416-014001.jpg 9\n", "5 20170416-015001.jpg 1\n", "6 20170416-015001.jpg 3\n", "7 20170416-015001.jpg 6\n", "8 20170416-015001.jpg 9\n", "9 20170416-025001.jpg 1\n", "10 20170416-025001.jpg 8\n", "11 20170416-030001.jpg 1\n", "12 20170416-030001.jpg 3\n", "13 20170416-030001.jpg 6\n", "14 20170416-030001.jpg 9\n", "15 20170416-032001.jpg 1\n", "16 20170416-044001.jpg 1\n", "17 20170416-044001.jpg 8\n", "18 20170416-045001.jpg 2\n", "19 20170416-045001.jpg 4\n", "20 20170416-050001.jpg 1\n", "21 20170416-050001.jpg 3\n", "22 20170416-050001.jpg 6\n", "23 20170416-050001.jpg 9\n", "24 20170416-051002.jpg 1\n", "25 20170416-051002.jpg 3\n", "26 20170416-051002.jpg 7\n", "27 20170416-051002.jpg 8\n", "28 20170416-062001.jpg 1\n", "29 20170416-062001.jpg 3\n", ".. ... ...\n", "646 20180415-022001.jpg 5\n", "647 20180415-022001.jpg 9\n", "648 20170803-210001.jpg 1\n", "649 20170803-210001.jpg 3\n", "650 20170803-210001.jpg 5\n", "651 20170803-210001.jpg 9\n", "652 20180101-174001.jpg 2\n", "653 20180101-174001.jpg 3\n", "654 20180101-174001.jpg 9\n", "655 20180327-112001.jpg 1\n", "656 20180327-112001.jpg 8\n", "657 20180418-060001.jpg 1\n", "658 20170813-034001.jpg 1\n", "659 20170813-034001.jpg 3\n", "660 20170813-034001.jpg 4\n", "661 20170716-094001.jpg 1\n", "662 20170716-094001.jpg 4\n", "663 20180501-120001.jpg 2\n", "664 20180501-120001.jpg 3\n", "665 20180501-120001.jpg 4\n", "666 20170913-012001.jpg 1\n", "667 20170726-033001.jpg 2\n", "668 20171216-170001.jpg 2\n", "669 20171224-144001.jpg 2\n", "670 20171224-144001.jpg 3\n", "671 20170423-003001.jpg 1\n", "672 20170423-003001.jpg 3\n", "673 20170424-092001.jpg 1\n", "674 20170424-092001.jpg 3\n", "675 20170424-092001.jpg 8\n", "\n", "[676 rows x 2 columns]" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "refs" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [], "source": [ "refs2 = refs.pivot_table(index=\"image\", columns=\"label\", aggfunc=len, fill_value=0).reset_index()" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['image', 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='object', name='label')" ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "refs2.columns" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "refs2.index.name = None" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "scrolled": true }, "outputs": [ { "ename": "ValueError", "evalue": "Length mismatch: Expected axis has 10 elements, new values have 9 elements", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mrefs2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"label\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrefs2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcol\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'image'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/anaconda3/envs/py35/lib/python3.5/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 3625\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3626\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3627\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3628\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3629\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mpandas/_libs/properties.pyx\u001b[0m in \u001b[0;36mpandas._libs.properties.AxisProperty.__set__\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py35/lib/python3.5/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_set_axis\u001b[0;34m(self, axis, labels)\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 558\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_set_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 559\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 560\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_clear_item_cache\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/py35/lib/python3.5/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mset_axis\u001b[0;34m(self, axis, new_labels)\u001b[0m\n\u001b[1;32m 3072\u001b[0m raise ValueError('Length mismatch: Expected axis has %d elements, '\n\u001b[1;32m 3073\u001b[0m \u001b[0;34m'new values have %d elements'\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3074\u001b[0;31m (old_len, new_len))\n\u001b[0m\u001b[1;32m 3075\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3076\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maxes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: Length mismatch: Expected axis has 10 elements, new values have 9 elements" ] } ], "source": [ "refs2.columns = [\"label\" + str(col) for col in refs2.columns.tolist() if col != 'image']" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [], "source": [ "refs2.rename(columns=lambda x: \"label\" + str(x) if x != 'image' else 'image', inplace=True)" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RangeIndex(start=0, stop=320, step=1)" ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "refs2.index" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "label\n", "label1 1\n", "label2 0\n", "Name: 0, dtype: object\n" ] } ], "source": [ "for i, row in refs2.iterrows():\n", " print(row[{'label1', 'label2'}])\n", " break" ] }, { "cell_type": "code", "execution_count": 126, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "320" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "refs2.index.size" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(676, 2)" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "refs.shape" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "refs2." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 2 }