{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 2021-05-20 (G2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Variablen entstehen bei der ersten Zuweisung:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "666" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = 666\n", "a" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = dict()\n", "d" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(d)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = {}\n", "d" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(d)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Literal?**\n", "Das ist ein Literal:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{1: 'one', 2: 'two'}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = {1:'one', 2:'two'}\n", "d" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{1: 'one', 2: 'two'}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "l = [(1, 'one'), (2, 'two')]\n", "d = dict(l)\n", "d" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## File I/O" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "filename = '../Exercises/files/students.txt'" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "f = open(filename)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "_io.TextIOWrapper" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(f)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reading File in One Big Swoop" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1910420003;Corinna;Baumgartner\\n1910420002;Anzhelika;Chernykh\\n1910420005;Simon;Hu\\n1910420006;Matthias;Kappel\\n1810420009;Peter;Kolter\\n1920420033;Jan;Kornberger\\n1910420007;Luis;Kraker\\n1810420010;Leonid;Kudriaschov\\n1910420008;Michael;Lang\\n1910420010;Sonja;Lukas\\n1910420013;Behnaz;Mehrabadi\\n1910420014;Leo;Moser\\n1910420015;Moritz;Nagelschmied\\n1910420018;Bianca;Reimer\\n1910420020;Tim;Schmid\\n1910420021;Lukas;Schüttler\\n1910420022;Michael;Schweiger\\n1910420024;Paul;Sinabell\\n1910420025;Jan;Slovik\\n1910420026;Daniel;Söls\\n1910420027;Michael;Stangl\\n1910420028;Felix;Themessl\\n1920420035;Astrid;Vogel\\n1810420029;Florian;Zwittnigg\\n666;Jörg;Faschingbauer\\n'" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "content = f.read()\n", "content" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Hm: probably not the right way to read a 5G file" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['1910420003;Corinna;Baumgartner',\n", " '1910420002;Anzhelika;Chernykh',\n", " '1910420005;Simon;Hu',\n", " '1910420006;Matthias;Kappel',\n", " '1810420009;Peter;Kolter',\n", " '1920420033;Jan;Kornberger',\n", " '1910420007;Luis;Kraker',\n", " '1810420010;Leonid;Kudriaschov',\n", " '1910420008;Michael;Lang',\n", " '1910420010;Sonja;Lukas',\n", " '1910420013;Behnaz;Mehrabadi',\n", " '1910420014;Leo;Moser',\n", " '1910420015;Moritz;Nagelschmied',\n", " '1910420018;Bianca;Reimer',\n", " '1910420020;Tim;Schmid',\n", " '1910420021;Lukas;Schüttler',\n", " '1910420022;Michael;Schweiger',\n", " '1910420024;Paul;Sinabell',\n", " '1910420025;Jan;Slovik',\n", " '1910420026;Daniel;Söls',\n", " '1910420027;Michael;Stangl',\n", " '1910420028;Felix;Themessl',\n", " '1920420035;Astrid;Vogel',\n", " '1810420029;Florian;Zwittnigg',\n", " '666;Jörg;Faschingbauer',\n", " '']" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lines = content.split('\\n')\n", "lines" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "What can be done with a list? Iterate, as always ..." ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1910420003;Corinna;Baumgartner\n", "1910420002;Anzhelika;Chernykh\n", "1910420005;Simon;Hu\n", "1910420006;Matthias;Kappel\n", "1810420009;Peter;Kolter\n", "1920420033;Jan;Kornberger\n", "1910420007;Luis;Kraker\n", "1810420010;Leonid;Kudriaschov\n", "1910420008;Michael;Lang\n", "1910420010;Sonja;Lukas\n", "1910420013;Behnaz;Mehrabadi\n", "1910420014;Leo;Moser\n", "1910420015;Moritz;Nagelschmied\n", "1910420018;Bianca;Reimer\n", "1910420020;Tim;Schmid\n", "1910420021;Lukas;Schüttler\n", "1910420022;Michael;Schweiger\n", "1910420024;Paul;Sinabell\n", "1910420025;Jan;Slovik\n", "1910420026;Daniel;Söls\n", "1910420027;Michael;Stangl\n", "1910420028;Felix;Themessl\n", "1920420035;Astrid;Vogel\n", "1810420029;Florian;Zwittnigg\n", "666;Jörg;Faschingbauer\n", "\n" ] } ], "source": [ "for line in lines:\n", " print(line)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reading File in Fixed Sized Chunks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rewind file (read pointer is currently at the end)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.seek(0)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1910420003;Corinna;B'" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(20)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2;Anzhelika;Chernykh'" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(20)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\n1910420005;Simon;Hu\\n1910420006;Matthias;Kappel\\n1810420009;Peter;Kolter\\n1920420033;Jan;Kornberger\\n1910420007;Luis;Kraker\\n1810420010;Leonid;Kudriaschov\\n1910420008;Michael;Lang\\n1910420010;Sonja;Lukas\\n19'" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(200)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'10420013;Behnaz;Mehrabadi\\n1910420014;Leo;Moser\\n1910420015;Moritz;Nagelschmied\\n1910420018;Bianca;Reimer\\n1910420020;Tim;Schmid\\n1910420021;Lukas;Schüttler\\n1910420022;Michael;Schweiger\\n1910420024;Paul;Sinabell\\n1910420025;Jan;Slovik\\n1910420026;Daniel;Söls\\n1910420027;Michael;Stangl\\n1910420028;Felix;Themes'" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(300)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'sl\\n1920420035;Astrid;Vogel\\n181'" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(30)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'0420029;Florian;Zwittnigg\\n666;'" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(30)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Jörg;Faschingbauer\\n'" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(30)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**End of File (EOF)**" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "''" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.read(30) # or 1000?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reading File Line by Line" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.seek(0)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1910420003;Corinna;Baumgartner\\n'" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.readline()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Note** the trailing ``\\n``" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'1910420002;Anzhelika;Chernykh\\n'" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.readline()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Skip the next 22 lines:" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "for _ in range(22):\n", " f.readline()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'666;Jörg;Faschingbauer\\n'" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.readline()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**End of File (EOF)**" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "''" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.readline()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**A Loop, Respecting EOF**" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.seek(0)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1910420003;Corinna;Baumgartner\n", "\n", "1910420002;Anzhelika;Chernykh\n", "\n", "1910420005;Simon;Hu\n", "\n", "1910420006;Matthias;Kappel\n", "\n", "1810420009;Peter;Kolter\n", "\n", "1920420033;Jan;Kornberger\n", "\n", "1910420007;Luis;Kraker\n", "\n", "1810420010;Leonid;Kudriaschov\n", "\n", "1910420008;Michael;Lang\n", "\n", "1910420010;Sonja;Lukas\n", "\n", "1910420013;Behnaz;Mehrabadi\n", "\n", "1910420014;Leo;Moser\n", "\n", "1910420015;Moritz;Nagelschmied\n", "\n", "1910420018;Bianca;Reimer\n", "\n", "1910420020;Tim;Schmid\n", "\n", "1910420021;Lukas;Schüttler\n", "\n", "1910420022;Michael;Schweiger\n", "\n", "1910420024;Paul;Sinabell\n", "\n", "1910420025;Jan;Slovik\n", "\n", "1910420026;Daniel;Söls\n", "\n", "1910420027;Michael;Stangl\n", "\n", "1910420028;Felix;Themessl\n", "\n", "1920420035;Astrid;Vogel\n", "\n", "1810420029;Florian;Zwittnigg\n", "\n", "666;Jörg;Faschingbauer\n", "\n" ] } ], "source": [ "while True:\n", " line = f.readline()\n", " if len(line) == 0: # EOF\n", " break\n", " print(line)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Iterating a File" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.seek(0)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1910420003;Corinna;Baumgartner\n", "\n", "1910420002;Anzhelika;Chernykh\n", "\n", "1910420005;Simon;Hu\n", "\n", "1910420006;Matthias;Kappel\n", "\n", "1810420009;Peter;Kolter\n", "\n", "1920420033;Jan;Kornberger\n", "\n", "1910420007;Luis;Kraker\n", "\n", "1810420010;Leonid;Kudriaschov\n", "\n", "1910420008;Michael;Lang\n", "\n", "1910420010;Sonja;Lukas\n", "\n", "1910420013;Behnaz;Mehrabadi\n", "\n", "1910420014;Leo;Moser\n", "\n", "1910420015;Moritz;Nagelschmied\n", "\n", "1910420018;Bianca;Reimer\n", "\n", "1910420020;Tim;Schmid\n", "\n", "1910420021;Lukas;Schüttler\n", "\n", "1910420022;Michael;Schweiger\n", "\n", "1910420024;Paul;Sinabell\n", "\n", "1910420025;Jan;Slovik\n", "\n", "1910420026;Daniel;Söls\n", "\n", "1910420027;Michael;Stangl\n", "\n", "1910420028;Felix;Themessl\n", "\n", "1920420035;Astrid;Vogel\n", "\n", "1810420029;Florian;Zwittnigg\n", "\n", "666;Jörg;Faschingbauer\n", "\n" ] } ], "source": [ "for line in f:\n", " print(line)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## String Methods (Needed to Implement the Exercise)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ``str.strip()``, ``str.rstrip()``, ``str.lstrip()``" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "line = '\\n blah \\r \\t \\n'" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'blah'" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line.strip()" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\n blah'" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line.rstrip()" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'blah \\r \\t \\n'" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line.lstrip()" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'666;Jörg;Faschingbauer'" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line = '666;Jörg;Faschingbauer\\n'\n", "line.rstrip('\\n')" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'666;Jörg;Faschingbauer'" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "line = '666;Jörg;Faschingbauer\\n'\n", "line = line.rstrip('\\n')\n", "line" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ``str.split()``" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['666', 'Jörg', 'Faschingbauer']" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fields = line.split(';')\n", "fields" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Jörg'" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fields[1]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.4" } }, "nbformat": 4, "nbformat_minor": 4 }