diff options
author | Prabhu Ramachandran | 2017-02-20 13:43:49 +0530 |
---|---|---|
committer | Prabhu Ramachandran | 2017-02-20 13:43:49 +0530 |
commit | b1115371efd7edd4c536c1f5de1154acd1aec8d1 (patch) | |
tree | bee5f27f0345b15259038ee1269d12dab354b590 /basic_python | |
parent | 821db8fb9c894877afcbd73ea5a5f13025d57a4b (diff) | |
download | python-workshops-b1115371efd7edd4c536c1f5de1154acd1aec8d1.tar.gz python-workshops-b1115371efd7edd4c536c1f5de1154acd1aec8d1.tar.bz2 python-workshops-b1115371efd7edd4c536c1f5de1154acd1aec8d1.zip |
Adding content on files.
Diffstat (limited to 'basic_python')
-rw-r--r-- | basic_python/files.tex | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/basic_python/files.tex b/basic_python/files.tex new file mode 100644 index 0000000..3646c51 --- /dev/null +++ b/basic_python/files.tex @@ -0,0 +1,289 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%Tutorial slides on Python. +% +% Author: FOSSEE +% Copyright (c) 2017, FOSSEE, IIT Bombay +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\documentclass[14pt,compress]{beamer} +\input{macros.tex} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Title page +\title[Functions]{Python language: reading/writing files} + +\author[FOSSEE Team] {The FOSSEE Group} + +\institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay} +\date[] {Mumbai, India} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% DOCUMENT STARTS +\begin{document} + +\begin{frame} + \titlepage +\end{frame} + +\section{Reading files} + +\begin{frame}[fragile] + \frametitle{Opening files} + Recall: + \begin{lstlisting} +% pwd # present working directory +% cd /home/fossee # go to directory + \end{lstlisting} + {\small The file is in our present working directory} + \begin{lstlisting} + In []: f = open('pendulum.txt') + In []: f + \end{lstlisting} + \begin{itemize} + \item \texttt{f} is a file object + \item Shows the mode in which the file is open (read mode) + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Reading the whole file} + \begin{lstlisting} + In []: pend = f.read() + In []: print(pend) + \end{lstlisting} + \begin{itemize} + \item We have read the whole file into the variable \texttt{pend} + \end{itemize} + \begin{lstlisting} + In []: type(pend) + In []: pend_list = pend.splitlines() + In []: pend_list + \end{lstlisting} + \begin{itemize} + \item \texttt{pend} is a string variable + \item We can split it at the newline characters into a list of + strings + \item Close the file, when done; Also, if you want to read again + \end{itemize} + \begin{lstlisting} + In []: f.close() + In []: f + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Reading line-by-line} + \begin{lstlisting} +In []: for line in open('pendulum.txt'): +.....: print(line) + \end{lstlisting} + \begin{itemize} + \item The file object is an ``iterable'' + \item We iterate over it and print each line + \item Instead of printing, collect lines in a list + \end{itemize} + \begin{lstlisting} +In []: line_list = [ ] +In []: for line in open('pendulum.txt'): +.....: line_list.append(line) + \end{lstlisting} +\end{frame} + +\section{Writing files} + +\begin{frame}[fragile] + \frametitle{Writing files} + \begin{lstlisting} +In []: f = open('new_file.txt', 'w') +In []: f.write('Hello world!\n') +In []: f.close() +\end{lstlisting} +\begin{itemize} +\item Note the mode \typ{'w'} +\item Will clobber existing file! +\item \typ{write} will not add new lines +\item Always remember to call \typ{close} +\end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Using print to write files} + \begin{small} + On Python 2.x + \begin{lstlisting} +In []: from __future__ import print_function +\end{lstlisting} +\hrule +\end{small} + + \begin{lstlisting} +In []: f = open('new_file.txt', 'w') +In []: print('Hello world!', file=f) +In []: f.close() +\end{lstlisting} +\begin{itemize} +\item Just pass the \typ{file} keyword arg +\item \typ{print} works normally, so adds new lines +\end{itemize} +\end{frame} + + +\section{Exercise: parsing data from file} + +\begin{frame}[fragile,plain] + \frametitle{File parsing -- Problem} + \begin{lstlisting} +A;010002;AMY A;058;037;42;35;40;212;P;; + \end{lstlisting} + \begin{itemize} + \item File with records like the one above is given + \item Each record has fields separated by \verb+;+ + \item region code; roll number; name; + \item marks --- $1^{st}$ L; $2^{nd}$ L; math; science; social; total + \item pass/fail indicated by P/F; W if withheld and else empty + \end{itemize} + + \begin{itemize} + \item We wish to calculate mean of math marks in region B + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Tokenization} + \begin{lstlisting} +In []: line = "parse this string" +In []: line.split() + \end{lstlisting} + \begin{itemize} + \item Original string is split on white-space + \item Returns a list of strings + \item It can be given an argument to split on that argrument + \end{itemize} + \begin{lstlisting} +r = "A;01;JOSE R;083;042;47;AA;72;244;;;" +r.split(';') + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Tokenization \ldots} + \begin{itemize} + \item Since we split on commas, fields may have extra spaces at ends + \item We can strip out the spaces at the ends + \end{itemize} + \begin{lstlisting} + In []: word = " B " + In []: word.strip() + \end{lstlisting} + \begin{itemize} + \item \texttt{strip} is returning a new string + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{\texttt{str} to \texttt{float}} + \begin{itemize} + \item After tokenizing, the marks we have are strings + \item We need numbers to perform math operations + \end{itemize} + \begin{lstlisting} + In []: mark_str = "1.25" + In []: mark = int(mark_str) + In []: type(mark_str) + In []: type(mark) + \end{lstlisting} + \begin{itemize} + \item \texttt{strip} is returning a new string + \end{itemize} +\end{frame} + +\begin{frame}[fragile, plain] + \frametitle{File parsing -- Solution} + \begin{lstlisting} +math_B = [] # empty list to store marks +for line in open("sslc1.txt"): + fields = line.split(";") + reg_code = fields[0] + reg_code = reg_code.strip() + + math_mark_str = fields[5] + math_mark = float(math_mark_str) + + if reg_code == "B": + math_B.append(math_mark) + +math_B_mean = sum(math_B) / len(math_B) +print(math_B_mean) + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile, plain] + \frametitle{File parsing -- Solution} + An Error! + \small + \begin{lstlisting} +ValueError: could not convert string to float: AA + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile, plain] + \frametitle{File parsing -- debugging} + \begin{lstlisting} +math_B = [] # empty list to store marks +for line in open("sslc1.txt"): + fields = line.split(";") + reg_code = fields[0] + reg_code = reg_code.strip() + print(fields) # <-- Added + math_mark_str = fields[5] + math_mark = float(math_mark_str) + # ... + \end{lstlisting} +\end{frame} + + +\begin{frame}[fragile, plain] + \frametitle{File parsing -- debugging} + \begin{lstlisting} +math_B = [] # empty list to store marks +for i, line in enumerate(open("sslc1.txt")): + fields = line.split(";") + reg_code = fields[0] + reg_code = reg_code.strip() + print(i, fields) # <-- Added + math_mark_str = fields[5] + math_mark = float(math_mark_str) + # ... + \end{lstlisting} +\end{frame} + + +\begin{frame}[fragile, plain] + \frametitle{File parsing -- Solution} + \begin{lstlisting} +math_B = [] # empty list to store marks +for line in open("sslc1.txt"): + fields = line.split(";") + reg_code = fields[0].strip() + m = fields[5] + mark = float(m) if m != 'AA' else 0 + if reg_code == "B": + math_B.append(mark) + +math_B_mean = sum(math_B) / len(math_B) +print(math_B_mean) + \end{lstlisting} +\end{frame} + +\begin{frame} + \frametitle{Summary} + \begin{itemize} + \item Reading files + \item Writing files + \item Simple file parsing + \end{itemize} +\end{frame} + +\end{document} |