%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Tutorial slides on Python. % % Author: FOSSEE % Copyright (c) 2017, FOSSEE, IIT Bombay %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \documentclass[14pt,compress]{beamer} \input{macros.tex} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Title page \title[Files]{Python language: reading/writing files} \author[FOSSEE Team] {The FOSSEE Group} \institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay} \date[] {Mumbai, India} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DOCUMENT STARTS \begin{document} \begin{frame} \titlepage \end{frame} \section{Reading files} \begin{frame}[fragile] \frametitle{Opening files} Recall: \begin{lstlisting} % pwd # present working directory % cd /home/fossee # go to directory \end{lstlisting} {\small The file is in our present working directory} \begin{lstlisting} In []: f = open('pendulum.txt') In []: f \end{lstlisting} \begin{itemize} \item \texttt{f} is a file object \item Shows the mode in which the file is open (read mode) \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Reading the whole file} \begin{lstlisting} In []: pend = f.read() In []: print(pend) \end{lstlisting} \begin{itemize} \item We have read the whole file into the variable \texttt{pend} \end{itemize} \begin{lstlisting} In []: type(pend) In []: pend_list = pend.splitlines() In []: pend_list \end{lstlisting} \begin{itemize} \item \texttt{pend} is a string variable \item We can split it at the newline characters into a list of strings \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Closing the file} \begin{itemize} \item Close the file, when done; Also, if you want to read again \end{itemize} \begin{lstlisting} In []: f.close() In []: f \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Reading line-by-line} \begin{lstlisting} In []: for line in open('pendulum.txt'): .....: print(line) \end{lstlisting} \begin{itemize} \item The file object is an ``iterable'' \item We iterate over it and print each line \item Instead of printing, collect lines in a list \end{itemize} \begin{lstlisting} In []: line_list = [ ] In []: for line in open('pendulum.txt'): .....: line_list.append(line) \end{lstlisting} \end{frame} \section{Writing files} \begin{frame}[fragile] \frametitle{Writing files} \begin{lstlisting} In []: f = open('new_file.txt', 'w') In []: f.write('Hello world!\n') In []: f.close() \end{lstlisting} \begin{itemize} \item Note the mode \typ{'w'} \item Will clobber existing file! \item \typ{write} will not add new lines \item Always remember to call \typ{close} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Using print to write files} \begin{small} On Python 2.x \begin{lstlisting} In []: from __future__ import print_function \end{lstlisting} \hrule \end{small} \begin{lstlisting} In []: f = open('new_file.txt', 'w') In []: print('Hello world!', file=f) In []: f.close() \end{lstlisting} \begin{itemize} \item Just pass the \typ{file} keyword arg \item \typ{print} works normally, so adds new lines \end{itemize} \end{frame} \section{Exercise: parsing data from file} \begin{frame}[fragile,plain] \frametitle{File parsing -- Problem} \begin{lstlisting} A;010002;AMY A;058;037;42;35;40;212;P;; \end{lstlisting} \begin{itemize} \item File with records like the one above is given \item Each record has fields separated by \verb+;+ \item region code; roll number; name; \item marks --- $1^{st}$ L; $2^{nd}$ L; math; science; social; total \item pass/fail indicated by P/F; W if withheld and else empty \end{itemize} \begin{itemize} \item We wish to calculate mean of math marks in region B \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Tokenization} \begin{lstlisting} In []: line = "parse this string" In []: line.split() \end{lstlisting} \begin{itemize} \item Original string is split on white-space \item Returns a list of strings \item It can be given an argument to split on that argument \end{itemize} \begin{lstlisting} r = "A;01;JOSE R;083;042;47;AA;72;244;;;" r.split(';') \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Tokenization \ldots} \begin{itemize} \item Since we split on commas, fields may have extra spaces at ends \item We can strip out the spaces at the ends \end{itemize} \begin{lstlisting} In []: word = " B " In []: word.strip() \end{lstlisting} \begin{itemize} \item \texttt{strip} is returning a new string \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{\texttt{str} to \texttt{float}} \begin{itemize} \item After tokenizing, the marks we have are strings \item We need numbers to perform math operations \end{itemize} \begin{lstlisting} In []: mark_str = "1.25" In []: mark = float(mark_str) In []: type(mark_str) In []: type(mark) \end{lstlisting} \end{frame} \begin{frame}[fragile, plain] \frametitle{File parsing -- Solution} \begin{lstlisting} math_B = [] # empty list to store marks for line in open("sslc1.txt"): fields = line.split(";") reg_code = fields[0] reg_code = reg_code.strip() math_mark_str = fields[5] math_mark = float(math_mark_str) if reg_code == "B": math_B.append(math_mark) math_B_mean = sum(math_B) / len(math_B) print(math_B_mean) \end{lstlisting} \end{frame} \begin{frame}[fragile, plain] \frametitle{File parsing -- Solution} An Error! \small \begin{lstlisting} ValueError: could not convert string to float: AA \end{lstlisting} \end{frame} \begin{frame}[fragile, plain] \frametitle{File parsing -- debugging} \begin{lstlisting} math_B = [] # empty list to store marks for line in open("sslc1.txt"): fields = line.split(";") reg_code = fields[0] reg_code = reg_code.strip() print(fields) # <-- Added math_mark_str = fields[5] math_mark = float(math_mark_str) # ... \end{lstlisting} \end{frame} \begin{frame}[fragile, plain] \frametitle{File parsing -- debugging} \small \begin{lstlisting} math_B = [] # empty list to store marks for i, line in enumerate(open("sslc1.txt")): fields = line.split(";") reg_code = fields[0] reg_code = reg_code.strip() print(i, fields) # <-- Added math_mark_str = fields[5] math_mark = float(math_mark_str) # ... \end{lstlisting} \end{frame} \begin{frame}[fragile, plain] \frametitle{File parsing -- Solution} \begin{lstlisting} math_B = [] # empty list to store marks for line in open("sslc1.txt"): fields = line.split(";") reg_code = fields[0].strip() m = fields[5] mark = float(m) if m != 'AA' else 0 if reg_code == "B": math_B.append(mark) math_B_mean = sum(math_B) / len(math_B) print(math_B_mean) \end{lstlisting} \end{frame} \begin{frame} \frametitle{Summary} \begin{itemize} \item Reading files \item Writing files \item Simple file parsing \end{itemize} \end{frame} \end{document}