%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Tutorial slides on Python.
%
% Author: FOSSEE
% Copyright (c) 2017, FOSSEE, IIT Bombay
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\documentclass[14pt,compress]{beamer}
\input{macros.tex}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Title page
\title[Files]{Python language: reading/writing files}

\author[FOSSEE Team] {The FOSSEE Group}

\institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
\date[] {Mumbai, India}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DOCUMENT STARTS
\begin{document}

\begin{frame}
  \titlepage
\end{frame}

\section{Reading files}

\begin{frame}[fragile]
  \frametitle{Opening files}
  Recall:
  \begin{lstlisting}
% pwd # present working directory
% cd /home/fossee # go to directory
  \end{lstlisting}
  {\small The file is in our present working directory}
  \begin{lstlisting}
   In []: f = open('pendulum.txt')
   In []: f
  \end{lstlisting}
  \begin{itemize}
  \item \texttt{f} is a file object
  \item Shows the mode in which the file is open (read mode)
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
 \frametitle{Reading the whole file}
  \begin{lstlisting}
   In []: pend = f.read()
   In []: print(pend)
  \end{lstlisting}
  \begin{itemize}
  \item We have read the whole file into the variable \texttt{pend}
  \end{itemize}
  \begin{lstlisting}
   In []: type(pend)
   In []: pend_list = pend.splitlines()
   In []: pend_list
  \end{lstlisting}
  \begin{itemize}
  \item  \texttt{pend} is a string variable
  \item We can split it at the newline characters into a list of
    strings
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
 \frametitle{Closing the file}
 \begin{itemize}
  \item Close the file, when done; Also, if you want to read again
  \end{itemize}
  \begin{lstlisting}
   In []: f.close()
   In []: f
 \end{lstlisting}
\end{frame}


\begin{frame}[fragile]
  \frametitle{Reading line-by-line}
  \begin{lstlisting}
In []: for line in open('pendulum.txt'):
.....:     print(line)
  \end{lstlisting}
  \begin{itemize}
  \item The file object is an ``iterable''
  \item We iterate over it and print each line
  \item Instead of printing, collect lines in a list
  \end{itemize}
  \begin{lstlisting}
In []: line_list = [ ]
In []: for line in open('pendulum.txt'):
.....:     line_list.append(line)
  \end{lstlisting}
\end{frame}

\section{Writing files}

\begin{frame}[fragile]
  \frametitle{Writing files}
  \begin{lstlisting}
In []: f = open('new_file.txt', 'w')
In []: f.write('Hello world!\n')
In []: f.close()
\end{lstlisting}
\begin{itemize}
\item Note the mode \typ{'w'}
\item Will clobber existing file!
\item \typ{write} will not add new lines
\item Always remember to call \typ{close}
\end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Using print to write files}
  \begin{small}
    On Python 2.x
  \begin{lstlisting}
In []: from __future__ import print_function
\end{lstlisting}
\hrule
\end{small}

  \begin{lstlisting}
In []: f = open('new_file.txt', 'w')
In []: print('Hello world!', file=f)
In []: f.close()
\end{lstlisting}
\begin{itemize}
\item Just pass the \typ{file} keyword arg
\item \typ{print} works normally, so adds new lines
\end{itemize}
\end{frame}


\section{Exercise: parsing data from file}

\begin{frame}[fragile,plain]
  \frametitle{File parsing -- Problem}
  \begin{lstlisting}
A;010002;AMY A;058;037;42;35;40;212;P;;
  \end{lstlisting}
  \begin{itemize}
  \item File with records like the one above is given
  \item Each record has fields separated by \verb+;+
  \item region code; roll number; name;
  \item marks --- $1^{st}$ L; $2^{nd}$ L; math; science; social; total
  \item pass/fail indicated by P/F; W if withheld and else empty
    \end{itemize}

  \begin{itemize}
  \item We wish to calculate mean of math marks in region B
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Tokenization}
  \begin{lstlisting}
In []: line = "parse this        string"
In []: line.split()
  \end{lstlisting}
  \begin{itemize}
  \item Original string is split on white-space
  \item Returns a list of strings
  \item It can be given an argument to split on that argument
  \end{itemize}
  \begin{lstlisting}
r = "A;01;JOSE R;083;042;47;AA;72;244;;;"
r.split(';')
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Tokenization \ldots}
  \begin{itemize}
  \item Since we split on commas, fields may have extra spaces at ends
  \item We can strip out the spaces at the ends
  \end{itemize}
  \begin{lstlisting}
   In []: word = "     B    "
   In []: word.strip()
  \end{lstlisting}
  \begin{itemize}
  \item \texttt{strip} is returning a new string
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{\texttt{str} to \texttt{float}}
  \begin{itemize}
  \item After tokenizing, the marks we have are strings
  \item We need numbers to perform math operations
  \end{itemize}
  \begin{lstlisting}
  In []: mark_str = "1.25"
  In []: mark = float(mark_str)
  In []: type(mark_str)
  In []: type(mark)
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile, plain]
  \frametitle{File parsing -- Solution}
  \begin{lstlisting}
math_B = [] # empty list to store marks
for line in open("sslc1.txt"):
    fields = line.split(";")
    reg_code = fields[0]
    reg_code = reg_code.strip()

    math_mark_str = fields[5]
    math_mark = float(math_mark_str)

    if reg_code == "B":
        math_B.append(math_mark)

math_B_mean = sum(math_B) / len(math_B)
print(math_B_mean)
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile, plain]
  \frametitle{File parsing -- Solution}
  An Error!
  \small
  \begin{lstlisting}
ValueError: could not convert string to float: AA
  \end{lstlisting}
\end{frame}

\begin{frame}[fragile, plain]
  \frametitle{File parsing -- debugging}
  \begin{lstlisting}
math_B = [] # empty list to store marks
for line in open("sslc1.txt"):
    fields = line.split(";")
    reg_code = fields[0]
    reg_code = reg_code.strip()
    print(fields) # <-- Added
    math_mark_str = fields[5]
    math_mark = float(math_mark_str)
    # ...
  \end{lstlisting}
\end{frame}


\begin{frame}[fragile, plain]
  \frametitle{File parsing -- debugging}
  \small
  \begin{lstlisting}
math_B = [] # empty list to store marks
for i, line in enumerate(open("sslc1.txt")):
    fields = line.split(";")
    reg_code = fields[0]
    reg_code = reg_code.strip()
    print(i, fields) # <-- Added
    math_mark_str = fields[5]
    math_mark = float(math_mark_str)
    # ...
  \end{lstlisting}
\end{frame}


\begin{frame}[fragile, plain]
  \frametitle{File parsing -- Solution}
  \begin{lstlisting}
math_B = [] # empty list to store marks
for line in open("sslc1.txt"):
    fields = line.split(";")
    reg_code = fields[0].strip()
    m = fields[5]
    mark = float(m) if m != 'AA' else 0
    if reg_code == "B":
        math_B.append(mark)

math_B_mean = sum(math_B) / len(math_B)
print(math_B_mean)
  \end{lstlisting}
\end{frame}

\begin{frame}
  \frametitle{Summary}
  \begin{itemize}
  \item Reading files
  \item Writing files
  \item Simple file parsing
  \end{itemize}
\end{frame}

\end{document}