summaryrefslogtreecommitdiff
path: root/basic_python
diff options
context:
space:
mode:
authorPrabhu Ramachandran2017-02-20 13:43:49 +0530
committerPrabhu Ramachandran2017-02-20 13:43:49 +0530
commitb1115371efd7edd4c536c1f5de1154acd1aec8d1 (patch)
treebee5f27f0345b15259038ee1269d12dab354b590 /basic_python
parent821db8fb9c894877afcbd73ea5a5f13025d57a4b (diff)
downloadpython-workshops-b1115371efd7edd4c536c1f5de1154acd1aec8d1.tar.gz
python-workshops-b1115371efd7edd4c536c1f5de1154acd1aec8d1.tar.bz2
python-workshops-b1115371efd7edd4c536c1f5de1154acd1aec8d1.zip
Adding content on files.
Diffstat (limited to 'basic_python')
-rw-r--r--basic_python/files.tex289
1 files changed, 289 insertions, 0 deletions
diff --git a/basic_python/files.tex b/basic_python/files.tex
new file mode 100644
index 0000000..3646c51
--- /dev/null
+++ b/basic_python/files.tex
@@ -0,0 +1,289 @@
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%Tutorial slides on Python.
+%
+% Author: FOSSEE
+% Copyright (c) 2017, FOSSEE, IIT Bombay
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\documentclass[14pt,compress]{beamer}
+\input{macros.tex}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Title page
+\title[Functions]{Python language: reading/writing files}
+
+\author[FOSSEE Team] {The FOSSEE Group}
+
+\institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay}
+\date[] {Mumbai, India}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% DOCUMENT STARTS
+\begin{document}
+
+\begin{frame}
+ \titlepage
+\end{frame}
+
+\section{Reading files}
+
+\begin{frame}[fragile]
+ \frametitle{Opening files}
+ Recall:
+ \begin{lstlisting}
+% pwd # present working directory
+% cd /home/fossee # go to directory
+ \end{lstlisting}
+ {\small The file is in our present working directory}
+ \begin{lstlisting}
+ In []: f = open('pendulum.txt')
+ In []: f
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{f} is a file object
+ \item Shows the mode in which the file is open (read mode)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Reading the whole file}
+ \begin{lstlisting}
+ In []: pend = f.read()
+ In []: print(pend)
+ \end{lstlisting}
+ \begin{itemize}
+ \item We have read the whole file into the variable \texttt{pend}
+ \end{itemize}
+ \begin{lstlisting}
+ In []: type(pend)
+ In []: pend_list = pend.splitlines()
+ In []: pend_list
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{pend} is a string variable
+ \item We can split it at the newline characters into a list of
+ strings
+ \item Close the file, when done; Also, if you want to read again
+ \end{itemize}
+ \begin{lstlisting}
+ In []: f.close()
+ In []: f
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Reading line-by-line}
+ \begin{lstlisting}
+In []: for line in open('pendulum.txt'):
+.....: print(line)
+ \end{lstlisting}
+ \begin{itemize}
+ \item The file object is an ``iterable''
+ \item We iterate over it and print each line
+ \item Instead of printing, collect lines in a list
+ \end{itemize}
+ \begin{lstlisting}
+In []: line_list = [ ]
+In []: for line in open('pendulum.txt'):
+.....: line_list.append(line)
+ \end{lstlisting}
+\end{frame}
+
+\section{Writing files}
+
+\begin{frame}[fragile]
+ \frametitle{Writing files}
+ \begin{lstlisting}
+In []: f = open('new_file.txt', 'w')
+In []: f.write('Hello world!\n')
+In []: f.close()
+\end{lstlisting}
+\begin{itemize}
+\item Note the mode \typ{'w'}
+\item Will clobber existing file!
+\item \typ{write} will not add new lines
+\item Always remember to call \typ{close}
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Using print to write files}
+ \begin{small}
+ On Python 2.x
+ \begin{lstlisting}
+In []: from __future__ import print_function
+\end{lstlisting}
+\hrule
+\end{small}
+
+ \begin{lstlisting}
+In []: f = open('new_file.txt', 'w')
+In []: print('Hello world!', file=f)
+In []: f.close()
+\end{lstlisting}
+\begin{itemize}
+\item Just pass the \typ{file} keyword arg
+\item \typ{print} works normally, so adds new lines
+\end{itemize}
+\end{frame}
+
+
+\section{Exercise: parsing data from file}
+
+\begin{frame}[fragile,plain]
+ \frametitle{File parsing -- Problem}
+ \begin{lstlisting}
+A;010002;AMY A;058;037;42;35;40;212;P;;
+ \end{lstlisting}
+ \begin{itemize}
+ \item File with records like the one above is given
+ \item Each record has fields separated by \verb+;+
+ \item region code; roll number; name;
+ \item marks --- $1^{st}$ L; $2^{nd}$ L; math; science; social; total
+ \item pass/fail indicated by P/F; W if withheld and else empty
+ \end{itemize}
+
+ \begin{itemize}
+ \item We wish to calculate mean of math marks in region B
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Tokenization}
+ \begin{lstlisting}
+In []: line = "parse this string"
+In []: line.split()
+ \end{lstlisting}
+ \begin{itemize}
+ \item Original string is split on white-space
+ \item Returns a list of strings
+ \item It can be given an argument to split on that argrument
+ \end{itemize}
+ \begin{lstlisting}
+r = "A;01;JOSE R;083;042;47;AA;72;244;;;"
+r.split(';')
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Tokenization \ldots}
+ \begin{itemize}
+ \item Since we split on commas, fields may have extra spaces at ends
+ \item We can strip out the spaces at the ends
+ \end{itemize}
+ \begin{lstlisting}
+ In []: word = " B "
+ In []: word.strip()
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{strip} is returning a new string
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{\texttt{str} to \texttt{float}}
+ \begin{itemize}
+ \item After tokenizing, the marks we have are strings
+ \item We need numbers to perform math operations
+ \end{itemize}
+ \begin{lstlisting}
+ In []: mark_str = "1.25"
+ In []: mark = int(mark_str)
+ In []: type(mark_str)
+ In []: type(mark)
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{strip} is returning a new string
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile, plain]
+ \frametitle{File parsing -- Solution}
+ \begin{lstlisting}
+math_B = [] # empty list to store marks
+for line in open("sslc1.txt"):
+ fields = line.split(";")
+ reg_code = fields[0]
+ reg_code = reg_code.strip()
+
+ math_mark_str = fields[5]
+ math_mark = float(math_mark_str)
+
+ if reg_code == "B":
+ math_B.append(math_mark)
+
+math_B_mean = sum(math_B) / len(math_B)
+print(math_B_mean)
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile, plain]
+ \frametitle{File parsing -- Solution}
+ An Error!
+ \small
+ \begin{lstlisting}
+ValueError: could not convert string to float: AA
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile, plain]
+ \frametitle{File parsing -- debugging}
+ \begin{lstlisting}
+math_B = [] # empty list to store marks
+for line in open("sslc1.txt"):
+ fields = line.split(";")
+ reg_code = fields[0]
+ reg_code = reg_code.strip()
+ print(fields) # <-- Added
+ math_mark_str = fields[5]
+ math_mark = float(math_mark_str)
+ # ...
+ \end{lstlisting}
+\end{frame}
+
+
+\begin{frame}[fragile, plain]
+ \frametitle{File parsing -- debugging}
+ \begin{lstlisting}
+math_B = [] # empty list to store marks
+for i, line in enumerate(open("sslc1.txt")):
+ fields = line.split(";")
+ reg_code = fields[0]
+ reg_code = reg_code.strip()
+ print(i, fields) # <-- Added
+ math_mark_str = fields[5]
+ math_mark = float(math_mark_str)
+ # ...
+ \end{lstlisting}
+\end{frame}
+
+
+\begin{frame}[fragile, plain]
+ \frametitle{File parsing -- Solution}
+ \begin{lstlisting}
+math_B = [] # empty list to store marks
+for line in open("sslc1.txt"):
+ fields = line.split(";")
+ reg_code = fields[0].strip()
+ m = fields[5]
+ mark = float(m) if m != 'AA' else 0
+ if reg_code == "B":
+ math_B.append(mark)
+
+math_B_mean = sum(math_B) / len(math_B)
+print(math_B_mean)
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Summary}
+ \begin{itemize}
+ \item Reading files
+ \item Writing files
+ \item Simple file parsing
+ \end{itemize}
+\end{frame}
+
+\end{document}