summaryrefslogtreecommitdiff
path: root/slides/basic-python/io_files_parsing.tex
diff options
context:
space:
mode:
Diffstat (limited to 'slides/basic-python/io_files_parsing.tex')
-rw-r--r--slides/basic-python/io_files_parsing.tex239
1 files changed, 239 insertions, 0 deletions
diff --git a/slides/basic-python/io_files_parsing.tex b/slides/basic-python/io_files_parsing.tex
new file mode 100644
index 0000000..cb46cbe
--- /dev/null
+++ b/slides/basic-python/io_files_parsing.tex
@@ -0,0 +1,239 @@
+\section{I/O}
+
+\begin{frame}[fragile]
+ \frametitle{Printing}
+ \begin{lstlisting}
+ In[]: a = "This is a string"
+ In[]: a
+ In[]: print a
+ \end{lstlisting}
+ \begin{itemize}
+ \item Both \texttt{a}, and \texttt{print a} are showing the value
+ \item What is the difference?
+ \item Typing \texttt{a} shows the value; \texttt{print a} prints it
+ \item Typing \texttt{a} shows the value only in interpreter
+ \item In a script, it has no effect.
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: b = "A line \n New line"
+ In[]: b
+ In[]: print b
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{String formatting}
+ \begin{lstlisting}
+ In[]: x = 1.5
+ In[]: y = 2
+ In[]: z = "zed"
+ In[]: print "x is %2.1f y is %d z is %s" %(x, y, z)
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{\texttt{print x} \& \texttt{print x,}}
+ \begin{itemize}
+ \item Open an editor
+ \item Type the following code
+ \item Save as \texttt{print\_example.py}
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: print "Hello"
+ In[]: print "World"
+
+ In[]: print "Hello",
+ In[]: print "World"
+ \end{lstlisting}
+ \begin{itemize}
+ \item Run the script using \texttt{\% run print\_example.py}
+ \item \texttt{print x} adds a newline whereas \texttt{print x,} adds
+ a space
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{\texttt{raw\_input}}
+ \begin{lstlisting}
+ In[]: ip = raw_input()
+ \end{lstlisting}
+ \begin{itemize}
+ \item The cursor is blinking; waiting for input
+ \item Type \texttt{an input} and hit <ENTER>
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: print ip
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{\texttt{raw\_input} \ldots}
+ \begin{lstlisting}
+ In[]: c = raw_input()
+ In[]: 5.6
+ In[]: c
+ In[]: type(c)
+ \end{lstlisting}
+ \begin{itemize}
+ \item \alert{\texttt{raw\_input} always takes a string}
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: name = raw_input("Please enter your name: ")
+ George
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{raw\_input} can display a prompt string for the user
+ \end{itemize}
+\end{frame}
+
+\section{Files}
+
+\begin{frame}[fragile]
+ \frametitle{Opening files}
+ \begin{lstlisting}
+ pwd # present working directory
+ cd /home/fossee # go to location of the file
+ \end{lstlisting}
+ {\tiny The file is in our present working directory}
+ \begin{lstlisting}
+ In[]: f = open('pendulum.txt')
+ In[]: f
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{f} is a file object
+ \item Shows the mode in which the file is open (read mode)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Reading the whole file}
+ \begin{lstlisting}
+ In[]: pend = f.read()
+ In[]: print pend
+ \end{lstlisting}
+ \begin{itemize}
+ \item We have read the whole file into the variable \texttt{pend}
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: type(pend)
+ In[]: pend_list = pend.splitlines()
+ In[]: pend_list
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{pend} is a string variable
+ \item We can split it at the newline characters into a list of
+ strings
+ \item Close the file, when done; Also, if you want to read again
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: f.close()
+ In[]: f
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Reading line-by-line}
+ \begin{lstlisting}
+ In[]: for line in open('pendulum.txt'):
+ ....: print line
+ \end{lstlisting}
+ \begin{itemize}
+ \item The file object is an ``iterable''
+ \item We iterate over it and print each line
+ \item Instead of printing, collect lines in a list
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: line_list = [ ]
+ In[]: for line in open('pendulum.txt'):
+ ....: line_list.append(line)
+ \end{lstlisting}
+\end{frame}
+
+
+\begin{frame}[fragile]
+ \frametitle{File parsing -- Problem}
+ \begin{lstlisting}
+ A;010002;ANAND R;058;037;42;35;40;212;P;;
+ \end{lstlisting}
+ \begin{itemize}
+ \item File with records like the one above is given
+ \item Each record has fields separated by ;
+ \item region code; roll number; name;
+ \item marks --- $1^{st}$ L; $2^{nd}$ L; math; science; social; total
+ \item pass/fail indicated by P/F; W if withheld and else empty
+ \end{itemize}
+
+ \begin{itemize}
+ \item We wish to calculate mean of math marks in region B
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Tokenization}
+ \begin{lstlisting}
+ In[]: line = "parse this string"
+ In[]: line.split()
+ \end{lstlisting}
+ \begin{itemize}
+ \item Original string is split on white-space (if no argument)
+ \item Returns a list of strings
+ \item It can be given an argument to split on that argrument
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: record = "A;015163;JOSEPH RAJ S;083;042;47;AA;72;244;;;"
+ In[]: record.split(';')
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Tokenization \ldots}
+ \begin{itemize}
+ \item Since we split on commas, fields may have extra spaces at ends
+ \item We can strip out the spaces at the ends
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: word = " B "
+ In[]: word.strip()
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{strip} is returning a new string
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{\texttt{str} to \texttt{float}}
+ \begin{itemize}
+ \item After tokenizing, the marks we have are strings
+ \item We need numbers to perform math operations
+ \end{itemize}
+ \begin{lstlisting}
+ In[]: mark_str = "1.25"
+ In[]: mark = int(mark_str)
+ In[]: type(mark_str)
+ In[]: type(mark)
+ \end{lstlisting}
+ \begin{itemize}
+ \item \texttt{strip} is returning a new string
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{File parsing -- Solution}
+ \begin{lstlisting}
+ In[]: math_B = [] # empty list to store marks
+ In[]: for line in open("sslc1.txt"):
+ ....: fields = line.split(";")
+
+ ....: reg_code = fields[0]
+ ....: reg_code_clean = reg_code.strip()
+
+ ....: math_mark_str = fields[5]
+ ....: math_mark = float(math_mark_str)
+
+ ....: if reg_code == "B":
+ ....: math_B.append(math_mark)
+
+ In[]: math_B_mean = sum(math_B) / len(math_B)
+ In[]: math_B_mean
+ \end{lstlisting}
+\end{frame}