diff options
Diffstat (limited to 'lecture-notes/basic-python/slides/io_files_parsing.tex')
-rw-r--r-- | lecture-notes/basic-python/slides/io_files_parsing.tex | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/lecture-notes/basic-python/slides/io_files_parsing.tex b/lecture-notes/basic-python/slides/io_files_parsing.tex new file mode 100644 index 0000000..cb46cbe --- /dev/null +++ b/lecture-notes/basic-python/slides/io_files_parsing.tex @@ -0,0 +1,239 @@ +\section{I/O} + +\begin{frame}[fragile] + \frametitle{Printing} + \begin{lstlisting} + In[]: a = "This is a string" + In[]: a + In[]: print a + \end{lstlisting} + \begin{itemize} + \item Both \texttt{a}, and \texttt{print a} are showing the value + \item What is the difference? + \item Typing \texttt{a} shows the value; \texttt{print a} prints it + \item Typing \texttt{a} shows the value only in interpreter + \item In a script, it has no effect. + \end{itemize} + \begin{lstlisting} + In[]: b = "A line \n New line" + In[]: b + In[]: print b + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{String formatting} + \begin{lstlisting} + In[]: x = 1.5 + In[]: y = 2 + In[]: z = "zed" + In[]: print "x is %2.1f y is %d z is %s" %(x, y, z) + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{\texttt{print x} \& \texttt{print x,}} + \begin{itemize} + \item Open an editor + \item Type the following code + \item Save as \texttt{print\_example.py} + \end{itemize} + \begin{lstlisting} + In[]: print "Hello" + In[]: print "World" + + In[]: print "Hello", + In[]: print "World" + \end{lstlisting} + \begin{itemize} + \item Run the script using \texttt{\% run print\_example.py} + \item \texttt{print x} adds a newline whereas \texttt{print x,} adds + a space + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{\texttt{raw\_input}} + \begin{lstlisting} + In[]: ip = raw_input() + \end{lstlisting} + \begin{itemize} + \item The cursor is blinking; waiting for input + \item Type \texttt{an input} and hit <ENTER> + \end{itemize} + \begin{lstlisting} + In[]: print ip + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{\texttt{raw\_input} \ldots} + \begin{lstlisting} + In[]: c = raw_input() + In[]: 5.6 + In[]: c + In[]: type(c) + \end{lstlisting} + \begin{itemize} + \item \alert{\texttt{raw\_input} always takes a string} + \end{itemize} + \begin{lstlisting} + In[]: name = raw_input("Please enter your name: ") + George + \end{lstlisting} + \begin{itemize} + \item \texttt{raw\_input} can display a prompt string for the user + \end{itemize} +\end{frame} + +\section{Files} + +\begin{frame}[fragile] + \frametitle{Opening files} + \begin{lstlisting} + pwd # present working directory + cd /home/fossee # go to location of the file + \end{lstlisting} + {\tiny The file is in our present working directory} + \begin{lstlisting} + In[]: f = open('pendulum.txt') + In[]: f + \end{lstlisting} + \begin{itemize} + \item \texttt{f} is a file object + \item Shows the mode in which the file is open (read mode) + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Reading the whole file} + \begin{lstlisting} + In[]: pend = f.read() + In[]: print pend + \end{lstlisting} + \begin{itemize} + \item We have read the whole file into the variable \texttt{pend} + \end{itemize} + \begin{lstlisting} + In[]: type(pend) + In[]: pend_list = pend.splitlines() + In[]: pend_list + \end{lstlisting} + \begin{itemize} + \item \texttt{pend} is a string variable + \item We can split it at the newline characters into a list of + strings + \item Close the file, when done; Also, if you want to read again + \end{itemize} + \begin{lstlisting} + In[]: f.close() + In[]: f + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Reading line-by-line} + \begin{lstlisting} + In[]: for line in open('pendulum.txt'): + ....: print line + \end{lstlisting} + \begin{itemize} + \item The file object is an ``iterable'' + \item We iterate over it and print each line + \item Instead of printing, collect lines in a list + \end{itemize} + \begin{lstlisting} + In[]: line_list = [ ] + In[]: for line in open('pendulum.txt'): + ....: line_list.append(line) + \end{lstlisting} +\end{frame} + + +\begin{frame}[fragile] + \frametitle{File parsing -- Problem} + \begin{lstlisting} + A;010002;ANAND R;058;037;42;35;40;212;P;; + \end{lstlisting} + \begin{itemize} + \item File with records like the one above is given + \item Each record has fields separated by ; + \item region code; roll number; name; + \item marks --- $1^{st}$ L; $2^{nd}$ L; math; science; social; total + \item pass/fail indicated by P/F; W if withheld and else empty + \end{itemize} + + \begin{itemize} + \item We wish to calculate mean of math marks in region B + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Tokenization} + \begin{lstlisting} + In[]: line = "parse this string" + In[]: line.split() + \end{lstlisting} + \begin{itemize} + \item Original string is split on white-space (if no argument) + \item Returns a list of strings + \item It can be given an argument to split on that argrument + \end{itemize} + \begin{lstlisting} + In[]: record = "A;015163;JOSEPH RAJ S;083;042;47;AA;72;244;;;" + In[]: record.split(';') + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Tokenization \ldots} + \begin{itemize} + \item Since we split on commas, fields may have extra spaces at ends + \item We can strip out the spaces at the ends + \end{itemize} + \begin{lstlisting} + In[]: word = " B " + In[]: word.strip() + \end{lstlisting} + \begin{itemize} + \item \texttt{strip} is returning a new string + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{\texttt{str} to \texttt{float}} + \begin{itemize} + \item After tokenizing, the marks we have are strings + \item We need numbers to perform math operations + \end{itemize} + \begin{lstlisting} + In[]: mark_str = "1.25" + In[]: mark = int(mark_str) + In[]: type(mark_str) + In[]: type(mark) + \end{lstlisting} + \begin{itemize} + \item \texttt{strip} is returning a new string + \end{itemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{File parsing -- Solution} + \begin{lstlisting} + In[]: math_B = [] # empty list to store marks + In[]: for line in open("sslc1.txt"): + ....: fields = line.split(";") + + ....: reg_code = fields[0] + ....: reg_code_clean = reg_code.strip() + + ....: math_mark_str = fields[5] + ....: math_mark = float(math_mark_str) + + ....: if reg_code == "B": + ....: math_B.append(math_mark) + + In[]: math_B_mean = sum(math_B) / len(math_B) + In[]: math_B_mean + \end{lstlisting} +\end{frame} |