%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %Tutorial slides on Python. % % Author: FOSSEE % Copyright (c) 2009, FOSSEE, IIT Bombay %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \documentclass[14pt,compress]{beamer} %\documentclass[draft]{beamer} %\documentclass[compress,handout]{beamer} %\usepackage{pgfpages} %\pgfpagesuselayout{2 on 1}[a4paper,border shrink=5mm] % Modified from: generic-ornate-15min-45min.de.tex \mode<presentation> { \usetheme{Warsaw} \useoutertheme{split} \setbeamercovered{transparent} } \usepackage[english]{babel} \usepackage[latin1]{inputenc} %\usepackage{times} \usepackage[T1]{fontenc} % Taken from Fernando's slides. \usepackage{ae,aecompl} \usepackage{mathpazo,courier,euler} \usepackage[scaled=.95]{helvet} \usepackage{amsmath} \definecolor{darkgreen}{rgb}{0,0.5,0} \usepackage{listings} \lstset{language=Python, basicstyle=\ttfamily\bfseries, commentstyle=\color{red}\itshape, stringstyle=\color{darkgreen}, showstringspaces=false, keywordstyle=\color{blue}\bfseries} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Macros \setbeamercolor{emphbar}{bg=blue!20, fg=black} \newcommand{\emphbar}[1] {\begin{beamercolorbox}[rounded=true]{emphbar} {#1} \end{beamercolorbox} } \newcounter{time} \setcounter{time}{0} \newcommand{\inctime}[1]{\addtocounter{time}{#1}{\tiny \thetime\ m}} \newcommand{\typ}[1]{\lstinline{#1}} \newcommand{\kwrd}[1]{ \texttt{\textbf{\color{blue}{#1}}} } %%% This is from Fernando's setup. % \usepackage{color} % \definecolor{orange}{cmyk}{0,0.4,0.8,0.2} % % Use and configure listings package for nicely formatted code % \usepackage{listings} % \lstset{ % language=Python, % basicstyle=\small\ttfamily, % commentstyle=\ttfamily\color{blue}, % stringstyle=\ttfamily\color{orange}, % showstringspaces=false, % breaklines=true, % postbreak = \space\dots % } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Title page \title[]{Arrays \& Least Squares Fit} \author[FOSSEE] {FOSSEE} \institute[IIT Bombay] {Department of Aerospace Engineering\\IIT Bombay} \date[] {31, October 2009\\Day 1, Session 3} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\pgfdeclareimage[height=0.75cm]{iitmlogo}{iitmlogo} %\logo{\pgfuseimage{iitmlogo}} %% Delete this, if you do not want the table of contents to pop up at %% the beginning of each subsection: \AtBeginSubsection[] { \begin{frame}<beamer> \frametitle{Outline} \tableofcontents[currentsection,currentsubsection] \end{frame} } \AtBeginSection[] { \begin{frame}<beamer> \frametitle{Outline} \tableofcontents[currentsection,currentsubsection] \end{frame} } \newcommand{\num}{\texttt{numpy}} % If you wish to uncover everything in a step-wise fashion, uncomment % the following command: %\beamerdefaultoverlayspecification{<+->} %\includeonlyframes{current,current1,current2,current3,current4,current5,current6} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % DOCUMENT STARTS \begin{document} \begin{frame} \maketitle \end{frame} %% \begin{frame} %% \frametitle{Outline} %% \tableofcontents %% % You might wish to add the option [pausesections] %% \end{frame} \begin{frame} \frametitle{Least Squares Fit} In this session - \begin{itemize} \item We shall plot a least squares fit curve for time-period(T) squared vs. length(L) plot of a Simple Pendulum. \item Given a file containing L and T values \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Least Squares Fit \ldots} Machinery Required - \begin{itemize} \item Reading files and parsing data \item Plotting points, lines \item Calculating the Coefficients of the Least Squares Fit curve \begin{itemize} \item Arrays \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Reading pendulum.txt} \begin{itemize} \item The file has two columns \item Column1 - L; Column2 - T \end{itemize} \begin{lstlisting} In []: L = [] In []: T = [] In []: for line in open('pendulum.txt'): .... ln, t = line.split() .... L.append(float(ln)) .... T.append(float(t)) \end{lstlisting} We now have two lists L and T \end{frame} \begin{frame}[fragile] \frametitle{Calculating $T^2$} \begin{itemize} \item Each element of the list T must be squared \item Iterating over each element of the list works \item But very slow \ldots \item Instead, we use arrays \end{itemize} \begin{lstlisting} In []: L = array(L) In []: T = array(T) In []: Tsq = T*T In []: plot(L, Tsq, 'o') \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Arrays} \begin{itemize} \item T is now a \typ{numpy array} \item \typ{numpy} arrays are very efficient and powerful \item Very easy to perform element-wise operations \item \typ{+, -, *, /, \%} \item More about arrays later \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Least Square Polynomial} \begin{enumerate} \item $T^2 = \frac{4\pi^2}{g}L$ \item $T^2$ and $L$ have a linear relationship \item We find an approximate solution to $Ax = y$, where A is the Van der Monde matrix to get coefficients of the least squares fit line. \end{enumerate} \end{frame} \begin{frame}[fragile] \frametitle{Van der Monde Matrix} Van der Monde matrix of order M \begin{equation*} \begin{bmatrix} l_1^{M-1} & \ldots & l_1 & 1 \\ l_2^{M-1} & \ldots &l_2 & 1 \\ \vdots & \ldots & \vdots & \vdots\\ l_N^{M-1} & \ldots & l_N & 1 \\ \end{bmatrix} \end{equation*} \begin{lstlisting} In []: A=vander(L,2) \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Least Square Fit Line} \begin{itemize} \item We use the \typ{lstsq} function of pylab \item It returns the \begin{enumerate} \item Least squares solution \item Sum of residues \item Rank of matrix A \item Singular values of A \end{enumerate} \end{itemize} \begin{lstlisting} In []: coef, res, r, s = lstsq(A,Tsq) \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Least Square Fit Line \ldots} \begin{itemize} \item Use the poly1d function of pylab, to create a function for the line equation using the coefficients obtained \begin{lstlisting} In []: p=poly1d(coef) \end{lstlisting} \item Get new $T^2$ values using the function \typ{p} obtained \begin{lstlisting} In []: Tline = p(L) \end{lstlisting} \item Now plot Tline vs. L, to get the Least squares fit line. \begin{lstlisting} In []: plot(L, Tline) \end{lstlisting} \end{itemize} \end{frame} \begin{frame} \frametitle{Statistical Analysis and Parsing} Read the data supplied in \emph{sslc1.txt} and obtain the following statistics: \begin{itemize} \item Average total marks scored in each region \item Subject wise average score of each region \item \alert{??Subject wise average score for all regions combined??} \item Find the subject wise standard deviation of scores for each region \end{itemize} \end{frame} \begin{frame} \frametitle{Statistical Analysis and Parsing \ldots} Machinery Required - \begin{itemize} \item File reading and parsing \item NumPy arrays - sum by rows and sum by coloumns \item Dictionaries \end{itemize} \end{frame} \begin{frame} \frametitle{File reading and parsing} Understanding the structure of sslc1.txt \begin{itemize} \item Each line in the file, i.e each row of a file is a single record. \item Each record corresponds to a record of a single student \item Each record consists of several fields separated by a ';' \end{itemize} \end{frame} \begin{frame} \frametitle{File reading and parsing \ldots} Each record consists of: \begin{itemize} \item Region Code \item Roll Number \item Name \item Marks of 5 subjects \item Total marks \item Pass (P) \item Withdrawn (W) \item Fail (F) \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{File reading and parsing \ldots} \begin{lstlisting} for record in open('sslc1.txt'): fields = record.split(';') \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Dictionary} \begin{itemize} \item lists index: 0 \ldots n \item dictionaries index using any hashable objects \item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\} \item d[``Terminator''] => ``I'll be back'' \item ``Terminator'' is called the key of \typ{d} \item ``I'll be back'' is called the value of the key ``Terminator'' \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Dictionary - Building parsed data} \begin{itemize} \item Let the parsed data be stored in dictionary \typ{data} \item Keys of \typ{data} are strings - region codes \item Value of the key is another dictionary. \item This dictionary contains: \begin{itemize} \item 'marks': A list of NumPy arrays \item 'total': Total marks of each student \item 'P': Number of passes \item 'F': Number of failures \item 'W': Number of withdrawls \end{itemize} \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Dictionary - Building parsed data \ldots} \small \begin{lstlisting} data = {} for record in open('sslc1.txt'): fields = record.split(';') if fields[0] not in data: data[fields[0]] = { 'marks': array([]), 'total': array([]), 'P': 0, 'F': 0, 'W': 0 } \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Dictionary - Building parsed data \ldots} \begin{lstlisting} marks = [] for field in fields[3:8]: score_str = field.strip() score = 0 if score_str == 'AA' or score_str == 'AAA' or score_str == '' else int(score_str) marks.append(score) data[fields[0]]['marks'].append(marks) \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Dictionary - Building parsed data \ldots} \begin{lstlisting} total = 0 if score_str == 'AA' or score_str == 'AAA' or score_str == '' else int(fields[8]) data[fields[0]]['total'].append(total) pfw_key = fields[9] or fields[10] or 'F' data[fields[0]][pfw_key] += 1 \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Dictionary - Building parsed data \ldots} \begin{lstlisting} pfw_key = fields[9] or fields[10] or 'F' data[fields[0]][pfw_key] += 1 \end{lstlisting} \end{frame} \begin{frame}[fragile] \frametitle{Calculations} \small \begin{lstlisting} for k in data: data[k]['marks'] = array(data[k]['marks']) data[k]['total'] = array(data[k]['total']) data[k]['avg'] = average( data[k]['total']) marks = data[k]['marks'] sub_avg = average(marks, axis=1) sub_std = sqrt(sum(square( sub_avg[:,newaxis] - marks), axis=0) / len(marks)) data[k]['sub_avg'] = sub_avg data[k]['sub_std'] = sub_std \end{lstlisting} \end{frame} \end{document}