summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSantosh G. Vattam2009-10-15 11:23:47 +0530
committerSantosh G. Vattam2009-10-15 11:23:47 +0530
commitcf39133fd33d273b145e2478357b1ba25bf3839e (patch)
treeb7f0182820fd1fd8f164e75cbd99e1e410424696
parent6a77605eb23bbd241dc5fa9fb0682536a0f36b88 (diff)
parent28f6ab429485cd97fdabc3f75e7142c5377a19a1 (diff)
downloadworkshops-more-scipy-cf39133fd33d273b145e2478357b1ba25bf3839e.tar.gz
workshops-more-scipy-cf39133fd33d273b145e2478357b1ba25bf3839e.tar.bz2
workshops-more-scipy-cf39133fd33d273b145e2478357b1ba25bf3839e.zip
Merged branches.
-rw-r--r--day1/session3.tex130
1 files changed, 130 insertions, 0 deletions
diff --git a/day1/session3.tex b/day1/session3.tex
index 525639c..bcfadcb 100644
--- a/day1/session3.tex
+++ b/day1/session3.tex
@@ -252,6 +252,136 @@ plot(L, Tline)
\end{itemize}
\end{frame}
+\begin{frame}
+ \frametitle{Statistical Analysis and Parsing}
+ Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
+ \begin{itemize}
+ \item Average total marks scored in each region
+ \item Subject wise average score of each region
+ \item ??Subject wise average score for all regions combined??
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Statistical Analysis and Parsing \ldots}
+ Machinery Required -
+ \begin{itemize}
+ \item File reading and parsing
+ \item NumPy arrays - sum by rows and sum by coloumns
+ \item Dictionaries
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{File reading and parsing}
+ Understanding the structure of sslc1.txt
+ \begin{itemize}
+ \item Each line in the file, i.e each row of a file is a single record.
+ \item Each record corresponds to a record of a single student
+ \item Each record consists of several fields separated by a ';'
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \frametitle{File reading and parsing \ldots}
+ Each record consists of:
+ \begin{itemize}
+ \item Region Code
+ \item Roll Number
+ \item Name
+ \item Marks of 5 subjects
+ \item Total marks
+ \item Pass (P)
+ \item Withdrawn (W)
+ \item Fail (F)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{File reading and parsing \ldots}
+ \begin{lstlisting}
+for record in open('sslc1.txt'):
+ fields = record.split(';')
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary}
+ \begin{itemize}
+ \item lists index: 0 \ldots n
+ \item dictionaries index using any hashable objects
+ \item d = \{ ``Hitchhiker's guide'' : 42, ``Terminator'' : ``I'll be back''\}
+ \item d[``Terminator''] => ``I'll be back''
+ \item ``Terminator'' is called the key of \typ{d}
+ \item ``I'll be back'' is called the value of the key ``Terminator''
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data}
+ \begin{itemize}
+ \item Let the parsed data be stored in dictionary \typ{data}
+ \item Keys of \typ{data} are strings - region codes
+ \item Value of the key is another dictionary.
+ \item This dictionary contains:
+ \begin{itemize}
+ \item 'marks': A list of NumPy arrays
+ \item 'total': Total marks of each student
+ \item 'P': Number of passes
+ \item 'F': Number of failures
+ \item 'W': Number of withdrawls
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data \ldots}
+ \small
+ \begin{lstlisting}
+data = {}
+for record in open('sslc1.txt'):
+ fields = record.split(';')
+ if fields[0] not in data:
+ data[fields[0]] = {
+ 'marks': array([]),
+ 'total': array([]),
+ 'P': 0,
+ 'F': 0,
+ 'W': 0
+ }
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data \ldots}
+ \small
+ \begin{lstlisting}
+data[fields[0]]['marks'] = append(
+ data[fields[0]]['marks'],
+ [int(fields[3]), int(fields[4]),
+ int(fields[5]), int(fields[6]),
+ int(fields[7])
+ ])
+
+data[fields[0]]['total'].append(fields[8])
+
+pfw_key = fields[9] or fields[10] or fields[11]
+data[fields[0]][pfw_key] += 1
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Calculations}
+ \begin{lstlisting}
+all_sub_avg = array([])
+for k, v in data:
+ data[k]['avg'] = average(
+ data[k]['total'])
+ data[k]['sub_avg'] = average(
+ data[k]['marks'], axis=1)
+ \end{lstlisting}
+\end{frame}
+
\end{document}
Least squares: Smooth curve fit.