diff options
author | Madhusudan.C.S | 2009-10-27 19:25:25 +0530 |
---|---|---|
committer | Madhusudan.C.S | 2009-10-27 19:25:25 +0530 |
commit | 674d8aeaa55d7ccff6f491da58f1641892e9b288 (patch) | |
tree | 55cd0239aa67ba876a939cfa034981cf8a066768 /day1 | |
parent | c627f89e4c736c1b0701576af65b9a29124484a2 (diff) | |
download | workshops-674d8aeaa55d7ccff6f491da58f1641892e9b288.tar.gz workshops-674d8aeaa55d7ccff6f491da58f1641892e9b288.tar.bz2 workshops-674d8aeaa55d7ccff6f491da58f1641892e9b288.zip |
Session 3 day 1, Statistical operations finalized.
Diffstat (limited to 'day1')
-rw-r--r-- | day1/session3.tex | 222 |
1 files changed, 168 insertions, 54 deletions
diff --git a/day1/session3.tex b/day1/session3.tex index 491d4a3..acf1649 100644 --- a/day1/session3.tex +++ b/day1/session3.tex @@ -140,6 +140,7 @@ \begin{itemize} \item Draw a pie chart representing the number of students who scored more than 90\% in Science per region. \item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined). + \item Print mean, median, mode and standard deviation of math scores for all regions combined. \end{itemize} \end{frame} @@ -147,8 +148,11 @@ \frametitle{Statistical Analysis and Parsing \ldots} Machinery Required - \begin{itemize} - \item File reading and parsing + \item File reading + \item Parsing \item Dictionaries + \item NumPy arrays + \item Statistical operations \end{itemize} \end{frame} @@ -156,8 +160,8 @@ \frametitle{File reading and parsing} Understanding the structure of sslc1.txt \begin{itemize} - \item Each line in the file, i.e each row of a file is a single record. - \item Each record corresponds to a record of a single student + \item Each line in the file corresponds to one student's details + \item aka record \item Each record consists of several fields separated by a ';' \end{itemize} \end{frame} @@ -169,11 +173,10 @@ \item Region Code \item Roll Number \item Name - \item Marks of 5 subjects + \item Marks of 5 subjects: English, Hindi, Maths, Science, Social \item Total marks - \item Pass (P) + \item Pass/Fail (P/F) \item Withdrawn (W) - \item Fail (F) \end{itemize} \end{frame} @@ -186,44 +189,83 @@ for record in open('sslc1.txt'): \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary - Building parsed data} + \frametitle{Dictionary: Introduction} \begin{itemize} - \item Let the parsed data be stored in list of dictionaries. - \item d = \{\} is an empty dictionary + \item lists index: 0 \ldots n + \item dictionaries index using strings \end{itemize} +\begin{block}{Example} +d = \{ ``Hitchhiker's guide'' : 42, + ``Terminator'' : ``I'll be back''\}\\ +d[``Terminator''] => ``I'll be back'' +\end{block} \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary - Building parsed data} + \frametitle{Dictionary: Introduction} \begin{lstlisting} -ninety_percents = [{}, {}, {}, {}, {}] +In [1]: d = {"Hitchhiker's guide" : 42, + "Terminator" : "I'll be back"} + +In [2]: d["Hitchhiker's guide"] +Out[2]: 42 + +In [3]: "Hitchhiker's guide" in d +Out[3]: True + +In [4]: "Guido" in d +Out[4]: False \end{lstlisting} \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary - Building parsed data} - \begin{itemize} - \item Index of a dictionary is called a \emph{key} - \item \emph{Keys} of these dictionaries are strings - region codes - \end{itemize} + \frametitle{Dictionary: Introduction} +\begin{lstlisting} +In [5]: d.keys() +Out[5]: ['Terminator', "Hitchhiker's + guide"] + +In [6]: d.values() +Out[6]: ["I'll be back", 42] +\end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{enumerate: Iterating through list indices} +\begin{lstlisting} +In [1]: names = ["Guido","Alex", "Tim"] + +In [2]: for i, name in enumerate(names): + ...: print i, name + ...: +0 Guido +1 Alex +2 Tim +\end{lstlisting} \end{frame} \begin{frame}[fragile] - \frametitle{Dictionary - Building parsed data \ldots} + \frametitle{Dictionary: Building parsed data} + Let our dictionary be: + \begin{lstlisting} +science = {} # is an empty dictionary + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Dictionary - Building parsed data} \begin{itemize} - \item Value of a \emph{key} can be any legal Python value - \item In this problem let the value of a \emph{key} be another an integer - \item This dictionary contains: + \item \emph{Keys} of \emph{science} will be region codes + \item Value of a \emph{science} will be the number students who scored more than 90\% in that region \end{itemize} -'region code': Number of students who scored more than 90\% in this region for this subject \end{frame} \begin{frame}[fragile] \frametitle{Building parsed data \ldots} \begin{lstlisting} -from pylab import * +from pylab import pie -ninety_percents = [{}, {}, {}, {}, {}] +science = {} for record in open('sslc1.txt'): record = record.strip() @@ -235,29 +277,17 @@ for record in open('sslc1.txt'): \begin{frame}[fragile] \frametitle{Building parsed data \ldots} - \small \begin{lstlisting} -for i, field in enumerate(fields[3:8]): - - if region_code not in ninety_percents[i]: - ninety_percents[i][region_code] = 0 +if region_code not in science: + science[region_code] = 0 - score_str = field.strip() +score_str = fields[4].strip() - score = 0 if score_str == 'AA' else - int(score_str) - if score > 90: - ninety_percents[i][region_code] += 1 - \end{lstlisting} -\end{frame} +score = int(score_str) if + score_str != 'AA' else 0 -\begin{frame}[fragile] - \frametitle{Consolidating data} - \begin{lstlisting} -subj_total = [] -for subject in ninety_percents: - subj_total.append(sum( - subject.values())) +if score > 90: + science[region_code] += 1 \end{lstlisting} \end{frame} @@ -266,8 +296,8 @@ for subject in ninety_percents: \small \begin{lstlisting} figure(1) -pie(ninety_percents[4].values(), - labels=ninety_percents[1].keys()) +pie(science.values(), + labels=science.keys()) title('Students scoring 90% and above in science by region') savefig('/tmp/science.png') @@ -281,6 +311,74 @@ savefig('/tmp/science.png') \end{frame} \begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} + \begin{lstlisting} +from pylab import pie +from scipy import mean, median, std +from scipy import stats + +scores = [[]] * 5 +ninety_percents = [{}] * 5 + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} + \begin{lstlisting} +from pylab import pie +from scipy import mean, median, std +from scipy import stats + \end{lstlisting} + + \begin{block}{Repeating list items} + \begin{lstlisting} +scores = [[]] * 5 +ninety_percents = [{}] * 5 + \end{lstlisting} + \end{block} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} + \begin{lstlisting} +for record in open('sslc1.txt'): + record = record.strip() + fields = record.split(';') + + region_code = fields[0].strip() + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Building data for all subjects \ldots} + \small + \begin{lstlisting} +for i, field in enumerate(fields[3:8]): + if region_code not in ninety_percents[i]: + ninety_percents[i][region_code] = 0 + + score_str = field.strip() + score = int(score_str) if + score_str != 'AA' else 0 + + scores[i].append(score) + + if score > 90: + ninety_percents[i][region_code] += 1 + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Consolidating data} + \begin{lstlisting} +subj_total = [] +for subject in ninety_percents: + subj_total.append(sum( + subject.values())) + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] \frametitle{Pie charts} \begin{lstlisting} figure(2) @@ -300,6 +398,32 @@ savefig('/tmp/all_regions.png') \end{frame} \begin{frame}[fragile] + \frametitle{Obtaining statistics} + \begin{lstlisting} +math_scores = array(scores[2]) + +print "Mean: ", mean(math_scores) + +print "Median: ", median(math_scores) + +print "Mode: ", stats.mode(math_scores) + +print "Standard Deviation: ", + std(math_scores) + \end{lstlisting} +\end{frame} + +\begin{frame}[fragile] + \frametitle{What tools did we use?} + \begin{itemize} + \item Dictionaries for storing data + \item Facilities for drawing pie charts + \item NumPy arrays for efficient array manipulations + \item Functions for statistical computations - mean, median, mode, standard deviation + \end{itemize} +\end{frame} + +\begin{frame}[fragile] \frametitle{Dealing with data whole-sale} \begin{lstlisting} In []: for t in T: @@ -419,14 +543,4 @@ In []: plot(L, Tline) \end{lstlisting} \end{frame} -\begin{frame}[fragile] - \frametitle{What did we learn?} - \begin{itemize} - \item Dictionaries - \item Drawing pie charts - \item Arrays - \item Least Square fitting - \item Intro to Matrices - \end{itemize} -\end{frame} \end{document} |