summaryrefslogtreecommitdiff
path: root/day1
diff options
context:
space:
mode:
authorMadhusudan.C.S2009-10-27 19:25:25 +0530
committerMadhusudan.C.S2009-10-27 19:25:25 +0530
commit674d8aeaa55d7ccff6f491da58f1641892e9b288 (patch)
tree55cd0239aa67ba876a939cfa034981cf8a066768 /day1
parentc627f89e4c736c1b0701576af65b9a29124484a2 (diff)
downloadworkshops-674d8aeaa55d7ccff6f491da58f1641892e9b288.tar.gz
workshops-674d8aeaa55d7ccff6f491da58f1641892e9b288.tar.bz2
workshops-674d8aeaa55d7ccff6f491da58f1641892e9b288.zip
Session 3 day 1, Statistical operations finalized.
Diffstat (limited to 'day1')
-rw-r--r--day1/session3.tex222
1 files changed, 168 insertions, 54 deletions
diff --git a/day1/session3.tex b/day1/session3.tex
index 491d4a3..acf1649 100644
--- a/day1/session3.tex
+++ b/day1/session3.tex
@@ -140,6 +140,7 @@
\begin{itemize}
\item Draw a pie chart representing the number of students who scored more than 90\% in Science per region.
\item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined).
+ \item Print mean, median, mode and standard deviation of math scores for all regions combined.
\end{itemize}
\end{frame}
@@ -147,8 +148,11 @@
\frametitle{Statistical Analysis and Parsing \ldots}
Machinery Required -
\begin{itemize}
- \item File reading and parsing
+ \item File reading
+ \item Parsing
\item Dictionaries
+ \item NumPy arrays
+ \item Statistical operations
\end{itemize}
\end{frame}
@@ -156,8 +160,8 @@
\frametitle{File reading and parsing}
Understanding the structure of sslc1.txt
\begin{itemize}
- \item Each line in the file, i.e each row of a file is a single record.
- \item Each record corresponds to a record of a single student
+ \item Each line in the file corresponds to one student's details
+ \item aka record
\item Each record consists of several fields separated by a ';'
\end{itemize}
\end{frame}
@@ -169,11 +173,10 @@
\item Region Code
\item Roll Number
\item Name
- \item Marks of 5 subjects
+ \item Marks of 5 subjects: English, Hindi, Maths, Science, Social
\item Total marks
- \item Pass (P)
+ \item Pass/Fail (P/F)
\item Withdrawn (W)
- \item Fail (F)
\end{itemize}
\end{frame}
@@ -186,44 +189,83 @@ for record in open('sslc1.txt'):
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data}
+ \frametitle{Dictionary: Introduction}
\begin{itemize}
- \item Let the parsed data be stored in list of dictionaries.
- \item d = \{\} is an empty dictionary
+ \item lists index: 0 \ldots n
+ \item dictionaries index using strings
\end{itemize}
+\begin{block}{Example}
+d = \{ ``Hitchhiker's guide'' : 42,
+ ``Terminator'' : ``I'll be back''\}\\
+d[``Terminator''] => ``I'll be back''
+\end{block}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data}
+ \frametitle{Dictionary: Introduction}
\begin{lstlisting}
-ninety_percents = [{}, {}, {}, {}, {}]
+In [1]: d = {"Hitchhiker's guide" : 42,
+ "Terminator" : "I'll be back"}
+
+In [2]: d["Hitchhiker's guide"]
+Out[2]: 42
+
+In [3]: "Hitchhiker's guide" in d
+Out[3]: True
+
+In [4]: "Guido" in d
+Out[4]: False
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data}
- \begin{itemize}
- \item Index of a dictionary is called a \emph{key}
- \item \emph{Keys} of these dictionaries are strings - region codes
- \end{itemize}
+ \frametitle{Dictionary: Introduction}
+\begin{lstlisting}
+In [5]: d.keys()
+Out[5]: ['Terminator', "Hitchhiker's
+ guide"]
+
+In [6]: d.values()
+Out[6]: ["I'll be back", 42]
+\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{enumerate: Iterating through list indices}
+\begin{lstlisting}
+In [1]: names = ["Guido","Alex", "Tim"]
+
+In [2]: for i, name in enumerate(names):
+ ...: print i, name
+ ...:
+0 Guido
+1 Alex
+2 Tim
+\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data \ldots}
+ \frametitle{Dictionary: Building parsed data}
+ Let our dictionary be:
+ \begin{lstlisting}
+science = {} # is an empty dictionary
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data}
\begin{itemize}
- \item Value of a \emph{key} can be any legal Python value
- \item In this problem let the value of a \emph{key} be another an integer
- \item This dictionary contains:
+ \item \emph{Keys} of \emph{science} will be region codes
+ \item Value of a \emph{science} will be the number students who scored more than 90\% in that region
\end{itemize}
-'region code': Number of students who scored more than 90\% in this region for this subject
\end{frame}
\begin{frame}[fragile]
\frametitle{Building parsed data \ldots}
\begin{lstlisting}
-from pylab import *
+from pylab import pie
-ninety_percents = [{}, {}, {}, {}, {}]
+science = {}
for record in open('sslc1.txt'):
record = record.strip()
@@ -235,29 +277,17 @@ for record in open('sslc1.txt'):
\begin{frame}[fragile]
\frametitle{Building parsed data \ldots}
- \small
\begin{lstlisting}
-for i, field in enumerate(fields[3:8]):
-
- if region_code not in ninety_percents[i]:
- ninety_percents[i][region_code] = 0
+if region_code not in science:
+ science[region_code] = 0
- score_str = field.strip()
+score_str = fields[4].strip()
- score = 0 if score_str == 'AA' else
- int(score_str)
- if score > 90:
- ninety_percents[i][region_code] += 1
- \end{lstlisting}
-\end{frame}
+score = int(score_str) if
+ score_str != 'AA' else 0
-\begin{frame}[fragile]
- \frametitle{Consolidating data}
- \begin{lstlisting}
-subj_total = []
-for subject in ninety_percents:
- subj_total.append(sum(
- subject.values()))
+if score > 90:
+ science[region_code] += 1
\end{lstlisting}
\end{frame}
@@ -266,8 +296,8 @@ for subject in ninety_percents:
\small
\begin{lstlisting}
figure(1)
-pie(ninety_percents[4].values(),
- labels=ninety_percents[1].keys())
+pie(science.values(),
+ labels=science.keys())
title('Students scoring 90% and above
in science by region')
savefig('/tmp/science.png')
@@ -281,6 +311,74 @@ savefig('/tmp/science.png')
\end{frame}
\begin{frame}[fragile]
+ \frametitle{Building data for all subjects \ldots}
+ \begin{lstlisting}
+from pylab import pie
+from scipy import mean, median, std
+from scipy import stats
+
+scores = [[]] * 5
+ninety_percents = [{}] * 5
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Building data for all subjects \ldots}
+ \begin{lstlisting}
+from pylab import pie
+from scipy import mean, median, std
+from scipy import stats
+ \end{lstlisting}
+
+ \begin{block}{Repeating list items}
+ \begin{lstlisting}
+scores = [[]] * 5
+ninety_percents = [{}] * 5
+ \end{lstlisting}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Building data for all subjects \ldots}
+ \begin{lstlisting}
+for record in open('sslc1.txt'):
+ record = record.strip()
+ fields = record.split(';')
+
+ region_code = fields[0].strip()
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Building data for all subjects \ldots}
+ \small
+ \begin{lstlisting}
+for i, field in enumerate(fields[3:8]):
+ if region_code not in ninety_percents[i]:
+ ninety_percents[i][region_code] = 0
+
+ score_str = field.strip()
+ score = int(score_str) if
+ score_str != 'AA' else 0
+
+ scores[i].append(score)
+
+ if score > 90:
+ ninety_percents[i][region_code] += 1
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Consolidating data}
+ \begin{lstlisting}
+subj_total = []
+for subject in ninety_percents:
+ subj_total.append(sum(
+ subject.values()))
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
\frametitle{Pie charts}
\begin{lstlisting}
figure(2)
@@ -300,6 +398,32 @@ savefig('/tmp/all_regions.png')
\end{frame}
\begin{frame}[fragile]
+ \frametitle{Obtaining statistics}
+ \begin{lstlisting}
+math_scores = array(scores[2])
+
+print "Mean: ", mean(math_scores)
+
+print "Median: ", median(math_scores)
+
+print "Mode: ", stats.mode(math_scores)
+
+print "Standard Deviation: ",
+ std(math_scores)
+ \end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{What tools did we use?}
+ \begin{itemize}
+ \item Dictionaries for storing data
+ \item Facilities for drawing pie charts
+ \item NumPy arrays for efficient array manipulations
+ \item Functions for statistical computations - mean, median, mode, standard deviation
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
\frametitle{Dealing with data whole-sale}
\begin{lstlisting}
In []: for t in T:
@@ -419,14 +543,4 @@ In []: plot(L, Tline)
\end{lstlisting}
\end{frame}
-\begin{frame}[fragile]
- \frametitle{What did we learn?}
- \begin{itemize}
- \item Dictionaries
- \item Drawing pie charts
- \item Arrays
- \item Least Square fitting
- \item Intro to Matrices
- \end{itemize}
-\end{frame}
\end{document}