summaryrefslogtreecommitdiff
path: root/day1
diff options
context:
space:
mode:
authorAsokan Pichai2009-10-27 14:26:18 +0530
committerAsokan Pichai2009-10-27 14:26:18 +0530
commitebe7c77cfaf0f63de4162f94b3ae6f5ceca4bd10 (patch)
tree7e1f39c8fdc6e8209a16bcfcf5e04c0d2e559745 /day1
parent692d69642cdb2c4cbe1c81f6baaff097a33e15dd (diff)
parent45ea8d3f8db839d9288dacf10a9f014529f6d2f3 (diff)
downloadworkshops-ebe7c77cfaf0f63de4162f94b3ae6f5ceca4bd10.tar.gz
workshops-ebe7c77cfaf0f63de4162f94b3ae6f5ceca4bd10.tar.bz2
workshops-ebe7c77cfaf0f63de4162f94b3ae6f5ceca4bd10.zip
branched merge
Diffstat (limited to 'day1')
-rw-r--r--day1/data/all_regions.pngbin0 -> 41857 bytes
-rw-r--r--day1/data/science.pngbin0 -> 39160 bytes
-rw-r--r--day1/session1.tex2
-rw-r--r--day1/session3.tex180
-rw-r--r--day1/session4.tex30
5 files changed, 119 insertions, 93 deletions
diff --git a/day1/data/all_regions.png b/day1/data/all_regions.png
new file mode 100644
index 0000000..6a684a7
--- /dev/null
+++ b/day1/data/all_regions.png
Binary files differ
diff --git a/day1/data/science.png b/day1/data/science.png
new file mode 100644
index 0000000..82921ff
--- /dev/null
+++ b/day1/data/science.png
Binary files differ
diff --git a/day1/session1.tex b/day1/session1.tex
index 76b0de7..56b8265 100644
--- a/day1/session1.tex
+++ b/day1/session1.tex
@@ -161,7 +161,7 @@ Breaking out of loops
\begin{columns}
\column{0.25\textwidth}
\hspace*{-0.5in}
- \includegraphics[height=2in, interpolate=true]{data/firstplot}
+ \includegraphics[height=2in, interpolate=true]{data/firstplot}
\column{0.8\textwidth}
\begin{block}{}
\small
diff --git a/day1/session3.tex b/day1/session3.tex
index 1af46cd..5ee1b61 100644
--- a/day1/session3.tex
+++ b/day1/session3.tex
@@ -127,13 +127,19 @@
%% \end{frame}
\begin{frame}
+ \frametitle{More on data processing}
+ \begin{block}{}
+ What do we do if we want to draw Pie charts for the data in a huge data file?
+ \end{block}
+\end{frame}
+
+
+\begin{frame}
\frametitle{Statistical Analysis and Parsing}
Read the data supplied in \emph{sslc1.txt} and obtain the following statistics:
\begin{itemize}
- \item Average total marks scored in each region
- \item Subject wise average score of each region
- \item \alert{??Subject wise average score for all regions combined??}
- \item Find the subject wise standard deviation of scores for each region
+ \item Draw a pie chart representing the number of students who scored more than 90\% in Science per region.
+ \item Draw a pie chart representing the number of students who scored more than 90\% per subject(All regions combined).
\end{itemize}
\end{frame}
@@ -142,7 +148,6 @@
Machinery Required -
\begin{itemize}
\item File reading and parsing
- \item NumPy arrays - sum by rows and sum by coloumns
\item Dictionaries
\end{itemize}
\end{frame}
@@ -183,135 +188,122 @@ for record in open('sslc1.txt'):
\begin{frame}[fragile]
\frametitle{Dictionary - Building parsed data}
\begin{itemize}
- \item Let the parsed data be stored in dictionary \typ{data}
- \item \begin{lstlisting}
-data = {} # is an empty dictionary
+ \item Let the parsed data be stored in list of dictionaries.
+ \item d = \{\} is an empty dictionary
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data}
+\begin{lstlisting}
+ninety_percents = [{}, {}, {}, {}, {}]
\end{lstlisting}
+\end{frame}
+
+\begin{frame}[fragile]
+ \frametitle{Dictionary - Building parsed data}
+ \begin{itemize}
\item Index of a dictionary is called a \emph{key}
- \item \emph{Keys} of \typ{data} are strings - region codes
- \item Value of a \emph{key} can be any Python object
+ \item \emph{Keys} of these dictionaries are strings - region codes
\end{itemize}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data...}
+ \frametitle{Dictionary - Building parsed data \ldots}
\begin{itemize}
- \item In this problem let the value of a \emph{key} be another dictionary.
+ \item Value of a \emph{key} can be any legal Python value
+ \item In this problem let the value of a \emph{key} be another an integer
\item This dictionary contains:
- \begin{itemize}
- \item 'marks': A \emph{List} of \emph{Lists} containing all marks
- \item 'total': A \emph{List} of total marks of each student
- \item 'P': Number of passes
- \item 'F': Number of failures
- \item 'W': Number of withdrawls
- \end{itemize}
\end{itemize}
+'region code': Number of students who scored more than 90\% in this region for this subject
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data \ldots}
- \small
+ \frametitle{Building parsed data \ldots}
\begin{lstlisting}
-data = {}
+from pylab import *
+
+ninety_percents = [{}, {}, {}, {}, {}]
+
for record in open('sslc1.txt'):
+ record = record.strip()
fields = record.split(';')
- if fields[0] not in data:
- data[fields[0]] = {
- 'marks': [],
- 'total': [],
- 'P': 0,
- 'F': 0,
- 'W': 0
- }
+
+ region_code = fields[0].strip()
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data \ldots}
+ \frametitle{Building parsed data \ldots}
+ \small
\begin{lstlisting}
-marks = []
-for field in fields[3:8]:
- score_str = field.strip()
- score = 0 if score_str == 'AA'
- or score_str == 'AAA'
- or score_str == ''
- else int(score_str)
- marks.append(score)
+for i, field in enumerate(fields[3:8]):
-data[fields[0]]['marks'].append(marks)
- \end{lstlisting}
-\end{frame}
+ if region_code not in ninety_percents[i]:
+ ninety_percents[i][region_code] = 0
-\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data \ldots}
- \begin{lstlisting}
-total = 0 if score_str == 'AA'
- or score_str == 'AAA'
- or score_str == ''
- else int(fields[8])
-data[fields[0]]['total'].append(total)
+ score_str = field.strip()
+
+ score = 0 if score_str == 'AA' else
+ int(score_str)
+ if score > 90:
+ ninety_percents[i][region_code] += 1
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Dictionary - Building parsed data \ldots}
+ \frametitle{Consolidating data}
\begin{lstlisting}
-pfw_key = fields[9]
- or fields[10]
- or 'F'
-data[fields[0]][pfw_key] += 1
+subj_total = []
+for subject in ninety_percents:
+ subj_total.append(sum(
+ subject.values()))
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
- \frametitle{NumPy arrays}
- \centerline{\alert{But I lied!?!?!?}}
-\end{frame}
-
-\begin{frame}[fragile]
- \frametitle{Calculations}
+ \frametitle{Pie charts}
+ \small
\begin{lstlisting}
-for k in data:
- data[k]['marks'] = array(
- data[k]['marks'])
- data[k]['total'] = array(
- data[k]['total'])
+figure(1)
+pie(ninety_percents[4].values(),
+ labels=ninety_percents[1].keys())
+title('Students scoring 90% and above
+ in science by region')
+savefig('/tmp/science.png')
\end{lstlisting}
+\begin{columns}
+ \column{5.25\textwidth}
+ \hspace*{1.1in}
+\includegraphics[height=2in, interpolate=true]{data/science}
+ \column{0.8\textwidth}
+\end{columns}
\end{frame}
\begin{frame}[fragile]
- \frametitle{Calculations}
- \small
+ \frametitle{Pie charts}
\begin{lstlisting}
- data[k]['avg'] = average(
- data[k]['total'])
- marks = data[k]['marks']
- sub_avg = average(marks, axis=1)
- sub_std = sqrt(sum(square(
- sub_avg[:,newaxis] - marks), axis=0) /
- len(marks))
- data[k]['sub_avg'] = sub_avg
- data[k]['sub_std'] = sub_std
+figure(2)
+pie(subj_total, labels=['English',
+ 'Hindi', 'Maths', 'Science',
+ 'Social'])
+title('Students scoring more than
+ 90% by subject(All regions
+ combined).')
+savefig('/tmp/all_regions.png')
\end{lstlisting}
\end{frame}
\begin{frame}[fragile]
- \frametitle{New Concepts}
- \begin{itemize}
- \item Dictionaries
- \item Slicing lists
- \item New type of conditional
- \item NumPy arrays
- \item Slicing NumPy arrays
- \item NumPy array functions - square, average, sqrt
- \end{itemize}
+ \frametitle{Pie charts}
+ \includegraphics[height=3in, interpolate=true]{data/all_regions}
\end{frame}
\begin{frame}[fragile]
\frametitle{Dealing with data whole-sale}
\begin{lstlisting}
In []: for t in T:
- ....: Tsq.append(t*t)
+ ....: TSq.append(t*t)
\end{lstlisting}
\begin{itemize}
\item This is not very efficient
@@ -321,7 +313,7 @@ In []: for t in T:
\begin{lstlisting}
In []: L = array(L)
In []: T = array(T)
-In []: Tsq = T*T
+In []: TSq = T*T
\end{lstlisting}
\end{frame}
@@ -409,7 +401,7 @@ In []: A = vander(L,2)
\item Along with a lot of things, it returns the least squares solution
\end{itemize}
\begin{lstlisting}
-In []: coef, res, r, s = lstsq(A,Tsq)
+In []: coef, res, r, s = lstsq(A,TSq)
\end{lstlisting}
\end{frame}
@@ -427,4 +419,14 @@ In []: plot(L, Tline)
\end{lstlisting}
\end{frame}
+\begin{frame}[fragile]
+ \frametitle{What did we learn?}
+ \begin{itemize}
+ \item Dictionaries
+ \item Drawing pie charts
+ \item Arrays
+ \item Least Square fitting
+ \item Intro to Matrices
+ \end{itemize}
+\end{frame}
\end{document}
diff --git a/day1/session4.tex b/day1/session4.tex
index 0fe9c7a..4f1eb7d 100644
--- a/day1/session4.tex
+++ b/day1/session4.tex
@@ -255,7 +255,7 @@ matrix([[ 0.07734807, 0.01657459, 0.32044199],
\end{itemize}
\begin{lstlisting}
In []: x = 0
-In []: integrate.quad(sin(x)+x**2, 0, 1)
+In []: quad(sin(x)+x**2, 0, 1)
\end{lstlisting}
\alert{\typ{error:}}
\typ{First argument must be a callable function.}
@@ -266,7 +266,7 @@ In []: integrate.quad(sin(x)+x**2, 0, 1)
\begin{lstlisting}
In []: def f(x):
return sin(x)+x**2
-In []: integrate.quad(f, 0, 1)
+In []: quad(f, 0, 1)
\end{lstlisting}
\begin{itemize}
\item \typ{def}
@@ -338,9 +338,13 @@ Out[]: 1.0
\begin{frame}[fragile]
\frametitle{Quadrature \ldots}
\begin{lstlisting}
-In []: integrate.quad(f, 0, 1)
+In []: quad(f, 0, 1)
\end{lstlisting}
Returns the integral and an estimate of the absolute error in the result.
+\begin{itemize}
+\item Use \typ{dblquad} for Double integrals
+\item Use \typ{tplquad} for Triple integrals
+\end{itemize}
\end{frame}
\subsection{ODEs}
@@ -403,5 +407,25 @@ In []: pend_sol = odeint(pend_int,
\end{lstlisting}
\end{frame}
+\begin{frame}
+ \frametitle{Things we have learned}
+ \begin{itemize}
+ \item
+ \item
+ \item Functions
+ \begin{itemize}
+ \item Definition
+ \item Calling
+ \item Default Arguments
+ \item Keyword Arguments
+ \end{itemize}
+ \item Integration
+ \begin{itemize}
+ \item Quadrature
+ \item ODEs
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
\end{document}