summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorankitjavalkar2016-04-18 12:04:05 +0530
committerankitjavalkar2016-04-18 12:04:05 +0530
commit32b8712249d7fa7891576b5c15c902c6604ee3d5 (patch)
treefad1c66150da512716d5654419e0098590119025
parent8876df52d088a1de0ea769a46b82ad6fb0682a80 (diff)
parentdd0e366a19d89a249257e5e3f5bd61ad020a0430 (diff)
downloadPython-TBC-Interface-32b8712249d7fa7891576b5c15c902c6604ee3d5.tar.gz
Python-TBC-Interface-32b8712249d7fa7891576b5c15c902c6604ee3d5.tar.bz2
Python-TBC-Interface-32b8712249d7fa7891576b5c15c902c6604ee3d5.zip
Merge pull request #26 from maheshgudi/master
Adding Commenting and Error detection apps
-rw-r--r--PythonTBC/settings.py3
-rw-r--r--PythonTBC/urls.py7
-rw-r--r--commentingapp/.gitignore3
-rw-r--r--commentingapp/__init__.py0
-rw-r--r--commentingapp/commenting_new.py106
-rw-r--r--commentingapp/models.py55
-rw-r--r--commentingapp/templates/commenting.html50
-rw-r--r--commentingapp/templates/notified.html14
-rw-r--r--commentingapp/views.py40
-rw-r--r--requirements.txt1
-rw-r--r--scripts/crawler/scrapy.cfg11
-rw-r--r--scripts/crawler/tbc_web_crawler/__init__.py0
-rw-r--r--scripts/crawler/tbc_web_crawler/settings.py86
-rw-r--r--scripts/crawler/tbc_web_crawler/spiders/__init__.py4
-rw-r--r--scripts/crawler/tbc_web_crawler/spiders/items.py18
-rw-r--r--scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py76
-rw-r--r--scripts/cron.sh23
-rw-r--r--scripts/database_updater.py78
-rw-r--r--scripts/split_json.py20
-rwxr-xr-xtbc/templates/base.html1
-rw-r--r--tbc/templates/tbc/admin-tools.html17
-rw-r--r--tbc/urls.py4
-rwxr-xr-xtbc/views.py9
-rw-r--r--tbc_error_page/models.py107
-rw-r--r--tbc_error_page/templates/broken.html28
-rw-r--r--tbc_error_page/templates/deliberate.html17
-rw-r--r--tbc_error_page/templates/error.html42
-rw-r--r--tbc_error_page/views.py56
28 files changed, 871 insertions, 5 deletions
diff --git a/PythonTBC/settings.py b/PythonTBC/settings.py
index bfcb2d5..4d99488 100644
--- a/PythonTBC/settings.py
+++ b/PythonTBC/settings.py
@@ -139,7 +139,8 @@ INSTALLED_APPS = (
'tbc',
'comments',
'south',
-
+ 'commentingapp',
+ 'tbc_error_page',
)
diff --git a/PythonTBC/urls.py b/PythonTBC/urls.py
index 78a7215..34bc0f6 100644
--- a/PythonTBC/urls.py
+++ b/PythonTBC/urls.py
@@ -18,10 +18,13 @@ urlpatterns = patterns('',
# url(r'^admin/doc/', include('django.contrib.admindocs.urls')),
# Uncomment the next line to enable the admin:
- url(r'^admin/', include(admin.site.urls)),
- url(r'^comments/', include('comments.urls')),
+ url(r'^admin', include(admin.site.urls)),
url(r'^', include('tbc.urls', namespace='tbc')),
url(r'^sitemap\.xml$', 'django.contrib.sitemaps.views.sitemap', {'sitemaps': sitemaps}),
+
+ url(r'^admin-tools/commenting', 'commentingapp.views.commenting', name = 'commenting'),
+ url(r'^admin-tools/error_page', 'tbc_error_page.views.error', name = 'error_page'),
+ url(r'^admin-tools/broken_page', 'tbc_error_page.views.broken', name = 'broken_page'),
)
diff --git a/commentingapp/.gitignore b/commentingapp/.gitignore
new file mode 100644
index 0000000..fad34df
--- /dev/null
+++ b/commentingapp/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+migrations/*
+
diff --git a/commentingapp/__init__.py b/commentingapp/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/commentingapp/__init__.py
diff --git a/commentingapp/commenting_new.py b/commentingapp/commenting_new.py
new file mode 100644
index 0000000..33f4923
--- /dev/null
+++ b/commentingapp/commenting_new.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+import requests
+import collections
+import os
+from urlparse import urljoin
+
+
+
+class DisqusCommenting(object):
+ """ A class for getting disqus comments per url, also features getting flagged comments."""
+
+ base_disqus_url = "http://disqus.com/api/"
+
+
+ def check_internet_connection(self):
+ """ Checks for the internet connection."""
+
+ try:
+ requests.get(self.base_disqus_url, timeout = 10)
+ self.internet_status = {"status":True, "message": "Connection Passed."}
+
+ except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
+ self.internet_status = {"status": False, "message": "Please Check the internet Connection."}
+
+ return self.internet_status["message"]
+
+ def check_authentication(self, public_key, forum_name, api_version=3.0):
+
+ """ Checks if public key and forum is valid. Returns the public key, forum name for Disqus API."""
+ # @TODO - Optional Authentication for read/write/moderate.
+ api_version = str(api_version)
+ try:
+ if self.internet_status["status"] == True:
+ url = urljoin(self.base_disqus_url,api_version)+"/forums/details.json" # get a better way to do this. Apparently urljoin doesnt work that way.
+ payload = {"api_key":public_key, "forum":forum_name}
+ connect_api = requests.get(url, params = payload).json()
+
+ if connect_api["code"]== 0:
+ self.public_key = public_key
+ self.forum_name = forum_name
+ self.api_version = api_version
+ self.api_connection_status = {"status": True, "message": "Your api key and forum name are valid."}
+ return self.api_connection_status["message"]
+
+ elif connect_api["code"] == 5:
+ self.api_connection_status = {"status": False, "message": "Your api key is invalid."}
+ return self.api_connection_status["message"]
+
+ else:
+ self.api_connection_status = {"status": False, "message": "Your forum name is invalid."}
+ return self.api_connection_status["message"]
+
+ else:
+ self.internet_status = {"status": False, "message": "You are still not connected to the internet. Please Check the internet Connection"}
+ return self.internet_status["message"]
+
+ except AttributeError:
+ self.api_connection_status = {"status": False, "message": "Check your internet connection first."}
+ return self.api_connection_status["message"]
+
+ def get_thread_ids(self):
+ """ Returns the counter for thread ids in a forum """
+
+ forum_url = urljoin(self.base_disqus_url,self.api_version)+"/forums/listPosts.json" # get a better way to do this. Apparently urljoin doesnt work that way.
+ payload = {"api_key":self.public_key,"forum": self.forum_name}
+ forum_data = requests.get(forum_url, params=payload).json()
+ thread_id_list = [thread_id["thread"] for thread_id in forum_data["response"]]
+ counter = collections.Counter(thread_id_list)
+ self.counter = counter
+ return counter
+
+ def get_comments(self):
+ """ Returns the comments and the url of a thread """
+
+ json_like_list = []
+
+ for thread_id in self.counter.keys(): # Find a better way to do this
+ comment_list = []
+ payload = {"api_key": self.public_key, "thread": thread_id}
+ thread_url = urljoin(self.base_disqus_url,self.api_version)+"/threads/list.json"
+ thread_data = requests.get(thread_url, params = payload).json()
+ comment_dict = {}
+ comment_dict["chapter_urls"] = thread_data["response"][0]["link"]
+ comment_url = urljoin(self.base_disqus_url,self.api_version)+"/threads/listPosts.json"
+ comment_data = requests.get(comment_url, params = payload).json()
+
+ for comments in comment_data["response"]:
+ comment_list.append(comments["raw_message"])
+ comment_dict["comment_list"] = comment_list
+
+
+ json_like_list.append(comment_dict)
+
+ return json_like_list
+
+
+if __name__ == "__main__":
+ x = DisqusCommenting()
+
+ y = x.check_internet_connection()
+ d = x.check_authentication("enter your disqus api PUBLIC key here", 'enter disqus forum name here ')
+ z = x.get_thread_ids()
+ z1 = x.get_comments()
+
+ print z1 # this will print out a json like list of all the urls and the comments on each url
diff --git a/commentingapp/models.py b/commentingapp/models.py
new file mode 100644
index 0000000..79e120e
--- /dev/null
+++ b/commentingapp/models.py
@@ -0,0 +1,55 @@
+from __future__ import unicode_literals
+from django.db import models
+from tbc.models import Chapters, Book
+from django.contrib.auth.models import User
+from django.db.models import Q
+import os
+import smtplib
+from email.mime.text import MIMEText
+
+
+
+class Url (models.Model):
+ id = models.AutoField(primary_key = True)
+ url = models.URLField()
+
+ def get_contributor_details(self, counter):
+ notebooks = [os.path.join(chapter_name.split("/")[-2], chapter_name.split('/')[-1]) for chapter_name in counter.keys()]
+ contributor_list = []
+ for notebook,url,number_of_comments in zip(notebooks, counter.keys(), counter.values()):
+ contributor_dict = {}
+ contributor_id = Book.objects.filter(Q(chapters__notebook = notebook)).values_list("contributor_id", flat = True)
+ contributor = User.objects.filter(id = contributor_id[0]).values("email", "first_name", "last_name")
+ contributor_dict ["contributor_email"] = contributor[0]["email"]
+ contributor_dict["contributor_name"] = contributor[0]["first_name"]+" "+ contributor[0]["last_name"]
+ contributor_dict["url"] = url
+ contributor_dict["number_of_comments"] = number_of_comments
+ contributor_list.append(contributor_dict)
+ return contributor_list
+
+ def send_mail_to_contributor(self, contributor_details):
+ me = 'put your localhost mail id'
+
+ for info in contributor_details:
+ body = """ Hi {0}, this mail is from TBC-Python Team. You have {1} unread comments for your chapter - {2}""".format(info["contributor_name"],
+ info["number_of_comments"],
+ info["url"]
+ )
+ you = info["contributor_email"]
+
+ message = MIMEText(body)
+ message["Subject"] = "You have {0} unread comment(s).".format(info["number_of_comments"])
+ message ["From"] = me
+ message ["To"] = you
+ smtp_instance = smtplib.SMTP('localhost')
+ smtp_instance.sendmail(me, you, message.as_string())
+ smtp_instance.quit()
+ return True
+
+
+
+class Comments(models.Model):
+ url = models.ForeignKey(Url, on_delete=models.CASCADE)
+ comments = models.TextField()
+ is_notified = models.BooleanField(default = False)
+
diff --git a/commentingapp/templates/commenting.html b/commentingapp/templates/commenting.html
new file mode 100644
index 0000000..dac6b37
--- /dev/null
+++ b/commentingapp/templates/commenting.html
@@ -0,0 +1,50 @@
+{% extends "base.html" %}
+
+{% block title %} TBC Commenting {% endblock %}
+
+{% block content %}
+ <h3><center> TBC Commenting </center></h3>
+ <h5> Hi <u> <b>{{user}} </b> </u> </h5><br/>
+ <a href = "https://pythontbc.disqus.com" target = "blank">Go to Disqus admin Page </a>
+
+ {% if not url_context %}
+ <center><h4> There are no new comments </h4></center>
+ {% else %}
+
+ <form name = "Send Email" action = "{% url 'commentingapp.views.commenting' %}" method = "POST">
+ {% csrf_token %}
+ <table id = "comment-table" border = 2 align = "center" style="empty-cells:hide;">
+ <tr>
+ <th style "width: 5%"> Sr. no </th>
+ <th style "width: 45%"> Url </th>
+ <th colspan = ""> Comments </th>
+ </tr>
+
+ {% for urls in url_context %}
+ <tr>
+ <td colspan = ""> {{ forloop.counter }}</td>
+ <td id = "urls" colspan = ""><a href = "{{ urls.url }}" target = "blank"> {{ urls.url }} </a> </td>
+ <td>
+ <table width = 100%>
+ {% for comments in urls.comments_set.all %}
+ {% if comments.is_notified == 0 %}
+ <tr>
+ <td style = "align:left;">{{comments.comments}}</td>
+ <td style = "align:right;"><input type = "checkbox" name = "comment" value = "{{ urls.url }}, {{comments.comments}}"></input></td>
+ </tr>
+ {% endif %}
+ {% endfor %}
+ </td>
+ </table>
+ </tr>
+ {% endfor %}
+ </table>
+ <br/>
+
+ <center> <input class = "btn" type = "submit" value = "Submit"> </input></center>
+
+ </form>
+ {% endif %}
+
+{% endblock %}
+
diff --git a/commentingapp/templates/notified.html b/commentingapp/templates/notified.html
new file mode 100644
index 0000000..c062d3f
--- /dev/null
+++ b/commentingapp/templates/notified.html
@@ -0,0 +1,14 @@
+{% extends "base.html" %}
+{% block title %} Success {% endblock %}
+
+{% block content %}
+
+<body>
+
+{% csrf_token %}
+<h5> {{ notified_comments }} </h5>
+<p></p>
+<p> <a href = "{% url 'commentingapp.views.commenting' %}"> << Go back to Commenting Page </a></p>
+
+</body>
+{% endblock %}
diff --git a/commentingapp/views.py b/commentingapp/views.py
new file mode 100644
index 0000000..b4c2b84
--- /dev/null
+++ b/commentingapp/views.py
@@ -0,0 +1,40 @@
+from django.shortcuts import render, render_to_response
+from django.contrib.auth.decorators import login_required
+from django.template import RequestContext
+from .models import Url, Comments
+from django.contrib.auth.decorators import user_passes_test
+from django.db.models import Q
+from tbc.models import Book, Chapters
+from django.contrib.auth.models import User
+from collections import Counter
+import os.path
+from email.mime.text import MIMEText
+
+@user_passes_test(lambda u:u.is_superuser, login_url="/admin/login/")
+
+def commenting(req):
+ ci = RequestContext(req)
+ url_instance = Url.objects.filter(Q(comments__is_notified = 0)).distinct()
+ context = {"url_context": url_instance, "user": req.user}
+
+ if req.method == "POST":
+ notified_comment_list = req.POST.getlist("comment")
+ url_list = []
+ for notified_comments in notified_comment_list:
+ url_comment_list= notified_comments.split(", ")
+ url_list.append(url_comment_list[0])
+ Comments.objects.filter(comments = url_comment_list[1]).update(is_notified = 1)
+
+ counter = Counter(url_list)
+ url_db_instance = Url()
+ contributor_details = url_db_instance.get_contributor_details(counter)
+ status = url_db_instance.send_mail_to_contributor(contributor_details)
+
+ if status == True:
+ context = {"notified_comments": "You have suceesfully notified the contributors"}
+ else:
+ context = {"notified_comments": "Mail couldnot be sent"}
+ return render_to_response("notified.html", context, ci)
+
+
+ return render_to_response ("commenting.html", context, ci)
diff --git a/requirements.txt b/requirements.txt
index 3184a23..6e46e4a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ reportlab==3.1.8
requests==2.6.0
urllib3==1.10.2
wsgiref==0.1.2
+scrapy==1.0.3
diff --git a/scripts/crawler/scrapy.cfg b/scripts/crawler/scrapy.cfg
new file mode 100644
index 0000000..b99853f
--- /dev/null
+++ b/scripts/crawler/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.org/en/latest/deploy.html
+
+[settings]
+default = tbc_web_crawler.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = tbc_web_crawler
diff --git a/scripts/crawler/tbc_web_crawler/__init__.py b/scripts/crawler/tbc_web_crawler/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/__init__.py
diff --git a/scripts/crawler/tbc_web_crawler/settings.py b/scripts/crawler/tbc_web_crawler/settings.py
new file mode 100644
index 0000000..03ba836
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/settings.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for tbc_web_crawler project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+# http://doc.scrapy.org/en/latest/topics/settings.html
+# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'tbc_web_crawler'
+
+SPIDER_MODULES = ['tbc_web_crawler.spiders']
+NEWSPIDER_MODULE = 'tbc_web_crawler.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'tbc_web_crawler (+http://www.yourdomain.com)'
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+CONCURRENT_REQUESTS=100
+
+# Configure a delay for requests for the same website (default: 0)
+# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY=3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN=16
+#CONCURRENT_REQUESTS_PER_IP=16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED=False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED=False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+# 'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+# 'tbc_web_crawler.middlewares.MyCustomSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+ #'scrapy.downloadermiddlewares.retry.RetryMiddleware': None
+# 'tbc_web_crawler.middlewares.MyCustomDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+# 'scrapy.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
+#ITEM_PIPELINES = {
+# 'tbc_web_crawler.pipelines.SomePipeline': 300,
+#}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
+# NOTE: AutoThrottle will honour the standard settings for concurrency and delay
+#AUTOTHROTTLE_ENABLED=True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY=5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY=60
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG=False
+
+# Enable and configure HTTP caching (disabled by default)
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED=True
+#HTTPCACHE_EXPIRATION_SECS=0
+#HTTPCACHE_DIR='httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES=[]
+#HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/scripts/crawler/tbc_web_crawler/spiders/__init__.py b/scripts/crawler/tbc_web_crawler/spiders/__init__.py
new file mode 100644
index 0000000..ebd689a
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/scripts/crawler/tbc_web_crawler/spiders/items.py b/scripts/crawler/tbc_web_crawler/spiders/items.py
new file mode 100644
index 0000000..9dda20f
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/spiders/items.py
@@ -0,0 +1,18 @@
+import scrapy
+
+
+class TbcErrorItems(scrapy.Item):
+
+
+ chapter_name = scrapy.Field()
+ chapter_urls = scrapy.Field()
+ completed_book_urls = scrapy.Field()
+ number_of_errors = scrapy.Field()
+ error_messages = scrapy.Field()
+
+
+
+class TbcBrokenItems(scrapy.Item):
+
+ broken_url = scrapy.Field()
+ broken_status = scrapy.Field()
diff --git a/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py
new file mode 100644
index 0000000..9688e70
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py
@@ -0,0 +1,76 @@
+import scrapy
+from items import TbcErrorItems, TbcBrokenItems
+from scrapy.utils.response import get_base_url
+from scrapy.utils.url import urljoin_rfc
+from scrapy.http import Request
+
+import os, json
+
+if os.path.isfile('items.json'):
+ os.remove('items.json')
+else:
+ pass
+
+class TbcSpider(scrapy.Spider):
+
+ name = "tbc_spider" # Name of the crawler. Use this name when crawling from the terminal, for eg - scrapy crawl tbc_spider
+
+ start_urls = ["http://tbc-python.fossee.aero.iitb.ac.in/completed-books/"]
+ handle_httpstatus_list = [404, 500, 502] # A list containing HTTP error codes.
+
+ def parse(self,response):
+ """ This function looks for book links and returns the url"""
+
+ for book_link in response.xpath('//a[contains(@href,"book-details")]/@href').extract():
+ """ Searches for links with "book-details" in it """
+
+ first_base_url = get_base_url(response)
+ first_relative_url = urljoin_rfc(first_base_url,book_link)
+ """creates a url to be returned to the next function."""
+
+ yield scrapy.Request(first_relative_url,callback=self.parse_book_contents)
+
+
+
+ def parse_book_contents(self, response):
+
+ """ This function looks for chapter links through each book link and returns the url"""
+
+ for chapter_link in response.xpath ('//a[contains(@href,"convert-notebook")]/@href').extract():
+ """ Searches for chapters in each book list"""
+ second_base_url = get_base_url(response).split('/book-details')[0]
+ second_relative_url = urljoin_rfc(second_base_url,chapter_link)
+ """creates a url to be returned to the next function."""
+
+ yield scrapy.Request(second_relative_url,callback=self.parse_chapter_details)
+
+
+
+ def parse_chapter_details(self, response):
+
+ if not response.xpath('//h1/text()').extract():
+ chapter_details = [response.url]
+ else:
+ chapter_details = response.xpath('//h1/text()').extract()
+
+
+ error_tag = response.xpath('//div[@class="output_subarea output_text output_error"]')
+ error_list = [error_notifications for error_notifications \
+ in response.xpath \
+ ('//div[@class="output_subarea output_text output_error"]/span/text()').extract()]
+
+ if response.status in self.handle_httpstatus_list:
+ broken_items = TbcBrokenItems()
+ broken_items['broken_url'] = response.url
+ broken_items['broken_status'] = response.status
+ yield broken_items
+ else:
+ if len(error_tag) != 0:
+ items = TbcErrorItems()
+ items ['chapter_name'] = chapter_details[0]
+ items ['chapter_urls'] = response.url
+ items ['number_of_errors'] = len (error_tag)
+ #items ['completed_book_urls'] = response.request.headers.get('Referer', None)
+ #items ['error_messages'] = error_list
+ yield items
+
diff --git a/scripts/cron.sh b/scripts/cron.sh
new file mode 100644
index 0000000..bf219be
--- /dev/null
+++ b/scripts/cron.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+DIR="$( cd "$( dirname "$0" )" && pwd )"
+cd $DIR
+
+python database_updater.py
+
+source ../../../bin/activate
+# this is for the test server. Might differ on different machines. Ideally it should be "source ../../bin/activate"
+
+
+
+cd crawler/
+
+scrapy crawl tbc_spider -o items.json -t json
+#sadly scrapy can only be run in the folders containing scrapy.cfg
+
+cd ../.
+
+python split_json.py
+
+deactivate
+
diff --git a/scripts/database_updater.py b/scripts/database_updater.py
new file mode 100644
index 0000000..71813ea
--- /dev/null
+++ b/scripts/database_updater.py
@@ -0,0 +1,78 @@
+import os
+import sys
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PythonTBC.settings")
+base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(base_path)
+
+from commentingapp.models import Url, Comments
+from commentingapp.commenting_new import DisqusCommenting
+from tbc.models import Book, Chapters
+from django.contrib.auth.models import User
+
+class CronForCommenting(object):
+
+ def fetch_comments_from_script(self):
+ """ Fetches comment from Commenting script"""
+
+ commenting_instance = DisqusCommenting()
+ check_net = commenting_instance.check_internet_connection()
+ check_auth = commenting_instance.check_authentication("enter your disqus api public key here",
+ "enter your forum name here"
+ )
+ thread = commenting_instance.get_thread_ids()
+ self.comments_for_db = commenting_instance.get_comments()
+
+ return self.comments_for_db
+
+
+
+ def add_comments_to_db(self):
+
+ if not Url.objects.exists():
+ """ Populates the db if empty"""
+ for comment_details in self.comments_for_db:
+ url_instance = Url(url = comment_details["chapter_urls"]) #url_instance is actually an object
+ url_instance.save()
+ for comment in comment_details["comment_list"]:
+ Comments.objects.create(url = url_instance, comments = comment)
+ return "Database is created"
+
+ else:
+ """ if the db isnt empty"""
+ for comment_details in self.comments_for_db:
+ url_object, url_status = Url.objects.get_or_create(url = comment_details["chapter_urls"])
+ url_primary_key = url_object.pk
+ for comment in comment_details["comment_list"]:
+ Comments.objects.get_or_create(comments = comment, url_id = url_primary_key)
+ return "Database is updated."
+
+
+ def delete_redundant_comments(self):
+ "delete urls that have no comments in them anymore"
+
+ url_list = [urls["chapter_urls"] for urls in self.comments_for_db]
+ url_list_db = Url.objects.values_list("url", flat = True)
+ url_difference = set(url_list_db)-set(url_list)
+ for delete_url in url_difference:
+ Url.objects.filter(url = delete_url).delete()
+
+ "delete comments that have been deleted from tbc notebooks"
+ for comment_details in self.comments_for_db:
+ url_instance = Url.objects.get(url = comment_details["chapter_urls"])
+ comment_list_db = url_instance.comments_set.values_list("comments", flat = True)
+ redundant_comment_list = set(comment_list_db)-set(comment_details["comment_list"])
+ for delete_comment in redundant_comment_list:
+ url_instance.comments_set.filter(comments = delete_comment).delete()
+ return "Redundant Comments deleted."
+
+
+
+if __name__ == '__main__':
+
+ a = CronForCommenting()
+ b = a.fetch_comments_from_script()
+ c = a.add_comments_to_db() #This should always be before delete_redundant_comments
+ d = a.delete_redundant_comments() #This should always be after add_comments_to_db
+ print c
+ print d
diff --git a/scripts/split_json.py b/scripts/split_json.py
new file mode 100644
index 0000000..baa0b90
--- /dev/null
+++ b/scripts/split_json.py
@@ -0,0 +1,20 @@
+import cPickle
+import json
+from os.path import dirname, abspath,join
+try:
+ with open('crawler/items.json', "r") as json_dump:
+ json_data = json.load(json_dump)
+ json_dump.close()
+ a = [saved_data for saved_data in json_data if str(saved_data).startswith("{u'ch")]
+ with open(join(dirname(abspath(dirname(__file__))),'tbc_error_page/error.pickle'), "w+") as error_json:
+ cPickle.dump(a, error_json)
+ error_json.close()
+
+ b = [saved_data for saved_data in json_data if str(saved_data).startswith("{u'br")]
+ with open(join(dirname(abspath(dirname(__file__))),'tbc_error_page/broken.pickle'), "w+") as broken_json:
+ cPickle.dump(b, broken_json)
+ broken_json.close()
+
+
+except ValueError:
+ print "Couldn't find file"
diff --git a/tbc/templates/base.html b/tbc/templates/base.html
index 84e7136..a1b4c8f 100755
--- a/tbc/templates/base.html
+++ b/tbc/templates/base.html
@@ -133,6 +133,7 @@
<li><a href="{% url 'tbc:GetCertificate' %}">Get Certificate</a></li>
<li><a href="{% url 'tbc:UpdateProfile' %}">Update Profile</a></li>
<li><a href="{% url 'tbc:UpdatePassword' %}">Update Password</a></li>
+ <li><a href="{% url 'tbc:admin_tools' %}">Admin Tools </a></li>
<li><a href="{% url 'tbc:UserLogout' %}">Logout</a></li>
</ul>
</li>
diff --git a/tbc/templates/tbc/admin-tools.html b/tbc/templates/tbc/admin-tools.html
new file mode 100644
index 0000000..1c46b64
--- /dev/null
+++ b/tbc/templates/tbc/admin-tools.html
@@ -0,0 +1,17 @@
+{% extends "base.html" %}
+{% block title %} Admin Tools {% endblock %}
+{% block content %}
+
+<body>
+
+{% csrf_token %}
+
+<h4> Hi, {{ user }} </h4>
+
+<p></p>
+<p></p>
+<p><a href = "{% url 'commentingapp.views.commenting' %}"> Commenting </a></p>
+<p><a href = "{% url 'tbc_error_page.views.error' %}"> Error Page </a></p>
+<p><a href = "{% url 'tbc_error_page.views.broken' %}"> Broken Page </a></p>
+
+{% endblock %}
diff --git a/tbc/urls.py b/tbc/urls.py
index 747a77d..6d3cc17 100644
--- a/tbc/urls.py
+++ b/tbc/urls.py
@@ -12,8 +12,8 @@ urlpatterns = patterns('',
url(r'^profile/$', 'tbc.views.UserProfile', name='UserProfile'),
url(r'^update-profile/$', 'tbc.views.UpdateProfile', name='UpdateProfile'),
url(r'^forgot-password/$', 'tbc.views.ForgotPassword', name='ForgotPassword'),
- url(r'^update-password/$', 'tbc.views.UpdatePassword', name='UpdatePassword'),
-
+ url(r'^update-password/$', 'tbc.views.UpdatePassword', name='UpdatePassword'),
+ url(r'^admin-tools/$', 'tbc.views.admin_tools', name='admin_tools'),
url(r'^submit-proposal/$', 'tbc.views.SubmitProposal', name='SubmitProposal'),
url(r'^submit-aicte-proposal/$', 'tbc.views.ListAICTE', name='ListAICTE'),
diff --git a/tbc/views.py b/tbc/views.py
index 9e3a2e7..767dd4e 100755
--- a/tbc/views.py
+++ b/tbc/views.py
@@ -1413,3 +1413,12 @@ def link_image(request):
chapter.save()
context['success'] = True
return render_to_response('tbc/link_image.html', context, context_instance=ci)
+
+@login_required( login_url= "/admin")
+def admin_tools(request):
+ ci = RequestContext(request)
+ user = request.user
+ context = {"user":user}
+ return render_to_response('tbc/admin-tools.html', context, context_instance=ci)
+
+
diff --git a/tbc_error_page/models.py b/tbc_error_page/models.py
new file mode 100644
index 0000000..82c4da6
--- /dev/null
+++ b/tbc_error_page/models.py
@@ -0,0 +1,107 @@
+from django.db import models
+import os
+import cPickle
+
+def get_json_from_file(filename):
+ path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
+ if os.path.isfile(path):
+ with open(path) as json_dump:
+ json_data =cPickle.load(json_dump)
+ return json_data
+ else:
+ return False
+
+
+
+class Error(models.Model):
+
+ chapter_url = models.URLField(max_length = 255)
+ number_of_errors = models.IntegerField()
+ chapter_name = models.CharField(max_length = 200,)
+ is_deliberate = models.IntegerField(default = False)
+
+ def create_new_error_data(self, error_json_data):
+ # Populates an empty table
+ for error_details in error_json_data:
+ Error.objects.create(chapter_url = error_details["chapter_urls"],
+ chapter_name = error_details["chapter_name"],
+ number_of_errors = int(error_details["number_of_errors"]),
+ is_deliberate = 0
+ )
+
+ def delete_redundant_error_data(self, error_json_data):
+ # delete errors which have been solved
+ for error_details in error_json_data:
+ db_url_list = Error.objects.values_list("chapter_url", flat=True)
+ json_url_list = [url_list["chapter_urls"] for url_list in error_json_data]
+ c = set(db_url_list)-set(json_url_list) #change variable name.
+ for somelist in c:
+ Error.objects.filter(chapter_url = somelist).delete()
+
+ def update_error_data(self, error_json_data):
+
+ # a little more refined.
+
+ for error_details in error_json_data:
+ original_value = Error.objects.get(chapter_url = error_details["chapter_urls"]).number_of_errors
+ # if number of errors have increased
+ if original_value < error_details["number_of_errors"]:
+
+ Error.objects.filter(chapter_url = error_details["chapter_urls"])\
+ .update(number_of_errors = error_details["number_of_errors"],
+ is_deliberate = 0
+ )
+ # if number of errors have decreased
+ elif original_value > error_details["number_of_errors"]:
+ Error.objects.filter(chapter_url = error_details["chapter_urls"])\
+ .update(number_of_errors = error_details["number_of_errors"], is_deliberate = 0)
+ else:
+ # if new errors have been added.
+ Error.objects.get_or_create(chapter_url = error_details["chapter_urls"],
+ number_of_errors = error_details["number_of_errors"]
+ )
+
+ Error.objects.filter(chapter_url = error_details["chapter_urls"])\
+ .update(chapter_url = error_details["chapter_urls"],
+ number_of_errors = error_details["number_of_errors"],
+ chapter_name = error_details["chapter_name"]
+ )
+
+
+
+
+
+ def update_deliberate_error(self, deliberate_error_list):
+
+ for deliberate_urls in deliberate_error_list:
+ a = Error.objects.filter(chapter_url = deliberate_urls).update(is_deliberate = 1)
+
+
+
+
+class Broken(models.Model):
+
+ broken_url = models.URLField(max_length = 255)
+ error_status = models.IntegerField()
+
+ def create_new_broken_data(self, broken_data):
+ for broken_details in broken_data:
+
+ Broken.objects.create(broken_url = broken_details["broken_url"],
+ error_status = broken_details["broken_status"])
+
+ def delete_redundant_broken_data(self, broken_data):
+ for broken_details in broken_data:
+ db_url_list = Broken.objects.values_list("broken_url", flat=True)
+ json_url_list = [url_list["broken_url"] for url_list in broken_data]
+ redundant_url = set(db_url_list)-set(json_url_list) #change variable name.
+ for delete_url in redundant_url:
+ Broken.objects.filter(broken_url = delete_url).delete()
+
+
+ def update_broken_data(self, broken_data):
+ for broken_details in broken_data:
+
+ Broken.objects.get_or_create(broken_url = broken_details["broken_url"],
+ error_status = broken_details["broken_status"]
+ )
diff --git a/tbc_error_page/templates/broken.html b/tbc_error_page/templates/broken.html
new file mode 100644
index 0000000..841069c
--- /dev/null
+++ b/tbc_error_page/templates/broken.html
@@ -0,0 +1,28 @@
+{% extends "base.html" %}
+{% block title %} TBC Broken Links {% endblock %}
+{% block content %}
+ {% if not broken %}
+ <center><h4> There are no new comments </h4></center>
+ {% else %}
+ <h3><u><center>TBC Broken Links Page </center></u></h3>
+ <h5> Hi <b><u> {{user}} </b><u> </h5>
+ <a href = "{% url 'tbc_error_page.views.error' %}"> TBC Error Status Page </a>
+<p></p>
+ <table border = 1>
+ <tr>
+ <th> Sr no. </th>
+ <th> Broken Urls </th>
+ <th> <b> HTTP status error code </th>
+ </tr>
+ {% for broken_data in broken %}
+ <tr>
+ <td> {{ forloop.counter }} </td>
+ <td> <a href = {{ broken_data.broken_url }} target = "blank"> {{ broken_data.broken_url }} </a> </td>
+ <td><b> {{ broken_data.error_status }} </b> error </td>
+ </tr>
+ {% endfor %}
+
+ </table>
+{% endif %}
+
+{% endblock %}
diff --git a/tbc_error_page/templates/deliberate.html b/tbc_error_page/templates/deliberate.html
new file mode 100644
index 0000000..89a8974
--- /dev/null
+++ b/tbc_error_page/templates/deliberate.html
@@ -0,0 +1,17 @@
+{% extends "base.html" %}
+{% block title %} Success {% endblock %}
+{% block content %}
+<p> You have added following urls as deliberate </p>
+<table border = 1>
+<th>Urls</th>
+
+{% for deliberate_links in deliberate %}
+
+<tr><td> {{ deliberate_links }} </tr></td>
+
+
+{% endfor %}
+</table>
+<p></p>
+<p><a href = "{% url 'tbc_error_page.views.error' %}"> <<< Go back to Error Page </a></p>
+{% endblock %}
diff --git a/tbc_error_page/templates/error.html b/tbc_error_page/templates/error.html
new file mode 100644
index 0000000..237c7f3
--- /dev/null
+++ b/tbc_error_page/templates/error.html
@@ -0,0 +1,42 @@
+{% extends "base.html" %}
+{% block title %} TBC Error Page {% endblock %}
+
+
+ </head>
+
+{% block content %}
+ <body>
+ <h3><u><center>TBC Error Page </center></u></h3>
+ <h5> Hi <b><u>{{ user }} </b></u></h5>
+ <p><a href = "{% url 'tbc_error_page.views.broken' %}"> TBC Broken Links page </a></p>
+ {% if not context %}
+ <center><h4> There are no new errors </h4></center>
+ {% else %}
+ <table border = 2>
+ <tr>
+ <th> Chapters With errors</th>
+ <th> Number of errors</th>
+ <th> Delibrate Errors</th>
+ </tr>
+
+ <form name = "Send Email" action = "{% url 'tbc_error_page.views.error' %}" method = "POST"> {% csrf_token %}
+ {% for errors in context %}
+ <div class = "error"">
+
+ <tr>
+
+ {% if errors.is_deliberate == 0 %}
+ <td><a href = {{ errors.chapter_url }} target = "blank"> {{ errors.chapter_name }} </a></td>
+ <td> {{ errors.number_of_errors }} </td>
+ <td> <input type = "checkbox" name = "deliberate" value = "{{ errors.chapter_url }}">
+ {% endif %}
+ </tr>
+ </div>
+
+ {% endfor %}
+ </table>
+ <br/>
+ <input class = "btn" type = "submit" value = "Submit"> </input>
+ </form>
+{% endif %}
+{% endblock %}
diff --git a/tbc_error_page/views.py b/tbc_error_page/views.py
new file mode 100644
index 0000000..aa32453
--- /dev/null
+++ b/tbc_error_page/views.py
@@ -0,0 +1,56 @@
+from django.shortcuts import render_to_response
+from .models import Error, Broken, get_json_from_file
+from django.contrib.auth.decorators import user_passes_test
+from django.template import RequestContext
+import json
+import os
+
+
+#@login_required(login_url="/admin/login/")
+@user_passes_test(lambda u:u.is_superuser, login_url="/admin/login")
+
+
+
+def error(req):
+ ci = RequestContext(req)
+ db_instance = Error()
+ error_json_data = get_json_from_file("error.pickle")
+
+ if not Error.objects.exists():
+ db_instance.create_new_error_data(error_json_data)
+ else:
+ db_instance.delete_redundant_error_data(error_json_data)
+ db_instance.update_error_data(error_json_data)
+
+ error_details = Error.objects.filter(is_deliberate = 0)
+
+ if req.method == "POST":
+ deliberate_urls_list = req.POST.getlist("deliberate")
+ db_instance.update_deliberate_error(deliberate_urls_list)
+
+ context = {"user":req.user, "deliberate" :deliberate_urls_list}
+
+ return render_to_response ("deliberate.html", context, ci)
+
+
+ context = {"context": error_details, "user": req.user}
+ return render_to_response ("error.html", context, ci)
+
+def broken(req):
+
+ ci = RequestContext(req)
+ db_instance = Broken()
+ broken_json_data = get_json_from_file("broken.pickle")
+
+ if not Broken.objects.exists():
+ db_instance.create_new_broken_data(broken_json_data)
+
+ else:
+ db_instance.delete_redundant_broken_data(broken_json_data)
+ db_instance.update_broken_data(broken_json_data)
+
+ broken = Broken.objects.all()
+ context = {"broken": broken, "user": req.user}
+ return render_to_response("broken.html", context, ci)
+
+