diff options
author | ankitjavalkar | 2016-04-18 12:04:05 +0530 |
---|---|---|
committer | ankitjavalkar | 2016-04-18 12:04:05 +0530 |
commit | 32b8712249d7fa7891576b5c15c902c6604ee3d5 (patch) | |
tree | fad1c66150da512716d5654419e0098590119025 | |
parent | 8876df52d088a1de0ea769a46b82ad6fb0682a80 (diff) | |
parent | dd0e366a19d89a249257e5e3f5bd61ad020a0430 (diff) | |
download | Python-TBC-Interface-32b8712249d7fa7891576b5c15c902c6604ee3d5.tar.gz Python-TBC-Interface-32b8712249d7fa7891576b5c15c902c6604ee3d5.tar.bz2 Python-TBC-Interface-32b8712249d7fa7891576b5c15c902c6604ee3d5.zip |
Merge pull request #26 from maheshgudi/master
Adding Commenting and Error detection apps
28 files changed, 871 insertions, 5 deletions
diff --git a/PythonTBC/settings.py b/PythonTBC/settings.py index bfcb2d5..4d99488 100644 --- a/PythonTBC/settings.py +++ b/PythonTBC/settings.py @@ -139,7 +139,8 @@ INSTALLED_APPS = ( 'tbc', 'comments', 'south', - + 'commentingapp', + 'tbc_error_page', ) diff --git a/PythonTBC/urls.py b/PythonTBC/urls.py index 78a7215..34bc0f6 100644 --- a/PythonTBC/urls.py +++ b/PythonTBC/urls.py @@ -18,10 +18,13 @@ urlpatterns = patterns('', # url(r'^admin/doc/', include('django.contrib.admindocs.urls')), # Uncomment the next line to enable the admin: - url(r'^admin/', include(admin.site.urls)), - url(r'^comments/', include('comments.urls')), + url(r'^admin', include(admin.site.urls)), url(r'^', include('tbc.urls', namespace='tbc')), url(r'^sitemap\.xml$', 'django.contrib.sitemaps.views.sitemap', {'sitemaps': sitemaps}), + + url(r'^admin-tools/commenting', 'commentingapp.views.commenting', name = 'commenting'), + url(r'^admin-tools/error_page', 'tbc_error_page.views.error', name = 'error_page'), + url(r'^admin-tools/broken_page', 'tbc_error_page.views.broken', name = 'broken_page'), ) diff --git a/commentingapp/.gitignore b/commentingapp/.gitignore new file mode 100644 index 0000000..fad34df --- /dev/null +++ b/commentingapp/.gitignore @@ -0,0 +1,3 @@ +*.pyc +migrations/* + diff --git a/commentingapp/__init__.py b/commentingapp/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/commentingapp/__init__.py diff --git a/commentingapp/commenting_new.py b/commentingapp/commenting_new.py new file mode 100644 index 0000000..33f4923 --- /dev/null +++ b/commentingapp/commenting_new.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +import requests +import collections +import os +from urlparse import urljoin + + + +class DisqusCommenting(object): + """ A class for getting disqus comments per url, also features getting flagged comments.""" + + base_disqus_url = "http://disqus.com/api/" + + + def check_internet_connection(self): + """ Checks for the internet connection.""" + + try: + requests.get(self.base_disqus_url, timeout = 10) + self.internet_status = {"status":True, "message": "Connection Passed."} + + except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): + self.internet_status = {"status": False, "message": "Please Check the internet Connection."} + + return self.internet_status["message"] + + def check_authentication(self, public_key, forum_name, api_version=3.0): + + """ Checks if public key and forum is valid. Returns the public key, forum name for Disqus API.""" + # @TODO - Optional Authentication for read/write/moderate. + api_version = str(api_version) + try: + if self.internet_status["status"] == True: + url = urljoin(self.base_disqus_url,api_version)+"/forums/details.json" # get a better way to do this. Apparently urljoin doesnt work that way. + payload = {"api_key":public_key, "forum":forum_name} + connect_api = requests.get(url, params = payload).json() + + if connect_api["code"]== 0: + self.public_key = public_key + self.forum_name = forum_name + self.api_version = api_version + self.api_connection_status = {"status": True, "message": "Your api key and forum name are valid."} + return self.api_connection_status["message"] + + elif connect_api["code"] == 5: + self.api_connection_status = {"status": False, "message": "Your api key is invalid."} + return self.api_connection_status["message"] + + else: + self.api_connection_status = {"status": False, "message": "Your forum name is invalid."} + return self.api_connection_status["message"] + + else: + self.internet_status = {"status": False, "message": "You are still not connected to the internet. Please Check the internet Connection"} + return self.internet_status["message"] + + except AttributeError: + self.api_connection_status = {"status": False, "message": "Check your internet connection first."} + return self.api_connection_status["message"] + + def get_thread_ids(self): + """ Returns the counter for thread ids in a forum """ + + forum_url = urljoin(self.base_disqus_url,self.api_version)+"/forums/listPosts.json" # get a better way to do this. Apparently urljoin doesnt work that way. + payload = {"api_key":self.public_key,"forum": self.forum_name} + forum_data = requests.get(forum_url, params=payload).json() + thread_id_list = [thread_id["thread"] for thread_id in forum_data["response"]] + counter = collections.Counter(thread_id_list) + self.counter = counter + return counter + + def get_comments(self): + """ Returns the comments and the url of a thread """ + + json_like_list = [] + + for thread_id in self.counter.keys(): # Find a better way to do this + comment_list = [] + payload = {"api_key": self.public_key, "thread": thread_id} + thread_url = urljoin(self.base_disqus_url,self.api_version)+"/threads/list.json" + thread_data = requests.get(thread_url, params = payload).json() + comment_dict = {} + comment_dict["chapter_urls"] = thread_data["response"][0]["link"] + comment_url = urljoin(self.base_disqus_url,self.api_version)+"/threads/listPosts.json" + comment_data = requests.get(comment_url, params = payload).json() + + for comments in comment_data["response"]: + comment_list.append(comments["raw_message"]) + comment_dict["comment_list"] = comment_list + + + json_like_list.append(comment_dict) + + return json_like_list + + +if __name__ == "__main__": + x = DisqusCommenting() + + y = x.check_internet_connection() + d = x.check_authentication("enter your disqus api PUBLIC key here", 'enter disqus forum name here ') + z = x.get_thread_ids() + z1 = x.get_comments() + + print z1 # this will print out a json like list of all the urls and the comments on each url diff --git a/commentingapp/models.py b/commentingapp/models.py new file mode 100644 index 0000000..79e120e --- /dev/null +++ b/commentingapp/models.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals +from django.db import models +from tbc.models import Chapters, Book +from django.contrib.auth.models import User +from django.db.models import Q +import os +import smtplib +from email.mime.text import MIMEText + + + +class Url (models.Model): + id = models.AutoField(primary_key = True) + url = models.URLField() + + def get_contributor_details(self, counter): + notebooks = [os.path.join(chapter_name.split("/")[-2], chapter_name.split('/')[-1]) for chapter_name in counter.keys()] + contributor_list = [] + for notebook,url,number_of_comments in zip(notebooks, counter.keys(), counter.values()): + contributor_dict = {} + contributor_id = Book.objects.filter(Q(chapters__notebook = notebook)).values_list("contributor_id", flat = True) + contributor = User.objects.filter(id = contributor_id[0]).values("email", "first_name", "last_name") + contributor_dict ["contributor_email"] = contributor[0]["email"] + contributor_dict["contributor_name"] = contributor[0]["first_name"]+" "+ contributor[0]["last_name"] + contributor_dict["url"] = url + contributor_dict["number_of_comments"] = number_of_comments + contributor_list.append(contributor_dict) + return contributor_list + + def send_mail_to_contributor(self, contributor_details): + me = 'put your localhost mail id' + + for info in contributor_details: + body = """ Hi {0}, this mail is from TBC-Python Team. You have {1} unread comments for your chapter - {2}""".format(info["contributor_name"], + info["number_of_comments"], + info["url"] + ) + you = info["contributor_email"] + + message = MIMEText(body) + message["Subject"] = "You have {0} unread comment(s).".format(info["number_of_comments"]) + message ["From"] = me + message ["To"] = you + smtp_instance = smtplib.SMTP('localhost') + smtp_instance.sendmail(me, you, message.as_string()) + smtp_instance.quit() + return True + + + +class Comments(models.Model): + url = models.ForeignKey(Url, on_delete=models.CASCADE) + comments = models.TextField() + is_notified = models.BooleanField(default = False) + diff --git a/commentingapp/templates/commenting.html b/commentingapp/templates/commenting.html new file mode 100644 index 0000000..dac6b37 --- /dev/null +++ b/commentingapp/templates/commenting.html @@ -0,0 +1,50 @@ +{% extends "base.html" %} + +{% block title %} TBC Commenting {% endblock %} + +{% block content %} + <h3><center> TBC Commenting </center></h3> + <h5> Hi <u> <b>{{user}} </b> </u> </h5><br/> + <a href = "https://pythontbc.disqus.com" target = "blank">Go to Disqus admin Page </a> + + {% if not url_context %} + <center><h4> There are no new comments </h4></center> + {% else %} + + <form name = "Send Email" action = "{% url 'commentingapp.views.commenting' %}" method = "POST"> + {% csrf_token %} + <table id = "comment-table" border = 2 align = "center" style="empty-cells:hide;"> + <tr> + <th style "width: 5%"> Sr. no </th> + <th style "width: 45%"> Url </th> + <th colspan = ""> Comments </th> + </tr> + + {% for urls in url_context %} + <tr> + <td colspan = ""> {{ forloop.counter }}</td> + <td id = "urls" colspan = ""><a href = "{{ urls.url }}" target = "blank"> {{ urls.url }} </a> </td> + <td> + <table width = 100%> + {% for comments in urls.comments_set.all %} + {% if comments.is_notified == 0 %} + <tr> + <td style = "align:left;">{{comments.comments}}</td> + <td style = "align:right;"><input type = "checkbox" name = "comment" value = "{{ urls.url }}, {{comments.comments}}"></input></td> + </tr> + {% endif %} + {% endfor %} + </td> + </table> + </tr> + {% endfor %} + </table> + <br/> + + <center> <input class = "btn" type = "submit" value = "Submit"> </input></center> + + </form> + {% endif %} + +{% endblock %} + diff --git a/commentingapp/templates/notified.html b/commentingapp/templates/notified.html new file mode 100644 index 0000000..c062d3f --- /dev/null +++ b/commentingapp/templates/notified.html @@ -0,0 +1,14 @@ +{% extends "base.html" %} +{% block title %} Success {% endblock %} + +{% block content %} + +<body> + +{% csrf_token %} +<h5> {{ notified_comments }} </h5> +<p></p> +<p> <a href = "{% url 'commentingapp.views.commenting' %}"> << Go back to Commenting Page </a></p> + +</body> +{% endblock %} diff --git a/commentingapp/views.py b/commentingapp/views.py new file mode 100644 index 0000000..b4c2b84 --- /dev/null +++ b/commentingapp/views.py @@ -0,0 +1,40 @@ +from django.shortcuts import render, render_to_response +from django.contrib.auth.decorators import login_required +from django.template import RequestContext +from .models import Url, Comments +from django.contrib.auth.decorators import user_passes_test +from django.db.models import Q +from tbc.models import Book, Chapters +from django.contrib.auth.models import User +from collections import Counter +import os.path +from email.mime.text import MIMEText + +@user_passes_test(lambda u:u.is_superuser, login_url="/admin/login/") + +def commenting(req): + ci = RequestContext(req) + url_instance = Url.objects.filter(Q(comments__is_notified = 0)).distinct() + context = {"url_context": url_instance, "user": req.user} + + if req.method == "POST": + notified_comment_list = req.POST.getlist("comment") + url_list = [] + for notified_comments in notified_comment_list: + url_comment_list= notified_comments.split(", ") + url_list.append(url_comment_list[0]) + Comments.objects.filter(comments = url_comment_list[1]).update(is_notified = 1) + + counter = Counter(url_list) + url_db_instance = Url() + contributor_details = url_db_instance.get_contributor_details(counter) + status = url_db_instance.send_mail_to_contributor(contributor_details) + + if status == True: + context = {"notified_comments": "You have suceesfully notified the contributors"} + else: + context = {"notified_comments": "Mail couldnot be sent"} + return render_to_response("notified.html", context, ci) + + + return render_to_response ("commenting.html", context, ci) diff --git a/requirements.txt b/requirements.txt index 3184a23..6e46e4a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ reportlab==3.1.8 requests==2.6.0 urllib3==1.10.2 wsgiref==0.1.2 +scrapy==1.0.3 diff --git a/scripts/crawler/scrapy.cfg b/scripts/crawler/scrapy.cfg new file mode 100644 index 0000000..b99853f --- /dev/null +++ b/scripts/crawler/scrapy.cfg @@ -0,0 +1,11 @@ +# Automatically created by: scrapy startproject +# +# For more information about the [deploy] section see: +# https://scrapyd.readthedocs.org/en/latest/deploy.html + +[settings] +default = tbc_web_crawler.settings + +[deploy] +#url = http://localhost:6800/ +project = tbc_web_crawler diff --git a/scripts/crawler/tbc_web_crawler/__init__.py b/scripts/crawler/tbc_web_crawler/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/__init__.py diff --git a/scripts/crawler/tbc_web_crawler/settings.py b/scripts/crawler/tbc_web_crawler/settings.py new file mode 100644 index 0000000..03ba836 --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/settings.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Scrapy settings for tbc_web_crawler project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# http://doc.scrapy.org/en/latest/topics/settings.html +# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html +# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html + +BOT_NAME = 'tbc_web_crawler' + +SPIDER_MODULES = ['tbc_web_crawler.spiders'] +NEWSPIDER_MODULE = 'tbc_web_crawler.spiders' + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = 'tbc_web_crawler (+http://www.yourdomain.com)' + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +CONCURRENT_REQUESTS=100 + +# Configure a delay for requests for the same website (default: 0) +# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY=3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN=16 +#CONCURRENT_REQUESTS_PER_IP=16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED=False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED=False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en', +#} + +# Enable or disable spider middlewares +# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# 'tbc_web_crawler.middlewares.MyCustomSpiderMiddleware': 543, +#} + +# Enable or disable downloader middlewares +# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html +#DOWNLOADER_MIDDLEWARES = { + #'scrapy.downloadermiddlewares.retry.RetryMiddleware': None +# 'tbc_web_crawler.middlewares.MyCustomDownloaderMiddleware': 543, +#} + +# Enable or disable extensions +# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# 'scrapy.telnet.TelnetConsole': None, +#} + +# Configure item pipelines +# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html +#ITEM_PIPELINES = { +# 'tbc_web_crawler.pipelines.SomePipeline': 300, +#} + +# Enable and configure the AutoThrottle extension (disabled by default) +# See http://doc.scrapy.org/en/latest/topics/autothrottle.html +# NOTE: AutoThrottle will honour the standard settings for concurrency and delay +#AUTOTHROTTLE_ENABLED=True +# The initial download delay +#AUTOTHROTTLE_START_DELAY=5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY=60 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG=False + +# Enable and configure HTTP caching (disabled by default) +# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED=True +#HTTPCACHE_EXPIRATION_SECS=0 +#HTTPCACHE_DIR='httpcache' +#HTTPCACHE_IGNORE_HTTP_CODES=[] +#HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/scripts/crawler/tbc_web_crawler/spiders/__init__.py b/scripts/crawler/tbc_web_crawler/spiders/__init__.py new file mode 100644 index 0000000..ebd689a --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/scripts/crawler/tbc_web_crawler/spiders/items.py b/scripts/crawler/tbc_web_crawler/spiders/items.py new file mode 100644 index 0000000..9dda20f --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/spiders/items.py @@ -0,0 +1,18 @@ +import scrapy + + +class TbcErrorItems(scrapy.Item): + + + chapter_name = scrapy.Field() + chapter_urls = scrapy.Field() + completed_book_urls = scrapy.Field() + number_of_errors = scrapy.Field() + error_messages = scrapy.Field() + + + +class TbcBrokenItems(scrapy.Item): + + broken_url = scrapy.Field() + broken_status = scrapy.Field() diff --git a/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py new file mode 100644 index 0000000..9688e70 --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py @@ -0,0 +1,76 @@ +import scrapy +from items import TbcErrorItems, TbcBrokenItems +from scrapy.utils.response import get_base_url +from scrapy.utils.url import urljoin_rfc +from scrapy.http import Request + +import os, json + +if os.path.isfile('items.json'): + os.remove('items.json') +else: + pass + +class TbcSpider(scrapy.Spider): + + name = "tbc_spider" # Name of the crawler. Use this name when crawling from the terminal, for eg - scrapy crawl tbc_spider + + start_urls = ["http://tbc-python.fossee.aero.iitb.ac.in/completed-books/"] + handle_httpstatus_list = [404, 500, 502] # A list containing HTTP error codes. + + def parse(self,response): + """ This function looks for book links and returns the url""" + + for book_link in response.xpath('//a[contains(@href,"book-details")]/@href').extract(): + """ Searches for links with "book-details" in it """ + + first_base_url = get_base_url(response) + first_relative_url = urljoin_rfc(first_base_url,book_link) + """creates a url to be returned to the next function.""" + + yield scrapy.Request(first_relative_url,callback=self.parse_book_contents) + + + + def parse_book_contents(self, response): + + """ This function looks for chapter links through each book link and returns the url""" + + for chapter_link in response.xpath ('//a[contains(@href,"convert-notebook")]/@href').extract(): + """ Searches for chapters in each book list""" + second_base_url = get_base_url(response).split('/book-details')[0] + second_relative_url = urljoin_rfc(second_base_url,chapter_link) + """creates a url to be returned to the next function.""" + + yield scrapy.Request(second_relative_url,callback=self.parse_chapter_details) + + + + def parse_chapter_details(self, response): + + if not response.xpath('//h1/text()').extract(): + chapter_details = [response.url] + else: + chapter_details = response.xpath('//h1/text()').extract() + + + error_tag = response.xpath('//div[@class="output_subarea output_text output_error"]') + error_list = [error_notifications for error_notifications \ + in response.xpath \ + ('//div[@class="output_subarea output_text output_error"]/span/text()').extract()] + + if response.status in self.handle_httpstatus_list: + broken_items = TbcBrokenItems() + broken_items['broken_url'] = response.url + broken_items['broken_status'] = response.status + yield broken_items + else: + if len(error_tag) != 0: + items = TbcErrorItems() + items ['chapter_name'] = chapter_details[0] + items ['chapter_urls'] = response.url + items ['number_of_errors'] = len (error_tag) + #items ['completed_book_urls'] = response.request.headers.get('Referer', None) + #items ['error_messages'] = error_list + yield items + diff --git a/scripts/cron.sh b/scripts/cron.sh new file mode 100644 index 0000000..bf219be --- /dev/null +++ b/scripts/cron.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +DIR="$( cd "$( dirname "$0" )" && pwd )" +cd $DIR + +python database_updater.py + +source ../../../bin/activate +# this is for the test server. Might differ on different machines. Ideally it should be "source ../../bin/activate" + + + +cd crawler/ + +scrapy crawl tbc_spider -o items.json -t json +#sadly scrapy can only be run in the folders containing scrapy.cfg + +cd ../. + +python split_json.py + +deactivate + diff --git a/scripts/database_updater.py b/scripts/database_updater.py new file mode 100644 index 0000000..71813ea --- /dev/null +++ b/scripts/database_updater.py @@ -0,0 +1,78 @@ +import os +import sys + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PythonTBC.settings") +base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(base_path) + +from commentingapp.models import Url, Comments +from commentingapp.commenting_new import DisqusCommenting +from tbc.models import Book, Chapters +from django.contrib.auth.models import User + +class CronForCommenting(object): + + def fetch_comments_from_script(self): + """ Fetches comment from Commenting script""" + + commenting_instance = DisqusCommenting() + check_net = commenting_instance.check_internet_connection() + check_auth = commenting_instance.check_authentication("enter your disqus api public key here", + "enter your forum name here" + ) + thread = commenting_instance.get_thread_ids() + self.comments_for_db = commenting_instance.get_comments() + + return self.comments_for_db + + + + def add_comments_to_db(self): + + if not Url.objects.exists(): + """ Populates the db if empty""" + for comment_details in self.comments_for_db: + url_instance = Url(url = comment_details["chapter_urls"]) #url_instance is actually an object + url_instance.save() + for comment in comment_details["comment_list"]: + Comments.objects.create(url = url_instance, comments = comment) + return "Database is created" + + else: + """ if the db isnt empty""" + for comment_details in self.comments_for_db: + url_object, url_status = Url.objects.get_or_create(url = comment_details["chapter_urls"]) + url_primary_key = url_object.pk + for comment in comment_details["comment_list"]: + Comments.objects.get_or_create(comments = comment, url_id = url_primary_key) + return "Database is updated." + + + def delete_redundant_comments(self): + "delete urls that have no comments in them anymore" + + url_list = [urls["chapter_urls"] for urls in self.comments_for_db] + url_list_db = Url.objects.values_list("url", flat = True) + url_difference = set(url_list_db)-set(url_list) + for delete_url in url_difference: + Url.objects.filter(url = delete_url).delete() + + "delete comments that have been deleted from tbc notebooks" + for comment_details in self.comments_for_db: + url_instance = Url.objects.get(url = comment_details["chapter_urls"]) + comment_list_db = url_instance.comments_set.values_list("comments", flat = True) + redundant_comment_list = set(comment_list_db)-set(comment_details["comment_list"]) + for delete_comment in redundant_comment_list: + url_instance.comments_set.filter(comments = delete_comment).delete() + return "Redundant Comments deleted." + + + +if __name__ == '__main__': + + a = CronForCommenting() + b = a.fetch_comments_from_script() + c = a.add_comments_to_db() #This should always be before delete_redundant_comments + d = a.delete_redundant_comments() #This should always be after add_comments_to_db + print c + print d diff --git a/scripts/split_json.py b/scripts/split_json.py new file mode 100644 index 0000000..baa0b90 --- /dev/null +++ b/scripts/split_json.py @@ -0,0 +1,20 @@ +import cPickle +import json +from os.path import dirname, abspath,join +try: + with open('crawler/items.json', "r") as json_dump: + json_data = json.load(json_dump) + json_dump.close() + a = [saved_data for saved_data in json_data if str(saved_data).startswith("{u'ch")] + with open(join(dirname(abspath(dirname(__file__))),'tbc_error_page/error.pickle'), "w+") as error_json: + cPickle.dump(a, error_json) + error_json.close() + + b = [saved_data for saved_data in json_data if str(saved_data).startswith("{u'br")] + with open(join(dirname(abspath(dirname(__file__))),'tbc_error_page/broken.pickle'), "w+") as broken_json: + cPickle.dump(b, broken_json) + broken_json.close() + + +except ValueError: + print "Couldn't find file" diff --git a/tbc/templates/base.html b/tbc/templates/base.html index 84e7136..a1b4c8f 100755 --- a/tbc/templates/base.html +++ b/tbc/templates/base.html @@ -133,6 +133,7 @@ <li><a href="{% url 'tbc:GetCertificate' %}">Get Certificate</a></li> <li><a href="{% url 'tbc:UpdateProfile' %}">Update Profile</a></li> <li><a href="{% url 'tbc:UpdatePassword' %}">Update Password</a></li> + <li><a href="{% url 'tbc:admin_tools' %}">Admin Tools </a></li> <li><a href="{% url 'tbc:UserLogout' %}">Logout</a></li> </ul> </li> diff --git a/tbc/templates/tbc/admin-tools.html b/tbc/templates/tbc/admin-tools.html new file mode 100644 index 0000000..1c46b64 --- /dev/null +++ b/tbc/templates/tbc/admin-tools.html @@ -0,0 +1,17 @@ +{% extends "base.html" %} +{% block title %} Admin Tools {% endblock %} +{% block content %} + +<body> + +{% csrf_token %} + +<h4> Hi, {{ user }} </h4> + +<p></p> +<p></p> +<p><a href = "{% url 'commentingapp.views.commenting' %}"> Commenting </a></p> +<p><a href = "{% url 'tbc_error_page.views.error' %}"> Error Page </a></p> +<p><a href = "{% url 'tbc_error_page.views.broken' %}"> Broken Page </a></p> + +{% endblock %} diff --git a/tbc/urls.py b/tbc/urls.py index 747a77d..6d3cc17 100644 --- a/tbc/urls.py +++ b/tbc/urls.py @@ -12,8 +12,8 @@ urlpatterns = patterns('', url(r'^profile/$', 'tbc.views.UserProfile', name='UserProfile'), url(r'^update-profile/$', 'tbc.views.UpdateProfile', name='UpdateProfile'), url(r'^forgot-password/$', 'tbc.views.ForgotPassword', name='ForgotPassword'), - url(r'^update-password/$', 'tbc.views.UpdatePassword', name='UpdatePassword'), - + url(r'^update-password/$', 'tbc.views.UpdatePassword', name='UpdatePassword'), + url(r'^admin-tools/$', 'tbc.views.admin_tools', name='admin_tools'), url(r'^submit-proposal/$', 'tbc.views.SubmitProposal', name='SubmitProposal'), url(r'^submit-aicte-proposal/$', 'tbc.views.ListAICTE', name='ListAICTE'), diff --git a/tbc/views.py b/tbc/views.py index 9e3a2e7..767dd4e 100755 --- a/tbc/views.py +++ b/tbc/views.py @@ -1413,3 +1413,12 @@ def link_image(request): chapter.save() context['success'] = True return render_to_response('tbc/link_image.html', context, context_instance=ci) + +@login_required( login_url= "/admin") +def admin_tools(request): + ci = RequestContext(request) + user = request.user + context = {"user":user} + return render_to_response('tbc/admin-tools.html', context, context_instance=ci) + + diff --git a/tbc_error_page/models.py b/tbc_error_page/models.py new file mode 100644 index 0000000..82c4da6 --- /dev/null +++ b/tbc_error_page/models.py @@ -0,0 +1,107 @@ +from django.db import models +import os +import cPickle + +def get_json_from_file(filename): + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) + if os.path.isfile(path): + with open(path) as json_dump: + json_data =cPickle.load(json_dump) + return json_data + else: + return False + + + +class Error(models.Model): + + chapter_url = models.URLField(max_length = 255) + number_of_errors = models.IntegerField() + chapter_name = models.CharField(max_length = 200,) + is_deliberate = models.IntegerField(default = False) + + def create_new_error_data(self, error_json_data): + # Populates an empty table + for error_details in error_json_data: + Error.objects.create(chapter_url = error_details["chapter_urls"], + chapter_name = error_details["chapter_name"], + number_of_errors = int(error_details["number_of_errors"]), + is_deliberate = 0 + ) + + def delete_redundant_error_data(self, error_json_data): + # delete errors which have been solved + for error_details in error_json_data: + db_url_list = Error.objects.values_list("chapter_url", flat=True) + json_url_list = [url_list["chapter_urls"] for url_list in error_json_data] + c = set(db_url_list)-set(json_url_list) #change variable name. + for somelist in c: + Error.objects.filter(chapter_url = somelist).delete() + + def update_error_data(self, error_json_data): + + # a little more refined. + + for error_details in error_json_data: + original_value = Error.objects.get(chapter_url = error_details["chapter_urls"]).number_of_errors + # if number of errors have increased + if original_value < error_details["number_of_errors"]: + + Error.objects.filter(chapter_url = error_details["chapter_urls"])\ + .update(number_of_errors = error_details["number_of_errors"], + is_deliberate = 0 + ) + # if number of errors have decreased + elif original_value > error_details["number_of_errors"]: + Error.objects.filter(chapter_url = error_details["chapter_urls"])\ + .update(number_of_errors = error_details["number_of_errors"], is_deliberate = 0) + else: + # if new errors have been added. + Error.objects.get_or_create(chapter_url = error_details["chapter_urls"], + number_of_errors = error_details["number_of_errors"] + ) + + Error.objects.filter(chapter_url = error_details["chapter_urls"])\ + .update(chapter_url = error_details["chapter_urls"], + number_of_errors = error_details["number_of_errors"], + chapter_name = error_details["chapter_name"] + ) + + + + + + def update_deliberate_error(self, deliberate_error_list): + + for deliberate_urls in deliberate_error_list: + a = Error.objects.filter(chapter_url = deliberate_urls).update(is_deliberate = 1) + + + + +class Broken(models.Model): + + broken_url = models.URLField(max_length = 255) + error_status = models.IntegerField() + + def create_new_broken_data(self, broken_data): + for broken_details in broken_data: + + Broken.objects.create(broken_url = broken_details["broken_url"], + error_status = broken_details["broken_status"]) + + def delete_redundant_broken_data(self, broken_data): + for broken_details in broken_data: + db_url_list = Broken.objects.values_list("broken_url", flat=True) + json_url_list = [url_list["broken_url"] for url_list in broken_data] + redundant_url = set(db_url_list)-set(json_url_list) #change variable name. + for delete_url in redundant_url: + Broken.objects.filter(broken_url = delete_url).delete() + + + def update_broken_data(self, broken_data): + for broken_details in broken_data: + + Broken.objects.get_or_create(broken_url = broken_details["broken_url"], + error_status = broken_details["broken_status"] + ) diff --git a/tbc_error_page/templates/broken.html b/tbc_error_page/templates/broken.html new file mode 100644 index 0000000..841069c --- /dev/null +++ b/tbc_error_page/templates/broken.html @@ -0,0 +1,28 @@ +{% extends "base.html" %} +{% block title %} TBC Broken Links {% endblock %} +{% block content %} + {% if not broken %} + <center><h4> There are no new comments </h4></center> + {% else %} + <h3><u><center>TBC Broken Links Page </center></u></h3> + <h5> Hi <b><u> {{user}} </b><u> </h5> + <a href = "{% url 'tbc_error_page.views.error' %}"> TBC Error Status Page </a> +<p></p> + <table border = 1> + <tr> + <th> Sr no. </th> + <th> Broken Urls </th> + <th> <b> HTTP status error code </th> + </tr> + {% for broken_data in broken %} + <tr> + <td> {{ forloop.counter }} </td> + <td> <a href = {{ broken_data.broken_url }} target = "blank"> {{ broken_data.broken_url }} </a> </td> + <td><b> {{ broken_data.error_status }} </b> error </td> + </tr> + {% endfor %} + + </table> +{% endif %} + +{% endblock %} diff --git a/tbc_error_page/templates/deliberate.html b/tbc_error_page/templates/deliberate.html new file mode 100644 index 0000000..89a8974 --- /dev/null +++ b/tbc_error_page/templates/deliberate.html @@ -0,0 +1,17 @@ +{% extends "base.html" %} +{% block title %} Success {% endblock %} +{% block content %} +<p> You have added following urls as deliberate </p> +<table border = 1> +<th>Urls</th> + +{% for deliberate_links in deliberate %} + +<tr><td> {{ deliberate_links }} </tr></td> + + +{% endfor %} +</table> +<p></p> +<p><a href = "{% url 'tbc_error_page.views.error' %}"> <<< Go back to Error Page </a></p> +{% endblock %} diff --git a/tbc_error_page/templates/error.html b/tbc_error_page/templates/error.html new file mode 100644 index 0000000..237c7f3 --- /dev/null +++ b/tbc_error_page/templates/error.html @@ -0,0 +1,42 @@ +{% extends "base.html" %} +{% block title %} TBC Error Page {% endblock %} + + + </head> + +{% block content %} + <body> + <h3><u><center>TBC Error Page </center></u></h3> + <h5> Hi <b><u>{{ user }} </b></u></h5> + <p><a href = "{% url 'tbc_error_page.views.broken' %}"> TBC Broken Links page </a></p> + {% if not context %} + <center><h4> There are no new errors </h4></center> + {% else %} + <table border = 2> + <tr> + <th> Chapters With errors</th> + <th> Number of errors</th> + <th> Delibrate Errors</th> + </tr> + + <form name = "Send Email" action = "{% url 'tbc_error_page.views.error' %}" method = "POST"> {% csrf_token %} + {% for errors in context %} + <div class = "error""> + + <tr> + + {% if errors.is_deliberate == 0 %} + <td><a href = {{ errors.chapter_url }} target = "blank"> {{ errors.chapter_name }} </a></td> + <td> {{ errors.number_of_errors }} </td> + <td> <input type = "checkbox" name = "deliberate" value = "{{ errors.chapter_url }}"> + {% endif %} + </tr> + </div> + + {% endfor %} + </table> + <br/> + <input class = "btn" type = "submit" value = "Submit"> </input> + </form> +{% endif %} +{% endblock %} diff --git a/tbc_error_page/views.py b/tbc_error_page/views.py new file mode 100644 index 0000000..aa32453 --- /dev/null +++ b/tbc_error_page/views.py @@ -0,0 +1,56 @@ +from django.shortcuts import render_to_response +from .models import Error, Broken, get_json_from_file +from django.contrib.auth.decorators import user_passes_test +from django.template import RequestContext +import json +import os + + +#@login_required(login_url="/admin/login/") +@user_passes_test(lambda u:u.is_superuser, login_url="/admin/login") + + + +def error(req): + ci = RequestContext(req) + db_instance = Error() + error_json_data = get_json_from_file("error.pickle") + + if not Error.objects.exists(): + db_instance.create_new_error_data(error_json_data) + else: + db_instance.delete_redundant_error_data(error_json_data) + db_instance.update_error_data(error_json_data) + + error_details = Error.objects.filter(is_deliberate = 0) + + if req.method == "POST": + deliberate_urls_list = req.POST.getlist("deliberate") + db_instance.update_deliberate_error(deliberate_urls_list) + + context = {"user":req.user, "deliberate" :deliberate_urls_list} + + return render_to_response ("deliberate.html", context, ci) + + + context = {"context": error_details, "user": req.user} + return render_to_response ("error.html", context, ci) + +def broken(req): + + ci = RequestContext(req) + db_instance = Broken() + broken_json_data = get_json_from_file("broken.pickle") + + if not Broken.objects.exists(): + db_instance.create_new_broken_data(broken_json_data) + + else: + db_instance.delete_redundant_broken_data(broken_json_data) + db_instance.update_broken_data(broken_json_data) + + broken = Broken.objects.all() + context = {"broken": broken, "user": req.user} + return render_to_response("broken.html", context, ci) + + |