summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/crawler/scrapy.cfg11
-rw-r--r--scripts/crawler/tbc_web_crawler/__init__.py0
-rw-r--r--scripts/crawler/tbc_web_crawler/settings.py86
-rw-r--r--scripts/crawler/tbc_web_crawler/spiders/__init__.py4
-rw-r--r--scripts/crawler/tbc_web_crawler/spiders/items.py18
-rw-r--r--scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py76
-rw-r--r--scripts/database_updater.py55
-rw-r--r--tbc_error_page/broken.json130
-rw-r--r--tbc_error_page/error.json194
-rw-r--r--tbc_error_page/models.py107
-rw-r--r--tbc_error_page/templates/broken.html28
-rw-r--r--tbc_error_page/templates/deliberate.html17
-rw-r--r--tbc_error_page/templates/error.html41
-rw-r--r--tbc_error_page/views.py56
14 files changed, 823 insertions, 0 deletions
diff --git a/scripts/crawler/scrapy.cfg b/scripts/crawler/scrapy.cfg
new file mode 100644
index 0000000..b99853f
--- /dev/null
+++ b/scripts/crawler/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.org/en/latest/deploy.html
+
+[settings]
+default = tbc_web_crawler.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = tbc_web_crawler
diff --git a/scripts/crawler/tbc_web_crawler/__init__.py b/scripts/crawler/tbc_web_crawler/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/__init__.py
diff --git a/scripts/crawler/tbc_web_crawler/settings.py b/scripts/crawler/tbc_web_crawler/settings.py
new file mode 100644
index 0000000..03ba836
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/settings.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for tbc_web_crawler project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+# http://doc.scrapy.org/en/latest/topics/settings.html
+# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'tbc_web_crawler'
+
+SPIDER_MODULES = ['tbc_web_crawler.spiders']
+NEWSPIDER_MODULE = 'tbc_web_crawler.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'tbc_web_crawler (+http://www.yourdomain.com)'
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+CONCURRENT_REQUESTS=100
+
+# Configure a delay for requests for the same website (default: 0)
+# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY=3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN=16
+#CONCURRENT_REQUESTS_PER_IP=16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED=False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED=False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+# 'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+# 'tbc_web_crawler.middlewares.MyCustomSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+ #'scrapy.downloadermiddlewares.retry.RetryMiddleware': None
+# 'tbc_web_crawler.middlewares.MyCustomDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+# 'scrapy.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
+#ITEM_PIPELINES = {
+# 'tbc_web_crawler.pipelines.SomePipeline': 300,
+#}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
+# NOTE: AutoThrottle will honour the standard settings for concurrency and delay
+#AUTOTHROTTLE_ENABLED=True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY=5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY=60
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG=False
+
+# Enable and configure HTTP caching (disabled by default)
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED=True
+#HTTPCACHE_EXPIRATION_SECS=0
+#HTTPCACHE_DIR='httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES=[]
+#HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/scripts/crawler/tbc_web_crawler/spiders/__init__.py b/scripts/crawler/tbc_web_crawler/spiders/__init__.py
new file mode 100644
index 0000000..ebd689a
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/scripts/crawler/tbc_web_crawler/spiders/items.py b/scripts/crawler/tbc_web_crawler/spiders/items.py
new file mode 100644
index 0000000..9dda20f
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/spiders/items.py
@@ -0,0 +1,18 @@
+import scrapy
+
+
+class TbcErrorItems(scrapy.Item):
+
+
+ chapter_name = scrapy.Field()
+ chapter_urls = scrapy.Field()
+ completed_book_urls = scrapy.Field()
+ number_of_errors = scrapy.Field()
+ error_messages = scrapy.Field()
+
+
+
+class TbcBrokenItems(scrapy.Item):
+
+ broken_url = scrapy.Field()
+ broken_status = scrapy.Field()
diff --git a/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py
new file mode 100644
index 0000000..9688e70
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py
@@ -0,0 +1,76 @@
+import scrapy
+from items import TbcErrorItems, TbcBrokenItems
+from scrapy.utils.response import get_base_url
+from scrapy.utils.url import urljoin_rfc
+from scrapy.http import Request
+
+import os, json
+
+if os.path.isfile('items.json'):
+ os.remove('items.json')
+else:
+ pass
+
+class TbcSpider(scrapy.Spider):
+
+ name = "tbc_spider" # Name of the crawler. Use this name when crawling from the terminal, for eg - scrapy crawl tbc_spider
+
+ start_urls = ["http://tbc-python.fossee.aero.iitb.ac.in/completed-books/"]
+ handle_httpstatus_list = [404, 500, 502] # A list containing HTTP error codes.
+
+ def parse(self,response):
+ """ This function looks for book links and returns the url"""
+
+ for book_link in response.xpath('//a[contains(@href,"book-details")]/@href').extract():
+ """ Searches for links with "book-details" in it """
+
+ first_base_url = get_base_url(response)
+ first_relative_url = urljoin_rfc(first_base_url,book_link)
+ """creates a url to be returned to the next function."""
+
+ yield scrapy.Request(first_relative_url,callback=self.parse_book_contents)
+
+
+
+ def parse_book_contents(self, response):
+
+ """ This function looks for chapter links through each book link and returns the url"""
+
+ for chapter_link in response.xpath ('//a[contains(@href,"convert-notebook")]/@href').extract():
+ """ Searches for chapters in each book list"""
+ second_base_url = get_base_url(response).split('/book-details')[0]
+ second_relative_url = urljoin_rfc(second_base_url,chapter_link)
+ """creates a url to be returned to the next function."""
+
+ yield scrapy.Request(second_relative_url,callback=self.parse_chapter_details)
+
+
+
+ def parse_chapter_details(self, response):
+
+ if not response.xpath('//h1/text()').extract():
+ chapter_details = [response.url]
+ else:
+ chapter_details = response.xpath('//h1/text()').extract()
+
+
+ error_tag = response.xpath('//div[@class="output_subarea output_text output_error"]')
+ error_list = [error_notifications for error_notifications \
+ in response.xpath \
+ ('//div[@class="output_subarea output_text output_error"]/span/text()').extract()]
+
+ if response.status in self.handle_httpstatus_list:
+ broken_items = TbcBrokenItems()
+ broken_items['broken_url'] = response.url
+ broken_items['broken_status'] = response.status
+ yield broken_items
+ else:
+ if len(error_tag) != 0:
+ items = TbcErrorItems()
+ items ['chapter_name'] = chapter_details[0]
+ items ['chapter_urls'] = response.url
+ items ['number_of_errors'] = len (error_tag)
+ #items ['completed_book_urls'] = response.request.headers.get('Referer', None)
+ #items ['error_messages'] = error_list
+ yield items
+
diff --git a/scripts/database_updater.py b/scripts/database_updater.py
new file mode 100644
index 0000000..cf3801a
--- /dev/null
+++ b/scripts/database_updater.py
@@ -0,0 +1,55 @@
+import os
+import sys
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PythonTBC.settings")
+base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(base_path)
+
+from commentingapp.models import Url, Comments
+from commentingapp.commenting_new import DisqusCommenting
+from tbc.models import Book, Chapters
+from django.contrib.auth.models import User
+
+class CronForCommenting(object):
+
+ def fetch_comments_from_script(self):
+ """ Fetches comment from Commenting script"""
+
+ commenting_instance = DisqusCommenting()
+ check_net = commenting_instance.check_internet_connection()
+ check_auth = commenting_instance.check_authentication("enter your disqus api public key here",
+ "enter your forum name here"
+ )
+ thread = commenting_instance.get_thread_ids()
+ self.comments_for_db = commenting_instance.get_comments()
+
+ return self.comments_for_db
+
+
+
+ def add_comments_to_db(self):
+
+ if not Url.objects.exists():
+ """ Populates the db if empty"""
+ for comment_details in self.comments_for_db:
+ url_instance = Url(url = comment_details["chapter_urls"]) #url_instance is actually an object
+ url_instance.save()
+ for comment in comment_details["comment_list"]:
+ Comments.objects.create(url = url_instance, comments = comment)
+ return "Database is created"
+
+ else:
+ """ if the db isnt empty"""
+ for comment_details in self.comments_for_db:
+ url_object, url_status = Url.objects.get_or_create(url = comment_details["chapter_urls"])
+ url_primary_key = url_object.pk
+ for comment in comment_details["comment_list"]:
+ Comments.objects.get_or_create(comments = comment, url_id = url_primary_key)
+ return "Database is updated."
+
+if __name__ == '__main__':
+
+ a = CronForCommenting()
+ b = a.fetch_comments_from_script()
+ c = a.add_comments_to_db()
+ print c
diff --git a/tbc_error_page/broken.json b/tbc_error_page/broken.json
new file mode 100644
index 0000000..4cfeb7b
--- /dev/null
+++ b/tbc_error_page/broken.json
@@ -0,0 +1,130 @@
+(lp1
+(dp2
+Vbroken_status
+p3
+I500
+sVbroken_url
+p4
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fluid_Mechanics_by_John_F_Douglass/Chapter_3.ipynb
+p5
+sa(dp6
+Vbroken_status
+p7
+I500
+sVbroken_url
+p8
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Elements_of_Electric_drives/Chapter1_2.ipynb
+p9
+sa(dp10
+Vbroken_status
+p11
+I500
+sVbroken_url
+p12
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_20_Radiochemistry.ipynb
+p13
+sa(dp14
+Vbroken_status
+p15
+I500
+sVbroken_url
+p16
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Elements_of_Electric_drives/Chapter3_2.ipynb
+p17
+sa(dp18
+Vbroken_status
+p19
+I500
+sVbroken_url
+p20
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Theory_Of_Machines/ch15.ipynb
+p21
+sa(dp22
+Vbroken_status
+p23
+I500
+sVbroken_url
+p24
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Principles_Of_Electronic_Communication_Systems_by_L_E_Frenzechapter13_1.ipynb
+p25
+sa(dp26
+Vbroken_status
+p27
+I500
+sVbroken_url
+p28
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_12_Thermodynamics_Thermodynamic_chemistry.ipynb
+p29
+sa(dp30
+Vbroken_status
+p31
+I500
+sVbroken_url
+p32
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_7_Conductivity.ipynb
+p33
+sa(dp34
+Vbroken_status
+p35
+I500
+sVbroken_url
+p36
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Principles_Of_Electronic_Communication_Systems_by_L_E_Frenze/chapter22_1.ipynb
+p37
+sa(dp38
+Vbroken_status
+p39
+I500
+sVbroken_url
+p40
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_13_Thermodynamics_Entropy_and_Free_Energy.ipynb
+p41
+sa(dp42
+Vbroken_status
+p43
+I500
+sVbroken_url
+p44
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/_Diffusion:_Mass_Transfer_In_Fluid_Systems_by__E._L._Cussler/Chapter_10_Absorption.ipynb
+p45
+sa(dp46
+Vbroken_status
+p47
+I500
+sVbroken_url
+p48
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Aircraft_Structures_for_Engineering_Students/Chapter24_1.ipynb
+p49
+sa(dp50
+Vbroken_status
+p51
+I500
+sVbroken_url
+p52
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Aircraft_Structures_for_Engineering_Students/Chapter20_1.ipynb
+p53
+sa(dp54
+Vbroken_status
+p55
+I500
+sVbroken_url
+p56
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_9_Ionic_Equilibria_and_Buffer_Action.ipynb
+p57
+sa(dp58
+Vbroken_status
+p59
+I500
+sVbroken_url
+p60
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/_Optical_Fiber_Communication_by_V._S._Bagad/Chapter02-Optical_Fiber_for_Telecommunication.ipynb
+p61
+sa(dp62
+Vbroken_status
+p63
+I500
+sVbroken_url
+p64
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Aircraft_Structures_for_Engineering_Students/Chapter01_1.ipynb
+p65
+sa. \ No newline at end of file
diff --git a/tbc_error_page/error.json b/tbc_error_page/error.json
new file mode 100644
index 0000000..56c952a
--- /dev/null
+++ b/tbc_error_page/error.json
@@ -0,0 +1,194 @@
+(lp1
+(dp2
+Vchapter_urls
+p3
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Beginning_C_By_Ivon_Horton/chapter13.ipynb
+p4
+sVnumber_of_errors
+p5
+I1
+sVchapter_name
+p6
+VChapter 13: The Preprocessor and Debugging
+p7
+sa(dp8
+Vchapter_urls
+p9
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Schaum's_Outlines:_Programming_with_C++/ch6.ipynb
+p10
+sVnumber_of_errors
+p11
+I2
+sVchapter_name
+p12
+VChapter 6: Arrays
+p13
+sa(dp14
+Vchapter_urls
+p15
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Schaum's_Outlines:_Programming_with_C++/ch4.ipynb
+p16
+sVnumber_of_errors
+p17
+I1
+sVchapter_name
+p18
+VChapter 4: Iteration
+p19
+sa(dp20
+Vchapter_urls
+p21
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/The_C_Book/Chapter2.ipynb
+p22
+sVnumber_of_errors
+p23
+I1
+sVchapter_name
+p24
+VChapter 2: Variables and Arithmetic
+p25
+sa(dp26
+Vchapter_urls
+p27
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Mastering_C/chapter8.ipynb
+p28
+sVnumber_of_errors
+p29
+I1
+sVchapter_name
+p30
+VChapter 8: Pointers
+p31
+sa(dp32
+Vchapter_urls
+p33
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Practical_C_Programming/Chapter_13_1.ipynb
+p34
+sVnumber_of_errors
+p35
+I1
+sVchapter_name
+p36
+VChapter 13: Simple pointers
+p37
+sa(dp38
+Vchapter_urls
+p39
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_in_C/Chapter_17.ipynb
+p40
+sVnumber_of_errors
+p41
+I1
+sVchapter_name
+p42
+VChapter 17: Miscellaneous and Advanced Features
+p43
+sa(dp44
+Vchapter_urls
+p45
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_in_C/Chapter_16.ipynb
+p46
+sVnumber_of_errors
+p47
+I2
+sVchapter_name
+p48
+VChapter 16: Input and Output Operations in Python
+p49
+sa(dp50
+Vchapter_urls
+p51
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/C++_Demystified:_A_Self-Teaching_Guide/chapter11.ipynb
+p52
+sVnumber_of_errors
+p53
+I1
+sVchapter_name
+p54
+VChapter 11 - What\u2019s the Address? Pointers
+p55
+sa(dp56
+Vchapter_urls
+p57
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/ANSI_C_Programming/chapter12.ipynb
+p58
+sVnumber_of_errors
+p59
+I3
+sVchapter_name
+p60
+VCHAPTER 12:FILE INPUT/OUTPUT
+p61
+sa(dp62
+Vchapter_urls
+p63
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/C_Programming:_A_Modern_Approach_by_K._N._King/Chapter9_1.ipynb
+p64
+sVnumber_of_errors
+p65
+I1
+sVchapter_name
+p66
+VChapter 9: Functions
+p67
+sa(dp68
+Vchapter_urls
+p69
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/_Mastering_C++_by_K_R_Venugopal_and_Rajkumar_Buyya/Chapter19-ExceptionHandling_1.ipynb
+p70
+sVnumber_of_errors
+p71
+I1
+sVchapter_name
+p72
+VChapter 19-Exception Handling
+p73
+sa(dp74
+Vchapter_urls
+p75
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_With_Java_A_Primer/chapter17.ipynb
+p76
+sVnumber_of_errors
+p77
+I1
+sVchapter_name
+p78
+VChapter 17: Assertion & Design by Contract
+p79
+sa(dp80
+Vchapter_urls
+p81
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/How_to_think_like_a_computer_scientist_by_Allen_B_Downey/ch15.ipynb
+p82
+sVnumber_of_errors
+p83
+I2
+sVchapter_name
+p84
+VChapter 15 : File Input/Output and apmatrixes
+p85
+sa(dp86
+Vchapter_urls
+p87
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Structured_Programing_with_C++/Chapter5.ipynb
+p88
+sVnumber_of_errors
+p89
+I1
+sVchapter_name
+p90
+VChapter 5 : Strings
+p91
+sa(dp92
+Vchapter_urls
+p93
+Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_With_Java_A_Primer/chapter13.ipynb
+p94
+sVnumber_of_errors
+p95
+I1
+sVchapter_name
+p96
+VChapter 13: Managing Errors & Exceptions
+p97
+sa. \ No newline at end of file
diff --git a/tbc_error_page/models.py b/tbc_error_page/models.py
new file mode 100644
index 0000000..ceab789
--- /dev/null
+++ b/tbc_error_page/models.py
@@ -0,0 +1,107 @@
+from django.db import models
+import os
+import cPickle
+
+def get_json_from_file(filename):
+ path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
+ if os.path.isfile(path):
+ with open(path) as json_dump:
+ json_data =cPickle.load(json_dump)
+ return json_data
+ else:
+ return False
+
+
+
+class Error(models.Model):
+
+ chapter_url = models.URLField(max_length = 255)
+ number_of_errors = models.IntegerField()
+ chapter_name = models.CharField(max_length = 200,)
+ is_deliberate = models.IntegerField(default = False)
+
+ def create_new_error_data(self, error_json_data):
+ # Populates an empty table
+ for error_details in error_json_data:
+ Error.objects.create(chapter_url = error_details["chapter_urls"],
+ chapter_name = error_details["chapter_name"],
+ number_of_errors = int(error_details["number_of_errors"]),
+ is_deliberate = 0
+ )
+
+ def delete_redundant_error_data(self, error_json_data):
+ # delete errors which have been solved
+ for error_details in error_json_data:
+ db_url_list = Error.objects.values_list("chapter_url", flat=True)
+ json_url_list = [url_list["chapter_urls"] for url_list in error_json_data]
+ c = set(db_url_list)-set(json_url_list) #change variable name.
+ for somelist in c:
+ Error.objects.filter(chapter_url = somelist).delete()
+
+ def update_error_data(self, error_json_data):
+
+ # Agreeably hacky at the moment. Will refine it.
+
+ for error_details in error_json_data:
+ # if number of errors have increased
+ if Error.objects.filter(chapter_url = error_details["chapter_urls"],
+ number_of_errors__lt = error_details["number_of_errors"]):
+
+ Error.objects.filter(chapter_url = error_details["chapter_urls"])\
+ .update(number_of_errors = error_details["number_of_errors"],
+ is_deliberate = 0
+ )
+ # if number of errors have decreased
+ elif Error.objects.filter(chapter_url = error_details["chapter_urls"],
+ number_of_errors__gt = error_details["number_of_errors"]):
+
+ Error.objects.filter(chapter_url = error_details["chapter_urls"])\
+ .update(number_of_errors = error_details["number_of_errors"], is_deliberate = 0)
+ else:
+ # if new errors have been added.
+ Error.objects.get_or_create(chapter_url = error_details["chapter_urls"],
+ number_of_errors = error_details["number_of_errors"]
+ )
+
+ Error.objects.filter(chapter_url = error_details["chapter_urls"])\
+ .update(chapter_url = error_details["chapter_urls"],
+ number_of_errors = error_details["number_of_errors"],
+ chapter_name = error_details["chapter_name"]
+ )
+
+
+
+ def update_deliberate_error(self, deliberate_error_list):
+
+ for deliberate_urls in deliberate_error_list:
+ a = Error.objects.filter(chapter_url = deliberate_urls).update(is_deliberate = 1)
+
+
+
+
+class Broken(models.Model):
+
+ broken_url = models.URLField(max_length = 255)
+ error_status = models.IntegerField()
+
+ def create_new_broken_data(self, broken_data):
+ for broken_details in broken_data:
+
+ Broken.objects.create(broken_url = broken_details["broken_url"],
+ error_status = broken_details["broken_status"])
+
+ def delete_redundant_broken_data(self, broken_data):
+ for broken_details in broken_data:
+ db_url_list = Broken.objects.values_list("broken_url", flat=True)
+ json_url_list = [url_list["broken_url"] for url_list in broken_data]
+ redundant_url = set(db_url_list)-set(json_url_list) #change variable name.
+ for delete_url in redundant_url:
+ Broken.objects.filter(broken_url = delete_url).delete()
+
+
+ def update_broken_data(self, broken_data):
+ for broken_details in broken_data:
+
+ Broken.objects.get_or_create(broken_url = broken_details["broken_url"],
+ error_status = broken_details["broken_status"]
+ )
diff --git a/tbc_error_page/templates/broken.html b/tbc_error_page/templates/broken.html
new file mode 100644
index 0000000..41449cd
--- /dev/null
+++ b/tbc_error_page/templates/broken.html
@@ -0,0 +1,28 @@
+{% extends "base.html" %}
+{% block title %} TBC Broken Links {% endblock %}
+{% block content %}
+ {% if not broken %}
+ <center><h4> There are no new comments </h4></center>
+ {% else %}
+ <h3><u><center>TBC Error Page </center></u></h3>
+ <h5> Hi <b><u> {{user}} </b><u> </h5>
+ <a href = "{% url 'tbc_error_page.views.error' %}"> TBC Error Status Page </a>
+<p></p>
+ <table border = 1>
+ <tr>
+ <th> Sr no. </b></th>
+ <th> Broken Urls </b></th>
+ <th> <b> HTTP status error code </th>
+ </tr>
+ {% for broken_data in broken %}
+ <tr>
+ <td> {{ forloop.counter }} </td>
+ <td> <a href = {{ broken_data.broken_url }} target = ""> {{ broken_data.broken_url }} </a> </td>
+ <td><b> {{ broken_data.error_status }} </b> error </td>
+ </tr>
+ {% endfor %}
+
+ </table>
+{% endif %}
+
+{% endblock %}
diff --git a/tbc_error_page/templates/deliberate.html b/tbc_error_page/templates/deliberate.html
new file mode 100644
index 0000000..89a8974
--- /dev/null
+++ b/tbc_error_page/templates/deliberate.html
@@ -0,0 +1,17 @@
+{% extends "base.html" %}
+{% block title %} Success {% endblock %}
+{% block content %}
+<p> You have added following urls as deliberate </p>
+<table border = 1>
+<th>Urls</th>
+
+{% for deliberate_links in deliberate %}
+
+<tr><td> {{ deliberate_links }} </tr></td>
+
+
+{% endfor %}
+</table>
+<p></p>
+<p><a href = "{% url 'tbc_error_page.views.error' %}"> <<< Go back to Error Page </a></p>
+{% endblock %}
diff --git a/tbc_error_page/templates/error.html b/tbc_error_page/templates/error.html
new file mode 100644
index 0000000..ee4c415
--- /dev/null
+++ b/tbc_error_page/templates/error.html
@@ -0,0 +1,41 @@
+{% extends "base.html" %}
+{% block title %} TBC Error Page {% endblock %}
+
+
+ </head>
+
+{% block content %}
+ <body>
+ <h3><u><center>TBC Error Page </center></u></h3>
+ <h5> Hi <b><u>{{ user }} </b></u></h5>
+ <p><a href = "{% url 'tbc_error_page.views.broken' %}"> TBC Broken Links page </a></p>
+ {% if not context %}
+ <center><h4> There are no new errors </h4></center>
+ {% else %}
+ <table border = 2>
+ <tr>
+ <td><b> Chapters With errors</b></td>
+ <td><b> Number of errors</b></td>
+ <td><b> Delibrate Errors</b></td>
+ </tr>
+
+ <form name = "Send Email" action = "{% url 'tbc_error_page.views.error' %}" method = "POST"> {% csrf_token %}
+ {% for errors in context %}
+ <div class = "error"">
+
+ <tr>
+
+ {% if errors.is_deliberate == 0 %}
+ <td><a href = {{ errors.chapter_url }} target = "_blank"> {{ errors.chapter_name }} </a></td>
+ <td> {{ errors.number_of_errors }} </td>
+ <td> <input type = "checkbox" name = "deliberate" value = "{{ errors.chapter_url }}">
+ {% endif %}
+ </tr>
+ </div>
+
+ {% endfor %}
+ </table>
+ <input class = "btn" type = "submit" value = "Submit"> </input>
+ </form>
+{% endif %}
+{% endblock %}
diff --git a/tbc_error_page/views.py b/tbc_error_page/views.py
new file mode 100644
index 0000000..1b271ef
--- /dev/null
+++ b/tbc_error_page/views.py
@@ -0,0 +1,56 @@
+from django.shortcuts import render_to_response
+from .models import Error, Broken, get_json_from_file
+from django.contrib.auth.decorators import user_passes_test
+from django.template import RequestContext
+import json
+import os
+
+
+#@login_required(login_url="/admin/login/")
+@user_passes_test(lambda u:u.is_superuser, login_url="/admin/login")
+
+
+
+def error(req):
+ ci = RequestContext(req)
+ db_instance = Error()
+ error_json_data = get_json_from_file("error.json")
+
+ if not Error.objects.exists():
+ db_instance.create_new_error_data(error_json_data)
+ else:
+ db_instance.delete_redundant_error_data(error_json_data)
+ db_instance.update_error_data(error_json_data)
+
+ error_details = Error.objects.filter(is_deliberate = 0)
+
+ if req.method == "POST":
+ deliberate_urls_list = req.POST.getlist("deliberate")
+ db_instance.update_deliberate_error(deliberate_urls_list)
+
+ context = {"user":req.user, "deliberate" :deliberate_urls_list}
+
+ return render_to_response ("deliberate.html", context, ci)
+
+
+ context = {"context": error_details, "user": req.user}
+ return render_to_response ("error.html", context, ci)
+
+def broken(req):
+
+ ci = RequestContext(req)
+ db_instance = Broken()
+ broken_json_data = get_json_from_file("broken.json")
+
+ if not Broken.objects.exists():
+ db_instance.create_new_broken_data(broken_json_data)
+
+ else:
+ db_instance.delete_redundant_broken_data(broken_json_data)
+ db_instance.update_broken_data(broken_json_data)
+
+ broken = Broken.objects.all()
+ context = {"broken": broken, "user": req.user}
+ return render_to_response("broken.html", context, ci)
+
+