diff options
-rw-r--r-- | scripts/crawler/scrapy.cfg | 11 | ||||
-rw-r--r-- | scripts/crawler/tbc_web_crawler/__init__.py | 0 | ||||
-rw-r--r-- | scripts/crawler/tbc_web_crawler/settings.py | 86 | ||||
-rw-r--r-- | scripts/crawler/tbc_web_crawler/spiders/__init__.py | 4 | ||||
-rw-r--r-- | scripts/crawler/tbc_web_crawler/spiders/items.py | 18 | ||||
-rw-r--r-- | scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py | 76 | ||||
-rw-r--r-- | scripts/database_updater.py | 55 | ||||
-rw-r--r-- | tbc_error_page/broken.json | 130 | ||||
-rw-r--r-- | tbc_error_page/error.json | 194 | ||||
-rw-r--r-- | tbc_error_page/models.py | 107 | ||||
-rw-r--r-- | tbc_error_page/templates/broken.html | 28 | ||||
-rw-r--r-- | tbc_error_page/templates/deliberate.html | 17 | ||||
-rw-r--r-- | tbc_error_page/templates/error.html | 41 | ||||
-rw-r--r-- | tbc_error_page/views.py | 56 |
14 files changed, 823 insertions, 0 deletions
diff --git a/scripts/crawler/scrapy.cfg b/scripts/crawler/scrapy.cfg new file mode 100644 index 0000000..b99853f --- /dev/null +++ b/scripts/crawler/scrapy.cfg @@ -0,0 +1,11 @@ +# Automatically created by: scrapy startproject +# +# For more information about the [deploy] section see: +# https://scrapyd.readthedocs.org/en/latest/deploy.html + +[settings] +default = tbc_web_crawler.settings + +[deploy] +#url = http://localhost:6800/ +project = tbc_web_crawler diff --git a/scripts/crawler/tbc_web_crawler/__init__.py b/scripts/crawler/tbc_web_crawler/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/__init__.py diff --git a/scripts/crawler/tbc_web_crawler/settings.py b/scripts/crawler/tbc_web_crawler/settings.py new file mode 100644 index 0000000..03ba836 --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/settings.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Scrapy settings for tbc_web_crawler project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# http://doc.scrapy.org/en/latest/topics/settings.html +# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html +# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html + +BOT_NAME = 'tbc_web_crawler' + +SPIDER_MODULES = ['tbc_web_crawler.spiders'] +NEWSPIDER_MODULE = 'tbc_web_crawler.spiders' + + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = 'tbc_web_crawler (+http://www.yourdomain.com)' + +# Configure maximum concurrent requests performed by Scrapy (default: 16) +CONCURRENT_REQUESTS=100 + +# Configure a delay for requests for the same website (default: 0) +# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +#DOWNLOAD_DELAY=3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN=16 +#CONCURRENT_REQUESTS_PER_IP=16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED=False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED=False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en', +#} + +# Enable or disable spider middlewares +# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# 'tbc_web_crawler.middlewares.MyCustomSpiderMiddleware': 543, +#} + +# Enable or disable downloader middlewares +# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html +#DOWNLOADER_MIDDLEWARES = { + #'scrapy.downloadermiddlewares.retry.RetryMiddleware': None +# 'tbc_web_crawler.middlewares.MyCustomDownloaderMiddleware': 543, +#} + +# Enable or disable extensions +# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# 'scrapy.telnet.TelnetConsole': None, +#} + +# Configure item pipelines +# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html +#ITEM_PIPELINES = { +# 'tbc_web_crawler.pipelines.SomePipeline': 300, +#} + +# Enable and configure the AutoThrottle extension (disabled by default) +# See http://doc.scrapy.org/en/latest/topics/autothrottle.html +# NOTE: AutoThrottle will honour the standard settings for concurrency and delay +#AUTOTHROTTLE_ENABLED=True +# The initial download delay +#AUTOTHROTTLE_START_DELAY=5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY=60 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG=False + +# Enable and configure HTTP caching (disabled by default) +# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED=True +#HTTPCACHE_EXPIRATION_SECS=0 +#HTTPCACHE_DIR='httpcache' +#HTTPCACHE_IGNORE_HTTP_CODES=[] +#HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/scripts/crawler/tbc_web_crawler/spiders/__init__.py b/scripts/crawler/tbc_web_crawler/spiders/__init__.py new file mode 100644 index 0000000..ebd689a --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/scripts/crawler/tbc_web_crawler/spiders/items.py b/scripts/crawler/tbc_web_crawler/spiders/items.py new file mode 100644 index 0000000..9dda20f --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/spiders/items.py @@ -0,0 +1,18 @@ +import scrapy + + +class TbcErrorItems(scrapy.Item): + + + chapter_name = scrapy.Field() + chapter_urls = scrapy.Field() + completed_book_urls = scrapy.Field() + number_of_errors = scrapy.Field() + error_messages = scrapy.Field() + + + +class TbcBrokenItems(scrapy.Item): + + broken_url = scrapy.Field() + broken_status = scrapy.Field() diff --git a/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py new file mode 100644 index 0000000..9688e70 --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/spiders/tbc_spider.py @@ -0,0 +1,76 @@ +import scrapy +from items import TbcErrorItems, TbcBrokenItems +from scrapy.utils.response import get_base_url +from scrapy.utils.url import urljoin_rfc +from scrapy.http import Request + +import os, json + +if os.path.isfile('items.json'): + os.remove('items.json') +else: + pass + +class TbcSpider(scrapy.Spider): + + name = "tbc_spider" # Name of the crawler. Use this name when crawling from the terminal, for eg - scrapy crawl tbc_spider + + start_urls = ["http://tbc-python.fossee.aero.iitb.ac.in/completed-books/"] + handle_httpstatus_list = [404, 500, 502] # A list containing HTTP error codes. + + def parse(self,response): + """ This function looks for book links and returns the url""" + + for book_link in response.xpath('//a[contains(@href,"book-details")]/@href').extract(): + """ Searches for links with "book-details" in it """ + + first_base_url = get_base_url(response) + first_relative_url = urljoin_rfc(first_base_url,book_link) + """creates a url to be returned to the next function.""" + + yield scrapy.Request(first_relative_url,callback=self.parse_book_contents) + + + + def parse_book_contents(self, response): + + """ This function looks for chapter links through each book link and returns the url""" + + for chapter_link in response.xpath ('//a[contains(@href,"convert-notebook")]/@href').extract(): + """ Searches for chapters in each book list""" + second_base_url = get_base_url(response).split('/book-details')[0] + second_relative_url = urljoin_rfc(second_base_url,chapter_link) + """creates a url to be returned to the next function.""" + + yield scrapy.Request(second_relative_url,callback=self.parse_chapter_details) + + + + def parse_chapter_details(self, response): + + if not response.xpath('//h1/text()').extract(): + chapter_details = [response.url] + else: + chapter_details = response.xpath('//h1/text()').extract() + + + error_tag = response.xpath('//div[@class="output_subarea output_text output_error"]') + error_list = [error_notifications for error_notifications \ + in response.xpath \ + ('//div[@class="output_subarea output_text output_error"]/span/text()').extract()] + + if response.status in self.handle_httpstatus_list: + broken_items = TbcBrokenItems() + broken_items['broken_url'] = response.url + broken_items['broken_status'] = response.status + yield broken_items + else: + if len(error_tag) != 0: + items = TbcErrorItems() + items ['chapter_name'] = chapter_details[0] + items ['chapter_urls'] = response.url + items ['number_of_errors'] = len (error_tag) + #items ['completed_book_urls'] = response.request.headers.get('Referer', None) + #items ['error_messages'] = error_list + yield items + diff --git a/scripts/database_updater.py b/scripts/database_updater.py new file mode 100644 index 0000000..cf3801a --- /dev/null +++ b/scripts/database_updater.py @@ -0,0 +1,55 @@ +import os +import sys + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PythonTBC.settings") +base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(base_path) + +from commentingapp.models import Url, Comments +from commentingapp.commenting_new import DisqusCommenting +from tbc.models import Book, Chapters +from django.contrib.auth.models import User + +class CronForCommenting(object): + + def fetch_comments_from_script(self): + """ Fetches comment from Commenting script""" + + commenting_instance = DisqusCommenting() + check_net = commenting_instance.check_internet_connection() + check_auth = commenting_instance.check_authentication("enter your disqus api public key here", + "enter your forum name here" + ) + thread = commenting_instance.get_thread_ids() + self.comments_for_db = commenting_instance.get_comments() + + return self.comments_for_db + + + + def add_comments_to_db(self): + + if not Url.objects.exists(): + """ Populates the db if empty""" + for comment_details in self.comments_for_db: + url_instance = Url(url = comment_details["chapter_urls"]) #url_instance is actually an object + url_instance.save() + for comment in comment_details["comment_list"]: + Comments.objects.create(url = url_instance, comments = comment) + return "Database is created" + + else: + """ if the db isnt empty""" + for comment_details in self.comments_for_db: + url_object, url_status = Url.objects.get_or_create(url = comment_details["chapter_urls"]) + url_primary_key = url_object.pk + for comment in comment_details["comment_list"]: + Comments.objects.get_or_create(comments = comment, url_id = url_primary_key) + return "Database is updated." + +if __name__ == '__main__': + + a = CronForCommenting() + b = a.fetch_comments_from_script() + c = a.add_comments_to_db() + print c diff --git a/tbc_error_page/broken.json b/tbc_error_page/broken.json new file mode 100644 index 0000000..4cfeb7b --- /dev/null +++ b/tbc_error_page/broken.json @@ -0,0 +1,130 @@ +(lp1 +(dp2 +Vbroken_status +p3 +I500 +sVbroken_url +p4 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fluid_Mechanics_by_John_F_Douglass/Chapter_3.ipynb +p5 +sa(dp6 +Vbroken_status +p7 +I500 +sVbroken_url +p8 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Elements_of_Electric_drives/Chapter1_2.ipynb +p9 +sa(dp10 +Vbroken_status +p11 +I500 +sVbroken_url +p12 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_20_Radiochemistry.ipynb +p13 +sa(dp14 +Vbroken_status +p15 +I500 +sVbroken_url +p16 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Elements_of_Electric_drives/Chapter3_2.ipynb +p17 +sa(dp18 +Vbroken_status +p19 +I500 +sVbroken_url +p20 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Theory_Of_Machines/ch15.ipynb +p21 +sa(dp22 +Vbroken_status +p23 +I500 +sVbroken_url +p24 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Principles_Of_Electronic_Communication_Systems_by_L_E_Frenzechapter13_1.ipynb +p25 +sa(dp26 +Vbroken_status +p27 +I500 +sVbroken_url +p28 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_12_Thermodynamics_Thermodynamic_chemistry.ipynb +p29 +sa(dp30 +Vbroken_status +p31 +I500 +sVbroken_url +p32 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_7_Conductivity.ipynb +p33 +sa(dp34 +Vbroken_status +p35 +I500 +sVbroken_url +p36 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Principles_Of_Electronic_Communication_Systems_by_L_E_Frenze/chapter22_1.ipynb +p37 +sa(dp38 +Vbroken_status +p39 +I500 +sVbroken_url +p40 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_13_Thermodynamics_Entropy_and_Free_Energy.ipynb +p41 +sa(dp42 +Vbroken_status +p43 +I500 +sVbroken_url +p44 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/_Diffusion:_Mass_Transfer_In_Fluid_Systems_by__E._L._Cussler/Chapter_10_Absorption.ipynb +p45 +sa(dp46 +Vbroken_status +p47 +I500 +sVbroken_url +p48 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Aircraft_Structures_for_Engineering_Students/Chapter24_1.ipynb +p49 +sa(dp50 +Vbroken_status +p51 +I500 +sVbroken_url +p52 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Aircraft_Structures_for_Engineering_Students/Chapter20_1.ipynb +p53 +sa(dp54 +Vbroken_status +p55 +I500 +sVbroken_url +p56 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Fundamentals_Of_Physical_Chemistry_by_H._D._Crockford,_Samuel_B.Knight/Chapter_9_Ionic_Equilibria_and_Buffer_Action.ipynb +p57 +sa(dp58 +Vbroken_status +p59 +I500 +sVbroken_url +p60 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/_Optical_Fiber_Communication_by_V._S._Bagad/Chapter02-Optical_Fiber_for_Telecommunication.ipynb +p61 +sa(dp62 +Vbroken_status +p63 +I500 +sVbroken_url +p64 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Aircraft_Structures_for_Engineering_Students/Chapter01_1.ipynb +p65 +sa.
\ No newline at end of file diff --git a/tbc_error_page/error.json b/tbc_error_page/error.json new file mode 100644 index 0000000..56c952a --- /dev/null +++ b/tbc_error_page/error.json @@ -0,0 +1,194 @@ +(lp1 +(dp2 +Vchapter_urls +p3 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Beginning_C_By_Ivon_Horton/chapter13.ipynb +p4 +sVnumber_of_errors +p5 +I1 +sVchapter_name +p6 +VChapter 13: The Preprocessor and Debugging +p7 +sa(dp8 +Vchapter_urls +p9 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Schaum's_Outlines:_Programming_with_C++/ch6.ipynb +p10 +sVnumber_of_errors +p11 +I2 +sVchapter_name +p12 +VChapter 6: Arrays +p13 +sa(dp14 +Vchapter_urls +p15 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Schaum's_Outlines:_Programming_with_C++/ch4.ipynb +p16 +sVnumber_of_errors +p17 +I1 +sVchapter_name +p18 +VChapter 4: Iteration +p19 +sa(dp20 +Vchapter_urls +p21 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/The_C_Book/Chapter2.ipynb +p22 +sVnumber_of_errors +p23 +I1 +sVchapter_name +p24 +VChapter 2: Variables and Arithmetic +p25 +sa(dp26 +Vchapter_urls +p27 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Mastering_C/chapter8.ipynb +p28 +sVnumber_of_errors +p29 +I1 +sVchapter_name +p30 +VChapter 8: Pointers +p31 +sa(dp32 +Vchapter_urls +p33 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Practical_C_Programming/Chapter_13_1.ipynb +p34 +sVnumber_of_errors +p35 +I1 +sVchapter_name +p36 +VChapter 13: Simple pointers +p37 +sa(dp38 +Vchapter_urls +p39 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_in_C/Chapter_17.ipynb +p40 +sVnumber_of_errors +p41 +I1 +sVchapter_name +p42 +VChapter 17: Miscellaneous and Advanced Features +p43 +sa(dp44 +Vchapter_urls +p45 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_in_C/Chapter_16.ipynb +p46 +sVnumber_of_errors +p47 +I2 +sVchapter_name +p48 +VChapter 16: Input and Output Operations in Python +p49 +sa(dp50 +Vchapter_urls +p51 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/C++_Demystified:_A_Self-Teaching_Guide/chapter11.ipynb +p52 +sVnumber_of_errors +p53 +I1 +sVchapter_name +p54 +VChapter 11 - What\u2019s the Address? Pointers +p55 +sa(dp56 +Vchapter_urls +p57 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/ANSI_C_Programming/chapter12.ipynb +p58 +sVnumber_of_errors +p59 +I3 +sVchapter_name +p60 +VCHAPTER 12:FILE INPUT/OUTPUT +p61 +sa(dp62 +Vchapter_urls +p63 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/C_Programming:_A_Modern_Approach_by_K._N._King/Chapter9_1.ipynb +p64 +sVnumber_of_errors +p65 +I1 +sVchapter_name +p66 +VChapter 9: Functions +p67 +sa(dp68 +Vchapter_urls +p69 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/_Mastering_C++_by_K_R_Venugopal_and_Rajkumar_Buyya/Chapter19-ExceptionHandling_1.ipynb +p70 +sVnumber_of_errors +p71 +I1 +sVchapter_name +p72 +VChapter 19-Exception Handling +p73 +sa(dp74 +Vchapter_urls +p75 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_With_Java_A_Primer/chapter17.ipynb +p76 +sVnumber_of_errors +p77 +I1 +sVchapter_name +p78 +VChapter 17: Assertion & Design by Contract +p79 +sa(dp80 +Vchapter_urls +p81 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/How_to_think_like_a_computer_scientist_by_Allen_B_Downey/ch15.ipynb +p82 +sVnumber_of_errors +p83 +I2 +sVchapter_name +p84 +VChapter 15 : File Input/Output and apmatrixes +p85 +sa(dp86 +Vchapter_urls +p87 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Structured_Programing_with_C++/Chapter5.ipynb +p88 +sVnumber_of_errors +p89 +I1 +sVchapter_name +p90 +VChapter 5 : Strings +p91 +sa(dp92 +Vchapter_urls +p93 +Vhttp://tbc-python.fossee.aero.iitb.ac.in/convert-notebook/Programming_With_Java_A_Primer/chapter13.ipynb +p94 +sVnumber_of_errors +p95 +I1 +sVchapter_name +p96 +VChapter 13: Managing Errors & Exceptions +p97 +sa.
\ No newline at end of file diff --git a/tbc_error_page/models.py b/tbc_error_page/models.py new file mode 100644 index 0000000..ceab789 --- /dev/null +++ b/tbc_error_page/models.py @@ -0,0 +1,107 @@ +from django.db import models +import os +import cPickle + +def get_json_from_file(filename): + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) + if os.path.isfile(path): + with open(path) as json_dump: + json_data =cPickle.load(json_dump) + return json_data + else: + return False + + + +class Error(models.Model): + + chapter_url = models.URLField(max_length = 255) + number_of_errors = models.IntegerField() + chapter_name = models.CharField(max_length = 200,) + is_deliberate = models.IntegerField(default = False) + + def create_new_error_data(self, error_json_data): + # Populates an empty table + for error_details in error_json_data: + Error.objects.create(chapter_url = error_details["chapter_urls"], + chapter_name = error_details["chapter_name"], + number_of_errors = int(error_details["number_of_errors"]), + is_deliberate = 0 + ) + + def delete_redundant_error_data(self, error_json_data): + # delete errors which have been solved + for error_details in error_json_data: + db_url_list = Error.objects.values_list("chapter_url", flat=True) + json_url_list = [url_list["chapter_urls"] for url_list in error_json_data] + c = set(db_url_list)-set(json_url_list) #change variable name. + for somelist in c: + Error.objects.filter(chapter_url = somelist).delete() + + def update_error_data(self, error_json_data): + + # Agreeably hacky at the moment. Will refine it. + + for error_details in error_json_data: + # if number of errors have increased + if Error.objects.filter(chapter_url = error_details["chapter_urls"], + number_of_errors__lt = error_details["number_of_errors"]): + + Error.objects.filter(chapter_url = error_details["chapter_urls"])\ + .update(number_of_errors = error_details["number_of_errors"], + is_deliberate = 0 + ) + # if number of errors have decreased + elif Error.objects.filter(chapter_url = error_details["chapter_urls"], + number_of_errors__gt = error_details["number_of_errors"]): + + Error.objects.filter(chapter_url = error_details["chapter_urls"])\ + .update(number_of_errors = error_details["number_of_errors"], is_deliberate = 0) + else: + # if new errors have been added. + Error.objects.get_or_create(chapter_url = error_details["chapter_urls"], + number_of_errors = error_details["number_of_errors"] + ) + + Error.objects.filter(chapter_url = error_details["chapter_urls"])\ + .update(chapter_url = error_details["chapter_urls"], + number_of_errors = error_details["number_of_errors"], + chapter_name = error_details["chapter_name"] + ) + + + + def update_deliberate_error(self, deliberate_error_list): + + for deliberate_urls in deliberate_error_list: + a = Error.objects.filter(chapter_url = deliberate_urls).update(is_deliberate = 1) + + + + +class Broken(models.Model): + + broken_url = models.URLField(max_length = 255) + error_status = models.IntegerField() + + def create_new_broken_data(self, broken_data): + for broken_details in broken_data: + + Broken.objects.create(broken_url = broken_details["broken_url"], + error_status = broken_details["broken_status"]) + + def delete_redundant_broken_data(self, broken_data): + for broken_details in broken_data: + db_url_list = Broken.objects.values_list("broken_url", flat=True) + json_url_list = [url_list["broken_url"] for url_list in broken_data] + redundant_url = set(db_url_list)-set(json_url_list) #change variable name. + for delete_url in redundant_url: + Broken.objects.filter(broken_url = delete_url).delete() + + + def update_broken_data(self, broken_data): + for broken_details in broken_data: + + Broken.objects.get_or_create(broken_url = broken_details["broken_url"], + error_status = broken_details["broken_status"] + ) diff --git a/tbc_error_page/templates/broken.html b/tbc_error_page/templates/broken.html new file mode 100644 index 0000000..41449cd --- /dev/null +++ b/tbc_error_page/templates/broken.html @@ -0,0 +1,28 @@ +{% extends "base.html" %} +{% block title %} TBC Broken Links {% endblock %} +{% block content %} + {% if not broken %} + <center><h4> There are no new comments </h4></center> + {% else %} + <h3><u><center>TBC Error Page </center></u></h3> + <h5> Hi <b><u> {{user}} </b><u> </h5> + <a href = "{% url 'tbc_error_page.views.error' %}"> TBC Error Status Page </a> +<p></p> + <table border = 1> + <tr> + <th> Sr no. </b></th> + <th> Broken Urls </b></th> + <th> <b> HTTP status error code </th> + </tr> + {% for broken_data in broken %} + <tr> + <td> {{ forloop.counter }} </td> + <td> <a href = {{ broken_data.broken_url }} target = ""> {{ broken_data.broken_url }} </a> </td> + <td><b> {{ broken_data.error_status }} </b> error </td> + </tr> + {% endfor %} + + </table> +{% endif %} + +{% endblock %} diff --git a/tbc_error_page/templates/deliberate.html b/tbc_error_page/templates/deliberate.html new file mode 100644 index 0000000..89a8974 --- /dev/null +++ b/tbc_error_page/templates/deliberate.html @@ -0,0 +1,17 @@ +{% extends "base.html" %} +{% block title %} Success {% endblock %} +{% block content %} +<p> You have added following urls as deliberate </p> +<table border = 1> +<th>Urls</th> + +{% for deliberate_links in deliberate %} + +<tr><td> {{ deliberate_links }} </tr></td> + + +{% endfor %} +</table> +<p></p> +<p><a href = "{% url 'tbc_error_page.views.error' %}"> <<< Go back to Error Page </a></p> +{% endblock %} diff --git a/tbc_error_page/templates/error.html b/tbc_error_page/templates/error.html new file mode 100644 index 0000000..ee4c415 --- /dev/null +++ b/tbc_error_page/templates/error.html @@ -0,0 +1,41 @@ +{% extends "base.html" %} +{% block title %} TBC Error Page {% endblock %} + + + </head> + +{% block content %} + <body> + <h3><u><center>TBC Error Page </center></u></h3> + <h5> Hi <b><u>{{ user }} </b></u></h5> + <p><a href = "{% url 'tbc_error_page.views.broken' %}"> TBC Broken Links page </a></p> + {% if not context %} + <center><h4> There are no new errors </h4></center> + {% else %} + <table border = 2> + <tr> + <td><b> Chapters With errors</b></td> + <td><b> Number of errors</b></td> + <td><b> Delibrate Errors</b></td> + </tr> + + <form name = "Send Email" action = "{% url 'tbc_error_page.views.error' %}" method = "POST"> {% csrf_token %} + {% for errors in context %} + <div class = "error""> + + <tr> + + {% if errors.is_deliberate == 0 %} + <td><a href = {{ errors.chapter_url }} target = "_blank"> {{ errors.chapter_name }} </a></td> + <td> {{ errors.number_of_errors }} </td> + <td> <input type = "checkbox" name = "deliberate" value = "{{ errors.chapter_url }}"> + {% endif %} + </tr> + </div> + + {% endfor %} + </table> + <input class = "btn" type = "submit" value = "Submit"> </input> + </form> +{% endif %} +{% endblock %} diff --git a/tbc_error_page/views.py b/tbc_error_page/views.py new file mode 100644 index 0000000..1b271ef --- /dev/null +++ b/tbc_error_page/views.py @@ -0,0 +1,56 @@ +from django.shortcuts import render_to_response +from .models import Error, Broken, get_json_from_file +from django.contrib.auth.decorators import user_passes_test +from django.template import RequestContext +import json +import os + + +#@login_required(login_url="/admin/login/") +@user_passes_test(lambda u:u.is_superuser, login_url="/admin/login") + + + +def error(req): + ci = RequestContext(req) + db_instance = Error() + error_json_data = get_json_from_file("error.json") + + if not Error.objects.exists(): + db_instance.create_new_error_data(error_json_data) + else: + db_instance.delete_redundant_error_data(error_json_data) + db_instance.update_error_data(error_json_data) + + error_details = Error.objects.filter(is_deliberate = 0) + + if req.method == "POST": + deliberate_urls_list = req.POST.getlist("deliberate") + db_instance.update_deliberate_error(deliberate_urls_list) + + context = {"user":req.user, "deliberate" :deliberate_urls_list} + + return render_to_response ("deliberate.html", context, ci) + + + context = {"context": error_details, "user": req.user} + return render_to_response ("error.html", context, ci) + +def broken(req): + + ci = RequestContext(req) + db_instance = Broken() + broken_json_data = get_json_from_file("broken.json") + + if not Broken.objects.exists(): + db_instance.create_new_broken_data(broken_json_data) + + else: + db_instance.delete_redundant_broken_data(broken_json_data) + db_instance.update_broken_data(broken_json_data) + + broken = Broken.objects.all() + context = {"broken": broken, "user": req.user} + return render_to_response("broken.html", context, ci) + + |