summaryrefslogtreecommitdiff
path: root/scripts/crawler/tbc_web_crawler/spiders/items.py
diff options
context:
space:
mode:
authormahesh2016-04-01 01:41:16 +0530
committermahesh2016-04-01 01:41:16 +0530
commitcf1209063bf68e01d312bc870280cf43b2a616a9 (patch)
treeee77994fa6e79822c7ef812519e9f2f171d482b5 /scripts/crawler/tbc_web_crawler/spiders/items.py
parent9c84c0ab3a780b8055212d613776f28907535b62 (diff)
downloadPython-TBC-Interface-cf1209063bf68e01d312bc870280cf43b2a616a9.tar.gz
Python-TBC-Interface-cf1209063bf68e01d312bc870280cf43b2a616a9.tar.bz2
Python-TBC-Interface-cf1209063bf68e01d312bc870280cf43b2a616a9.zip
A crawler to crawl the tbc website to find errors on tbc notebooks and broken links
Diffstat (limited to 'scripts/crawler/tbc_web_crawler/spiders/items.py')
-rw-r--r--scripts/crawler/tbc_web_crawler/spiders/items.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/scripts/crawler/tbc_web_crawler/spiders/items.py b/scripts/crawler/tbc_web_crawler/spiders/items.py
new file mode 100644
index 0000000..9dda20f
--- /dev/null
+++ b/scripts/crawler/tbc_web_crawler/spiders/items.py
@@ -0,0 +1,18 @@
+import scrapy
+
+
+class TbcErrorItems(scrapy.Item):
+
+
+ chapter_name = scrapy.Field()
+ chapter_urls = scrapy.Field()
+ completed_book_urls = scrapy.Field()
+ number_of_errors = scrapy.Field()
+ error_messages = scrapy.Field()
+
+
+
+class TbcBrokenItems(scrapy.Item):
+
+ broken_url = scrapy.Field()
+ broken_status = scrapy.Field()