diff options
author | mahesh | 2016-04-01 01:41:16 +0530 |
---|---|---|
committer | mahesh | 2016-04-01 01:41:16 +0530 |
commit | cf1209063bf68e01d312bc870280cf43b2a616a9 (patch) | |
tree | ee77994fa6e79822c7ef812519e9f2f171d482b5 /scripts/crawler/tbc_web_crawler/spiders/items.py | |
parent | 9c84c0ab3a780b8055212d613776f28907535b62 (diff) | |
download | Python-TBC-Interface-cf1209063bf68e01d312bc870280cf43b2a616a9.tar.gz Python-TBC-Interface-cf1209063bf68e01d312bc870280cf43b2a616a9.tar.bz2 Python-TBC-Interface-cf1209063bf68e01d312bc870280cf43b2a616a9.zip |
A crawler to crawl the tbc website to find errors on tbc notebooks and broken links
Diffstat (limited to 'scripts/crawler/tbc_web_crawler/spiders/items.py')
-rw-r--r-- | scripts/crawler/tbc_web_crawler/spiders/items.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/scripts/crawler/tbc_web_crawler/spiders/items.py b/scripts/crawler/tbc_web_crawler/spiders/items.py new file mode 100644 index 0000000..9dda20f --- /dev/null +++ b/scripts/crawler/tbc_web_crawler/spiders/items.py @@ -0,0 +1,18 @@ +import scrapy + + +class TbcErrorItems(scrapy.Item): + + + chapter_name = scrapy.Field() + chapter_urls = scrapy.Field() + completed_book_urls = scrapy.Field() + number_of_errors = scrapy.Field() + error_messages = scrapy.Field() + + + +class TbcBrokenItems(scrapy.Item): + + broken_url = scrapy.Field() + broken_status = scrapy.Field() |