summaryrefslogtreecommitdiff
path: root/scripts/split_json.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/split_json.py')
-rw-r--r--scripts/split_json.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/scripts/split_json.py b/scripts/split_json.py
new file mode 100644
index 0000000..baa0b90
--- /dev/null
+++ b/scripts/split_json.py
@@ -0,0 +1,20 @@
+import cPickle
+import json
+from os.path import dirname, abspath,join
+try:
+ with open('crawler/items.json', "r") as json_dump:
+ json_data = json.load(json_dump)
+ json_dump.close()
+ a = [saved_data for saved_data in json_data if str(saved_data).startswith("{u'ch")]
+ with open(join(dirname(abspath(dirname(__file__))),'tbc_error_page/error.pickle'), "w+") as error_json:
+ cPickle.dump(a, error_json)
+ error_json.close()
+
+ b = [saved_data for saved_data in json_data if str(saved_data).startswith("{u'br")]
+ with open(join(dirname(abspath(dirname(__file__))),'tbc_error_page/broken.pickle'), "w+") as broken_json:
+ cPickle.dump(b, broken_json)
+ broken_json.close()
+
+
+except ValueError:
+ print "Couldn't find file"