diff options
Diffstat (limited to 'duplicate.py')
-rw-r--r-- | duplicate.py | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/duplicate.py b/duplicate.py new file mode 100644 index 00000000..a95cee35 --- /dev/null +++ b/duplicate.py @@ -0,0 +1,138 @@ +import os +import shutil +import sys + + +def remove_duplcates(book_path): + """ + This function will find latest version and remove duplicates of a chapter + to backup folder in current path. function returns number of files moved. + """ + + moved_file_count = 0 + flag = 0 + dic={} + keep=[] + delete = [] + latest_chapter = [] + temp_list =[] + temp_list_updated =[] + chapter_name=[] + for path,dirname,filenames in os.walk(book_path,topdown=False): + dic[path]={} + for files in filenames: + if files.endswith('.ipynb'): + dot_index = files.index('.') + chapter = files[:dot_index]+'_' + chapter_name.append(chapter) + chapter_name.sort() + + + for name in chapter_name: + x =[i for i in chapter_name if name in i] + if len(x)==1: + temp_list.append(x[0]) + continue + x.sort() + key_name = x.pop(0) + for j in x: + if dic[path].has_key(key_name): + dic[path][key_name].append(j) + else: + dic[path][key_name]=[j] + + for i in temp_list: + for key in dic[path].keys(): + if key in i: + temp_list_updated.append(i) + + single_chapter = set(temp_list).difference(set(temp_list_updated)) + + + for key in dic[path].keys(): + if len(dic[path][key])==0: + latest_chapter.append(key) + else: + latest_chapter.append(max(dic[path][key])) + + latest_chapter_updated = set(latest_chapter).union(single_chapter) + + for path,dirname,filenames in os.walk(book_path,topdown=False): + + for files in filenames: + if files.rfind('.')>0: + dot_index = files.rindex('.') + files_update = files[:dot_index]+'_' + else: + files_update = files + + for latest in latest_chapter_updated: + if files_update.__contains__(latest): + flag = 1 + keep.append(os.path.join(path,files)) + break + else: + flag = 0 + + if not flag and ('.ipynb' in files or '.html' in files): + delete.append(os.path.join(path,files)) + + moved_file_count = moved_file_count+len(delete) + + book_name = path.split('/')[-1] + + try: + os.mkdir(os.path.join(path,book_name+'_version_backup')) + except OSError: + pass + + while delete: + try: + shutil.move(delete.pop(),os.path.join(path,book_name+'_version_backup')) + except Exception: + pass + + return moved_file_count + +def remove_emptyfolders(book_path): + + for path,dirname,filenames in os.walk(book_path,topdown=False): + if len(os.listdir(path))==0: + os.rmdir(path) + +def move_backupfolders(book_path,root_backup): + + for path,dirname,filenames in os.walk(book_path,topdown=False): + if 'version_backup' in path: + shutil.move(path,root_backup) + break + else: + pass + + +def main(book_path,root_backup): + remove_emptyfolders(book_path) + print "Removing duplicates in given path:\n" + number_of_moved = remove_duplcates(book_path) + print ("moved {0} files to backup folder in given path\n".format(number_of_moved)) + print "Removing empty folders in given path:\n" + remove_emptyfolders(book_path) + print "Moving version_backup to backup folder:\n" + move_backupfolders(book_path,root_backup) + + +if __name__=='__main__': + cwd = os.getcwd() + try: + os.mkdir(os.path.join(cwd,'backup')) + except OSError: + pass + root_backup = os.path.join(cwd,'backup') + #main(os.sep.join((cwd,'Analog_and_Digital_Communications_by_H_P_Hsu')),root_backup=root_backup) + + dirs = os.listdir(cwd) + for k in dirs: + if os.path.isdir(os.path.join(cwd,k)) and (k != 'backup'): + print '----------------------------------------------------------' + print k + main(os.path.join(cwd,k),root_backup) |