Mercurial > hg > config
comparison python/find_duplicate_files.py @ 799:dbd2562cb03e
remove old way of doing things; note TODO on replacing
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Fri, 28 Oct 2016 16:06:11 -0700 |
| parents | ab831c7621e9 |
| children | bea4dd61ae45 |
comparison
equal
deleted
inserted
replaced
| 798:720e51cb0edb | 799:dbd2562cb03e |
|---|---|
| 9 import argparse | 9 import argparse |
| 10 import os | 10 import os |
| 11 import subprocess | 11 import subprocess |
| 12 import sys | 12 import sys |
| 13 | 13 |
| 14 # module globals | |
| 15 __all__ = ['main', 'Parser'] | |
| 16 | 14 |
| 17 class Parser(argparse.ArgumentParser): | 15 class DuplicateFilesParser(argparse.ArgumentParser): |
| 18 """CLI option parser""" | 16 """CLI option parser""" |
| 17 | |
| 19 def __init__(self, **kwargs): | 18 def __init__(self, **kwargs): |
| 20 kwargs.setdefault('description', __doc__) | 19 kwargs.setdefault('description', __doc__) |
| 21 argparse.ArgumentParser.__init__(self, **kwargs) | 20 argparse.ArgumentParser.__init__(self, **kwargs) |
| 22 self.add_argument('directory') | 21 self.add_argument('directory') |
| 23 self.options = None | 22 self.options = None |
| 35 | 34 |
| 36 def main(args=sys.argv[1:]): | 35 def main(args=sys.argv[1:]): |
| 37 """CLI""" | 36 """CLI""" |
| 38 | 37 |
| 39 # parse command line options | 38 # parse command line options |
| 40 parser = Parser() | 39 parser = DuplicateFilesParser() |
| 41 options = parser.parse_args(args) | 40 options = parser.parse_args(args) |
| 42 | 41 |
| 43 output = subprocess.check_output(['ls', '-l', options.directory]).strip() | 42 # get all files |
| 44 rows = [row.strip().split() for row in output.splitlines()[1:]] | 43 raise NotImplementedError('TODO') # -> record TODO items |
| 45 | |
| 46 sizes = {} | |
| 47 for row in rows: | |
| 48 size = int(row[4]) | |
| 49 filename = row[-1] | |
| 50 sizes.setdefault(size, []).append(filename) | |
| 51 | |
| 52 duplicates = {} | |
| 53 for size, filenames in sizes.items(): | |
| 54 if len(filenames) < 2: | |
| 55 continue | |
| 56 duplicates[size] = filenames | |
| 57 | |
| 58 for size in sorted(duplicates.keys()): | |
| 59 print ('{} : '.format(size)) | |
| 60 print ('\n'.join(duplicates[size])) | |
| 61 print ('\n') | |
| 62 | 44 |
| 63 if __name__ == '__main__': | 45 if __name__ == '__main__': |
| 64 main() | 46 main() |
