Mercurial > hg > config
comparison python/url.py @ 754:f011ec45b8e8
add example load type interface
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Fri, 03 Jul 2015 21:07:03 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 753:05fef8e5b8a9 | 754:f011ec45b8e8 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 """ | |
| 5 url manipulation | |
| 6 """ | |
| 7 | |
| 8 import argparse | |
| 9 import os | |
| 10 import shutil | |
| 11 import subprocess | |
| 12 import sys | |
| 13 import tempfile | |
| 14 import urlparse | |
| 15 import urllib2 | |
| 16 | |
| 17 __all__ = ['load', 'main'] | |
| 18 string = (str, unicode) | |
| 19 | |
| 20 def ensure_dir(directory): | |
| 21 """ensure `directory` is a directory""" | |
| 22 if os.path.exists(directory): | |
| 23 assert os.path.isdir(directory) | |
| 24 return directory | |
| 25 os.makedirs(directory) | |
| 26 return directory | |
| 27 | |
| 28 def isURL(url): | |
| 29 return '://' in url | |
| 30 | |
| 31 def read_s3(url): | |
| 32 name = tempfile.mktemp() | |
| 33 try: | |
| 34 subprocess.check_output(['s3cmd', 'get', url, name]) | |
| 35 with open(name) as f: | |
| 36 read = f.read() | |
| 37 os.remove(name) | |
| 38 return read | |
| 39 finally: | |
| 40 if os.path.exists(name): | |
| 41 os.remove(name) | |
| 42 | |
| 43 def read_http(url): | |
| 44 return urllib2.urlopen(url).read() | |
| 45 | |
| 46 def read_file(url): | |
| 47 scheme = 'file://' | |
| 48 if url.startswith(scheme): | |
| 49 url = url[len(scheme):] | |
| 50 return open(url).read() | |
| 51 | |
| 52 loaders = {'s3': read_s3, | |
| 53 'http': read_http, | |
| 54 'https': read_http, | |
| 55 'file': read_file | |
| 56 } | |
| 57 | |
| 58 def scheme(url): | |
| 59 if '://' in url: | |
| 60 parsed = urlparse.urlsplit(url) | |
| 61 return parsed.scheme | |
| 62 return 'file' | |
| 63 | |
| 64 def parent(url): | |
| 65 if '://' in url: | |
| 66 return url.rsplit('/', 1)[0] | |
| 67 else: | |
| 68 # file | |
| 69 return os.path.abspath(os.path.dirname(url)) | |
| 70 | |
| 71 def basename(url): | |
| 72 if '://' in url: | |
| 73 return url.rsplit('/', 1)[-1] | |
| 74 else: | |
| 75 # file | |
| 76 return os.path.basename(url) | |
| 77 | |
| 78 def loader(url): | |
| 79 return loaders[scheme(url)] | |
| 80 | |
| 81 def load(url): | |
| 82 """returns the contents of a URL""" | |
| 83 return loader(url)(url) | |
| 84 | |
| 85 def get_file(src, dest): | |
| 86 shutil.copy2(src, dest) | |
| 87 | |
| 88 def get_s3(src, dest): | |
| 89 subprocess.check_output(['s3cmd', 'get', src, dest]) | |
| 90 | |
| 91 def default_getter(src, dest): | |
| 92 assert not os.path.isURL(dest) | |
| 93 dirname = parent(dest) | |
| 94 ensure_dir(dirname) | |
| 95 with open(dest, 'w') as f: | |
| 96 f.write(load(url)) | |
| 97 | |
| 98 getters = {'file': get_file, | |
| 99 's3': get_s3 | |
| 100 } | |
| 101 | |
| 102 def get(src, dest): | |
| 103 """get a thing to a local file""" | |
| 104 if os.path.isdir(dest): | |
| 105 dest = os.path.join(dest, basename(src)) | |
| 106 return getters.get(scheme(src), default_getter)(src, dest) | |
| 107 | |
| 108 def rel(base, path): | |
| 109 """ | |
| 110 relative path to base | |
| 111 otherwise, return None | |
| 112 """ | |
| 113 | |
| 114 if path.startswith(base): | |
| 115 return path[len(base):] | |
| 116 | |
| 117 def main(args=sys.argv[1:]): | |
| 118 """CLI""" | |
| 119 | |
| 120 # parse command line | |
| 121 parser = argparse.ArgumentParser(description=__doc__) | |
| 122 parser.add_argument('url', help='URL to read') | |
| 123 parser.add_argument('-o', '--output', dest='output', | |
| 124 help="get to this location") | |
| 125 options = parser.parse_args(args) | |
| 126 | |
| 127 if options.output: | |
| 128 # copy src to this location | |
| 129 get(options.url, options.output) | |
| 130 sys.exit() | |
| 131 | |
| 132 # read location | |
| 133 contents = load(options.url) | |
| 134 | |
| 135 # output | |
| 136 print (contents) | |
| 137 | |
| 138 if __name__ == '__main__': | |
| 139 main() |
