Mercurial > hg > config
annotate python/url.py @ 929:7c4be71a560b default tip
remove old aliases
| author | Jeff Hammel <k0scist@gmail.com> | 
|---|---|
| date | Mon, 20 Oct 2025 15:22:19 -0700 | 
| parents | f011ec45b8e8 | 
| children | 
| rev | line source | 
|---|---|
| 754 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 2 # -*- coding: utf-8 -*- | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 3 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 4 """ | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 5 url manipulation | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 6 """ | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 7 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 8 import argparse | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 9 import os | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 10 import shutil | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 11 import subprocess | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 12 import sys | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 13 import tempfile | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 14 import urlparse | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 15 import urllib2 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 16 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 17 __all__ = ['load', 'main'] | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 18 string = (str, unicode) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 19 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 20 def ensure_dir(directory): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 21 """ensure `directory` is a directory""" | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 22 if os.path.exists(directory): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 23 assert os.path.isdir(directory) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 24 return directory | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 25 os.makedirs(directory) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 26 return directory | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 27 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 28 def isURL(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 29 return '://' in url | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 30 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 31 def read_s3(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 32 name = tempfile.mktemp() | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 33 try: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 34 subprocess.check_output(['s3cmd', 'get', url, name]) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 35 with open(name) as f: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 36 read = f.read() | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 37 os.remove(name) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 38 return read | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 39 finally: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 40 if os.path.exists(name): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 41 os.remove(name) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 42 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 43 def read_http(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 44 return urllib2.urlopen(url).read() | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 45 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 46 def read_file(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 47 scheme = 'file://' | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 48 if url.startswith(scheme): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 49 url = url[len(scheme):] | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 50 return open(url).read() | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 51 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 52 loaders = {'s3': read_s3, | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 53 'http': read_http, | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 54 'https': read_http, | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 55 'file': read_file | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 56 } | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 57 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 58 def scheme(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 59 if '://' in url: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 60 parsed = urlparse.urlsplit(url) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 61 return parsed.scheme | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 62 return 'file' | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 63 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 64 def parent(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 65 if '://' in url: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 66 return url.rsplit('/', 1)[0] | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 67 else: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 68 # file | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 69 return os.path.abspath(os.path.dirname(url)) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 70 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 71 def basename(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 72 if '://' in url: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 73 return url.rsplit('/', 1)[-1] | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 74 else: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 75 # file | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 76 return os.path.basename(url) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 77 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 78 def loader(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 79 return loaders[scheme(url)] | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 80 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 81 def load(url): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 82 """returns the contents of a URL""" | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 83 return loader(url)(url) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 84 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 85 def get_file(src, dest): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 86 shutil.copy2(src, dest) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 87 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 88 def get_s3(src, dest): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 89 subprocess.check_output(['s3cmd', 'get', src, dest]) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 90 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 91 def default_getter(src, dest): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 92 assert not os.path.isURL(dest) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 93 dirname = parent(dest) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 94 ensure_dir(dirname) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 95 with open(dest, 'w') as f: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 96 f.write(load(url)) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 97 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 98 getters = {'file': get_file, | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 99 's3': get_s3 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 100 } | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 101 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 102 def get(src, dest): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 103 """get a thing to a local file""" | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 104 if os.path.isdir(dest): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 105 dest = os.path.join(dest, basename(src)) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 106 return getters.get(scheme(src), default_getter)(src, dest) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 107 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 108 def rel(base, path): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 109 """ | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 110 relative path to base | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 111 otherwise, return None | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 112 """ | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 113 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 114 if path.startswith(base): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 115 return path[len(base):] | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 116 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 117 def main(args=sys.argv[1:]): | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 118 """CLI""" | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 119 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 120 # parse command line | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 121 parser = argparse.ArgumentParser(description=__doc__) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 122 parser.add_argument('url', help='URL to read') | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 123 parser.add_argument('-o', '--output', dest='output', | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 124 help="get to this location") | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 125 options = parser.parse_args(args) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 126 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 127 if options.output: | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 128 # copy src to this location | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 129 get(options.url, options.output) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 130 sys.exit() | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 131 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 132 # read location | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 133 contents = load(options.url) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 134 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 135 # output | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 136 print (contents) | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 137 | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 138 if __name__ == '__main__': | 
| 
f011ec45b8e8
add example load type interface
 Jeff Hammel <k0scist@gmail.com> parents: diff
changeset | 139 main() | 
