#!/usr/bin/env python
import sys
import os
import glob
import hashlib
from abstrys.aws_utils import *
from abstrys.cmd_utils import *

#
# Some runtime variables...
#

config_file = 'aws-config.txt'
argfile = None
files_to_upload = []
s3 = None  # the AWS::S3 client
s3_destination = ""
switches = []
switch_aliases = {
    'archive': 'a',
    'private': 'p',
    'public': 'P',
    'recurse': 'r',
    'sync': 's',
    'update': 'u',
    'yes': 'y',
    'file': 'f' }

access_key_id = None
secret_access_key = None
progress_bar = ProgressBar(size=20, outputs=['bar', 'val:B', 'pct'])

USAGE = """
s3up
====

Upload files to Amazon S3. This script can also unpublish files (make them
private), upload files and publish them in one step, or just upload files while
keeping them private.

Usage
-----

There are four forms of usage:

* The basic form, which uploads local files to S3::

    {s3up} <options> <filespec1> [filespec2] [,,,] <s3_path>

  Any number of files can be specified to upload. s3up will attempt to save
  them all to the location specified in *s3_path*.

* The argfile form, which takes a file of paths to publish to S3::

    {s3up} <options> -f <argfile> [s3_path]

  This form is essentially the same as the basic form, but the list of files is
  not specified on the command-line. Useful when you're sending many discrete
  files from different places on your file system.

* The publish/unpublish form, which changes the access of files already on S3::

    {s3up} -p|-P <s3_path>

  This does no copying of data. Only the provided s3 path (which must exist
  first) will be affected.

Switches
--------

Switches, if provided, always precede the accompanying arguments.

**-a, --archive**
    Archives files. This sets the -s (sync) and -p (private) options
    automatically.

**-f, --file <filepath>**
    Specifies a file to load the list of local paths to copy to S3.

**-h, --help**
    Prints help. You can pipe the output of this command to rst2*.py to
    generate HTML, manpage, or other versions of this help.

**-r, --recurse**
    Can be specified when you also specify local filespecs to publish or upload
    to S3. If one of the filespecs is a directory, then {s3up} will
    recursively upload the directory's contents. If it contains subdirectories,
    their contents will also be uploaded. The directory structure will be
    preserved on S3.

**-s, --sync**
    Causes {s3up} to sync files--files will not be overwritten unless they have
    different md5 hash digest values. If a directory (or recurse) is specified,
    then any files in S3 that do not exist in the local directory will be
    removed from S3.

**-u, --update**
    Causes #{s3up} to update files--files will not be overwritten unless they
    have different md5 hash digest values. Files already on S3 that do not
    exist in the local directory will still be retained on S3.

**-P, --public**
    When specified, gives the file public access (publishes it).

**-p, --private**
    When applied to a file already in S3, makes the file private. When
    uploading files, this is the default.

**-y**
    The ``-y`` switch, when specified, tells #{s3up} to automatically answer
    'yes' to any queries (of the form "are you sure you want to do this?"). If
    ``-y`` is specified:

    * s3 buckets that don't exist will automatically be created if necessary.

    * s3 objects with a different md5 digest than the local file will be
      overwritten without any confirmation.

    * local files will not be uploaded if the s3 version has the same md5
      digest.

    With great power comes great responsibility. Play carefully.

Arguments
---------

**filespec**
    A local file or directory, file-glob, or list of files to publish.  If a
    directory is specified, the ``-r`` switch can be used to copy all of the
    files in the directory recursively. The ``-u`` argument will cause the
    files to be uploaded only, and not published.

**s3_path**
    A bucket or path on S3. When a path is provided to s3up by itself, it is
    assumed that the file already exists on S3 and should be made public. If
    the ``-u`` switch is provided, then the file is made unpublic (private),
    instead.

**argfile**
    A file containing local filepaths and s3 paths for publishing files, one
    set per line. To specify the argfile, you *must* precede it with the ``-f``
    switch, or it will be either be interpreted as an s3 path or as a file to
    upload.

    The ``-u`` switch will cause the files to be uploaded only, and not
    published.

    You can specify *both* an argfile and an s3 path. If so, the
    argfile is considered to be a simple list of filespecs to upload.

Notes
-----

Before using this script, you must provide your AWS credentials to the program,
with any one of the following methods:

* Set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables
  with your AWS credentials.

* Provide a YAML-formatted file called `{configfile}` in the current
  directory with your aws credentials specified like this::

      ---
      access_key_id:     ACCESSKEYID
      secret_access_key: SECRETACCESKEY

  Replace ACCESSKEYID and SECRETACCESSKEY with your own AWS credentials.

  You can also specify the path to the config file by passing the
  ``--aws_config <config_file_path>`` argument with the path to the config file
  specified. You can use this argument to point to a config file with any name.

* Provide the command-line arguments: ``--access_key ACCESSKEYID`` and/or
  ``--secret_key SECRETACCESSKEY``, providing your own AWS credentials in place
  of ACCESSKEYID and SECRETACCESSKEY.

""".format(configfile=config_file, s3up=__file__)


def show_help():
    print(format_doc(USAGE))
    sys.exit()


def get_help():
    print "Use '%s -h' to get help." % __file__
    sys.exit()


def process_args(args):
    """Interpret the args and set switches"""

    # provide access to global variables.
    global argfile
    global config_file
    global files_to_upload
    global s3_destination
    global access_key_id
    global secret_access_key
    global switches

    # First, iterate through any switches. These always precede the other
    # arguments.
    i = 0
    while i < len(args) and args[i][0] == '-':
        first_char = args[i][1]
        if first_char == '-':
            # Oooh, an extended command!
            ext_cmd = args[i][2:]
            if ext_cmd == 'access_key':
                i += 1 # read the next argument.
                access_key_id = args[i]
            elif ext_cmd == 'secret_key':
                i += 1 # read the next argument.
                secret_access_key = args[i]
            elif ext_cmd == 'aws_config':
                i += 1 # read the next argument.
                config_file = args[i]
            elif ext_cmd == 'help':
                print USAGE
                sys.exit()
            elif ext_cmd == 'archive':
                switches += ['s', 'p']
            elif ext_cmd in switch_aliases:
                switches += switch_aliases[ext_cmd]
        else: # not an extended command
            arg_set = args[i][1:]
            if 'h' in arg_set:
                print 'h detected!'
                sys.exit()
            if 'a' in arg_set:
                switches += ['s', 'p']
            for char in arg_set:
                switches += char

        # increment the counteGr.
        i += 1

    # remove any duplicates
    switches = list(set(switches))

    # You can't specify both 'p' and 'P'... bad you!
    if ('p' in switches) and ('P' in switches):
        print """
Both public (P) and private (p) switches set. Defaulting to the safer option:
*private*.\n"""
        switches.remove('P')

    if 'a' in switches:
        switches.remove('a')

    if i == len(args)-1:
        # If there is only one argument remaining, then it must either be an
        # argfile...
        if 'f' in switches:
            argfile = args[i]
        else:
            # or the s3 destination, and there are no files to upload. In other
            # words, we're making a file that's already hosted on S3 either public
            # or private.
            s3_destination = args[i]
    elif i < len(args)-1:
        if 'f' in switches:
            # if in argfile mode, you can still set an s3 destination. The
            # first argument is a list of files to upload.
            argfile = args[i]
        else:
            # if not an argfile, then all remaining arguments but the last are
            # files to upload.
            files_to_upload = args[i:-1]

        # In either case, the last arg is the s3 destination.
        s3_destination = args[-1]
        # remove any trailing slashes.
        if s3_destination[-1] == '/':
            s3_destination = s3_destination[:-1]
    else:
        print "** Hmm, I seem to have more, or fewer arguments than I know what to do with!"
        sys.exit()


def report_progress(bytes_transferred, total_bytes):
    """Report the progress of the transfer."""
    progress_bar.show(bytes_transferred / 1000)


def change_object_privacy(s3_path):
    """Make the object at the given S3 path public (or private) and return its
    URL. The mode is chosen by the presence of the command-lines switches '-P'
    or '-p'."""
    bucket_name, object_name = split_s3_path(s3_path)

    bucket = None

    # get the s3 bucket.
    if s3.lookup(bucket_name) is None:
        print "** bucket %s doesn't exist!" % bucket_name
        return None
    else:
        bucket = s3.get_bucket(bucket_name)

    canned_acl_name = 'public-read' if 'P' in switches else 'private'

    # operate on all keys that match the "object" part of the path.
    public_urls = []
    if object_name == None:
        objects = bucket.list()
    else:
        objects = bucket.list(object_name)

    for s3obj in objects:
        s3obj.set_acl(canned_acl_name)
        public_urls.append(s3obj.generate_url(0, query_auth=False))

    return public_urls


def sync_file(local_path, s3_path):
    """Sync a local file with S3"""

    local_file_exists = False
    s3_object_exists = False

    # first, get the bucket and object.
    (s3_bucket_name, s3_object_name) = split_s3_path(s3_path)

    # if no object name was provided (appended to the bucket name after '/'),
    # then use the local file name as the s3 object name.
    if s3_object_name is None:
        s3_object_name = os.path.basename(local_path)

    # check to see if the file exists locally.
    if os.path.exists(file_path):
        local_file_exists = True

    # check to see if the object exists on S3.
    s3_bucket = None

    try:
        # create (or get) the s3 bucket.
        s3_bucket = s3.create_bucket(s3_bucket_name)
    except S3Error:
        sys.exit(1)

    # does the s3 object exist?
    if s3_object in s3_bucket:
        s3_object_exists = True
        # check its hash
        print s3_object.md5.hexdigest()


def upload_file(file_path, s3_path):
    """Upload a file to an s3 location. Return the URL if it is being made
    public. Otherwise, return the S3 path."""

    # first, get the bucket and object.
    (bucket_name, object_name) = split_s3_path(s3_path)

    if not os.path.exists(file_path):
        print "** local file does not exist: %s" % file_path
        sys.exit(1)

    # collect the local md5--we'll use it to see if the file on S3 is the same.
    local_md5 = get_local_md5_hex_digest(file_path)

    # if no object name was provided (appended to the bucket name after '/'),
    # then use the local file name as the s3 object name.
    if object_name is None:
        object_name = os.path.basename(file_path)

    bucket = None

    # get the s3 bucket.
    if s3.lookup(bucket_name) is None:
        if 'y' not in switches:
            print "** no such bucket: %s" % bucket_name
            if not confirm("Do you want me to create it?"):
                sys.exit(1)
        bucket = s3.create_bucket(bucket_name)
    else:
        bucket = s3.get_bucket(bucket_name)

    # does the object already exist? If so, we should update it. If not, we'll
    # create a new object.
    s3obj = bucket.get_key(object_name)

    print "%s" % file_path
    sys.stdout.write(" ->  S3://%s/%s" % (bucket_name, object_name))
    confirm_upload = ('y' not in switches)

    if s3obj is None:
        print " [create]"
        from boto.s3.key import Key
        s3obj = Key(bucket)
        s3obj.key = object_name
    else:
        print " [overwrite]"
        s3_md5 = get_s3_md5_hex_digest(s3_object=s3obj)
        if s3_md5 == local_md5:
            # In the case of 's', 'u', or 'y', files with MD5 equality are
            # ignored by default.
            if not ('s' in switches or 'u' in switches or 'y' in switches):
                if not confirm("File is identical per MD5 digest. Still overwrite?"):
                    return None
                else:
                    confirm_upload = False
            else:
                sys.stdout.write("** Local file is identical per MD5 digest.  Ignoring %s\n   " % s3_path)
                return None

    if confirm_upload:
        if not confirm("OK?"):
            return None

    s3_path = 'S3://%s/%s' % (bucket_name, object_name)
    sys.stdout.write("** writing %s ...\n   " % s3_path)
    canned_acl = 'public-read' if 'P' in switches else 'private'
    total_bytes = os.path.getsize(file_path)
    progress_bar.set_target(total_bytes / 1000)
    s3obj.set_metadata('md5-hexdigest', local_md5)
    s3obj.set_contents_from_filename(file_path, headers=None, replace=True,
            cb=report_progress, num_cb=-1, policy=canned_acl)
    # also set the special md5 tag string (we can't rely on S3's ETag).
    print "\n** complete! Object has %s access." % canned_acl

    # no need to generate a URL for a non-public upload. Just return the S3
    # path.
    if not 'P' in switches:
        return s3_path

    public_url = s3obj.generate_url(0, query_auth=False)
    print "   public URL: %s" % public_url
    return public_url


def upload_files(filespec, s3_path):
    """Upload files to S3 that match the provided filespec."""
    global switches
    for filepath in glob.glob(filespec):
        base_name = os.path.basename(filepath)
        if 'r' in switches and os.path.isdir(filepath):
            upload_files('%s/*' % filepath, '%s/%s' % (s3_path, base_name))
        else:
            upload_file(filepath, '%s/%s' % (s3_path, base_name))
            print ""

#
# THE SCRIPT
#

# Check dependencies... these modules might not be installed on the system.
try:
    import boto
except ImportError, e:
    print "boto is not installed. Run 'pip install boto' on the command-line"
    print "to install it first."
    sys.exit(1)

if len(sys.argv) > 1:
    process_args(sys.argv[1:])
else:
    print "** you must supply at least one argument!"
    get_help()
    sys.exit()

s3 = get_s3(access_key_id, secret_access_key, config_file)
if s3 is None:
    print "** could not make a connection to Amazon S3."
    get_help()
    sys.exit(1)

# now, operate based on the mode...

#
# argfile mode.
#
if 'f' in switches:
    f = open(argfile, 'r')

    if s3_destination is not "":
        # file is a list of filespecs.
        for filespec in f:
            upload_files(filespec, s3_destination)
    else:
        # file is a list of filespec / destination lines.
        for argline in f:
            (filespec, s3dest) = split(argline)
            for filepath in glob.glob(filespec.strip()):
                base_name = os.path.basename(filepath)
                upload_file(filepath, s3dest)
                print ""

#
# file upload + publish mode
#
elif len(files_to_upload) > 0 and (s3_destination is not ""):
    for filespec in files_to_upload:
        upload_files(filespec, s3_destination)

#
# s3 publish/unpublish mode
#
elif s3_destination is not "":
    urls = change_object_privacy(s3_destination)
    if urls == None:
        print "** no matching objects found"
        sys.exit()

    n = len(urls)
    print "Made %d file%s %s:" % (n, '' if n == 1 else 's', 'private' if 'u' in
            switches else 'public')
    for url in urls:
        print "  %s" % url

else:
    # Hmmm, something weird happened here...
    print """** argument error: I don't have any files to upload *or* an s3
    destination to publish to! C'mon, give me *something* to work with here!"""
    get_help()

# Fin!
