#!python
"""A script for sending files to VOSpace via multiple connection streams."""

import vos, sys
import threading, Queue
import datetime, optparse

queue=Queue.Queue()
goodDirs=[]
nodeDict={}
        

class ThreadCopy(threading.Thread):
  def __init__(self,queue):
    super( ThreadCopy, self ).__init__()

    try:
      self.client=vos.Client()
    except Exception as e:
      logging.error("Conneciton failed:  %s" %  (str(e)))
      sys.exit(e.errno)
    self.queue=queue
    self.filesSent=0
    self.filesSkipped=0
    self.bytesSent=0
    self.bytesSkipped=0
    self.filesErrored=0


  def fileMD5(self,filename,block_size=8192):
    import hashlib
    md5 = hashlib.md5()
    f=open(filename)
    while True:
      data = f.read(block_size)
      if not data:
        break
      md5.update(data)
    f.close()
    return md5.hexdigest()   

  def run(self):
    while True:
      (src,dest)=self.queue.get()
      now = datetime.datetime.now()
      srcMD5=None
      stat=os.stat(src)
      if not opt.ignore_checksum:
         srcMD5=self.fileMD5(src)
      if not opt.overwrite:
       try:
        node=nodeDict.get(dest,self.client.getNode(dest))
        """Check if the file is the same"""
        info=node.getInfo()
        destMD5=node.props.get('MD5',None)
        destLength=node.attr['st_size']
        destTime=node.attr['st_ctime']
        if (not opt.ignore_checksum and srcMD5==destMD5) or ( opt.ignore_checksum and destTime >= stat.st_ctime and destLength==stat.st_size) :
          if opt.verbose:
            sys.stdout.write( "skipping: %s  matches %s\n" % (src,dest) )
          self.filesSkipped+=1
          self.bytesSkipped+=stat.st_size
          self.queue.task_done()
          continue
       except IOError as e:
        """Ignore the erorr"""
        pass
      if opt.verbose:
        sys.stdout.write("%s -> %s\n" % (src,dest))
      try:
        self.client.copy(src,dest)
        self.filesSent+=1
        self.bytesSent+=stat.st_size
      except IOError as e:
        sys.stderr.write("Error writing %s to server, skipping\n" % ( src))
        sys.stderr.write(str(e))
        self.filesErrored+=1
        pass
      self.queue.task_done()
      if not opt.ignore_checksum:
        destMD5=None
        try:
          destMD5=self.client.getNode(dest).props.get('MD5',None)
        except IOError as e:
          sys.stderr.write(str(e))
          pass
        if destMD5!=srcMD5:
          sys.stderr.write("Problem writing to vospace. requeuing: %s" % (src))
          #self.queue.put((src,dest))

def mkdirs(dirs):
  if dirs in goodDirs:
    return
  if c.isdir(dirs):
    goodDirs.append(dirs) 
    if opt.cache_nodes:
      for node in c.getNode(dirs).getNodeList():
        nodeDict[os.path.join(dirs,node.name)]=node
    return

  (dir,subdir) = os.path.split(dirs)

  if not c.access(dir):
    mkdirs(dir)
  if not c.isdir(dir):
    sys.exit("Part of the path isn't a directy? (%s)" % ( dir))
  if c.isfile(dirs):
    sys.exit("Destination location a file when directory expected (%s)" % (subdir))
  c.mkdir(dirs)

  goodDirs.append(dirs)
  if opt.cache_nodes:
    for node in c.getNode(dirs).getNodeList():
      nodeDict[os.path.join(dirs,node.name)]=node
  return

import os
def copy(source,dest):
  ## strip down dest until we find a part that exists
  ## and then build up the path.  Dest should include the filename
  dirname=os.path.dirname(dest)
  mkdirs(dirname)
  queue.put((source,dest))
                              
def startStreams(nstreams):
  streams=[]
  for i in range(nstreams):
    if opt.verbose:
      sys.stdout.write("Launching vospace connection stream %d\n" % ( i ))
    t=ThreadCopy(queue)
    t.setDaemon(True)
    t.start()
    streams.append(t)
  return streams

def buildFileList(basePath,destRoot='',recursive=False,ignore=None):
  """Build a list of files that should be copied into VOSpace"""
  filelist=[]
  import re
  for (root,dirs,filenames) in os.walk(basePath):
    for thisfilename in filenames:
      if ignore is not None and ignore in thisfilename:
        continue
      srcfilename=os.path.normpath(os.path.join(root,thisfilename))
      cprefix=os.path.commonprefix((basePath,srcfilename))
      destfilename=os.path.normpath(destRoot+"/"+srcfilename[len(cprefix):])
      mkdirs(os.path.dirname(destfilename))
      filelist.append((srcfilename,destfilename))
    if not recursive:
      return filelist
  return filelist

if __name__=='__main__':
  import time
  startTime=time.time()
  usage = "%prog [options] files vos:Destination/"
  parser=optparse.OptionParser(usage=usage)
  parser.add_option('--verbose','-v',action="store_true",help='run in verbose mode')
  parser.add_option('--cache_nodes',action="store_true",help='caches the contents of the vospace at the start of transfer, not safe in running multiple instances of vsync, but safe for multiple streams in one instance')
  parser.add_option('--ignore-checksum',action="store_true",help='dont check MD5 sum, forces transfer')
  parser.add_option('--recursive','-r',help="Do a recursive sync",action="store_true")
  parser.add_option('--nstreams',type=int,help="Number of streams to run (MAX: 10)",default=1)
  parser.add_option('--exclude',help="ignore directories or files containing this pattern",default=None)
  parser.add_option('--overwrite',help="overwrite copy on server regardless of modification/size/md5 checks",action="store_true")
  parser.add_option('--load_test',action="store_true",help="Used to stress test the VOServer, also set --nstreams to a large value")
  (opt,args)=parser.parse_args()
  if len(args)<2:
    parser.error("requires one or more source files and a single destination directory")

  if opt.nstreams>10 and not opt.load_test:
    parser.error("Maximum of 10 streams exceeded")

  dest=args.pop()
  if dest[0:4]!="vos:":
    parser.error("Only allows sync FROM local copy TO VOSpace")
  ## Currently we don't create nodes in sync and we don't sync onto files
  if opt.verbose:
    sys.stdout.write("Connecting to VOSpace\n")
  c=vos.Client()
  destIsDir=c.isdir(dest)

  ### build a complete file list given all the things on the command line
  filelist=[]
  for filename in args:
    #if opt.verbose:
    #  sys.stdout.write("Adding %s to transfer list\n" % (filename))
    thisRoot=dest
    if os.path.isdir(filename):
      if filename[-1]!="/" :
        thisRoot=os.path.join(dest,os.path.basename(filename))
      filelist.extend(buildFileList(filename,destRoot=thisRoot,recursive=opt.recursive,ignore=opt.exclude))
    elif os.path.isfile(filename):
      if destIsDir:
        thisRoot=os.path.join(dest,os.path.basename(filename))
      filelist.append((filename,thisRoot))
    else:
      sys.stderr.write("%s: No such file or directory.\n" % ( filename))

  if 1==2:
   destIsFile=c.isfile(dest)
   destIsDir=c.isdir(dest)

   if len(filelist)>1 :
    if destIsFile:
      parser.error("Desitnation for multiple files must be a direcotry")
    if not destIsDir:
      parser.error("Destination directory must already exist in VOSpace")

  for (src,dest) in filelist:
    if os.path.islink(src):
       sys.stderr.write("%s is a link, skipping\n" % ( src))
       continue
    if not os.access(src,os.R_OK):
       sys.stderr.write("Failed to open file %s, skipping\n" % ( src))
       continue
    import re
    if re.match('^[A-Za-z0-9\\._\\-\\(\\);:&\\*\\$@!+=\\/]*$',src) is None:
       sys.stderr.write("filename %s contains illegal characters, skipping\n" % ( src))
       continue
    copy(src,dest)	
       

  streams=startStreams(opt.nstreams)
  queue.join()
  endTime=time.time()
  bytesSent=0
  filesSent=0
  bytesSkipped=0
  filesSkipped=0
  filesErrored=0
  for stream in streams:
    bytesSent+=stream.bytesSent
    bytesSkipped+=stream.bytesSkipped
    filesSent+=stream.filesSent
    filesSkipped+=stream.filesSkipped
    filesErrored+=stream.filesErrored

  sys.stdout.write("\n\n==== TRANSFER REPORT ====\n\n")

  if bytesSent>0:
    rate=bytesSent/(endTime-startTime)/1024.0
    sys.stdout.write("Sent %d files (%8.1f kbytes @ %8.3f kBytes/s)\n" %(filesSent,bytesSent/1024.0,rate))
    speedUp=(bytesSkipped+bytesSent)/bytesSent
    sys.stdout.write("Speedup:  %f (skipped %d files)\n" % (speedUp,filesSkipped))

  if bytesSent==0:
    sys.stdout.write("No files needed sending \n")

  if filesErrored > 0:
    sys.stdout.write("Error transferring %d files, please try again\n" % ( filesErrored))


