#!/usr/bin/env python

# Copyright (c) 2010- The University of Notre Dame.
# This software is distributed under the GNU General Public License.
# See the file COPYING for details.

# This program reads gets catalog history data from disk.
# It accepts arguments: catalog history directory, starting point, ending point.
# It outputs the checkpoint at the starting point, followed by all deltas until the ending point.

import sys
import os
import time
import datetime

current_version = "1.0"


debug = 0
def logDebug(str):
  if debug:
    logError(str)
def logError(str):
  sys.stderr.write(str+"\n")
def badSyntax(str):
  sys.stderr.write(str+"\n\n")
  sys.stderr.write("Please use the following syntax:\n")
  sys.stderr.write("catalog_history_select [source_directory] [starting_point] [ending_point]\n\n")
  sys.stderr.write("See man page for more details and examples.\n\n")
  sys.exit(0)

def timeToFileName(ts, days_offset=0):
  ts += (3600*24)*days_offset
  tt = datetime.datetime.fromtimestamp(ts).timetuple()
  dayOfYear = tt.tm_yday
  year = tt.tm_year
  path = directory+str(year)+'/'+str(dayOfYear)
  return path


# Get source directory
if len(sys.argv)>1 and sys.argv[1]:
  if (sys.argv[1]=='-v'):
    command = sys.argv[0].split('/')[-1]
    print command+" version CCTOOLS_VERSION (released CCTOOLS_RELEASEDATE)"
    sys.exit(0)
  elif (sys.argv[1]=='-h'):
    badSyntax('')
  else:
    arg = sys.argv[1]
    if (arg[0]=='.'):
      pass
      directory = os.getcwd()+'/'+arg+'/'
    else:
      directory = arg+'/'
    if not os.path.isdir(directory):
      badSyntax('Source directory does not exist...')
else:
  badSyntax("You must specify a source directory. Use '.' for the current directory.")


# Get starting point
begin = 0
if len(sys.argv)>2 and sys.argv[2]:
  arg = sys.argv[2]
  if (len(arg)>=14 and len(arg)<=19):
    datearr = time.strptime(arg, '%Y-%m-%d-%H-%M-%S')
    dt = datetime.datetime(*(datearr[0:6]))
    begin = int( time.mktime(dt.timetuple()))
  else:
    begin = int(arg)
  filename = timeToFileName(begin,True)
else:
  logError("No starting point specified. Starting at the beginning of this year.")
  datearr = datetime.date(datetime.date.today().year, 1, 1)
  begin = int( time.mktime(datearr.timetuple()))


# Get ending point
end = sys.maxint
if len(sys.argv)>3 and sys.argv[3]:
  arg = sys.argv[3]
  if (len(arg)>=14 and len(arg)<=19):
    dt = datetime.datetime(*(time.strptime(arg, '%Y-%m-%d-%H-%M-%S')[0:6]))
    end = int( time.mktime(dt.timetuple()) )
  elif (arg[0]=='y'):
    end = begin + int(arg[1:])*31557600
  elif (arg[0]=='w'):
    end = begin + int(arg[1:])*604800
  elif (arg[0]=='d'):
    end = begin + int(arg[1:])*86400
  elif (arg[0]=='h'):
    end = begin + int(arg[1:])*3600
  elif (arg[0]=='m'):
    end = begin + int(arg[1:])*60
  elif (arg[0]=='s'):
    end = begin + int(arg[1:])
  elif (arg[0]=='+'):
    end = begin + int(arg[1:])
  else:
    end = int(arg)
else:
  logError("No ending point specified. Ending with data from today (if available).")
  end = int( time.mktime(datetime.datetime.now().timetuple()) )


logDebug('Start:'+str(begin)+' End:'+str(end))



def read_in_chunks(file_object, chunk_size=1024):
    """Lazy function (generator) to read a file piece by piece.
    Default chunk size: 1k."""
    while True:
        data = file_object.read(chunk_size)
        if not data:
            break
        yield data



seriesNow = {}
seriesLog = []

# Read the initial checkpoint file and group data by the value of the "key" field
# Go a day earlier (day_offset=-1) to handle for auto-deleted series' which start reporting again after not appearing in the checkpoint
day_offset = 0
filename = timeToFileName(begin,day_offset)
filesFound = 0
try:


  #f = open(filename+'.ckpt', 'r')
  #for piece in read_in_chunks(f):
  #  print piece

  for line in  open(filename+'.ckpt', 'r'):
    line = line.strip()
    print line



except IOError:
  logDebug('File does not exist? '+filename)
if 'checkpoint_file' in locals():
  checkpoint_file.close()

# Read each of the log files
pastStartTime = 0
pastEndTime = 0
logTime = begin
while True:

  lastHost = None

  try:
    filesFound += 1
    logDebug(filename+'.log')





    #f = open(filename+'.log', 'r')
    #for piece in read_in_chunks(f):
    #  print piece
    for line in open(filename+'.log', 'r'):
      line = line.strip()
      print line




  except IOError:
    logDebug('File does not exist? '+filename+'.log')

  if 'f' in locals():
    f.close()

  if logTime>end:
    break
  day_offset += 1
  logTime += 86400
  filename = timeToFileName(begin,day_offset)

if filesFound==0:
  logError("No files found in that source directory for the specified time frame.")
