import codecs
import sys
import operator
from sets import Set
import pickle
import imp


def is_digit(n):
  try:
    float(n)
    return True
  except ValueError:
    return False


# Read plane ages
filename = './plane-data.csv'
delim=','
#
fin = codecs.open(filename, mode='r',encoding='utf-8')
fin.readline() # skip the header line
plane_age_dict = {}
line_id = 0
for line in fin:
  line = line.strip()
  parts = line.split(delim)
  if len(parts)<>9 or parts[8]=='None':
    continue
  tailnum = parts[0]
  age = 2015-int(parts[8])
  plane_age_dict[tailnum] = age
print("#plane: {0}".format(len(plane_age_dict)))
fin.close()

#####

# Read data
filename = './2008.csv'
delim=','
cand_cols = [1,2,3,4,7,13,14,18]
cand_mon = [1,2,3,4,5,6,7,8,9,10,11,12]
mon_col = 1
tailnum_col = 10
#
fin = codecs.open(filename, mode='r',encoding='utf-8')
fout = codecs.open("./2008.data.prep", mode='w');
fin.readline() # skip the header line
line_id = 0
nunkage = 0
nna = 0
nmon = 0
for line in fin:
  line = line.strip()
  parts = line.split(delim)

  if int(parts[mon_col]) in cand_mon:
    nmon = nmon + 1
  else:
    continue

  flag = 0
  for i in cand_cols:
    if parts[i]=='NA':
      nna = nna + 1
      flag = 1
      break
  if flag == 1:
    continue  

  flag = 0
  for i in cand_cols:
    if not is_digit(parts[i]):
      print("Err {0}".format(parts[i]))
      flag = 1
      break
  if flag == 1:
    continue

  tailnum = parts[tailnum_col]
  if tailnum not in plane_age_dict:
    nunkage = nunkage + 1
    if int(parts[mon_col]) in cand_mon:
      nmon = nmon + 1
    continue
 
  fout.write("{0} ".format(plane_age_dict[tailnum]))

  for i in cand_cols:
    fout.write(parts[i] + ' ')
  line_id += 1    
  fout.write('\n')    
  fout.flush()
fout.close()
print("#lines {0}".format(line_id))

fin.close()

