# -*- coding: utf-8 -*-
"""
Created on Mon Sep 29 13:41:22 2014

@author: s4493222
"""
'''
Code to support SI for 
Reconciling the signal and noise of atmospheric warming on decadal timescales
Roger N Jones* and James H Ricketts

Victoria Institute of Strategic Economic Studies, Victoria University, Melbourne, Victoria 8001, Australia
Correspondence to: Roger N. Jones (roger.jones@vu.edu.au)

And related publications

This code made available for informational purposes under the Creative Commons Attribution 3.0 License (enabling electronic and paper copies);
'''

'''
This code is very much an experimeters code and no prizes for style. 
Please discuss with me jim.ricketts@gmail.com 
'''

#CB_Reports.py - take convergent_breaks objects and formats a report as a csv file with shuffle_test added
#convergentBreaks returns   initialBreaks, newbreaks, statlist
#statlist is a list of [(ystats, tstats, shiftstats)]
#where each of the three elements is (mean, stdev)
#e.g ([1850.0, 1920.0, 1996.0, 2012.0], [1850.0, 1920.0, 1996.0, 2012.0], [((1920.0, 0.0), (42.682625, 0.76312649), (0.20108237061765552, 0.0020284819355781795)), ((1996.0, 0.0), (52.760872, 0.6639939), (0.55426463964501127, 0.004255255432305689))])

#shuffle cut returns   stats.norm.fit(TiPosIndex), stats.norm.fit(TiList),stats.norm.fit(ShiftList), float(bins[mode])/iterations, mode, 

#grep Returning *.trace|awk 'BEGIN {FS="->"}; {print $2}' |awk 'BEGIN {FS = "\\[\\("}; {print $1}' |sort|uniq -c
#MyDocuments/abrupt/4Roger_Nature_SVN_264/had4_krig_annual_v2_0_0
#grep Returning *.trace|awk 'BEGIN {FS="->"}; {print $2}' |awk 'BEGIN {FS = "\\[\\("}; {print $1}' |sort|uniq -c|sed 's/\[//g' |sed 's/\]//g'|sed 's/,//g'
# grep Returning *.trace|awk 'BEGIN {FS="->"}; {print $2}' |awk 'BEGIN {FS = "\\[\\("}; {print $1}' |sort|uniq -c|sed 's/\[//g' |sed 's/\]//g'|sed 's/,//g'|awk '{for (i=1;i<=NF;i++) {a[$i] +=$0;}} END {for (y in a) {print y " " a[y];}}'
# for d in `ls -d */`;do pushd $d; echo $d;grep Returning *.trace|awk 'BEGIN {FS="->"}; {print $2}' |awk 'BEGIN {FS = "\\[\\("}; {print $1}' |sort|uniq -c|sed 's/\[//g' |sed 's/\]//g'|sed 's/,//g'|awk -v dr=$d '{for (i=2;i<=NF;i++) {a[$i] +=$0;}} END {for (y in a) {print dr " " y " " a[y];}}';popd ;done >tabulated.rpt
import numpy as np
from CMIP3 import CMIP3gw
from CMIP5 import CMIP5gw
from NOAAascii import NOAAascii
from ICMP import ICMP
from HADCRUT4 import HADCRUT4
TRENDS=False
import ConvergentBreaks as convergent_breaks  
import shuffle
import matplotlib.pyplot as plt
import datetime
import os
import glob
#import csv
import errno
import PATHS
SVNRevision="$Revision: 401 $"
SCREENPR=0.01


def mkdir_p(path):
    """ 'mkdir -p' in Python """
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise
def applyShuffle(cb, xs, ys, Years):
  breaks=[0]
  for b in cb[2]:
    #print b
    breaks.extend([list(Years[:]).index(round(b[0][0])) +1])
  breaks.append(len(Years))
  
  #print "BREAKS",breaks
  results = []
  for b in range(len(breaks))[1:-1]:
    if xs[breaks[b-1]:breaks[b+1]] == []:
      print "oops", breaks, b
    results.append(shuffle.shuffle_cut(xs[breaks[b-1]:breaks[b+1]], ys[breaks[b-1]:breaks[b+1]], Years[breaks[b-1]:breaks[b+1]], breaks[b]-breaks[b-1]))
  return results

def report(filename,cb, xs, ys, Years, HeaderComments):
  import bivariate_multi as bivariate
  sc=applyShuffle(cb, xs, ys, Years)
  init='"%s"' % cb[0] #inital bisection result
  refined='"%s"' % cb[1] #after refinement list of years
  with open(filename,"w") as out:
    for line in HeaderComments:
      print >>out,'"%s"' % (str(line),)
    print >>out, init, datetime.datetime.now().strftime(" %X,%a,%d-%b-%Y")
    print >>out, refined    
    print >>out,"BreakDate,CritTi, BreakMean, BreakStDev, BreakTi0, BreakTi0StDev, Shift, ShiftStd, ShuffledBreakdate, ShuffledBreakStDev, ShuffledBreakTi0, ShuffledBreakTi0StDev, ShuffledShift, ShuffledShiftStd, ShuffledYearHitRatio, ShuffledModeYear"
    i = 0
    for entry in cb[2]:
      scentry=sc[i]
      i += 1
      pr = bivariate.critTi(0.01, int(cb[1][i+1]-cb[1][i-1]))
      print >>out,'%d,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%d' % \
      (int(round(cb[1][i])),pr, entry[0][0],entry[0][1],entry[1][0],entry[1][1],entry[2][0],entry[2][1], scentry[0][0],scentry[0][1],scentry[1][0],scentry[1][1],scentry[2][0],scentry[2][1],scentry[3], int(scentry[4] -1))

def gatherNOAANames(path):
  import glob
  import os
  fnames = glob.glob(path+"\\*.asc")
  return fnames

def gatherCMIP5Names(path):
  import glob
  import os
  fnames = glob.glob(path+"\\*.GW")
  histfnames=glob.glob(path+"\\*historical*.GW")
  #build a dict or historical file names
  filelist=[]
  for h in histfnames:
    fn = os.path.basename(h)
    (var,sort, gcm, _, rep, _)=fn.split("_")
    for rcp in ["rcp26", "rcp45", "rcp60", "rcp85"]:
      candidates=glob.glob(path+"\\"+"%s_%s_%s_%s_%s_*.GW" % (var, sort, gcm, rcp,rep))
      for c in candidates:
        if h != c:
          #print rcp, len(candidates), os.path.basename(h), os.path.basename(c)
          filelist.append([h,c])
  return filelist

def gatherCMIP5ZonalNames(path):
  import glob
  import os
  fnames = glob.glob(path+"\\*.ZW")
  histfnames=glob.glob(path+"\\*historical*.ZW")
  #build a dict or historical file names
  filelist=[]
  for h in histfnames:
    fn = os.path.basename(h)
    (var,sort, gcm, _, rep, _, lo,hi)=fn.split("_")
    for rcp in ["rcp26", "rcp45", "rcp60", "rcp85"]:
      candidates=glob.glob(path+"\\"+"%s_%s_%s_%s_%s_*_%s_%s" % (var, sort, gcm, rcp,rep,lo,hi))
      for c in candidates:
        if h != c:
          #print rcp, len(candidates), os.path.basename(h), os.path.basename(c)
          filelist.append([h,c])
  return filelist

def gatherCMIP3Names(path):
  import glob
  fnames = glob.glob(path+"\\*_gw.txt")
  #build a dict or historical file names
  return fnames
  
def gatherNCDCZonalNames(path):
  import glob
  fnames = glob.glob(path+"\\*asc")
  #build a dict or historical file names
  return fnames
  
def gatherNCDCcsvNames(path):
  import glob
  fnames = glob.glob(path+"\\*4.csv")
  #build a dict or historical file names
  return fnames

def collated(path, intype, initial_header=0):
  import glob
  import os
  #import csvfile
  outf=open(path+"\\collated.csv","w",0) #open with forced flush because of huger line lengths
  tracenames=glob.glob(path+"\\*.trace")
  initialfn=None
  #now open the csv file f
  
  i = 0
  while i in range(len(tracenames)) and initialfn == None: #find the initial file
    tn = tracenames[i]
   # bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    if intype in []:
      n = 3
    else:
      n = 4
    if intype==None:
      raise Exception("Specify type of input")
    try:
      analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=n)
      initialfn=analysis.header()[0].replace("\\\\","\\").replace("\n","").replace('"','').replace("'","")
      c3=csvfile.CSVfile(initialfn,skip_header=initial_header)
      Years=c3.data4name("Year")
    except:
      pass
    i +=1
    
  #now a reporting pass  
  rept={}
  for tn in tracenames:
    #bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=n)
    model=analysis.header()[1].replace("\n","").replace('"','').replace("'","")
#    if model == "BCCA2r1":
#      print model
    ys=c3.data4name(model)
    Yrs=c3.time4name(model)
    Tis=np.zeros(np.shape(Yrs),dtype=np.float32)
    shifts=np.zeros(np.shape(Yrs),dtype=np.float32)
    bdates=analysis["BreakDate"]
    pltbreaks=[Yrs[0]]
    for i in range(len(bdates)):
      for y in range(len(Yrs)):
        if Yrs[y] == bdates[i]:
          Tis[y] = analysis["BreakTi0"][i]
          shifts[y]=analysis["Shift"][i]
          pltbreaks.append(Yrs[y])
    rept[model]=(Yrs,ys,Tis,shifts)
    pltbreaks.append(Yrs[-1])
    graph(ys, Yrs, pltbreaks,model,csvfn)
  print >>outf, "Year",
  headerfields=c3.fields()
  headerfields.sort()
  for model in headerfields:
    if model != "Year":
      print >>outf,","+model+",",model+"_Ti0,",model+"_shift",
  print >>outf
  keys=rept.keys()
  keys.sort()
  for y in range(len(Years)):
    print >>outf,"%d" % (Years[y],),
    for k in keys:
      res=rept[k]
      ry=res[0].searchsorted(Years[y])
#      if k == "BCCA2r1":
#        print k
      if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
        try:
          print >>outf,",%f,%f,%f" % (res[1][ry], res[2][ry], res[3][ry]),
        except:
          print >>outf,", , ,",
          pass
      else:
        print >>outf,", , ,",
    print >>outf

  headerfields=c3.fields()
  headerfields.sort()
  extent=["","","_Ti0","_shift"]
  
  tanoms={}
  for k in headerfields:
    if k !="Year":
      res=rept[k]
      lo=res[0].searchsorted(1961)
      hi=res[0].searchsorted(1990)+1
      tanoms[k] = np.mean(res[1][lo:hi])
    
  for j in [1,2,3]:
    print >>outf, "Year",
    for model in headerfields:
      if model != "Year":
        print >>outf,","+model+extent[j],
    print >>outf
    keys=rept.keys()
    keys.sort()
    for y in range(len(Years)):
      print >>outf,"%d" % (Years[y],),
      for k in keys:
        res=rept[k]
        ry=res[0].searchsorted(Years[y])
  #      if k == "BCCA2r1":
  #        print k
        if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
          try:
            if j == 1:
              print >>outf,",%f" % (res[j][ry]-tanoms[k], ),
            else:
              print >>outf,",%f" % (res[j][ry], ),
          except:
            print >>outf,",",
            pass
        else:
          print >>outf,",",
      print >>outf
  outf.close()
  
def collatedCMIP5(path, intype):
  import glob
  import os
  #import csvfile
  outf=open(path+"\\collated_"+intype+"_.csv","w",0) #open with forced flush because of huger line lengths
  tracenames=glob.glob(path+"\\*_"+str(intype)+"_*.trace")

  rept={}
  headerfields=[]
  minyr=10000
  maxyr=0
  for tn in tracenames:
    #bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=4)
    fn=analysis.header()[:2]
    fn[0]=fn[0].replace("\n","").replace('"','').replace("'","")
    fn[1]=fn[1].replace("\n","").replace('"','').replace("'","")
    
    model=analysis.header()[1].replace("\n","").replace('"','').replace("'","")
    model=os.path.basename(model).split("_")
    model=model[2]+"_"+model[3]+"_"+model[4]
    c3=CMIP5gw(fn)
    ys =  c3.Warming()
    Yrs= c3.Years()
    minyr=min(minyr,np.min(Yrs))
    maxyr=min(2100,max(maxyr,np.max(Yrs)))
#    if model == "BCCA2r1":
#      print model
    Tis=np.zeros(np.shape(Yrs),dtype=np.float32)
    shifts=np.zeros(np.shape(Yrs),dtype=np.float32)
    bdates=analysis["BreakDate"]
    pltbreaks=[Yrs[0]]
    for i in range(len(bdates)):
      for y in range(len(Yrs)):
        if Yrs[y] == bdates[i]:
          Tis[y] = analysis["BreakTi0"][i]
          shifts[y]=analysis["Shift"][i]
          pltbreaks.append(Yrs[y])
    rept[model]=(Yrs,ys,Tis,shifts)
    pltbreaks.append(Yrs[-1])
    graph(ys, Yrs, pltbreaks,model,csvfn)
    headerfields.append(model)
  Years=range(int(round(minyr)), int(round(maxyr)) + 1)
  print >>outf, "Year",

  headerfields.sort()
  for model in headerfields:
    if model != "Year":
      print >>outf,","+model+",",model+"_Ti0,",model+"_shift",
  print >>outf
  
  keys=headerfields
  for y in range(len(Years)):
    print >>outf,"%d" % (Years[y],),
    for k in keys:
      res=rept[k]
      ry=res[0].searchsorted(Years[y])
#      if k == "BCCA2r1":
#        print k
      if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
        try:
          print >>outf,",%f,%f,%f" % (res[1][ry], res[2][ry], res[3][ry]),
        except:
          print >>outf,", , ,",
          pass
      else:
        print >>outf,", , ,",
    print >>outf

#  headerfields=c3.fields()
#  headerfields.sort()
  extent=["","","_Ti0","_shift"]
  
  tanoms={}
  for k in headerfields:
    if k !="Year":
      res=rept[k]
      lo=res[0].searchsorted(1961)
      hi=res[0].searchsorted(1990)+1
      tanoms[k] = np.mean(res[1][lo:hi])
    
  for j in [1,2,3]:
    print >>outf, "Year",
    for model in headerfields:
      if model != "Year":
        print >>outf,","+model+extent[j],
    print >>outf
    for y in range(len(Years)):
      print >>outf,"%d" % (Years[y],),
      for k in keys:
        res=rept[k]
        ry=res[0].searchsorted(Years[y])
  #      if k == "BCCA2r1":
  #        print k
        if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
          try:
            if j == 1:
              print >>outf,",%f" % (res[j][ry]-tanoms[k], ),
            else:
              print >>outf,",%f" % (res[j][ry], ),
          except:
            print >>outf,",",
            pass
        else:
          print >>outf,",",
      print >>outf
  outf.close()

def collatedNCDC(path, intype, part):
  import glob
  import os
  #import csvfile
  outf=open(path+"\\"+part+"_collated.csv","w",0) #open with forced flush because of huger line lengths
  tracenames=glob.glob(path+"\\"+part+"*1880-2014.csv*.trace")
  initialfn=None
  #now open the csv file f
  
  i = 0
  while i in range(len(tracenames)) and initialfn == None: #find the initial file
    tn = tracenames[i]
   # bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    if intype in [0]:
      n = 3
    else:
      n = 4
    if intype==None:
      raise Exception("Specify type of input")
    try:
      analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=n)
      initialfn=analysis.header()[0].replace("\\\\","\\").replace("\n","").replace('"','').replace("'","")
      c3=csvfile.CSVfile(initialfn,skip_header=2)
      Years=c3.data4name("Year")
    except:
      pass
    i +=1
    
  #now a reporting pass  
  rept={}
  headerfields=[]
  for tn in tracenames:
    #bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=n)
    initialfn=analysis.header()[0].replace("\\\\","\\").replace("\n","").replace('"','').replace("'","")
    c3=csvfile.CSVfile(initialfn,skip_header=2)
    model=os.path.basename(analysis.header()[0].replace("\n","").replace('"','').replace("'","")).split('.')[0]
    model=csvfn.split("_")[-1]
    headerfields.append(model)
#    if model == "BCCA2r1":
#      print model
    
    ys=c3.data4name("Value")
    Yrs=c3.time4name("Value")
    Tis=np.zeros(np.shape(Yrs),dtype=np.float32)
    shifts=np.zeros(np.shape(Yrs),dtype=np.float32)
    bdates=analysis["BreakDate"]
    pltbreaks=[Yrs[0]]
    for i in range(len(bdates)):
      for y in range(len(Yrs)):
        if Yrs[y] == bdates[i]:
          Tis[y] = analysis["BreakTi0"][i]
          shifts[y]=analysis["Shift"][i]
          pltbreaks.append(Yrs[y])
    rept[model]=(Yrs,ys,Tis,shifts)
    pltbreaks.append(Yrs[-1])
    graph(ys, Yrs, pltbreaks,model,csvfn)
  print >>outf, "Year",
  headerfields.sort()
  for model in headerfields:
    if model != "Year":
      print >>outf,","+model+",",model+"_Ti0,",model+"_shift",
  print >>outf
  keys=rept.keys()
  keys.sort()
  for y in range(len(Years)):
    print >>outf,"%d" % (Years[y],),
    for k in keys:
      res=rept[k]
      ry=res[0].searchsorted(Years[y])
#      if k == "BCCA2r1":
#        print k
      if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
        try:
          print >>outf,",%f,%f,%f" % (res[1][ry], res[2][ry], res[3][ry]),
        except:
          print >>outf,", , ,",
          pass
      else:
        print >>outf,", , ,",
    print >>outf

  headerfields.sort()
  extent=["","","_Ti0","_shift"]
  
  tanoms={}
  for k in headerfields:
    if k !="Year":
      res=rept[k]
      lo=res[0].searchsorted(1961)
      hi=res[0].searchsorted(1990)+1
      tanoms[k] = np.mean(res[1][lo:hi])
    
  for j in [1,2,3]:
    print >>outf, "Year",
    for model in headerfields:
      if model != "Year":
        print >>outf,","+model+extent[j],
    print >>outf
    keys=rept.keys()
    keys.sort()
    for y in range(len(Years)):
      print >>outf,"%d" % (Years[y],),
      for k in keys:
        res=rept[k]
        ry=res[0].searchsorted(Years[y])
  #      if k == "BCCA2r1":
  #        print k
        if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
          try:
            if j == 1:
              print >>outf,",%f" % (res[j][ry]-tanoms[k], ),
            else:
              print >>outf,",%f" % (res[j][ry], ),
          except:
            print >>outf,",",
            pass
        else:
          print >>outf,",",
      print >>outf
  outf.close()

def collatedNOAA(path, intype):
#  for fn in files:
#    print fn
#    data=NOAAascii(fn).annually()
#    #print data.annually()
#    ys=np.array([row[2] for row in data]) #2014 not complete
#    
#    Years=np.array([row[0] for row in data])
  import glob
  import os
  #import csvfile
  outf=open(path+"\\collatedNOAA.csv","w",0) #open with forced flush because of huger line lengths
  tracenames=glob.glob(path+"\\*asc.trace")
  initialfn=None
  #now open the csv file f
  
  #now a reporting pass  
  rept={}
  headerfields=[]
  for tn in tracenames:
    #bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    #print csvfn
    try:
      analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=3)
    except:
      analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=3)
    initialfn=analysis.header()[0].replace("\\\\","\\").replace("\n","").replace('"','').replace("'","")
    data=NOAAascii(initialfn).annually()
#    #print data.annually()
    ys=np.array([row[2] for row in data]) #2014 not complete
#    
    Yrs=np.array([row[0] for row in data]) 
    Years=Yrs
    model=os.path.basename(analysis.header()[0].replace("\n","").replace('"','').replace("'","")).split(".")
    model=(model[3]+"."+model[4]+"_"+model[5]+"."+model[6]).split("_")[0]
    print model
    headerfields.append(model)
#    if model == "BCCA2r1":
#      print model
    
    Tis=np.zeros(np.shape(Yrs),dtype=np.float32)
    shifts=np.zeros(np.shape(Yrs),dtype=np.float32)
    bdates=analysis["BreakDate"]
    pltbreaks=[Yrs[0]]
    for i in range(len(bdates)):
      for y in range(len(Yrs)):
        if Yrs[y] == bdates[i]:
          Tis[y] = analysis["BreakTi0"][i]
          shifts[y]=analysis["Shift"][i]
          pltbreaks.append(Yrs[y])
    rept[model]=(Yrs,ys,Tis,shifts)
    pltbreaks.append(Yrs[-1])
    graph(ys, Yrs, pltbreaks,model,csvfn)
  print >>outf, "Year",
  
  headerfields.sort()
  for model in headerfields:
    if model != "Year":
      print >>outf,","+model+",",model+"_Ti0,",model+"_shift",
  print >>outf
  keys=headerfields
  for y in range(len(Years)):
    print >>outf,"%d" % (Years[y],),
    for k in keys:
      res=rept[k]
      ry=res[0].searchsorted(Years[y])
#      if k == "BCCA2r1":
#        print k
      if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
        try:
          print >>outf,",%f,%f,%f" % (res[1][ry], res[2][ry], res[3][ry]),
        except:
          print >>outf,", , ,",
          pass
      else:
        print >>outf,", , ,",
    print >>outf

  extent=["","","_Ti0","_shift"]
  
  tanoms={}
  for k in headerfields:
    if k !="Year":
      res=rept[k]
      lo=res[0].searchsorted(1961)
      hi=res[0].searchsorted(1990)+1
      tanoms[k] = np.mean(res[1][lo:hi])
    
  for j in [1,2,3]:
    print >>outf, "Year",
    for model in headerfields:
      if model != "Year":
        print >>outf,","+model+extent[j],
    print >>outf
    keys=rept.keys()
    keys.sort()
    for y in range(len(Years)):
      print >>outf,"%d" % (Years[y],),
      for k in keys:
        res=rept[k]
        ry=res[0].searchsorted(Years[y])
  #      if k == "BCCA2r1":
  #        print k
        if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
          try:
            if j == 1:
              print >>outf,",%f" % (res[j][ry]-tanoms[k], ),
            else:
              print >>outf,",%f" % (res[j][ry], ),
          except:
            print >>outf,",",
            pass
        else:
          print >>outf,",",
      print >>outf
  outf.close()

def collatedICMP(path, intype, initial_header=0):
  import glob
  import os
  #import csvfile
  outf=open(path+"\\collated.csv","w",0) #open with forced flush because of huger line lengths
  tracenames=glob.glob(path+"\\*.trace")
  initialfn=None
  #now open the csv file f
  
  i = 0
  while i in range(len(tracenames)) and initialfn == None: #find the initial file
    tn = tracenames[i]
   # bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    if intype in []:
      n = 3
    else:
      n = 4
    if intype==None:
      raise Exception("Specify type of input")
    try:
      analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=n)
      initialfn=analysis.header()[0].replace("\\\\","\\").replace("\n","").replace('"','').replace("'","")
      c3=csvfile.CSVfile(initialfn,skip_header=initial_header)
      Years=c3.data4name("Year")
    except:
      pass
    i +=1
    
  #now a reporting pass  
  rept={}
  for tn in tracenames:
    #bn=os.path.basename(tn)
    csvfn=os.path.splitext(tn)[0]+".csv"
    csvfn=csvfn.replace("\\\\","\\") #fudge to overcome an error I made earlier
    analysis=csvfile.CSVfile(csvfn, timevar="BreakDate",skip_header=n)
    model=analysis.header()[1].replace("\n","").replace('"','').replace("'","")
#    if model == "BCCA2r1":
#      print model
    ys=c3.data4name(model)
    Yrs=c3.time4name(model)
    Tis=np.zeros(np.shape(Yrs),dtype=np.float32)
    shifts=np.zeros(np.shape(Yrs),dtype=np.float32)
    bdates=analysis["BreakDate"]
    pltbreaks=[Yrs[0]]
    for i in range(len(bdates)):
      for y in range(len(Yrs)):
        if Yrs[y] == bdates[i]:
          Tis[y] = analysis["BreakTi0"][i]
          shifts[y]=analysis["Shift"][i]
          pltbreaks.append(Yrs[y])
    rept[model]=(Yrs,ys,Tis,shifts)
    pltbreaks.append(Yrs[-1])
    graph(ys, Yrs, pltbreaks,model,csvfn)
  print >>outf, "Year",
  headerfields=c3.fields()
  headerfields.sort()
  for model in headerfields:
    if model != "Year":
      print >>outf,","+model+",",model+"_Ti0,",model+"_shift",
  print >>outf
  keys=rept.keys()
  keys.sort()
  for y in range(len(Years)):
    print >>outf,"%d" % (Years[y],),
    for k in keys:
      res=rept[k]
      ry=res[0].searchsorted(Years[y])
#      if k == "BCCA2r1":
#        print k
      if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
        try:
          print >>outf,",%f,%f,%f" % (res[1][ry], res[2][ry], res[3][ry]),
        except:
          print >>outf,", , ,",
          pass
      else:
        print >>outf,", , ,",
    print >>outf

  headerfields=c3.fields()
  headerfields.sort()
  extent=["","","_Ti0","_shift"]
  
  tanoms={}
  for k in headerfields:
    if k !="Year":
      res=rept[k]
      lo=res[0].searchsorted(1961)
      hi=res[0].searchsorted(1990)+1
      tanoms[k] = np.mean(res[1][lo:hi])
    
  for j in [1,2,3]:
    print >>outf, "Year",
    for model in headerfields:
      if model != "Year":
        print >>outf,","+model+extent[j],
    print >>outf
    keys=rept.keys()
    keys.sort()
    for y in range(len(Years)):
      print >>outf,"%d" % (Years[y],),
      for k in keys:
        res=rept[k]
        ry=res[0].searchsorted(Years[y])
  #      if k == "BCCA2r1":
  #        print k
        if ry in range(len(res[0])) and Years[y] == res[0][ry]: #not the case if search goes out of bounds
          try:
            if j == 1:
              print >>outf,",%f" % (res[j][ry]-tanoms[k], ),
            else:
              print >>outf,",%f" % (res[j][ry], ),
          except:
            print >>outf,",",
            pass
        else:
          print >>outf,",",
      print >>outf
  outf.close()

def graph(ys, Years, breaks,title, savename):
  import regress
  fig=plt.figure()
  plt.plot(Years, ys)

  segments=[Years.searchsorted(b+1) for b in breaks]
  lohi=[(segments[i], segments[i+1]+1) for i in range(len(segments)-1)]
  for lh in lohi:
    (lo, hi) = lh
    stats=regress.analysed_regress(ys[lo:hi], Years[lo:hi])
    yhat, _=regress.residuals(ys[lo:hi], Years[lo:hi], stats)
    plt.plot(Years[lo:hi], yhat,'r-')
    ym=[np.mean(yhat) for y in yhat]
    plt.plot(Years[lo:hi], ym, 'k-')
  fig.suptitle(title+"\n"+datetime.datetime.now().strftime("%X,%a,%d-%b-%Y "))
  #ax.set_title(title)
  #plt.show()
  if savename != None:
    fig.savefig(savename+'.png')
  plt.close(fig)
  

if __name__ == "__main__":
  import random
  import csvfile


  
  c3=csvfile.CSVfile(PATHS.CMIP3CSVFILE)
  for fn in c3.fields():
    if fn != "Year":
      dirname = os.path.basename(c3.filename())+"_"+fn 
      newdir = dirname
      mkdir_p(newdir)
#       fn1=os.path.basename(c3.filename())
      picfilename=dirname+"_pic.png"
      if os.path.exists(picfilename):
        print picfilename,"exists!"
      else:
        ys=c3.data4name(fn)
        Years=c3.time4name(fn)       
        counts={}
        for iteration in range(100):
          xs = np.array([random.random() for y in ys])
          fn1=dirname+"//"+dirname+"_"+str(iteration)
          convergent_breaks.TraceFile = fn1+'.trace'
          cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
          report(fn1+'.csv',cb, xs, ys, Years, [fn1,fn])
          breaks=cb[1]
          sbreaks=str(breaks)
          if not sbreaks in counts:
            counts[sbreaks] = 0
          counts[sbreaks] += 1
        maxc=0
        maxb=''
        for b in counts.keys():
          if maxb=='':
            maxb=b
            maxc=counts[b]
          else:
            if counts[b] > maxc:
              maxb=b
              maxc=counts[b]
        graph(ys, Years, eval(maxb),fn1+" "+str(maxc), dirname+"_pic")
        plt.close("all")
#==============================================================================

  import sys
  sys.exit() #just halt

#####################################################################################################

#The following block of code for ICMP data set sent by Roger
#controls
  files=glob.glob(PATHS.ICMIP5PATH+'r*_piCont*.dat')
  for fn1 in files:
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:
        
      counts={}
      newdir= os.path.splitext((os.path.basename(fn1)))[0]
      mkdir_p(newdir)
      icmp=ICMP(fn1)
      Years=icmp.years()
      ys=icmp.annual_means()
      fn = newdir+"\\"+os.path.basename(icmp.filename())
      for iteration in range(100):
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn+"_"+str(iteration)+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [icmp.filename(),fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn)+" "+str(maxc), os.path.basename(fn)+"_pic")
      plt.close("all")
  #then all RCPs 
  files=glob.glob(PATHS.ICMIP5PATH+'r*_RCP*.dat')
  for fn1 in files:
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:
        
      counts={}
      newdir= os.path.splitext((os.path.basename(fn1)))[0]
      mkdir_p(newdir)
      icmp=ICMP(fn1)
      Years=icmp.years()
      ys=icmp.annual_means()
      fn = newdir+"\\"+os.path.basename(icmp.filename())
      for iteration in range(100):
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn+"_"+str(iteration)+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [icmp.filename(),fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn)+" "+str(maxc), os.path.basename(fn)+"_pic")
      plt.close("all")
      
####################################################################################################
#The following block of code for KNMI data downloaded by jim to match set sent by Roger

  files=glob.glob(PATHS.ICMIP5PATH+'rcp26\\r*.dat.txt')
  for fn1 in files:
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:
        
      counts={}
      newdir= os.path.splitext((os.path.basename(fn1)))[0]
      mkdir_p(newdir)
      icmp=ICMP(fn1)
      Years=icmp.years()
      ys=icmp.annual_means()
      fn = newdir+"\\"+os.path.basename(icmp.filename())
      for iteration in range(100):
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn+"_"+str(iteration)+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [icmp.filename(),fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn)+" "+str(maxc), os.path.basename(fn)+"_pic")
      plt.close("all")
      
####################################################################################################
#The following block of code for NOAA's banded observation
      
  files = gatherNOAANames(PATHS.NCDCBANDED)

  for fn in files:
    newdir= os.path.splitext(os.path.basename(fn))[0]
    fn1=newdir+"//"+os.path.basename(fn)
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:        
      data=NOAAascii(fn).annually()
      #print data.annually()
      ys=np.array([row[2] for row in data]) #2014 not complete
      Years=np.array([row[0] for row in data])
      counts={}
      mkdir_p(newdir)
      for iteration in range(100):
        fn2=fn1+"_"+str(iteration)
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn2+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn2+'.csv',cb, xs, ys, Years, [fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn1)+" "+str(maxc), os.path.basename(fn1)+"_pic")

#
#######################################################################################################
###The following block of code for GISSTEMP3 banded observation
  c3=csvfile.CSVfile("GISSTEMPto6-2013b.csv",skip_header=9)
  for fn in c3.fields():
    if fn != "Year":
      if os.path.exists(c3.filename()+"_"+fn+"_pic.png"):
        print c3.filename()+"_"+fn+"_pic.png","exists!"
      else:
        counts={}
        newdir= os.path.splitext((os.path.basename(c3.filename())))[0]+"_"+fn
        print newdir
        mkdir_p(newdir)
        ys=c3.data4name(fn)
        Years=c3.time4name(fn)
        for iteration in range(100):
          xs = np.array([random.random() for y in ys])
          convergent_breaks.TraceFile = newdir+"//"+c3.filename()+"_"+fn+"_"+str(iteration)+'.trace'
          cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
          report(newdir+"//"+c3.filename()+"_"+fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [c3.filename(),fn])
          breaks=cb[1]
          sbreaks=str(breaks)
          if not sbreaks in counts:
            counts[sbreaks] = 0
          counts[sbreaks] += 1
        maxc=0
        maxb=''
        for b in counts.keys():
          if maxb=='':
            maxb=b
            maxc=counts[b]
          else:
            if counts[b] > maxc:
              maxb=b
              maxc=counts[b]
        graph(ys, Years, eval(maxb),c3.filename()+"_"+fn+"_"+str(99)+'.csv'+" "+str(maxc), c3.filename()+"_"+fn+"_pic")
#
##  collated(".\\",0,initial_header=9)      
#
##==============================================================================
##Cowtan and Way
##==============================================================================
  from COWTAN_WAY import COWTAN_WAY
  fn=PATHS.CWDATAPATH+'\\had4_krig_annual_v2_0_0.txt'
  data = COWTAN_WAY(fn)
  ys =  data.annual()
  Years= data.years()
  counts={}
  newdir= os.path.splitext((os.path.basename(data.filename())))[0]
  mkdir_p(newdir)
  fn1=newdir+'//'+os.path.basename(data.filename())
  picfn=os.path.basename(data.filename())+"_pic"  
  if os.path.exists(picfn+".png"):
    print picfn+".png", "exists!!"
  else:
    for iteration in range(100):
      data = COWTAN_WAY(fn)
      ys =  data.annual()
      xs = np.array([random.random() for y in ys])
      convergent_breaks.TraceFile = fn1+"_"+str(iteration)+'.trace'
      cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
      report(fn1+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [fn1])
      breaks=cb[1]
      sbreaks=str(breaks)
      if not sbreaks in counts:
        counts[sbreaks] = 0
      counts[sbreaks] += 1
    maxc=0
    maxb=''
    for b in counts.keys():
      if maxb=='':
        maxb=b
        maxc=counts[b]
      else:
        if counts[b] > maxc:
          maxb=b
          maxc=counts[b]
    graph(ys, Years, eval(maxb),fn1+"_"+str(iteration)+'.csv'+" "+str(maxc), picfn)
    plt.close("all")
    
##    
###==============================================================================
###BEST
###==============================================================================
##    
  import BERKLEY
  data = BERKLEY.BERKLEY()
  fn=data.filename()
  ys =  data.annual()
  Years= data.years()
  counts={}
  newdir= os.path.splitext((os.path.basename(data.filename())))[0]
  mkdir_p(newdir)
  fn1=newdir+'//'+os.path.basename(data.filename())
  picfn=os.path.basename(data.filename())+"_pic"  
  if os.path.exists(picfn+".png"):
    print picfn+".png", "exists!!"
  else:
    for iteration in range(100):
      xs = np.array([random.random() for y in ys])
      convergent_breaks.TraceFile = fn1+"_"+str(iteration)+'.trace'
      cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
      report(fn1+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [fn1])
      breaks=cb[1]
      sbreaks=str(breaks)
      if not sbreaks in counts:
        counts[sbreaks] = 0
      counts[sbreaks] += 1
    maxc=0
    maxb=''
    for b in counts.keys():
      if maxb=='':
        maxb=b
        maxc=counts[b]
      else:
        if counts[b] > maxc:
          maxb=b
          maxc=counts[b]
    graph(ys, Years, eval(maxb),fn1+"_"+str(iteration)+'.csv'+" "+str(maxc), picfn)
    plt.close("all")
##    
###==============================================================================
###FOSTER RAHMSTORF 2011
###==============================================================================
  fnin=PATHS.FRDATAPATH+"adjusted.csv"  
  c3=csvfile.CSVfile(fnin)
  for field in c3.fields():
    if field != "t":
      if TRENDS:
        fn=fnin+"_T2_SCREEN_"+field
      else:
        fn=fnin+"_"+field
      ys =  c3[field]
      Years= c3["t"]
      Years=np.array([int(np.mean(Years[i * 12: (i+1)*12])) for i in range(int(len(ys)/12))])
      ys=np.array([np.mean(ys[i * 12: (i+1)*12]) for i in range(len(Years))])
      counts={}
      newdir= os.path.basename(fn)
      mkdir_p(newdir)
      fn1=newdir+'//'+os.path.basename(fn)
      picfn=os.path.basename(fn)+"_pic"  
      if os.path.exists(picfn+".png"):
        print picfn+".png", "exists!!"
      else:
        for iteration in range(100):
          xs = np.array([random.random() for y in ys])
          convergent_breaks.TraceFile = fn1+"_"+str(iteration)+'.trace'
          #cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
          cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
          report(fn1+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [fnin])
          breaks=cb[1]
          sbreaks=str(breaks)
          if not sbreaks in counts:
            counts[sbreaks] = 0
          counts[sbreaks] += 1
        maxc=0
        maxb=''
        for b in counts.keys():
          if maxb=='':
            maxb=b
            maxc=counts[b]
          else:
            if counts[b] > maxc:
              maxb=b
              maxc=counts[b]
        graph(ys, Years, eval(maxb),os.path.basename(fn) +" "+str(maxc), picfn)
        plt.close("all")
#  
##==============================================================================
##Roger's test set
##==============================================================================
#    
  fnin=PATHS.ARTIFICIALDATA
  c3=csvfile.CSVfile(fnin)
  for field in c3.fields():
    if field != "Year":
      if TRENDS:
        fn=fnin+"_T2_SCREEN_"+field
      else:
        fn=fnin+"_"+field
      ys =  c3[field]
      Years= c3["Year"]
      counts={}
      newdir= os.path.basename(fn)
      mkdir_p(newdir)
      fn1=newdir+'//'+os.path.basename(fn)
      picfn=os.path.basename(fn)+"_pic"  
      if os.path.exists(picfn+".png"):
        print picfn+".png", "exists!!"
      else:
        for iteration in range(100):
          xs = np.array([random.random() for y in ys])
          convergent_breaks.TraceFile = fn1+"_"+str(iteration)+'.trace'
#          cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
          cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
          report(fn1+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [fnin])
          breaks=cb[1]
          sbreaks=str(breaks)
          if not sbreaks in counts:
            counts[sbreaks] = 0
          counts[sbreaks] += 1
        maxc=0
        maxb=''
        for b in counts.keys():
          if maxb=='':
            maxb=b
            maxc=counts[b]
          else:
            if counts[b] > maxc:
              maxb=b
              maxc=counts[b]
        graph(ys, Years, eval(maxb),os.path.basename(fn) +" "+str(maxc), picfn)
        plt.close("all")

##==============================================================================
##cmip 3 files from QCCCE
##==============================================================================
#    
  files =gatherCMIP3Names(PATHS.DATAROOT+"CSIRO_Model_Set\\gwFiles")
  for fnx in files:
    fn1=fnx
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:
      
      print fnx
      counts={}
      newdir= os.path.splitext((os.path.basename(fn1)))[0]
      mkdir_p(newdir)
      c3=CMIP3gw(fnx)
      ys =  c3.Warming()
      xs = np.array([random.random() for y in ys])
      Years= c3.Years()
      fn = newdir+"\\"+os.path.basename(fn1)
      for iteration in range(100):
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn+"_"+str(iteration)+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [fn1,fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn)+" "+str(maxc), os.path.basename(fn)+"_pic")
      plt.close("all")

##==============================================================================
##cmip 5 files from QCCCE
##==============================================================================
#    

  files=gatherCMIP5Names(PATHS.QCCCEROOT[:-2])
  for fnx in files:
    fn1=fnx[1]
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:
      print fnx
      counts={}
      newdir= os.path.splitext((os.path.basename(fn1)))[0]
      mkdir_p(newdir)
      c3=CMIP5gw(fnx)
      ys =  c3.Warming()
      xs = np.array([random.random() for y in ys])
      Years= c3.Years()
      fn = newdir+"\\"+os.path.basename(fn1)
      for iteration in range(100):
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn+"_"+str(iteration)+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [fn1,fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn)+" "+str(maxc), os.path.basename(fn)+"_pic")
      plt.close("all")
   
#######################################################################################################
#The following code for HADCRUT4
#For 4.2
  files=glob.glob(HADLEYROOT+'HADCRUT4\\HadCRUT.4.2.0.0.annual*.txt')
  for fn1 in files:
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:
        
      counts={}
      newdir= os.path.splitext((os.path.basename(fn1)))[0]
      mkdir_p(newdir)
      had=HADCRUT4(fn1)
      Years=had.years()
      ys=had.annual()
      fn = newdir+"\\"+os.path.basename(had.filename())
      for iteration in range(100):
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn+"_"+str(iteration)+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [had.filename(),fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn)+" "+str(maxc), os.path.basename(fn)+"_pic")
      plt.close("all")

#For 4.3.0.0
  files=glob.glob(HADLEYROOT+'HADCRUT4\\HadCRUT.4.3.0.0.annual*.txt')
  
  for fn1 in files:
    if os.path.exists(os.path.basename(fn1)+"_pic.png"):
      print os.path.basename(fn1)+"_pic.png","exists!"
    else:
        
      counts={}
      newdir= os.path.splitext((os.path.basename(fn1)))[0]
      mkdir_p(newdir)
      had=HADCRUT4(fn1)
      Years=had.years()
      ys=had.annual()
      fn = newdir+"\\"+os.path.basename(had.filename())
      for iteration in range(100):
        xs = np.array([random.random() for y in ys])
        convergent_breaks.TraceFile = fn+"_"+str(iteration)+'.trace'
        cb=convergent_breaks.convergentBreaks(ys, xs, Years, 'GWAnom', mode="control", guide="Stability",screenpr=SCREENPR, pr=0.01, trace=True)
        report(fn+"_"+str(iteration)+'.csv',cb, xs, ys, Years, [had.filename(),fn])
        breaks=cb[1]
        sbreaks=str(breaks)
        if not sbreaks in counts:
          counts[sbreaks] = 0
        counts[sbreaks] += 1
      maxc=0
      maxb=''
      for b in counts.keys():
        if maxb=='':
          maxb=b
          maxc=counts[b]
        else:
          if counts[b] > maxc:
            maxb=b
            maxc=counts[b]
      graph(ys, Years, eval(maxb),os.path.basename(fn)+" "+str(maxc), os.path.basename(fn)+"_pic")
      plt.close("all")


