#!/usr/bin/python
import os, fnmatch, collections, re

print "Folders must be numbered 1,2,3,4,...Run from parent dir of the folders"
folnum = input("Enter number of folders: ")

os.chdir("1") #auto-detect # of atoms, or ask user for input
for file in os.listdir('.'):  
	if fnmatch.fnmatch(file,'*.dat'):
		datfile = open(file,'r')
		lines = datfile.read().splitlines()
		for each in lines:
			if re.search(r'ATOMS\s*\:+\s*([\w\s-]*)',each):
				numatoms = len(re.search(r'ATOMS\s*\:*\s*([\w\s-]*)',each).group(1).split())
os.chdir("..")
if not numatoms: #ask user for number of atoms if can't detect automatically
	numatoms = input("Enter number of atoms: ")

print "using " + str(numatoms) + " atoms"


avgpdf_pdata=collections.defaultdict(list) #make dict of lists for PDF partials (watch the pdfp and pdf discrepance, underscores are there to help)
avgsq_pdata=collections.defaultdict(list)  #ditto, SQ partials
avgpdfdata=collections.defaultdict(list)  #ditto, PDF
avgsqdata=collections.defaultdict(list)  #ditto, SQ
pairs = 0
for each in range(numatoms): #get # of pairs
	pairs = pairs + each + 1


for num in range(folnum):
#*********************************INITIALIZATION*****
	thispdf_pdata=collections.defaultdict(list) #dict of lists for particluar folder in loop
	thissq_pdata=collections.defaultdict(list)
	thispdfdata=collections.defaultdict(list)
	thissqdata=collections.defaultdict(list)

	for pair in range(pairs+1):  #make the "r" and all the atom-atom pair lists, resets them every loop
		thispdf_pdata[pair] = []
		thissq_pdata[pair] = []

	for i in range(3):
		thispdfdata[i]=[]
		thissqdata[i]=[]

	os.chdir(str(num+1)) #change directory

	os.system("sed '/Ang/d' *_PDFpartials.csv -i") #remove first line
	os.system("sed 's/,//g' *_PDFpartials.csv -i") #and remove commas of all files we're going to process
	os.system("sed '/Ang/d' *_PDF1.csv -i")
	os.system("sed 's/,//g' *_PDF1.csv -i")
	os.system("sed '/Ang/d' *_SQ1.csv -i")
	os.system("sed 's/,//g' *_SQ1.csv -i")
	os.system("sed '/Ang/d' *_SQpartials.csv -i")
	os.system("sed 's/,//g' *_SQpartials.csv -i")

#*******************************FILE OPENING, READING, PARSING*****
	for file in os.listdir('.'):  
		if fnmatch.fnmatch(file,'*_PDFpartials.csv'): #get pdfpartials filename, open it, read it & split into lines, close it
			partpdf_p = open(file,'r')
			datapdf_p = partpdf_p.read().splitlines()
			partpdf_p.close()
		if fnmatch.fnmatch(file,'*_SQpartials.csv'): #ditto, for s(q)partials
			partsq_p = open(file,'r')
			datasq_p = partsq_p.read().splitlines()
			partsq_p.close()
		if fnmatch.fnmatch(file,'*_PDF1.csv'): #ditto, for s(q)
			pdf = open(file,'r')
			datapdf = pdf.read().splitlines()
			pdf.close()
		if fnmatch.fnmatch(file,'*_SQ1.csv'): #ditto, for s(q)
			sq = open(file,'r')
			datasq = sq.read().splitlines()
			sq.close()
	if num == 0:	#initialize avgdata to the right length if it's the first loop
		for pair in range(pairs+1):  #make the "r" and all the atom-atom pair lists	
			avgpdf_pdata[pair] = [0]*len(datapdf_p)
			avgsq_pdata[pair] = [0]*len(datasq_p)
		for i in range(3):  # only need two lists for pdf and sq total (r and magnitude)
			avgpdfdata[i] = [0]*len(datapdf)
			avgsqdata[i] = [0]*len(datasq)

#******************************MORE PARSING, AND THE AVERAGING*****
	for pair in range(pairs+1):
		for each in datapdf_p:
			thispdf_pdata[pair].append(each.split()[pair]) #split lines into discreet values
		for each in datasq_p:
			thissq_pdata[pair].append(each.split()[pair])
		for i in range(len(thispdf_pdata[pair])):
			avgpdf_pdata[pair][i] = avgpdf_pdata[pair][i]+float(thispdf_pdata[pair][i])/float(folnum) #add each value (normalized by # of folders) to average array in avg dict
		for i in range(len(thissq_pdata[pair])):
			avgsq_pdata[pair][i] = avgsq_pdata[pair][i]+float(thissq_pdata[pair][i])/float(folnum)

	for i in range(3):
		for each in datapdf:
			thispdfdata[i].append(each.split()[i]) #split lines into discreet values
		for each in datasq:
			thissqdata[i].append(each.split()[i])
		for j in range(len(thispdfdata[i])):
			avgpdfdata[i][j] = avgpdfdata[i][j]+float(thispdfdata[i][j])/float(folnum) #add each value (normalized by # of folders) to average array in avg dict
		for j in range(len(thissqdata[i])):
			avgsqdata[i][j] = avgsqdata[i][j]+float(thissqdata[i][j])/float(folnum)
	os.chdir('..')


#*****************************WRITING THE AVERAGE FILES*****
pdf_p=open("PDF_avg_partials",'w') 
for i in range(len(avgpdf_pdata[0])):
	for pair in range(pairs+1):
		pdf_p.write(str(avgpdf_pdata[pair][i]) + ' ') #write average pdfpartials file, space separated values
	pdf_p.write('\n')
pdf_p.close

sq_p=open("SQ_avg_partials",'w') 
for i in range(len(avgsq_pdata[0])):
	for pair in range(pairs+1):
		sq_p.write(str(avgsq_pdata[pair][i]) + ' ') #write average pdfpartials file, space separated values
	sq_p.write('\n')
sq_p.close

pdf=open("PDF_avg",'w') #kinda silly to do a loop here, but i'm just copying and pasting from above
for i in range(len(avgpdfdata[0])):
	for j in range(3):
		pdf.write(str(avgpdfdata[j][i]) + ' ') #write average pdf file, space separated values
	pdf.write('\n')
pdf.close

sq=open("SQ_avg",'w') 
for i in range(len(avgsqdata[0])):
	for j in range(3):
		sq.write(str(avgsqdata[j][i]) + ' ') #write average sq file, space separated values
	sq.write('\n')
sq.close

print "great success!"