require(picante)

## Edit first line to your local directory, then set working directory; this directory should have a set of files in the sample format with local species abundances. When this script is done, there will be two files for each site and each scale of analysis
dir.name = '/Users/david/Documents/Projects/NCEAS-Phylo-Trait-LTER/CleanedCommunityData/May19/may19.split.low'
setwd(dir.name)

site.meta = read.delim('site-metadata.txt',as.is=TRUE)

## read in list of files
files = dir('low.samples.to.process')

## loop through files for lowest level sampling units
for (i in 1:length(files)) {
#for (i in 1:1) {
	print(files[i])
	dat = read.table(paste('low.samples.to.process/',files[i],sep=''),header=FALSE,sep='\t',as.is=TRUE)
	## process names to aggregate
	nrec = nrow(dat)
	names(dat) = c('sample','abundance','species')

	## split names of sampling units
	spl.sample = strsplit(dat$sample,'-',fixed=TRUE)
	site = tolower(spl.sample[[1]][1])
	meta = subset(site.meta,tolower(site.meta$Acronym)==site)
	nlevels = nrow(meta)
	levels = meta$Our.Label

	
	# write out copy of abundance and pa files to all.samples directory
## copy abundance file and write pa file into all.samples dir
	## this is not necessary: if (length(grep('plot',files[i]))==1) low.level = 'plot' else if (length(grep('patch',files[i]))==1) low.level = 'patch' else if (length(grep('vegtype',files[i]))==1) low.level = 'vegtype'
	low.level = tolower(levels[nlevels])
	outname = paste('all.samples/',tolower(site),'.',low.level,'.s.mab.txt',sep='')
	write.table(dat,outname,sep='\t',quote=FALSE,row.names=FALSE,col.names=FALSE)

	outname = paste('all.samples/',tolower(site),'.',low.level,'.s.pa.txt',sep='')
	pa = data.frame(dat$sample,1,dat$species)
	write.table(pa,outname,sep='\t',quote=FALSE,row.names=FALSE,col.names=FALSE)

	## output matrix files for EcoPhyl
	dat.m = t(sample2matrix(dat))
	outname = paste('all.matrices/',tolower(site),'.',low.level,'.m.mab.txt',sep='')
	head = matrix(c('species',colnames(dat.m)),nrow=1)
	write.table(head,outname,sep='\t',quote=FALSE,col.names=FALSE,row.names=FALSE)
	write.table(dat.m,outname,sep='\t',quote=FALSE,col.names=FALSE,append=TRUE)

	pa.m = t(sample2matrix(pa))
	outname = paste('all.matrices/',tolower(site),'.',low.level,'.m.pa.txt',sep='')
	head = matrix(c('species',colnames(pa.m)),nrow=1)
	write.table(head,outname,sep='\t',quote=FALSE,col.names=FALSE,row.names=FALSE)
	write.table(pa.m,outname,sep='\t',quote=FALSE,col.names=FALSE,append=TRUE)
	
	## add expanded names of levels to new columns
	dat = cbind(matrix(NA,nrow=nrec,ncol=nlevels),dat)
	names(dat)[1:nlevels] = levels
	for (j in 1:nrec) {
		dat[j,1:nlevels] = spl.sample[[j]]
	}

	## calculate diversity and area per low.level unit
	div = data.frame(table(dat$sample))
	low.area = meta$ScaleArea.m2[nlevels]
	sparea = data.frame(site,rownames(div),1,div$Freq,low.area)
	names(sparea) = c('site','sample','nunits','richness','area')
	
	## CREATE FILE WITH PLOT NESTING STRUCTURE
	samples = unique(dat$sample)
	nsamples = length(samples)	
	tmp = matrix(NA,nrow=nsamples,ncol=nlevels)
	spl.sample = strsplit(samples,'-',fixed=TRUE)
	for (n in 1:nsamples) {
		tmp[n,] = spl.sample[[n]]
	}
	plot.structure = tmp
	for (j in 2:nlevels) plot.structure[,j] = paste(plot.structure[,(j-1)],plot.structure[,j],sep='-')
	colnames(plot.structure) = levels
	outname = paste('plot.structure.files/',tolower(site),'.plotstruct.txt',sep='')
	write.table(plot.structure,outname,sep='\t',quote=FALSE,row.names=FALSE)
	
	
## step through higher level units in each site and create files for mean abundance, occupancy and presence/absence	
	for (v in (nlevels-1):1) {

		# step up through appropriate number of levels to identify units
		units = dat[,1]
		if (v>1) for (j in 2:v) units = paste(units,dat[,j],sep='-')

	## calculate mean abundance at higher level
		sum.abund = aggregate(dat$abundance,by=list(units,dat$species),sum)
		names(sum.abund) = c(levels[v],'species','tot.abund')
		sort.order = order(sum.abund[,1],sum.abund[,2])
		sum.abund = sum.abund[sort.order,]
		nunits = data.frame(table(plot.structure[,v]))
		#nunits = aggregate(sum.abund$tot.abund,by=list(sum.abund[,1]),length)
		n2s = match(sum.abund[,1],rownames(nunits))
		sum.abund$nunits = nunits$Freq[n2s]
		sum.abund$mean.abund = sum.abund$tot.abund/sum.abund$nunits
		outname = paste('all.samples/',tolower(site),'.',tolower(levels[v]),'.s.mab.txt',sep='')
		write.table(sum.abund[,c(1,5,2)],outname,sep='\t',quote=FALSE,row.names=FALSE,col.names=FALSE)
	# abundance, matrix file
		ab.m = t(sample2matrix(sum.abund[,c(1,5,2)]))
		outname = paste('all.matrices/',tolower(site),'.',tolower(levels[v]),'.m.mab.txt',sep='')
		head = matrix(c('species',colnames(ab.m)),nrow=1)
		write.table(head,outname,sep='\t',quote=FALSE,col.names=FALSE,row.names=FALSE)
		write.table(ab.m,outname,sep='\t',quote=FALSE,col.names=FALSE,append=TRUE)


	## calculate occupancy
		occ = aggregate(dat$abundance,by=list(units,dat$species),length)
		names(occ) = c(levels[v],'species','tot.occ')
		sort.order = order(occ[,1],occ[,2])
		occ = occ[sort.order,]
		occ$nunits = nunits$Freq[n2s]
		occ$prop.occ = occ$tot.occ/occ$nunits
		outname = paste('all.samples/',tolower(site),'.',tolower(levels[v]),'.s.occ.txt',sep='')
		write.table(occ[,c(1,5,2)],outname,sep='\t',quote=FALSE,row.names=FALSE,col.names=FALSE)

	#occupancy, matrix file
		occ.m = t(sample2matrix(occ[,c(1,5,2)]))
		outname = paste('all.matrices/',tolower(site),'.',tolower(levels[v]),'.m.occ.txt',sep='')
		head = matrix(c('species',colnames(occ.m)),nrow=1)
		write.table(head,outname,sep='\t',quote=FALSE,col.names=FALSE,row.names=FALSE)
		write.table(occ.m,outname,sep='\t',quote=FALSE,col.names=FALSE,append=TRUE)

	
	## calculate presence/absence
		pa = occ
		pa$prop.occ = 1
		outname = paste('all.samples/',tolower(site),'.',tolower(levels[v]),'.s.pa.txt',sep='')
		write.table(pa[,c(1,5,2)],outname,sep='\t',quote=FALSE,row.names=FALSE,col.names=FALSE)

	#pa, matrix file
		pa.m = t(sample2matrix(pa[,c(1,5,2)]))
		outname = paste('all.matrices/',tolower(site),'.',tolower(levels[v]),'.m.pa.txt',sep='')
		head = matrix(c('species',colnames(pa.m)),nrow=1)
		write.table(head,outname,sep='\t',quote=FALSE,col.names=FALSE,row.names=FALSE)
		write.table(pa.m,outname,sep='\t',quote=FALSE,col.names=FALSE,append=TRUE)


	## calculate species-area data
		div = aggregate(sum.abund$nunits,by=list(sum.abund[,1]),length)
		area = nunits$Freq * low.area
		tmp = data.frame(site,div[,1],nunits$Freq,div$x,area)
		names(tmp) = c('site','sample','nunits','richness','area')
		sparea = rbind(sparea,tmp)
	}
	
	if (!(site %in% c('yosemite'))) {
	outname = paste('sparea.files/',tolower(site),'.sparea.txt',sep='')
	write.table(sparea,outname,sep='\t',quote=FALSE,row.names=FALSE)	}
	
}