Commit 742e917f authored by Evelyne Deplazes's avatar Evelyne Deplazes

new UI repository

parent b4549530
*.pyc
*~
.project
.pydevproject
.settings/
from samplingConv import blkAvErrorEst, findConvergedClusters
from genLibMData import timeToIndexRel
KS_STAT_THRESHOLD = 0.95
KS_ANALYSIS_STEP_SIZE = 1 #%
def burger_with_the_lot(time, observable, print_me_some_details=False, figure_name=None):
time, observable, fig = converged_region(time, observable, print_me_some_details=print_me_some_details, return_figure=(figure_name!=None))
mu, error_est = mean_with_error_est(time, observable, fig=fig)
if figure_name:
fig.tight_layout()
fig.savefig(figure_name)
return mu, error_est
def converged_region(time, observable, print_me_some_details=False, return_figure=False):
converged_regions, converged_region_ks_scores, fig = findConvergedClusters(time, observable, KS_STAT_THRESHOLD, KS_ANALYSIS_STEP_SIZE, returnGraph=return_figure)
if not converged_regions:
print "WARNING! could not find any converged regions. The data will be returned as is. All bets are off!"
return time, observable
# sorry for this nasty one-liner
largest_converged_region, largest_converged_region_ks_scores = sorted(zip(converged_regions, converged_region_ks_scores), key=lambda x:x[0][0]-x[0][-1])[-1]
start_index = timeToIndexRel(time, time[-1] - largest_converged_region[0])
if print_me_some_details:
print "Largest converged region length: {0} ps (size matters here... big numbers good, small numbers bad!)".format(largest_converged_region[0]-largest_converged_region[-1])
print "Maximum KS score: {0:.3f} (close to 1 is good, threshold for convergence set to {1})".format(max(largest_converged_region_ks_scores), KS_STAT_THRESHOLD)
print "Data from {0} ps onward will be returned as the converged region".format(largest_converged_region[0])
return time[start_index:], observable[start_index:], fig
def mean_with_error_est(time, observable, fig=None):
mu, errEst, _ = blkAvErrorEst(time, observable, fig=fig)
return mu, errEst
def sloppy_data_parser(file_name):
with open(file_name) as fh:
xs, ys = zip(*[map(float, l.split()) for l in fh.read().splitlines() if l and not l[0] in ["#", "@"] and len(l.split()) == 2])
return xs, ys
if __name__=="__main__":
import sys
if len(sys.argv) > 1:
xs, ys = sloppy_data_parser(sys.argv[1])
else:
test_case_data = "/home/uqmstroe/mddata2/gromos_job_data/LEU/TI_CHE/lambda_CHE_0.000/dvdl.dat"
xs, ys = sloppy_data_parser(test_case_data)
mu, err = burger_with_the_lot(xs, ys, print_me_some_details=True, figure_name="test.png")
print "{0:.2f} +/- {1:.2f}".format(mu, err)
\ No newline at end of file
This diff is collapsed.
import pylab, numpy
from scipy.optimize import curve_fit
CONV_TOL = 0.0001
def readDvdl(path):
print "Reading data"
return zip(*[(float(l.split()[0]), float(l.split()[1])) for l in open(path).readlines() if l and not l.startswith("#")])
def getPlotAxis():
fig = pylab.figure()
fig.hold(True)
ax11 = fig.add_subplot(111)
return ax11
def cumulativeAv(ys):
cAv = [ys[0]]
for i, y in enumerate(ys[1:]):
cAv.append(cAv[-1] + (y - cAv[-1])/float(i+2))
return cAv
def cumulativeAvSlow(ys):
cAv = []
for i in range(len(ys)):
cAv.append(sum(ys[:i+1])/float(i+1))
return cAv
def timeToIndexRel(timeSeries, t1):
if len(timeSeries) < 2 or t1 <= (timeSeries[1] - timeSeries[0]):
return 0
for i, val in enumerate(timeSeries):
if val-timeSeries[0] >= t1: return i
def getBlocks(y, blockIndexSize):
blockBoundaries = range(0, len(y)+1, blockIndexSize)
return [y[blockBoundaries[i]:blockBoundaries[i+1]] for i in range(0,len(blockBoundaries)-1)]
def blockAverage(x, y):
blockIndexSize = 50
blocks = getBlocks(y, blockIndexSize)
dvdlBlks = [numpy.mean(b) for b in blocks]
timeBlks = [x[j] + (x[j+1] - x[j])/2. for j in range(0, len(x), blockIndexSize)][:len(dvdlBlks)]
return timeBlks, dvdlBlks
def blkAvs(time, dvdl):
deltaBlockSize = 1
# need at least 2 values to start with
blkSize = time[2] - time[0]
blkSizeList = []
N = []
stds = []
# while block size is less than 1/10 of the total run length
while blkSize < time[len(time)/20] - time[0]:
blkSizeIndex = timeToIndexRel(time, blkSize)
dvdlBlks = getBlocks(dvdl, blkSizeIndex)
blkMeans = [numpy.mean(b) for b in dvdlBlks]
blkSizeList.append(blkSize)
stds.append(numpy.std(blkMeans))
N.append(len(dvdlBlks))
blkSize += deltaBlockSize
return blkSizeList, stds, N
def expFunc(x,a,b, c):
return b * numpy.exp(-a * x)+c
def doubleExpFunc(x, a1, a2, b1, b2, c):
return b1 * numpy.exp(-a1 * x) + b2 * numpy.exp(-a2 * x) +c
def derDoubleExpFunc(x, a1, a2, b1, b2, c):
return -a1*b1 * numpy.exp(-a1 * x) + -a2*b2 * numpy.exp(-a2 * x)
def oneOver(x,a):
return a/x
def fittingFunc(x, a1, a2, b1, b2, c):
return (b1 * numpy.exp(-a1 * x) + b2 * numpy.exp(-a2 * x) + c)*numpy.sqrt(x)
def derFittingFunc(x, a1, a2, b1, b2, c):
return (b1 * numpy.exp(-a1 * x) + b2 * numpy.exp(-a2 * x) + c)*(1./(2.*numpy.sqrt(x))) + \
(-a1*b1 * numpy.exp(-a1 * x) + -a2*b2 * numpy.exp(-a2 * x))*numpy.sqrt(x)
def oneOverSqrtOneOver(x, a):
return 1./numpy.sqrt(a/x)
def derOneOversQrtOneOver(x, a):
return 1./(2*numpy.sqrt(a*x))
def blockAvFit(sizes, stds, ns, ax=None, label=None):
# fit a/x to Ns data to get a continuous function
xRange = numpy.linspace(min(sizes), max(sizes), 600)
# fit 2 exponentials to to sizes vs. stds
try:
poptFit, _ = curve_fit(fittingFunc, numpy.array(sizes), numpy.array([s/numpy.sqrt(n) for s, n in zip(stds, ns)]), p0=[0.5, 1, 0.5, 1, 0], maxfev=10000)
except:
# no fit
print "Fitting exponentials to data failed"
return None, None
convBlkSize = None
for x in xRange:
if derFittingFunc(x, *poptFit) < CONV_TOL:
convBlkSize = x
break
if not convBlkSize:
print "DID NOT CONVERGE TO GRADIDENT LESS THAN {0}".format(CONV_TOL)
if ax:
yfitted = [fittingFunc(x, *poptFit) for x in xRange]
yDerFitted = [derFittingFunc(x, *poptFit) for x in xRange]
ax.plot(sizes, [s/numpy.sqrt(n) for s, n in zip(stds, ns)], linestyle='--', label=label)
ax.plot(xRange, yfitted, color="g")
ax.plot(xRange, yDerFitted, color="r")
return None, None
# calculate corresponding converged error estimate
errorEst = fittingFunc(convBlkSize, *poptFit)
if ax:
yfitted = [fittingFunc(x, *poptFit) for x in xRange]
ax.plot(sizes, [s/numpy.sqrt(n) for s, n in zip(stds, ns)], linestyle='--', label=label)
ax.plot(xRange, yfitted, color="g")
ax.plot([convBlkSize, convBlkSize], [0, errorEst], color="r")
ax.plot([0, convBlkSize], [errorEst, errorEst], color="r")
return convBlkSize, errorEst
def plotCumulative(data, title, splitPt):
ax11 = getPlotAxis()
for label, (x, y) in data:
print "Plotting data"
#x, y = blockAverage(x, y)
ax11.plot(x[splitPt:], cumulativeAv(y[splitPt:]), linestyle='-', label=label)
ax11.legend(loc = 'lower right',numpoints = 1)#, frameon = False)
ax11.set_title(title)
ax11.set_xlabel("Time (ps)")
ax11.set_ylabel("DV/DL (kJ/mol)")
pylab.show()
#data_cavity = (
# ("SD fric=0", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634/dvdl.dat")),
# ("SD fric=1", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_1/dvdl.dat")),
# ("SD fric=10", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_10/dvdl.dat")),
# ("SD fric=91", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_91/dvdl.dat")),
#
# ("SD all fric=0.1", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_all_0.1/dvdl.dat")),
# ("SD all fric=1", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_all_1/dvdl.dat")),
# ("SD all fric=10", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_all_10/dvdl.dat")),
# ("SD all fric=91", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_all_91/dvdl.dat")),
# )
# (
# ("SD fric=0 G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_SD0/dvdl.dat")),
# ("SD fric=0 G96", readDvdl("/mddata1/uqamalde/FE_test/G11_G96_SD/G96/lam_0.75/meth_SD0/l075_dGdl.dat")),
#
# ("SD fric=10 solute G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_SDSolute/dvdl.dat")),
# ("SD fric=10 solute G96", readDvdl("/mddata1/uqamalde/FE_test/G11_G96_SD/G96/lam_0.75/meth_SDSolute/l075_dGdl.dat")),
#
# ("SD fric=10 all G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_SDAll/dvdl.dat")),
# ("SD fric=10 all G96", readDvdl("/mddata1/uqamalde/FE_test/G11_G96_SD/G96/lam_0.75/meth_SDAll/l075_dGdl.dat")),
# )
#data_real = (
#
# ("SD fric=0 G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_realSD0/dvdl.dat")),
# ("No-SD G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_realNoSD/dvdl.dat")),
#
# ("SD No-Berendsen fric=91 all G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_realSD91/dvdl.dat")),
# ("SD fric=10 all G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_realSD10_all/dvdl.dat")),
# ("SD fric=10 solute G11", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/meth_realSD10_solute/dvdl.dat")),
#
#
# ("SD fric=10 solute G96", readDvdl("/mddata1/uqamalde/FE_test/G11_G96_SD/G96/lam_0.0/meth_SDSolute/l000_dGdl.dat")),
# ("SD fric=10 all G96", readDvdl("/mddata1/uqamalde/FE_test/G11_G96_SD/G96/lam_0.0/meth_SDAll/l000_dGdl.dat")),
#
#)
if __name__=="__main__":
data_real_chloroEther = (
("SD fric=0", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_real/1634_0/dvdl.dat")),
("SD fric=1", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_real/1634_1/dvdl.dat")),
("SD fric=10", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_real/1634_10/dvdl.dat")),
("SD all fric=1", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_real/1634_all_1/dvdl.dat")),
("SD all fric=10", readDvdl("/mddata/uqmstroe/gromosBugInvestigation/SDTest/detailedComp/1634_real/1634_all_10/dvdl.dat")),
)
splitPt = 0#len(data_real[0][1][0])/2
#plotCumulative(data_real, "Real system", splitPt)
#plotCumulative(data_cavity, "Cavity system, lam = 0.75", splitPt)
plotCumulative(data_real_chloroEther, "dichloroether real system", splitPt)
ax11 = getPlotAxis()
for label, (xs, ys) in data_real_chloroEther:
sizes, stds, ns = blkAvs(xs, ys)
convBlkSize, blkAvErrorEst = blockAvFit(sizes, stds, ns, ax=ax11, label=label)
if convBlkSize:
print "{3}: {0:.3f} +/- {1:.3f}, blkSize: {2}".format(numpy.mean(ys), blkAvErrorEst, convBlkSize, label)
ax11.legend(loc = 'upper left',numpoints = 1, frameon = False)
ax11.set_title("Block averaged error estimate")
ax11.set_ylabel("error est dV/d${\lambda}$ (kJ/mol)")
ax11.set_xlabel("block size (ps)")
pylab.show()
\ No newline at end of file
from numpy import mean
from genLibMData import blockAvAnalysis, linReg, timeToIndex, timeToIndexRel, getBlocks, blockAv, getCorTime, blockAvIndex, minSizeBlockAnalysis
from pylab import polyfit, poly1d
from scipy.stats import shapiro
def plotDetectDrift(time, dvdl, N, graphing):
mids, mu, blkSizes = _detectDriftHelper(time, dvdl, N)
r,minBlkSize,actualN = detectDrift(time, dvdl, N)
ax = graphing.getPlotAxis()
ax.errorbar(mids, mu, xerr=[b/2.0 for b in blkSizes], marker ='o', linestyle='',color ='k')
fit = polyfit(mids,mu,1)
fit_fn = poly1d(fit)
domain = [min(mids), max(mids)]
ax.plot(domain, fit_fn(domain), color='r')
ax.set_title("Block average trends r^2={0:5.3f} (N={1}, minBlk={2:5.3f}ps)".format(r**2, actualN, minBlkSize))
ax.set_ylabel("dv/dl")
ax.set_xlabel("Time (ps)")
ax.set_xlim(time[0])
def calcDriftTime(time, dvdl, N, minR, snipPercent):
r, _, _ = detectDrift(time, dvdl, N)
snipTime = (time[-1]-time[0])*(snipPercent/100.0)
cumulativeDriftTime = time[0]
while r**2 > minR and len(time) > 0:
cumulativeDriftTime += snipTime
cutPt = timeToIndex(time, cumulativeDriftTime)
r, _, _ = detectDrift(time[cutPt:], dvdl[cutPt:], N)
return cumulativeDriftTime, r
def detectDrift(time, dvdl, N):
if len(time) < N:
return 0,0,0
mids, mu, blkSizes = _detectDriftHelper(time, dvdl, N)
return linReg(mids, mu), min(blkSizes), len(mu)
def _detectDriftHelper(time, dvdl, N):
def means(timeBlks, dvdlBlks):
return [tb[len(tb)/2] for tb in timeBlks], [mean(b) for b in dvdlBlks]
results = blockAvAnalysis(time, dvdl, means, N)
mu = []
mids = []
blkSizes = []
for blkSize, res in results.items():
mids.extend(res[0])
mu.extend(res[1])
blkSizes.extend([blkSize]*len(res[0])) # block size is twice the distance from the beginning to the middle of the first block
return mids, mu, blkSizes
###
###
def detectInitialOutliers(time, dvdl, outlierThreshold, trimPercent):
trimTime = (time[-1]-time[0])*(trimPercent/100.0)
trimRegionIndex = 0
# only look at the first half of the run
testRegionIndex = timeToIndexRel(time, time[len(time)/2] - time[0])
while trimRegionIndex < testRegionIndex and shapiro(dvdl[trimRegionIndex:testRegionIndex])[0] < outlierThreshold:
trimRegionIndex += timeToIndexRel(time, trimTime)
return time[trimRegionIndex] - time[0]
def plotDetectInitialOutliers(time, dvdl, trimPercent, graphing):
trimTime = (time[-1]-time[0])*(trimPercent/100.0)
trimRegionIndex = 0
# only look at the first half of the run
testRegionIndex = timeToIndexRel(time, time[len(time)/2] - time[0])
trimAmount = []
gaussianList = []
while trimRegionIndex < testRegionIndex:
gaussianList.append(shapiro(dvdl[trimRegionIndex:testRegionIndex])[0])
trimAmount.append(trimRegionIndex)
trimRegionIndex += timeToIndexRel(time, trimTime)
ax = graphing.getPlotAxis()
ax.plot(trimAmount, gaussianList)
###
###
def plotDetectDriftMinBlk(time, dvdl, minBlkSize, graphing):
mids, mu, blkSizes = _detectDriftHelperMinBlk(time, dvdl, minBlkSize)
r,minBlkSize,actualN = detectDriftMinBlk(time, dvdl, minBlkSize)
ax = graphing.getPlotAxis()
ax.errorbar(mids, mu, xerr=[b/2.0 for b in blkSizes], marker ='o', linestyle='',color ='k')
fit = polyfit(mids,mu,1)
fit_fn = poly1d(fit)
domain = [min(mids), max(mids)]
ax.plot(domain, fit_fn(domain), color='r')
ax.set_title("Block average trends r^2={0:5.3f} (N={1}, minBlk={2:5.3f}ps)".format(r**2, actualN, minBlkSize))
ax.set_ylabel("dv/dl")
ax.set_xlabel("Time (ps)")
ax.set_xlim(time[0])
def calcDriftTimeMinBlk(time, dvdl, minBlkSize, minR, snipPercent):
r, _, _ = detectDriftMinBlk(time, dvdl, minBlkSize)
snipTime = (time[-1]-time[0])*(snipPercent/100.0)
cumulativeDriftTime = time[0]
while r**2 > minR and len(time) > 0:
cumulativeDriftTime += snipTime
cutPt = timeToIndex(time, cumulativeDriftTime)
r, _, _ = detectDriftMinBlk(time[cutPt:], dvdl[cutPt:], minBlkSize)
return cumulativeDriftTime, r
def detectDriftMinBlk(time, dvdl, minBlkSize):
mids, mu, blkSizes = _detectDriftHelperMinBlk(time, dvdl, minBlkSize)
return linReg(mids, mu), min(blkSizes), len(mu)
def _detectDriftHelperMinBlk(time, dvdl, minBlkSize):
def means(timeBlks, dvdlBlks):
return [tb[len(tb)/2] for tb in timeBlks], [mean(b) for b in dvdlBlks]
results = minSizeBlockAnalysis(time, dvdl, means, minBlkSize)
mu = []
mids = []
blkSizes = []
for blkSize, res in results.items():
mids.extend(res[0])
mu.extend(res[1])
blkSizes.extend([blkSize]*len(res[0])) # block size is twice the distance from the beginning to the middle of the first block
return mids, mu, blkSizes
##
##
def plotAutoCor(time, dvdl, autoCorTol, graphing):
time_n = time[:]
dvdl_n = dvdl[:]
ACRes = []
# initialize to ensure at least one iterations
ACIndex = 2
while ACIndex < len(time)/2 and ACIndex >= 1:
ACIndex, ACTime = getCorTime(time, dvdl, autoCorTol)
ACRes.append(time[ACIndex])
time, dvdl = blockAvIndex(time, dvdl, ACIndex)
ax = graphing.getPlotAxis()
ax.plot(range(len(ACRes)), ACRes, linestyle='-',color="k",marker ='o')
from numpy import mean
from genLibMData import timeToIndex, getBlocks
from scipy.stats import ks_2samp
from pylab import polyfit, poly1d
from copy import copy
def initialEquilibr(time, dvdl, minBlkPercent, equilibKS, snipPercent):
minBlkSize = (time[-1]-time[0])*(minBlkPercent/100.0)
snipTime = (time[-1]-time[0])*(snipPercent/100.0)
equilibTime = _initEquilibrHelper(time[:], dvdl[:], minBlkSize, equilibKS, snipTime)
cumulativeEquilibTime = copy(equilibTime)
# also have to stop if we get to the end of the time series
while equilibTime > 0.0:
equilibIndex = timeToIndex(time, cumulativeEquilibTime)
equilibTime = _initEquilibrHelper(time[equilibIndex:], dvdl[equilibIndex:], minBlkSize, equilibKS, snipTime)
cumulativeEquilibTime += equilibTime
return cumulativeEquilibTime
def _initEquilibrHelper(time, dvdl, minBlkSize, equilibKS, snipTime):
equilibTime = 0.0
probData = _initEquilibGetData(time, dvdl, minBlkSize)
if not probData:
return equilibTime
KStats, _, _, _ = _initEquilibAnalyse(probData)
sigDiffs = [ks for ks in KStats if ks > equilibKS]
if not sigDiffs:
return equilibTime
#while _resultImproving(diffHistory, equilibSigDiff):
while [ks for ks in KStats if ks > equilibKS]:
equilibTime += snipTime
cutPt = timeToIndex(time, equilibTime)
probData = _initEquilibGetData(time[cutPt:], dvdl[cutPt:], minBlkSize)
KStats, _, _, _ = _initEquilibAnalyse(probData)
return equilibTime
def _initEquilibAnalyse(probData):
if len(probData) < 1:
print "WARNING method terminated abnormally"
return [], [], [], []
# compare mean probability including and excluding first block
blkSizes = []
mids = []
KStats = []
pVals = []
for size, data in probData.items():
mids.append( data["mids"][0])
blkSizes.append( size )
KStats.append( data["probs"][0][0] )
pVals.append( data["probs"][0][1] )
return KStats, pVals, mids, blkSizes
def _initEquilibGetData(time, dvdl, minBlkSize):
results = refBlockAnalysis(time, dvdl, minBlkSize)
probData = {}
for blkSize, res in results.items():
probData[blkSize] = {"probs": res[1], "mids": res[0]}
return probData
def sameDistTest(timeBlks, dvdlBlks, ref):
return [tb[len(tb)/2] for tb in timeBlks], [ks_2samp(b,ref) for b in dvdlBlks]
def refBlockAnalysis(time, dvdl, minBlkSize):
# for refBlock Analysis minBlkSize needs to be doubled to account for the
# fact that only half the time series is being considered
minBlkSize *= 2
testdvdl, refDvdl = dvdl[:len(dvdl)/2], dvdl[len(dvdl)/2:]
testtime, refTime = time[:len(time)/2], time[len(time)/2:]
if len(testtime) < 2:
return {}
blkSize = testtime[-1] - testtime[0]
# do the first iteration "manually"
res = {}
numBlks = 2
while blkSize >= minBlkSize:
blkSizeIndex = len(testdvdl)/numBlks
blkSize = testtime[blkSizeIndex] - testtime[0]
timeBlks = getBlocks(testtime, blkSizeIndex)
dvdlBlks = getBlocks(testdvdl, blkSizeIndex)
res[blkSize] = sameDistTest(timeBlks,dvdlBlks, refDvdl)
numBlks *= 2
return res
def plotInitialEquilibr(time, dvdl, minBlkPercent, equilibKS, snipPercent, graphing):
minBlkSize = (time[-1]-time[0])*(minBlkPercent/100.0)
results = _initEquilibGetData(time, dvdl, minBlkSize)
KStats, _, mids, blkSizes = _initEquilibAnalyse(results)
ax = graphing.getPlotAxis()
#ax.errorbar(mids, KStats, xerr=[b/2.0 for b in blkSizes], marker ='o', linestyle='',color ='k')
ax.plot(blkSizes, KStats, marker ='o', linestyle='',color ='k')
#ax.set_yscale("log")
ax.set_title("Measure of 'sameness'")
ax.set_ylabel("KS statistic")
ax.set_xlabel("Block size (ps)")
ax.set_xlim(time[0])
snipTime = (time[-1]-time[0])*(snipPercent/100.0)
cutPt = timeToIndex(time, snipTime)
while [ks for ks in KStats if ks > equilibKS]:
time = time[cutPt:]
dvdl = dvdl[cutPt:]
results = _initEquilibGetData(time, dvdl, minBlkSize)
KStats, _, _, _ = _initEquilibAnalyse(results)
#ax.errorbar(mids, KStats, xerr=[b/2.0 for b in blkSizes], marker ='o', linestyle='',color ='k')
graphing.addToPlot(ax, blkSizes[:len(KStats)], KStats)
import numpy
from scipy.stats import shapiro, linregress
try:
import pylab
except:
pass
class Data(object):
dvdl=numpy.array
time=numpy.array
def __init__(self, timeScale="ps", datafile=None, xs=None, ys=None):
if timeScale == "ns":
self.adjustTime = 1000
elif timeScale == "ps":
self.adjustTime = 1.0
else:
raise Exception("timeScale not recognized: {0}".format(timeScale))
self.dvdl = []
self.time = []
if datafile:
self.parse_datafile(datafile)
elif xs is not None and ys is not None:
self.dvdl = ys
self.time = xs
else:
raise Exception("No data provided")
self.dvdl = numpy.array(self.dvdl)
self.time = numpy.array(self.time)*self.adjustTime
self.stepSize = self.time[1] - self.time[0]
def parse_datafile(self, datafile):
with open(datafile,'r') as fh:
raw = fh.read()
for line in raw.splitlines():
if line.startswith("#") or line.startswith("@"):
continue
cols = line.split()
if len(cols) == 2:
t = cols[0]
d = cols[1]
# this occures when dvdl has too many significant figures and the columns merge (ene_ana bug)
elif len(cols) == 1:
t = line[:15]
d = line[15:]
else:
print "Unexpected line: " + line
self.time.append(float(t))
self.dvdl.append(float(d))
class Graphing(object):
def __init__(self):
self._figCounter = 0
self._colors = ("r","g","c","y","m","b")
self._colorIndex = 0
def getPlotAxis(self):
self._figCounter
fig = pylab.figure(self._figCounter); self._figCounter += 1
fig.hold(True)
ax11 = fig.add_subplot(111)
return ax11
def getFig(self):
self._figCounter
fig = pylab.figure(self._figCounter); self._figCounter += 1
fig.hold(True)
return fig
def addToPlot(self, ax, x, y):
ax.plot(x, y, color =self._colors[self._colorIndex],marker ='o',linestyle='',zorder=3)
self._colorIndex += 1;self._colorIndex %= len(self._colors)
def addToMeansPlot(self, ax, times, means, blkSize):
xer = [blkSize/2]*len(times) # not really errors but block widths
ax.errorbar(times, means, xerr=xer, color =self._colors[self._colorIndex],marker ='o',linestyle='',zorder=3, linewidth=1.8)
self._colorIndex += 1;self._colorIndex %= len(self._colors)
def getMeanData(timeBlks, dvdlBlks):
x = [tb[len(tb)/2] for tb in timeBlks]
y = [numpy.mean(b) for b in dvdlBlks]
std = [numpy.std(b) for b in dvdlBlks]
return x, y, std
def linReg(x,y):
_, _, r, _, _ = linregress(x,y)
return r
def testNormality(blocks):
res = {}
for i, b in enumerate(blocks):
w, _ = shapiro(b)
res[i]= w
return res
def timeToIndex(timeSeries, t1):
if len(timeSeries) < 2 or t1 <= (timeSeries[1] - timeSeries[0]):
return 0
for i, val in enumerate(timeSeries):
if val >= t1: return i
def timeToIndexRel(timeSeries, t1):
if len(timeSeries) < 2 or t1 <= (timeSeries[1] - timeSeries[0]):
return 0
for i, val in enumerate(timeSeries):
if val-timeSeries[0] >= t1: return i
def subset(time, dvdl, start, end):
startIndex = next(i for i, t in enumerate(time) if t >= start)
endIndex = next(i for i, t in enumerate(time) if t >= end)
return time[startIndex:endIndex], dvdl[startIndex:endIndex]
def getBlocks(y, blockIndexSize):
blockBoundaries = range(0, len(y)+1, blockIndexSize)
block_boundary_indexes = range(0, len(blockBoundaries)-1)
blocks = numpy.empty((len(block_boundary_indexes), blockIndexSize))
for i in block_boundary_indexes:
blocks[i] = y[blockBoundaries[i]:blockBoundaries[i+1]]
return blocks
def makeBlocks(time,dvdl,timeWindow):
indexWindow = timeToIndex(time, timeWindow)
return getBlocks(time, indexWindow), getBlocks(dvdl, indexWindow)
def blockAvAnalysis(time, dvdl, method, N):
blkSize = time[-1] - time[0]
res = {blkSize: method([time],[dvdl])}
numBlks = 2
sampleSize = 1
blkSizeIndex = len(dvdl)/numBlks
# if blkSizeIndex < 1 then blks will have size 0
while sampleSize < N and blkSizeIndex > 0:
sampleSize += numBlks
blkSizeIndex = len(dvdl)/numBlks
blkSize = time[blkSizeIndex] - time[0]
timeBlks = getBlocks(time, blkSizeIndex)
dvdlBlks = getBlocks(dvdl, blkSizeIndex)
res[blkSize] = method(timeBlks,dvdlBlks)
numBlks *= 2
return res
def minSizeBlockAnalysis(time, dvdl, method, minBlkSize):
blkSize = time[-1] - time[0]
res = {blkSize: method([time],[dvdl])}
numBlks = 2
while blkSize >= minBlkSize:
blkSizeIndex = len(dvdl)/numBlks
blkSize = time[blkSizeIndex] - time[0]
timeBlks = getBlocks(time, blkSizeIndex)
dvdlBlks = getBlocks(dvdl, blkSizeIndex)
res[blkSize] = method(timeBlks,dvdlBlks)
numBlks *= 2
return res
def refBlockAnalysis(time, dvdl, method, minBlkSize):
# for refBlock Analysis minBlkSize needs to be doubled to account for the
# fact that only half the time series is being considered
minBlkSize *= 2
testdvdl, refDvdl = dvdl[:len(dvdl)/2], dvdl[len(dvdl)/2:]
testtime, refTime = time[:len(time)/2], time[len(time)/2:]
if len(testtime) < 2:
return {}
blkSize = testtime[-1] - testtime[0]
# do the first iteration "manually"
res = {blkSize:method([testtime, refTime],[testdvdl, refDvdl])}
numBlks = 2
while blkSize >= minBlkSize:
blkSizeIndex = len(testdvdl)/numBlks
blkSize = testtime[blkSizeIndex] - testtime[0]
timeBlks = getBlocks(testtime, blkSizeIndex)
dvdlBlks = getBlocks(testdvdl, blkSizeIndex)
res[blkSize] = method(timeBlks,dvdlBlks)
numBlks *= 2
return res
def blockAv(x_orig, y, blockSize):
blockIndex = timeToIndexRel(x_orig, blockSize)
blockBoundaries = range(0, len(y), blockIndex)
new_x = []
blkAvs = []
for i in range(len(blockBoundaries)-1):
lowerBlkIndex = blockBoundaries[i]
upperBlkIndex = blockBoundaries[i+1]
new_x.append(x_orig[lowerBlkIndex] + (x_orig[upperBlkIndex] - x_orig[lowerBlkIndex])/2.0)
blkAvs.append(numpy.mean(y[lowerBlkIndex:upperBlkIndex]))
return numpy.array(new_x), numpy.array(blkAvs)
def blockAvIndex(x_orig, y, blockIndex):
blockBoundaries = range(0, len(y), blockIndex)
new_x = []
blkAvs = []
for i in range(len(blockBoundaries)-1):
lowerBlkIndex = blockBoundaries[i]
upperBlkIndex = blockBoundaries[i+1]
new_x.append(x_orig[lowerBlkIndex] + (x_orig[upperBlkIndex] - x_orig[lowerBlkIndex])/2.0)
blkAvs.append(numpy.mean(y[lowerBlkIndex:upperBlkIndex]))
return numpy.array(new_x), numpy.array(blkAvs)
def getCorTime(time, y, autoCorTol):
corData = _estimated_autocorrelation(y)
#ax = getPlotAxis()
#ax.plot(time,corData)
#pylab.show()
for i, val in enumerate(corData):
if val <= autoCorTol:
return i, time[i]
return -1, -1
def _estimated_autocorrelation(y):
"""
http://stackoverflow.com/q/14297012/190597
http://en.wikipedia.org/wiki/Autocorrelation#Estimation
"""
n = len(y)
variance = y.var()
y = y-y.mean()
r = numpy.correlate(y, y, mode = 'full')[-n:]
assert numpy.allclose(r, numpy.array([(y[:n-k]*y[-(n-k):]).sum() for k in range(n)]))
result = r/(variance*(numpy.arange (n, 0, -1)))
return result
import os
import matplotlib
if not os.environ.has_key("DISPLAY"):
matplotlib.use("Agg")
import pylab
from genLibMData import Data, Graphing, timeToIndexRel
import logging
from numpy import mean, std
from samplingConv import blkAvErrorEst, findConvergedClusters, plotFindConvergedClusters,\
plotEstError
import sys
from optparse import OptionParser
PLOT = True
minN = 15
convKStat = 0.95
convRegStepSize = 1 #%
def run(timeScale="ps", xs=None, ys=None, dataPath=None, graphing=None, log=None, fig=None):
if not log:
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s: %(message)s', datefmt='%d-%m-%Y %H:%M:%S')
log = logging
if dataPath:
log.info( "Processing {0}\n...".format(dataPath))
feData = Data(datafile=dataPath, xs=xs, ys=ys, timeScale=timeScale)
mu, errEst, convBlkSize = blkAvErrorEst(feData.time[:], feData.dvdl[:], log=log, fig=fig)
errEst = "{0:5.2g}".format(errEst) if errEst is not None else "NA"
convBlkSize = "{0:5.2g}".format(convBlkSize) if convBlkSize is not None else "NA"
log.debug("Result on raw data: {0:5.2f} +/- {1} dv/dl (blockSize={2}ps)".format(mu, errEst, convBlkSize))
convClusters, convergedKSVals, _ = findConvergedClusters(feData.time[:], feData.dvdl[:], convKStat, convRegStepSize)
if convClusters:
if len(convClusters) > 1:
log.debug("Multiple possible converged regions found!")
for i, cluster in enumerate(convClusters):
log.debug("Possible converged region starting from {0:5.1f}ps (max(1-ks)={1:5.3f} continuity={2:5.1f}ps)".format(feData.time[-1] - cluster[0], max(convergedKSVals[i]),cluster[0]-cluster[-1]))
maxContinuity = -1
maxContCluster = []
maxContKS = -1
for i, cluster in enumerate(convClusters):
if cluster[0] - cluster[-1] > maxContinuity:
maxContCluster = cluster
maxContinuity = cluster[0] - cluster[-1]
maxContKS = convergedKSVals[i]
log.debug("\nLargest converged region starts from {0:5.1f}ps (max(1-ks)={1:5.3g} continuity={2:5.1f}ps)".format(feData.time[-1] - maxContCluster[0], max(maxContKS),maxContinuity))
startIndex = timeToIndexRel(feData.time, feData.time[-1] - maxContCluster[0])
fig = graphing.getFig() if graphing else None
xs_converged = feData.time[startIndex:]
ys_converged = feData.dvdl[startIndex:]
muInit, errEstInit, convBlkSize = blkAvErrorEst(xs_converged, ys_converged, fig=fig, log=log)
errEstInit_str = "{0:5.2g}".format(errEstInit) if errEstInit is not None else "NA"
convBlkSize_str = "{0:5.2f}".format(convBlkSize) if convBlkSize is not None else "NA"
log.info("Result for largest converged region ({3}ps): {0:5.2f} +/- {1} dv/dl (blockSize={2}ps)".format(muInit, errEstInit_str, convBlkSize_str, maxContinuity))
else:
log.info("No converged regions found")
muInit, errEstInit, maxContinuity, startIndex, convBlkSize = None, None, 0.0, len(feData.time), None
if graphing is not None:
plotFindConvergedClusters(feData.time[:], feData.dvdl[:],convKStat, convRegStepSize,graphing)
#plotEstError(feData.time[:], feData.dvdl[:], graphing)
#plotConvTest(feData.dvdl[:], graphing)
# graphical overview
feData = Data(datafile=dataPath, timeScale=timeScale)
ax = graphing.getPlotAxis()
ax.plot(feData.time, feData.dvdl, color="k",alpha=.5)
if convClusters:
# plot the largest converged region
midCluster = (feData.time[-1] - maxContCluster[0]) + (maxContCluster[0])/2.0
clusterStartIndex = timeToIndexRel(feData.time, feData.time[-1] - maxContCluster[0])
meanCluster = mean(feData.dvdl[clusterStartIndex:])
ax.errorbar([midCluster],[meanCluster],xerr=[maxContCluster[0]/2.0], color ="g",marker ='o',linestyle='',zorder=4, linewidth=2)
# plot the continuity
clusterStartIndexSm = timeToIndexRel(feData.time, feData.time[-1] - maxContCluster[-1])
meanClusterSm = mean(feData.dvdl[clusterStartIndexSm:])
ax.errorbar([(feData.time[-1] - maxContCluster[-1]) + (maxContCluster[-1])/2.0],[meanClusterSm],xerr=[maxContCluster[-1]/2.0], color ="b",marker ='o',linestyle='',zorder=4, linewidth=2)
# plot the block average convergence for largest converged region
#plotEstError(feData.time[clusterStartIndex:], feData.dvdl[clusterStartIndex:], graphing)
#plotConvTest(feData.dvdl[clusterStartIndex:], graphing)
ax.set_title("Result summary")
ax.set_ylabel("Property")
ax.set_xlabel("Time (ps)")
ax.set_xlim(0)
#pylab.savefig("uncertainty_analysis.png", format="png", dpi=600)
return muInit, errEstInit, maxContinuity, startIndex, timeToIndexRel(feData.time, convBlkSize)
if __name__=="__main__":
timeScale = "ps"
graphing = None
parser = OptionParser(usage="python %prog -d <timeseriesDataFile> [options]")
# required
parser.add_option("-d", "--data", dest="data", help="Time series data file", action="store", type="string")
# optional
parser.add_option("-g", "--graph", dest="graph", help="Display graph", action="store_true")
parser.add_option("-n", "--nano", dest="nano", help="Set time scale of data to ns (default=ps)", action="store_true")
(opts, args) = parser.parse_args()
if not opts.data:
parser.print_help()
sys.exit()
if opts.nano:
timeScale = "ns"
if opts.graph:
graphing = Graphing()
run(timeScale, dataPath=opts.data, graphing=graphing)
pylab.show()
This diff is collapsed.
This diff is collapsed.
*.pyc
.project
.pydevproject
DEFAULT_FIGURE_NAME = "integration_error_analysis.svg"
IMBALANCED_2ND_DERIVATIVE_TOL = 40 #%
CONVERGENCE_RATE_SCALING = 1
import numpy as np
def round_sigfigs(num, sig_figs):
"""Round to specified number of sigfigs.
>>> round_sigfigs(0, sig_figs=4)
0
>>> int(round_sigfigs(12345, sig_figs=2))
12000
>>> int(round_sigfigs(-12345, sig_figs=2))
-12000
>>> int(round_sigfigs(1, sig_figs=2))
1
>>> '{0:.3}'.format(round_sigfigs(3.1415, sig_figs=2))
'3.1'
>>> '{0:.3}'.format(round_sigfigs(-3.1415, sig_figs=2))
'-3.1'
>>> '{0:.5}'.format(round_sigfigs(0.00098765, sig_figs=2))
'0.00099'
>>> '{0:.6}'.format(round_sigfigs(0.00098765, sig_figs=3))
'0.000988'
"""
if num != 0:
return round(num, -int(np.floor(np.log10(abs(num))) - (sig_figs - 1)))
else:
return 0 # Can't take the log of 0
def rss(values):
return np.sqrt(np.sum(np.square(values)))
def calc_y_intersection_pt(pt1, pt2, x_c):
'''
Return the point at the intersection of the linear function defined by pt1,pt2 and the line x = x_c
'''
x1, y1, _ = pt1
x2, y2, _ = pt2
# gradient of line 1
m = (y2 - y1)/float(x2 - x1)
# y-intercept of line1
c = (x1*y2 - x2*y1)/float(x1 - x2)
# intersection with x_c
yi = m*x_c + c
return yi
def second_derivative_with_uncertainty(pts):
(x1, y1, e1), (x2, y2, e2), (x3, y3, e3) = pts
h0 = x2-x1
h1 = x3-x2
a = 2./(h0*(h0+h1))
b = -2./(h0*h1)
c = 2./(h1*(h0+h1))
return a*y1 + b*y2 + c*y3, rss([a*e1, b*e2, c*e3])
def parse_user_data(data):
# first attempt to get xs, ys and errors
try:
xs, ys, es = zip(*[map(float, l.split()[:3]) for l in data.splitlines() if l and not l.startswith("#")])
except:
# now try just xs and ys with errors set to 0.0
xs, ys, es = zip(*[map(l.split()[:2]) + [0.0] for l in data.splitlines() if l and not l.startswith("#")])
return xs, ys, es
\ No newline at end of file
import sys
import argparse
import numpy as np
from helpers import round_sigfigs, rss, calc_y_intersection_pt, second_derivative_with_uncertainty, parse_user_data
from config import DEFAULT_FIGURE_NAME
DO_NOT_PLOT = "DO_NOT_PLOT"
def point_error_calc(xs, es):
'''
This method is based on propagation of point uncertainty for Trapezoidal algorithm on
non-uniformly distributed points: https://en.wikipedia.org/wiki/Trapezoidal_rule#Non-uniform_grid
'''
# left boundary point
errors = [ 0.5*(xs[1]-xs[0])*es[0] ]
for i in range(len(xs)-2):
# intermediate point error
errors.append( 0.5*(xs[i+2]-xs[i])*es[i+1] )
# right boundary point
errors.append( 0.5*(xs[-1]-xs[-2])*es[-1] )
# return half the RSS of individual errors (factor of 2 is due to double counting of domain).
return errors
def interval_errors(xs, ys, es, forward=True):
'''
Based on analytical Trapezoidal error function with 2nd derivative estimated numerically:
https://en.wikipedia.org/wiki/Trapezoidal_rule#Error_analysis
'''
pts = zip(xs, ys, es)
gap_xs = [ (xs[0] + xs[1])/2. ]
gap_ys = [ calc_y_intersection_pt(pts[0], pts[1], gap_xs[0]) ]
# if there are only 2 points, the interval error will be zero
if len(pts) == 2:
return gap_xs, gap_ys, [0]
gap_es = [ trapz_interval_error(pts[:3], (xs[1] - xs[0])) ]
for i in range(len(xs)-3):
gap_xs.append( (xs[i+1] + xs[i+2])/2. )
gap_ys.append( calc_y_intersection_pt(pts[i+1], pts[i+2], gap_xs[i+1]) )
dx = xs[i+2] - xs[i+1]
if forward:
gap_es.append( trapz_interval_error(pts[i:i+3], dx) )
else:
# reverse
gap_es.append( trapz_interval_error(pts[i+1:i+4], dx) )
#print gap_es[-1]
gap_xs.append( (xs[-1] + xs[-2])/2. )
gap_ys.append( calc_y_intersection_pt(pts[-2], pts[-1], gap_xs[-1]) )
gap_es.append( trapz_interval_error(pts[-3:], (xs[-1] - xs[-2])) )
return gap_xs, gap_ys, gap_es
def trapz_interval_error(pts, dx):
#second_der, error = second_derivative_with_uncertainty(pts)
second_der, _ = second_derivative_with_uncertainty(pts)
return (dx**3)/12.*np.array(second_der)
def plot_error_analysis(xs, ys, es, gap_xs, gap_ys, gap_errors, figure_name=None, title="", show=True, x_label="x", y_label="y"):
import os
if not os.environ.has_key("DISPLAY"):
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
fig.hold(True)
ax.errorbar(xs, ys, es, marker="o", label="Integration Points")
ax.errorbar(gap_xs, gap_ys, 12.*np.array(gap_errors), linestyle="", label="Relative Interval Errors")
plt.ylabel(y_label, fontweight="bold")
plt.xlabel(x_label, fontweight="bold")
plt.title(title, fontweight="bold")
plt.legend(loc = 'upper right', prop={'size':11}, numpoints = 1, frameon = False)
fig.tight_layout()
if figure_name:
plt.savefig(figure_name, format="png")
if show:
plt.show()
def trapz_integrate_with_uncertainty(xs, ys, es, be_conservative=True):
integration_point_errors = point_error_calc(xs, es)
forward_results = interval_errors(xs, ys, es, forward=True)
backwards_results = interval_errors(xs, ys, es, forward=False)
gap_xs, gap_ys, gap_errors = sorted([forward_results, backwards_results], key=lambda x:np.abs(np.sum(x[-1])))[-1]
point_uncertainty_error = rss(list(integration_point_errors))
sum_gap_errors = np.abs(np.sum(gap_errors))
max_interval_error = np.max(np.abs(np.concatenate((forward_results[-1], backwards_results[-1]))))
forward_reverse_diff = np.abs(np.abs(np.sum(forward_results[-1])) - np.abs(np.sum(backwards_results[-1])))
error_safety_policy = max(max_interval_error, forward_reverse_diff)
total_error = point_uncertainty_error + sum_gap_errors + (error_safety_policy if be_conservative else 0.0)
return np.trapz(ys, xs), total_error, gap_xs, gap_ys, gap_errors, integration_point_errors, error_safety_policy
def config_argparse():
argparser = argparse.ArgumentParser(description='Integration Error Estimate')
argparser.add_argument('-d', '--data', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
help="File containing data to integrate. Line format:<x> <y> [<error>]. Data can also be provided via stdin within argument.")
argparser.add_argument('-p', '--plot', nargs='?', type=str, default=DO_NOT_PLOT,
help="Show plot of integration errors, requires matplotlib. Optional argument will determine where and in what format the figure will be saved in.")
argparser.add_argument('-s', '--sigfigs', type=int, default=3,
help="Number of significant figures in output. Default=3")
argparser.add_argument('-v', '--verbose', action="store_true",
help="Output error breakdown.")
argparser.add_argument('-b', '--be_conservative', action="store_true",
help="Make a conservative estimate of the uncertainty.")
return argparser
def process_plot_argument(args):
figure_name = DEFAULT_FIGURE_NAME if args.plot is None else args.plot
figure_name = False if figure_name == DO_NOT_PLOT else figure_name
figure_name = "{0}.png".format(figure_name) if (figure_name and "." not in figure_name) else figure_name
return figure_name
def parse_args():
argparser = config_argparse()
args = argparser.parse_args()
figure_name = process_plot_argument(args)
data = parse_user_data(args.data.read())
return data, figure_name, args.sigfigs, args.verbose, args.be_conservative
def main():
data, figure_name, sigfigs, verbose, be_conservative = parse_args()
xs, ys, es = np.array(data)
integral, total_error, gap_xs, gap_ys, gap_errors, integration_point_errors, conservative_error_adjustment = trapz_integrate_with_uncertainty(xs, ys, es, be_conservative=be_conservative)
round_sf = lambda x:round_sigfigs(x, sigfigs)
result_string = "{0:g} +/- {1:g}".format(round_sf(integral), round_sf(total_error))
if verbose:
value_error = round_sf(rss(integration_point_errors))
print "Error from y-value uncertainty: +/- {0:g}".format(value_error)
truncation_error = round_sf(np.sum(gap_errors) + conservative_error_adjustment if be_conservative else np.sum(gap_errors))
print "Estimated truncation error: {1:g}".format(truncation_error)
if be_conservative:
print "Additional error component: {0:g}".format(round_sf(conservative_error_adjustment))
print "Integral: {0}".format(result_string)
else:
print result_string
if figure_name:
plot_error_analysis(xs, ys, es, gap_xs, gap_ys, gap_errors, figure_name, title="Integral: {0}".format(result_string), show=True)
if __name__=="__main__":
main()
import numpy as np
from config import CONVERGENCE_RATE_SCALING
from integration_error_estimate import config_argparse, process_plot_argument, parse_user_data, \
plot_error_analysis, trapz_integrate_with_uncertainty
from helpers import round_sigfigs, rss
def reduce_error_on_residual_error(error_pts, residule_error, convergence_rate_scaling, be_conservative):
if be_conservative:
# since pts are already sorted we can simply filter out all non-gap points and take the first which remains
largest_gap_error_pt = [pt for pt in error_pts if pt[-1] == "gap"][0]
largest_error_pts = []
for pt in sorted(error_pts, key=lambda x:abs(x[0]))[::-1]:
if residule_error < 0:
break
largest_error_pts.append(pt)
# Add special case for largest gap error if we're being conservative
if be_conservative and pt == largest_gap_error_pt:
residule_error -= abs(pt[0])
# Since addition of a point to an interval reduces error by a factor of 1/4 according
# to the truncation error estimate method we're using => (e - e/4) = 0.75*e.
# We simply assume the same is true for reducing the uncertainty on an existing point.
# The convergence rate scaling factor allows for additional control over the number of points
# that will be added on each iteration.
residule_error -= (1./convergence_rate_scaling)*0.75*abs(pt[0])
return largest_error_pts
def get_updates(xs, integration_point_errors, gap_xs, gap_errors, trapz_est_error, target_uncertainty, convergence_rate_scaling, be_conservative=True):
n_gaps = len(gap_xs)
gap_error_pts = zip(gap_errors, gap_xs, ["gap"]*n_gaps)
pts_errors = zip(integration_point_errors, xs)
combined_pts_errors = gap_error_pts + pts_errors
residule_error = abs(trapz_est_error) - target_uncertainty
largest_error_pts = reduce_error_on_residual_error(combined_pts_errors, residule_error, convergence_rate_scaling, be_conservative)
is_gap = lambda x:x[-1] == "gap"
update_xs = [map(float, e) for e in largest_error_pts if not is_gap(e)]
new_pts = [map(float, e[:-1]) for e in largest_error_pts if is_gap(e)]
return new_pts, update_xs
def parse_args():
argparser = config_argparse()
argparser.add_argument('-t', '--target_error', type=float, required=True, nargs=1,
help="Target error of integration, used to determine how to reduce integration uncertainty.")
argparser.add_argument('-c', '--convergence_rate_scaling', type=float, nargs=1, default=CONVERGENCE_RATE_SCALING,
help="Scaling factor for rate of convergence to target error i.e. determines iterations required to reach target error. "\
"e.g. A value < 1 will result in more iterations but fewer overall points as the points will be concentrated in regions of high "\
"uncertainty; conversely values > 1 will result it more points but fewer iterations required to reach a given target error. Default=1.")
args = argparser.parse_args()
figure_name = process_plot_argument(args)
data = parse_user_data(args.data.read())
return data, figure_name, args.be_conservative, args.sigfigs, args.verbose, args.target_error, args.convergence_rate_scaling
def main():
data, figure_name, be_conservative, sigfigs, verbose, target_error, convergence_rate_scaling = parse_args()
xs, ys, es = np.array(data)
integral, total_error, gap_xs, gap_ys, gap_errors, integration_point_errors, conservative_error_adjustment = trapz_integrate_with_uncertainty(xs, ys, es, be_conservative=be_conservative)
round_sf = lambda x:round_sigfigs(x, sigfigs)
result_string = "{0:g} +/- {1:g}".format(round_sf(integral), round_sf(total_error))
if verbose:
value_error = round_sf(rss(integration_point_errors))
print "Error from y-value uncertainty: +/- {0:g}".format(value_error)
truncation_error = round_sf(np.sum(gap_errors) if be_conservative else np.sum(gap_errors))
print "Estimated truncation error: {0:g}".format(truncation_error)
if be_conservative:
print "Additional error component: {0:g}".format(round_sf(conservative_error_adjustment))
print "Integral: {0}".format(result_string)
print "Truncation errors: interval midpoint -> error"
print "\n".join(["{1:<6.4f} -> {0:>7.4f}".format(*map(round_sf, d)) for d in sorted(zip(gap_errors, gap_xs), key=lambda x:abs(x[0]))[::-1]])
print "Point errors: point -> +/- error"
print "\n".join(["{1:<6.4f} -> +/- {0:6.4f}".format(*map(round_sf, d)) for d in sorted(zip(integration_point_errors, xs))[::-1]])
else:
print result_string
if total_error > target_error:
new_pts, update_xs = get_updates(xs, integration_point_errors, gap_xs, gap_errors, total_error, target_error, convergence_rate_scaling, be_conservative=be_conservative)
if new_pts:
print "Suggested new points:"
print ",".join(["{0:g}".format(round_sf(p[1])) for p in new_pts])
if update_xs:
print "Suggested to reduce uncertainty in existing points:"
print ",".join(["{0:g}".format(round_sf(p[1])) for p in update_xs])
else:
print "Target error has been reached."
if figure_name:
plot_error_analysis(xs, ys, es, gap_xs, gap_ys, gap_errors, figure_name, title="Integral: {0}".format(result_string))
if __name__=="__main__":
main()
This diff is collapsed.
This diff is collapsed.
import sys
from numpy import array, mean, std, random, ones, trapz
from scipy.integrate import simps
import pylab as pl
CONV_TOT = 0.01
MAX_ITER = 100000
MIN_ITER = 20
def mc_trapz(xs, ys, es):
mc_integral, errorEstTrap, conv = est_error_mc(xs, ys, es, trapz)
if not conv:
sys.stderr.write("WARNING: MC error estimate did not fully converge.\n")
return mc_integral, errorEstTrap
def mc_simps(xs, ys, es):
mc_integral, errorEstSimps, conv = est_error_mc(xs, ys, es, simps)
if not conv:
sys.stderr.write("WARNING: MC error estimate did not fully converge.\n")
return mc_integral, errorEstSimps
def est_error_mc(xs, ys, es, method, plot=False):
expctValue = method(ys, xs)
y_trials = samplY(ys, es, MAX_ITER)
trialResults = [method(y_trial, xs) for y_trial in y_trials]
if plot:
fig = pl.figure()
ax = fig.add_subplot(111)
std_conv, mean_conv = zip(*[(std(trialResults[:i+1]), mean(trialResults[:i+1])) for i in range(len(trialResults)-1)])
ax.plot(range(len(trialResults)-1), std_conv, "r")
ax1 = ax.twinx()
ax1.plot(range(len(trialResults)-1), mean_conv, "b")
ax1.plot([0, len(trialResults) - 1], [expctValue]*2, "g")
pl.show()
return mean(trialResults), std(trialResults), converged(trialResults, expctValue)
def samplY(ys, es, N):
yTrial = [random.normal(mu, sig, N) if sig != 0 else mu*ones(N) for mu, sig in zip(ys, es)]
return array(yTrial).transpose()
def converged(trialResults, expctValue):
if abs(mean(trialResults) - expctValue) > CONV_TOT:
return False
if abs(std(trialResults) - std(trialResults[:-MIN_ITER])) > CONV_TOT:
return False
return True
This diff is collapsed.
from scipy.integrate import trapz
import numpy as np
from scipy import interpolate
import sys
sys.path.append("../")
from reduce_integration_uncertainty import reduce_error_on_residual_error
from method_validation import TARGET_ERROR
import simulated_iterative_integration
from integration_error_estimate import trapz_integrate_with_uncertainty, interval_errors, point_error_calc
from helpers import rss
from method_validation import DATA_FILE, get_data, parse_data_dir, get_realistic_function
def integrate_with_point_uncertinaty(xs, ys, es):
integration_error_per_point = point_error_calc(xs, es)
return np.trapz(ys, xs), rss(integration_error_per_point)
def integrate_with_gap_uncertinaty(xs, ys, es):
_, _, gap_es = interval_errors(xs, ys, es)
_, _, gap_es_reverse = interval_errors(xs, ys, es, forward=False)
gap_errors = sorted([gap_es, gap_es_reverse], key=lambda x:np.abs(np.sum(x)))[-1]
truncation_error = np.abs(np.sum(gap_errors)) + np.abs(np.abs(np.sum(gap_es)) - np.abs(np.sum(gap_es_reverse)))
return np.trapz(ys, xs), truncation_error
def run_test(xs, ys, es, x_fine=None, y_fine=None):
if x_fine is not None:
integral = trapz(y_fine, x_fine)
print "Integral: {0}".format(integral)
print "True truncation error: {0}".format(trapz(ys, xs) - integral)
print "Truncation error estimate: {0} +/- {1}".format(*integrate_with_gap_uncertinaty(xs, ys, es))
print "Analytical integral point uncertainty: {0} +/- {1}".format(*integrate_with_point_uncertinaty(xs, ys, es))
trapz_integral, total_error, gap_xs, _, gap_errors, _, _ = trapz_integrate_with_uncertainty(xs, ys, es)
print "Combined error estimate: {0} +/- {1}".format(trapz_integral, total_error)
#plot_error_analysis(xs, ys, es, gap_xs, gap_ys, zip(*gap_errors)[0])
plot_data(xs, ys, es, x_fine, y_fine, figure_name="example_integration_{0}.eps".format(len(xs)))
#plot_data_error_propagation(xs, ys, es, x_fine, figure_name="error_prop.png")
return gap_errors, gap_xs, total_error
def plot_data(xs, ys, es, x_fine=None, y_fine=None, figure_name=None, title=""):
import os
if not os.environ.has_key("DISPLAY"):
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(5,4))
ax = fig.add_subplot(111)
fig.hold(True)
f = interpolate.interp1d(xs, ys, kind=1)
if x_fine is not None and y_fine is not None:
ax.fill_between(x_fine, y_fine, map(f, x_fine), facecolor='red', alpha=0.8)
ax.plot(x_fine, y_fine, "r")
ax.set_title("N={0}".format(len(xs)), fontweight="bold")
ax.errorbar(xs, ys, es, marker="o", label="Integration Points")
ax.set_xlabel(r'$\mathbf{\lambda}$')
ax.set_ylabel(r'<$\mathbf{dV/d\lambda}$> (kJ/mol)')
#ax.set_title("Measurement Error Propagation", fontweight="bold")
#plt.legend(loc = 'upper right', prop={'size':11}, numpoints = 1, frameon = False)
fig.tight_layout()
if figure_name:
plt.savefig("{0}".format(figure_name), dpi=300)
plt.show()
def plot_data_error_propagation(xs, ys, es, x_fine, figure_name=None, title=""):
import os
if not os.environ.has_key("DISPLAY"):
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(5,4))
ax = fig.add_subplot(111)
fig.hold(True)
ax.errorbar(xs, ys, es, marker="o", label="Integration Points")
ax.set_xlabel(r'$\mathbf{\lambda}$')
ax.set_ylabel(r'<$\mathbf{dV/d\lambda}$> (kJ/mol)')
#ax.set_title("Measurement Error Propagation", fontweight="bold")
#ax.fill_between(xs, np.zeros(len(xs)), ys, facecolor='blue', alpha=0.2)
for i in range(1,len(xs)-1):
ax.fill_between(xs[i-1:i+2], ys[i-1:i+2], [ys[i-1], ys[i]+es[i], ys[i+1]], facecolor='red', alpha=0.2)
#plt.legend(loc = 'upper right', prop={'size':11}, numpoints = 1, frameon = False)
fig.tight_layout()
if figure_name:
plt.savefig("{0}".format(figure_name), dpi=300)
plt.show()
def plot_data_tapz_simps(xs, ys, es, x_fine, figure_name=None, title=""):
#mc_integral, mc_error = mc_simps(xs, ys, es)
#print "Monte Carlo Simps integral point uncertainty: {0} +/- {1}".format(mc_integral, mc_error)
import os
if not os.environ.has_key("DISPLAY"):
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(9,4))
ax = fig.add_subplot(121)
fig.hold(True)
ax.errorbar(xs, ys, es, marker="o", label="Integration Points")
ax.set_xlabel(r'$\mathbf{\lambda}$')
ax.set_ylabel(r'<$\mathbf{dV/d\lambda}$> (kJ/mol)')
ax.set_title("Linear interpolation", fontweight="bold")
ax.fill_between(xs, np.zeros(len(xs)), ys, facecolor='blue', alpha=0.2)
ax3 = fig.add_subplot(122, sharey=ax)
ax3.set_xlim((0,1))
ax3.errorbar(xs, ys, es, marker="o", linestyle="", label="Integration Points")
f = interpolate.interp1d(xs, ys, kind=3)
ax3.plot(x_fine, map(f, x_fine), "b", label="Integration Points")
ax3.fill_between(x_fine, np.zeros(len(x_fine)), map(f, x_fine), facecolor='blue', alpha=0.2)
ax3.get_yaxis().set_visible(False)
ax3.set_title("Cubic interpolation", fontweight="bold")
ax3.set_xlabel(r'$\mathbf{\lambda}$')
#plt.legend(loc = 'upper right', prop={'size':11}, numpoints = 1, frameon = False)
fig.tight_layout()
if figure_name:
plt.savefig("{0}".format(figure_name), dpi=300)
plt.show()
def simple_test_function():
xs = [0, 1, 2]
ys = [0, 10, 0]
es = [0.5, 0.5, 0.5]
run_test(xs, ys, es)
def periodic_test_function():
N = 11
a = 0
b = np.pi
f = lambda x: 100*(np.sin(x) + 2*np.cos(3*x-.5) + 2*np.sin(x-.2))
xs = list(np.linspace(a, b, N))
es = np.random.uniform(0, 10, N)
ys = map(f, xs)
x_fine = np.linspace(a, b, N*100)
y_fine = map(f, x_fine)
run_test(xs, ys, es, x_fine, y_fine)
def interpolated_test_function():
N = 11
a = 0
b = 1
f = simulated_iterative_integration.get_realistic_function()
xs = list(np.linspace(a, b, N))
es = np.random.uniform(0, 10, N)
ys = map(f, xs)
x_fine = np.linspace(a, b, N*100)
y_fine = map(f, x_fine)
run_test(xs, ys, es, x_fine, y_fine)
def real_function():
N = 5
a = 0
b = 1
data = get_data(DATA_FILE, parse_data_dir)
for _, dvdl in data.items():
f = get_realistic_function(dvdl[0], dvdl[1])
break
xs = list(np.linspace(a, b, N))
es = np.random.uniform(0, 5, N)
ys = map(f, xs)
x_fine = np.linspace(a, b, N*100)
y_fine = map(f, x_fine)
#generated_pts = [xs, ys, list(np.zeros(len(xs)))]
generated_pts = [xs, ys, es]
for _ in range(6):
gap_errors, gap_xs, total_error = run_test(*(generated_pts + [x_fine, y_fine]))
gap_error_pts = zip(gap_errors, gap_xs, ["gap"]*len(gap_errors))
largest_gap_error = reduce_error_on_residual_error(gap_error_pts, total_error-TARGET_ERROR, 0.5, False)
if largest_gap_error:
largest_gap_errors_x = zip(*largest_gap_error)[1]
else:
largest_gap_errors_x = []
generated_pts = zip(*sorted(zip(*generated_pts) + zip(largest_gap_errors_x, map(f, largest_gap_errors_x), list(np.zeros(len(largest_gap_errors_x))) )))
if __name__=="__main__":
#simple_test_function()
#periodic_test_function()
#interpolated_test_function()
real_function()
\ No newline at end of file
import numpy as np
from integration_error_estimate import trapz_integrate_with_uncertainty,\
plot_error_analysis, point_error_calc
from scipy import interpolate
from reduce_integration_uncertainty import reduce_error_on_residual_error
from helpers import parse_user_data, rss
from config import CONVERGENCE_RATE_SCALING
SIGMA = 5
N_INIT = 11
EXAMPLE_DVDL_DATA = '''
# lambda dvdl_average error_est
0.000 99.84849 0.36028
0.025 155.28053 0.90581
0.050 186.13209 1.11968
0.075 188.50786 1.42872
0.100 177.67623 1.45245
0.125 149.29522 1.37675
0.150 125.41653 1.41022
0.200 83.82993 0.78686
0.250 63.47751 0.54568
0.300 50.74429 0.35037
0.400 32.61911 0.41166
0.500 17.44440 0.62932
0.600 -2.15131 1.13114
0.649 -16.64929 1.29989
0.700 -32.44893 1.58581
0.750 -71.44813 3.10821
0.775 -94.12352 3.47328
0.800 -110.41922 3.80674
0.825 -127.66568 2.59015
0.850 -113.45883 1.90801
0.875 -97.05920 1.65601
0.900 -71.38330 0.95010
0.950 -30.13460 0.54041
1.000 3.15286 0.21252'''
def integrate_with_point_uncertinaty(xs, ys, es):
integration_error_per_point = point_error_calc(xs, es)
return np.trapz(ys, xs), rss(integration_error_per_point)
def simulate_updates(xs, es, integration_point_errors, gap_xs, gap_errors, trapz_est_error, target_uncertainty):
n_gaps = len(gap_xs)
gap_error_pts = zip(gap_errors, gap_xs, np.random.uniform(0, SIGMA, n_gaps), ["gap"]*n_gaps)
pts_errors = zip(integration_point_errors, xs, es)
combined_pts_errors = sorted( gap_error_pts + pts_errors )[::-1]
residule_error = trapz_est_error - target_uncertainty
largest_error_pts = reduce_error_on_residual_error(combined_pts_errors, residule_error, CONVERGENCE_RATE_SCALING, be_conservative=True)
# (xs, es)
new_pts = [(pt[1], pt[2]) for pt in largest_error_pts if pt[-1] == "gap"]
updated_xs = [pt[2] for pt in largest_error_pts if pt[-1] != "gap"]
updated_pts = [(pt[1], 0) if pt[2] in updated_xs else (pt[1], pt[2]) for pt in pts_errors]
sorted_pts = sorted(updated_pts + new_pts)
print "Intervals which received new points: {0:.1f}%".format(len(new_pts)/float(len(gap_xs))*100)
print "Updated points: {0:.1f}%".format(len(updated_xs)/float(len(xs))*100)
xs, es = zip(*sorted_pts)
return xs, es
def show_results(xs, es, ys, fine_integral, trapz_integral, trapz_est_error, gap_xs, gap_ys, gap_errors):
#print "Monte Carlo integral point uncertainty: {0} +/- {1}".format(mc_integral, mc_error)
trap_integral, point_uncertainty = integrate_with_point_uncertinaty(xs, ys, es)
print "Analytical integral point uncertainty: {0} +/- {1}".format(trap_integral, point_uncertainty)
print "Truncation error estimate: {0} +/- {1}".format(trap_integral, np.abs(np.sum(gap_errors)))
print "True truncation error: {0}".format(trapz_integral - fine_integral)
print
print "Estimated integral: {0} +/- {1}".format(trapz_integral, trapz_est_error)
print "Fine scale integral: {0} true error {1}".format(fine_integral, abs(fine_integral - trapz_integral) + point_uncertainty)
plot_error_analysis(xs, ys, es, gap_xs, gap_ys, np.abs(gap_errors))
def do_iteration(function, target_uncertainty, xs, es, ys, fine_integral):
ys = map(function, xs)
trapz_integral, trapz_est_error, gap_xs, gap_ys, gap_errors, integration_point_errors, error_safety_policy = trapz_integrate_with_uncertainty(xs, ys, es)
#mc_integral, mc_error = mc_trapz(xs, ys, es)
show_results(xs, es, ys, fine_integral, trapz_integral, trapz_est_error, gap_xs, gap_ys, gap_errors)
return integration_point_errors, gap_xs, gap_errors, trapz_est_error
def trapz_errorbased_integration(function, a, b, target_uncertainty, plot=True):
xs = np.linspace(0, 1, 11)
es = np.random.uniform(0, SIGMA, N_INIT)
ys = map(function, xs)
x_fine = np.linspace(a, b, 1000)
y_fine = map(function, x_fine)
fine_integral = np.trapz(y_fine, x_fine)
integration_point_errors, gap_xs, gap_errors, trapz_est_error = do_iteration(function, target_uncertainty, xs, es, ys, fine_integral)
while trapz_est_error > target_uncertainty:
xs, es = simulate_updates(xs, es, integration_point_errors, gap_xs, np.abs(gap_errors), trapz_est_error, target_uncertainty)
integration_point_errors, gap_xs, gap_errors, trapz_est_error = do_iteration(function, target_uncertainty, xs, es, ys, fine_integral)
def get_realistic_function():
xs, ys, _ = parse_user_data(EXAMPLE_DVDL_DATA)
xs, ys = filter_(xs, ys)
f = interpolate.interp1d(xs, ys, kind=3)
return f
def filter_(xs, ys):
initPts = [x/10. for x in range(0,11,2)]
newxs = []; newys = [];
for i, x in enumerate(xs):
if x in initPts:
newxs.append(x)
newys.append(ys[i])
return newxs, newys
def simulated_trapz_integration():
target_uncertainty = 0.5
f = get_realistic_function()
a, b = 0, 1
trapz_errorbased_integration(f, a, b, target_uncertainty)
if __name__=="__main__":
simulated_trapz_integration()
\ No newline at end of file
from argparse import ArgumentParser
from os.path import basename, dirname, join
import numpy as np
import sys
import glob
import gzip
import itertools
SKIP_LINE_CHARACTERS = ["#", "@"]
UI_INPUT_FILE_HEADER = "# time(ps) position(nm) equilibrium_position(nm) force_constant(kJmol^-1nm^-<N>)"
UI_INPUT_LINE_TEMPLATE = "{0:>10g} {1:>10g} {2:>10g} {3:>10g}"
JK_UI_INPUT_FILE_HEADER = "# position(nm) force_constant(kJmol^-1nm^-<N>) equilibrium_position(nm)"
JK_UI_INPUT_LINE_TEMPLATE = " {0:>10g} {1:>10g} {2:>10g}"
def generate_ui_input_lines(input_file, eq_pos, fc, shift=False, jk_ui=False, fast=True):
if fast:
time, pos = parse_input_file_quickly(input_file, keep_every=1)
else:
time, pos = parse_input_file(input_file)
if shift:
pos = pos + eq_pos
n = len(time)
print "constructing new file format"
if jk_ui:
return [JK_UI_INPUT_LINE_TEMPLATE.format(p, f, eq_p, ) for p, f, eq_p in zip(pos, [fc]*n, [eq_pos]*n)]
return [UI_INPUT_LINE_TEMPLATE.format(t, p, eq_p, f) for t, p, eq_p, f in zip(time, pos, [eq_pos]*n, [fc]*n)]
def generate_ui_input_lines_merged(run_data, shift=False):
ui_input_lines = []
# sort data on eq_pos first and then input_file
for input_file, eq_pos, fc in sorted(run_data, key=lambda x:(x[2], x[0])):
print "Processing {0}".format(input_file)
ui_input_lines.extend(generate_ui_input_lines(input_file, eq_pos, fc, shift))
return ui_input_lines
def write_output_file(output_file, ui_input_lines, jk_ui=False):
with open(output_file, "w") as fh:
if jk_ui:
fh.write("\n".join(ui_input_lines))
else:
fh.write("\n".join([UI_INPUT_FILE_HEADER] + ui_input_lines))
def parse_input_file(input_file):
open_function = gzip.open if input_file.endswith(".gz") else open
fh = open_function(input_file)
lines = fh.read().splitlines()
fh.close()
# warn if first non-commented line contains more than 2 columns
for l in lines:
if l and l.strip()[0] not in SKIP_LINE_CHARACTERS:
if len(l.split()) > 2:
print "WARNING: more than 2 columns were found in {0}: Proceeding with => col1=time, col2=reaction coordinate position".format(input_file)
break
return np.array([map(float, l.split()[:2]) for l in lines if l and l.strip()[0] not in SKIP_LINE_CHARACTERS]).T
def parse_input_file_quickly(input_file, keep_every=1):
open_function = gzip.open if input_file.endswith(".gz") else open
fh = open_function(input_file)
lines = fh.read().splitlines()
print "Parsing file, keeping ever {0}(st/nd/rd/th) value".format(keep_every)
line_iterator = itertools.islice(lines, None, None, keep_every)
lines = np.array([map(float, l.split()[:2]) for l in line_iterator if l.strip()[0] not in SKIP_LINE_CHARACTERS])
fh.close()
return lines.T
def parse_commandline():
parser = ArgumentParser(prog=basename(__file__))
#"python -i <input data file> -r <reaction coordinate equilibrium position> -f <force constant> [-s]")
# required
parser.add_argument("-i", "--input_file",
help="Input data file: col1=time (ps), col2=position (nm)",
action="store", required=True)
parser.add_argument("-o", "--output_file",
help="Output data file: col1=time (ps), col2=position (nm), col5=umbrella_equilibrium_position (nm), col4=force_constant (kJ mol^-1 nm^-<N>)",
action="store", required=True)
parser.add_argument("-f", "--force_constant",
help="Force constant used for the umbrella potential (kJ mol-1 nm-2)",
action="store", required=True, type=float)
parser.add_argument("-r", "--reaction_coordinate_position",
help="Equilibrium position of the umbrella potential along the reaction coordinate (nm)",
action="store", required=True, type=float)
parser.add_argument("-s", "--shift_positions_by",
help="Shift all reaction coordinate positions by value set for reaction_coordinate_position (helpful since some packages output positions relative to the umbrella equilibrium position)",
action="store_true", required=False)
args = parser.parse_args()
return args.input_file, args.output_file, args.force_constant, args.reaction_coordinate_position, args.shift_positions_by
def main():
input_file, output_file, fc, eq_pos, shift = parse_commandline()
ui_input_lines = generate_ui_input_lines(input_file, eq_pos, fc, shift)
write_output_file(output_file, ui_input_lines)
if __name__=="__main__":
if len(sys.argv) > 1:
main()
# another example script for generating merged ui input file
fc = 500000.0 # J mol-1 nm-2
fc /= 1000.0 # kJ mol-1 nm-2
run_data = []
for f in glob.glob("/Users/e.deplazes/Desktop/MD/peptides_lipids/ProTx2/pmf_popc/analysis/pdo_ProTx2_popc_*_reduced.dat"):
window_center = float(basename(f).split("_")[3])
run_data.append( (f, window_center, fc) )
ui_input_lines = generate_ui_input_lines_merged(run_data, shift=True)
write_output_file("/Users/e.deplazes/Desktop/MD/peptides_lipids/ProTx2/pmf_popc/analysis/pdo_ProTx2_popc_merged_ui_input_0to10ns.dat", ui_input_lines)
if False:
# JK umbrella integration example of multiple input file generation
fc = 500000.0 # J mol-1 nm-2
fc /= 1000.0 # kJ mol-1 nm-2
for f in glob.glob("test_data/com_distance_vs_time_files/Nafc500*.dat"):
window_center = float(basename(f).split("_")[3])
ui_input_lines = generate_ui_input_lines(f, window_center, fc, shift=False, jk_ui=True)
write_output_file(join(dirname(f), "{0}.jk_ui".format(basename(f)[:-4])), ui_input_lines, jk_ui=True)
if False:
# example script for generating merged ui input file
fc = 500000.0 # J mol-1 nm-2
fc /= 1000.0 # kJ mol-1 nm-2
run_data = []
for f in glob.glob("test_data/protein_system/pdo*.dat"):
window_left, window_right = map(float, basename(f).split("_")[1:3])
window_center = np.mean([window_left, window_right])
run_data.append( (f, window_center, fc) )
ui_input_lines = generate_ui_input_lines_merged(run_data, shift=True)
write_output_file("protien_system_merged_ui_input.dat", ui_input_lines)
# another example script for generating merged ui input file
fc = 500000.0 # J mol-1 nm-2
fc /= 1000.0 # kJ mol-1 nm-2
run_data = []
for f in glob.glob("test_data/com_distance_vs_time_files/Nafc500*.dat"):
window_center = float(basename(f).split("_")[3])
run_data.append( (f, window_center, fc) )
ui_input_lines = generate_ui_input_lines_merged(run_data, shift=False)
write_output_file("na_ion_merged_ui_input.dat", ui_input_lines)
# example of multiple input file generation
fc = 500000.0 # J mol-1 nm-2
fc /= 1000.0 # kJ mol-1 nm-2
for f in glob.glob("test_data/protein_system/pdo*.dat"):
window_left, window_right = map(float, basename(f).split("_")[1:3])
window_center = np.mean([window_left, window_right])
ui_input_lines = generate_ui_input_lines(f, window_center, fc, shift=True)
write_output_file(join(dirname(f), "{0}.ui_dat".format(basename(f)[:-4])), ui_input_lines)
# another example of multiple input file generation
fc = 500000.0 # J mol-1 nm-2
fc /= 1000.0 # kJ mol-1 nm-2
for f in glob.glob("test_data/com_distance_vs_time_files/Nafc500*.dat"):
window_center = float(basename(f).split("_")[3])
ui_input_lines = generate_ui_input_lines(f, window_center, fc, shift=False)
write_output_file(join(dirname(f), "{0}.ui_dat".format(basename(f)[:-4])), ui_input_lines)
from itertools import product
import colorsys
import os
import matplotlib
if not os.environ.has_key("DISPLAY"):
matplotlib.use("Agg")
import pylab
import numpy as np
try:
from scipy.stats.kde import gaussian_kde
CAN_PLOT_KDE = True
except:
print "KDE will be omitted from window distribution plot due to missing scipy library."
CAN_PLOT_KDE = False
def get_more_colors():
hue = [0, 60, 80, 180, 240, 270, 300]
saturation = [1]
values = [1]
hue = map(lambda x:x/360., hue)
hsv = [list(p) for p in product(hue, saturation, values)]
return [colorsys.hsv_to_rgb(*c) for c in hsv]
def create_figure(figsize=(10,8)):
fig = matplotlib.pyplot.figure(figsize=figsize)
fig.hold = True
return fig
def add_axis_to_figure(fig, subplot_layout=111, sharex=None, sharey=None):
return fig.add_subplot(subplot_layout, sharex=sharex, sharey=sharey)
def save_figure(fig, fig_name):
fig.savefig(fig_name)
def plot(ax, xs, ys, es, xlabel=None, ylabel=None, label=None, linewidth=1, alpha=1, line_style="-", show=True):
marker_size = 5
width = 1.25 # table borders and ticks
tick_width = 0.75
tick_length = 4
font_size = 12
font_weight = "bold"
symbols = [u'o', u'v', u'^', u'<', u'>', u's', u'p', u'h', u'H', u'D', u'd']
symbol = symbols[0]
built_in_colors = [
'b', #blue
'g',# green
'r',# red
'c',# cyan
'm',# magenta
'y',# yellow
'k',# black
'w',# white
]
line_color = "k"
fill_color = "k"
line_styles = ['-', '--', '-.', ':', '']
line_style = line_style
if not ylabel:
ylabel="y"
if not xlabel:
xlabel="x"
if not label:
label=""
ax.errorbar(xs, ys, yerr=es, marker=symbol, zorder=1, linestyle=line_style, color=line_color, markerfacecolor=fill_color, label=label, markersize=marker_size, alpha=alpha, linewidth=linewidth)
pylab.ylabel(ylabel, fontsize=font_size, fontweight=font_weight)
pylab.xlabel(xlabel, fontsize=font_size, fontweight=font_weight)
pylab.xticks(fontsize=font_size, fontweight=font_weight)
pylab.yticks(fontsize=font_size, fontweight=font_weight)
[i.set_linewidth(width) for i in ax.spines.itervalues()]
pylab.tick_params(which='major', length=tick_length, color=line_color, width=tick_width)
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc = 'upper right', prop={'size':12}, numpoints = 1, frameon = False)
if show:
pylab.show()
def plot_different_colors(ax, xs, ys, xlabel=None, ylabel=None, label=None, show=True, kde=None, color="blue"):
width = 1.25 # table borders and ticks
tick_width = 0.75
tick_length = 4
font_size = 12
font_weight = "bold"
if not ylabel:
ylabel="y"
if not xlabel:
xlabel="x"
if not label:
label=""
ax.plot(xs, ys, color=color, label=label)
if kde:
kde_curve = kde(np.array(xs))
#ax.plot(xs, kde_curve)
ax.fill_between(xs, kde_curve, alpha=0.5, facecolor=color)
pylab.ylabel(ylabel)
pylab.xlabel(xlabel)
pylab.xticks(fontsize=font_size, fontweight=font_weight)
pylab.yticks(fontsize=font_size, fontweight=font_weight)
[i.set_linewidth(width) for i in ax.spines.itervalues()]
pylab.tick_params(which='major', length=tick_length, width=tick_width)
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc = 'upper right', prop={'size':12}, numpoints = 1, frameon = False)
if show:
pylab.show()
def generate_histogram(values, n_bins=50):
his, bins = np.histogram(values, bins=n_bins, normed=True)
centers = (bins[:-1]+bins[1:])/2
kde = gaussian_kde(values) if CAN_PLOT_KDE else None
return centers, his, kde
def plot_hist_to_axis(ax, centers, his, kde=None):
plot_different_colors(ax, centers, his, show=False, kde=kde)
def finalise_figure(fig, ax, xlabel="x", ylabel="y", fig_name=None, show=False):
pylab.ylabel(ylabel)
pylab.xlabel(xlabel)
fig.tight_layout()
if fig_name:
save_figure(fig, fig_name)
if show:
pylab.show()
matplotlib.pyplot.close(fig)
def plot_histogram(values, xlabel="x", ylabel="y", n_bins=100, fig_name=None, show=False):
his, bins = np.histogram(values, bins = n_bins, normed=True)
centers = (bins[:-1]+bins[1:])/2
fig = create_figure((8,6))
ax = add_axis_to_figure(fig)
plot(ax, centers, his, np.zeros(len(his)), xlabel=xlabel, ylabel=ylabel, show=False)
fig.tight_layout()
if fig_name:
save_figure(fig, fig_name)
if show:
pylab.show()
matplotlib.pyplot.close(fig)
def simple_plot(xs, ys, es, xlabel, ylabel):
fig = create_figure()
ax = add_axis_to_figure(fig)
plot(ax, xs, ys, es, linewidth=1.0, xlabel=xlabel, ylabel=ylabel)
if __name__ == "__main__":
xs = range(0,10)
ys = np.sin(xs)
es = np.random.normal(size=len(xs))
simple_plot(xs, ys, es)
\ No newline at end of file
3.200000 -116.072078 294.308939
3.275510 -90.304672 162.882568
3.351020 -61.743422 65.735079
3.426531 -32.201216 15.212402
3.502041 -2.766319 5.380849
3.577551 15.232815 6.234374
3.653061 8.664140 6.932854
3.728571 4.541066 3.653015
3.804082 11.119442 5.597466
3.879592 30.350830 5.269813
3.955102 41.930283 5.695798
4.030612 27.026861 2.714112
4.106122 23.076543 3.926382
4.181633 29.120829 2.017760
4.257143 37.454855 3.661284
4.332653 42.289226 2.311139
4.408163 44.254787 2.070010
4.483673 45.503484 2.182223
4.559184 46.235702 2.540846
4.634694 46.676007 2.855445
4.710204 41.220291 2.829754
4.785714 31.954112 2.993838
4.861224 26.354959 2.297394
4.936735 23.226705 2.758823
5.012245 19.096750 4.793819
5.087755 18.339840 6.059476
5.163265 21.642748 5.731446
5.238776 15.966710 3.892959
5.314286 9.328172 2.944553
5.389796 7.902187 2.543292
5.465306 13.302217 2.161543
5.540816 8.291992 1.579911
5.616327 -0.417806 2.174626
5.691837 -5.593003 2.583971
5.767347 1.024172 2.256632
5.842857 4.403962 1.411625
5.918367 1.654431 1.148086
5.993878 -0.977121 1.638442
6.069388 -3.040181 2.629587
6.144898 3.074812 2.272368
6.220408 5.748131 1.641147
6.295918 1.025184 1.999060
6.371429 0.968172 2.608820
6.446939 -1.207633 1.451116
6.522449 -4.050394 1.663572
6.597959 -4.403209 2.533582
6.673469 -3.732687 4.192894
6.748980 -4.361065 4.447660
6.824490 -7.386986 4.012766
6.900000 -12.835403 4.194462
\ No newline at end of file
# reaction_coordinate(nm) energy(kJ/mol) error(kJ/mol)
3.200000 18.399459 0.000000
3.275510 10.607684 0.722387
3.351020 4.867092 1.747046
3.426531 1.320203 2.098647
3.502041 0.000000 2.283409
3.577551 0.470674 2.406245
3.653061 1.372906 2.497285
3.728571 1.871470 2.561102
3.804082 2.462734 2.616525
3.879592 4.028448 2.662471
3.955102 6.757429 2.703181
4.030612 9.360913 2.732264
4.106122 11.252572 2.760656
4.181633 13.223289 2.781764
4.257143 15.736861 2.804575
4.332653 18.747607 2.822398
4.408163 22.015085 2.838183
4.483673 25.403918 2.853025
4.559184 28.867540 2.867708
4.634694 32.375431 2.882285
4.710204 35.693965 2.896063
4.785714 38.456672 2.909600
4.861224 40.658137 2.920913
4.936735 42.530097 2.932849
5.012245 44.128024 2.949226
5.087755 45.541446 2.968227
5.163265 47.050993 2.986079
5.238776 48.470942 2.999242
5.314286 49.425952 3.009880
5.389796 50.076486 3.019320
5.465306 50.877061 3.027635
5.540816 51.692352 3.034373
5.616327 51.989643 3.042334
5.691837 51.762704 3.051094
5.767347 51.590208 3.058925
5.842857 51.795148 3.064624
5.918367 52.023883 3.069573
5.993878 52.049455 3.075559
6.069388 51.897781 3.083767
6.144898 51.899089 3.091029
6.220408 52.232200 3.096712
6.295918 52.487927 3.103146
6.371429 52.563186 3.110924
6.446939 52.554145 3.115912
6.522449 52.355628 3.121324
6.597959 52.036461 3.128702
6.673469 51.729290 3.139878
6.748980 51.423709 3.151566
6.824490 50.980161 3.162165
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment