#!/bin/env python
# Copyright 2013 National Renewable Energy Laboratory, Golden CO, USA
# This file is part of NREL MatDB.
#
# NREL MatDB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# NREL MatDB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with NREL MatDB. If not, see <http://www.gnu.org/licenses/>.
import datetime, json, math, os, pwd, re
import shutil, socket, stat, subprocess, sys, time, traceback
import numpy as np
np.seterr( all='raise', under='ignore')
version = '1.0.0'
# Name of the metadata file
metadataName = 'metadata'
# Work and archive subdir of the top level dir.
digestDirName = 'wrapUpload.archive'
#====================================================================
def badparms( msg):
print '\nError: %s' % (msg,)
print ''
print 'There are three ways to run wrapUpload.py.'
print ''
print '1. Specify a list of directories to upload, using -keepList.'
print ''
print '2. Specify regular expressions matching the names of directories'
print 'to upload, using -keepPatterns and -omitPatterns.'
print ''
print '3. Do not specify -keepList or -keepPatterns or -omitPatterns.'
print 'In this case wrapUpload will upload every directory containing'
print 'a metadata file in the tree rooted at topDir.'
print ''
print 'Parmeters:'
print ''
print ' -bugLev <int> Debug level. Default: 0'
print ''
print ' -readType <string> outcar / xml'
print ''
print ' -requireInput <boolean> no/yes: do we require that the files'
print ' INCAR, KPOINTS, POSCAR exist.'
print ''
print ' -requireIcsd <boolean> no/yes: do we require that the file'
print ' paths names contain ICSD info.'
print ''
print ' -keepList <string> File containing the absolute paths'
print ' of the dirs to be uploaded.'
print ' Still topDir must be specified,'
print ' and all paths in keepList must'
print ' start with the specified topDir.'
print ' If keepList is specified,'
print ' keepPatterns and omitPatterns'
print ' must not be specified.'
print ''
print ' -keepPatterns <string> Comma separated list of'
print ' regular expressions matching'
print ' the relative paths of those directories'
print ' to be kept. If specified,'
print ' keepList must not be specified.'
print ' If none of keepList, keepPatterns,'
print ' or omitPattens are specified, all dirs'
print ' below topDir containing a metadata'
print ' file will be archived.'
print ''
print ' -omitPatterns <string> Comma separated list of'
print ' regular expressions matching'
print ' the relative paths of those directories'
print ' to be omitted. If specified,'
print ' keepList must not be specified.'
print ''
print ' -topDir <string> Top of dir tree to upload.'
print ''
print ' -workDir <string> Work dir'
print ''
sys.exit(1)
#====================================================================
[docs]def main():
'''
Locates model runs, checks and extracts dir contents,
and uses ``tar`` and ``scp`` to send the data to the server running
:mod:`wrapReceive`.
Command line parameters:
================= ========= ===========================================
Parameter Type Description
================= ========= ===========================================
**-bugLev** integer Debug level. Normally 0.
**-readType** string If 'outcar', read the OUTCAR files.
Else if 'xml', read the vasprun.xml files.
**-requireInput** boolean no/yes: do we require that the files
INCAR, KPOINTS, POSCAR exist.
**-requireIcsd** boolean no/yes: do we require that the file
path names contain ICSD info.
See notes below.
**-keepList** string File containing the absolute paths
of the dirs to be uploaded.
Still ``topDir`` must be specified,
and all paths in ``keepList`` must
start with the specified ``topDir``.
If ``keepList`` is specified,
``keepPatterns`` and ``omitPatterns``
must not be specified.
If none of keepList, keepPatterns,
or omitPattens are specified, all dirs
below topDir containing a metadata
file will be archived.
**-keepPatterns** string Comma separated list of
regular expressions matching
the relative paths of those directories
to be kept. If specified,
``keepList`` must not be specified.
**-omitPatterns** string Comma separated list of
regular expressions matching
the relative paths of those directories
to be omitted. If specified,
``keepList`` must not be specified.
**-topDir** string Top of dir tree to upload.
**-workDir** string Work dir
================= ========= ===========================================
If requireIcsd is true, :func:`getIcsdMap` must be able
to extract ICSD info from the file names. File names must be like: ::
.../icsd_083665/icsd_083665.cif/ls-anti-ferro-7/relax_cellshape/1
^^^^^^ ^^^^^^^^ ^ ^^^^^^^^^^^^^^^ ^
icsdNum magType magNum relaxType relaxNum
'''
bugLev = 0
readType = None
requireInput = None
requireIcsd = None
keepList = None
keepPatterns = None
omitPatterns = None
topDir = None
workDir = None
if len(sys.argv) % 2 != 1:
badparms('Parms must be key/value pairs')
for iarg in range( 1, len(sys.argv), 2):
key = sys.argv[iarg]
val = sys.argv[iarg+1]
if key == '-bugLev': bugLev = int( val)
elif key == '-readType': readType = val
elif key == '-requireInput': requireInput = parseBoolean( val)
elif key == '-requireIcsd': requireIcsd = parseBoolean( val)
elif key == '-keepList': keepList = val
elif key == '-keepPatterns': keepPatterns = val.split(',')
elif key == '-omitPatterns': omitPatterns = val.split(',')
elif key == '-topDir': topDir = val
elif key == '-workDir': workDir = val
else: badparms('unknown key: "%s"' % (key,))
if readType == None: badparms('parm not specified: -readType')
if requireInput == None: badparms('missing parameter: -requireInput')
if requireIcsd == None: badparms('missing parameter: -requireIcsd')
# keepList is optional
# keepPatterns is optional
# omitPatterns is optional
if keepList != None and (keepPatterns != None or omitPatterns != None):
badparms('with keepList, may not spec keepPatterns or omitPatterns')
if topDir == None: badparms('missing parameter: -topDir')
if workDir == None: badparms('missing parameter: -workDir')
absTopDir = os.path.abspath( topDir)
print 'wrapUpload: readType: %s' % (readType,)
print 'wrapUpload: keepList: %s' % (keepList,)
print 'wrapUpload: keepPatterns: %s' % (keepPatterns,)
print 'wrapUpload: omitPatterns: %s' % (omitPatterns,)
print 'wrapUpload: topDir: %s' % (topDir,)
print 'wrapUpload: absTopDir: %s' % (absTopDir,)
print 'wrapUpload: workDir: %s' % (workDir,)
# Names of required files
requireNames = [
metadataName,
]
# Names of optional files
optionNames = [
'pbserr',
'pbsout',
'pbsscript',
'stderr',
'stdout',
'DOSCAR',
'POTCAR',
]
inputNames = [ 'INCAR', 'KPOINTS', 'POSCAR']
if requireInput: requireNames += inputNames
else: optionNames += inputNames
if readType == 'xml':
requireNames.append('vasprun.xml')
optionNames.append('OUTCAR')
elif readType == 'outcar':
requireNames.append('OUTCAR')
optionNames.append('vasprun.xml')
else: badparms('invalid readType: %s' % (readType,))
doUpload( bugLev, requireNames, optionNames, requireIcsd,
keepList, keepPatterns, omitPatterns,
topDir, workDir)
#====================================================================
[docs]def doUpload(
bugLev,
requireNames,
optionNames,
requireIcsd, # require icsd info in absTopDir string
keepList,
keepPatterns,
omitPatterns,
topDir,
workDir):
'''
Locates model runs, checks and extracts dir contents,
and uses ``tar`` and ``scp`` to send the data to the server running
:mod:`wrapReceive`.
If ``keepList`` is specified, creates keepAbsPaths = unique absolute
paths and calls :func:`iterateDirs`.
Otherwise calls :func:`searchDirs` to recursively search the
directory tree starting at ``topDir``.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* requireNames (str[]): names of required files.
* optionNames (str[]): names of optional files.
* requireIcsd (boolean): if True, the absTopDir string must
contain ICSD info that :func:`getIcsdMap` can extract.
* keepList (str[]):
List of the absolute paths
of the dirs to be uploaded, or None.
Still ``topDir`` must be specified,
and all paths in ``keepList`` must
start with the specified ``topDir``.
If ``keepList`` is specified,
``keepPatterns`` and ``omitPatterns``
must not be specified.
* keepPatterns (str[]):
List of regular expressions matching
the relative paths of those directories
to be kept. If specified,
``keepList`` must not be specified.
* omitPatterns (str[]):
List of regular expressions matching
the relative paths of those directories
to be omitted. If specified,
``keepList`` must not be specified.
* topDir (str): Top of dir tree to upload.
* workDir (str): Work dir
**Returns**
* None
'''
absTopDir = os.path.abspath( topDir)
if not os.path.isdir( workDir):
throwerr('workDir does not exist: %s' % (workDir,))
nms = os.listdir( workDir)
if len(nms) != 0:
throwerr('workDir is not empty: %s' % (workDir,))
# Get keepAbsPaths from file keepList
# Use a set and os.path.abspath to make sure entries are unique.
keepAbsPaths = None
if keepList != None:
keepAbsPathSet = set()
with open( keepList) as fin:
iline = 0
while True:
line = fin.readline()
if line == '': break
iline += 1
line = line.strip()
if len(line) > 0 and not line.startswith('#'):
apath = line
if apath.endswith('/'): apath = apath[:-1]
if apath != os.path.abspath( apath):
throwerr('keepList line is not an abs path. iline: %d line: %s' \
% (iline, line,))
if not os.path.isdir( apath):
throwerr(
'keepList line is not a dir. iline: %d line: %s' \
% (iline, line,))
keepAbsPathSet.add( apath)
keepAbsPaths = list( keepAbsPathSet)
keepAbsPaths.sort()
if bugLev >= 1: print 'doUpload: len(keepAbsPaths): ', len(keepAbsPaths)
if bugLev >= 5: print 'doUpload: keepAbsPaths: ', keepAbsPaths
digestDir = os.path.join( workDir, digestDirName)
if os.path.lexists( digestDir):
throwerr('workDir is not empty: subdir already exists: %s' \
% (digestDir,))
warnings = []
relDirs = [] # list of dirs we archive
relFiles = [] # list of files to archive
if keepAbsPaths != None:
# Get the relative paths of all files to be archived,
# using the keepAbsPaths list.
iterateDirs(
bugLev,
requireNames,
optionNames,
keepAbsPaths,
absTopDir,
requireIcsd, # require icsd info in absTopDir string
warnings, # may append to warnings
relDirs, # appends to list
relFiles) # appends to list
else:
# Get the relative paths of all files to be archived,
# starting at absTopDir.
searchDirs(
bugLev,
requireNames,
optionNames,
keepPatterns,
omitPatterns,
absTopDir,
'', # relative path so far
requireIcsd, # require icsd info in absTopDir string
warnings, # may append to warnings
relDirs, # appends to list
relFiles) # appends to list
numWarn = len( warnings)
if numWarn > 0:
print ''
for warn in warnings:
print 'Warning: %s' % (warn,)
print ''
throwerr('\nFound %d warnings. See above.' % (numWarn,))
numKeptDir = len( relDirs)
numKeptFile = len( relFiles)
if bugLev >= 0:
print 'wrapUpload: numKeptDir: %d' % (numKeptDir,)
print 'wrapUpload: numKeptFile: %d' % (numKeptFile,)
os.mkdir( digestDir)
listFile = os.path.join( digestDir, 'digest.list')
with open( listFile, 'w') as fout:
for path in relFiles:
print >> fout, path
print 'wrapUpload: beginning tar (this could take several minutes)'
curDate = datetime.datetime.now()
userId = pwd.getpwuid(os.getuid())[0]
uui = formatUui( curDate, userId, absTopDir)
fBase = os.path.join( digestDir, uui)
tarFile = fBase + '.tgz'
flagFile = fBase + '.zzflag'
# Create tarFile = tar of the files to be saved.
# Use -h to deref symlinks.
args = ['/bin/tar', '-chzf', tarFile, '-T', listFile, '--mode=660']
runSubprocess( bugLev, absTopDir, args, False) # showStdout = False
# Create flagFile with version num
with open( flagFile, 'w') as fout:
msg = version + ' ' + uui
print >> fout, msg
print 'wrapUpload: Completed. numKeptDir: %d numKeptFile: %d' \
% (numKeptDir, numKeptFile,)
#====================================================================
[docs]def searchDirs(
bugLev,
requireNames,
optionNames,
keepPatterns,
omitPatterns,
absTopDir,
relPath, # relative path so far
requireIcsd, # require icsd info in absTopDir string
warnings, # may append to warnings
relDirs, # appends to list
relFiles): # appends to list
'''
Recursive: locates model runs, checks dir contents,
and appends names to lists of dirs and files.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* requireNames (str[]): names of required files.
* optionNames (str[]): names of optional files.
* keepPatterns (str[]):
List of regular expressions matching
the relative paths of those directories
to be kept. If specified,
``keepList`` must not be specified.
* omitPatterns (str[]):
List of regular expressions matching
the relative paths of those directories
to be omitted. If specified,
``keepList`` must not be specified.
* absTopDir (str): Absolute path of the original top of dir tree to upload.
* relPath (str): Relative path so far, somewhere below absTopDir.
* requireIcsd (boolean): if True, the absTopDir string must
contain ICSD info that :func:`getIcsdMap` can extract.
* relDirs (str[]): We append dirs to be archived.
* relFiles (str[]): We append file names to be archived.
**Returns**
* None
'''
inDir = os.path.abspath( os.path.join( absTopDir, relPath))
if bugLev >= 5:
print 'searchDirs: relPath: %s' % (relPath,)
print 'searchDirs: inDir: %s' % (inDir,)
if not os.path.isdir( inDir): throwerr('not a dir')
# Check for keepPattern and omitPattern matches.
# If any keepPatterns exist:
# keepIt = (not any omitPattern) and some keepPattern
# Else:
# keepIt = not any omitPattern
keepIt = True
if keepPatterns != None:
found = False
for pat in keepPatterns:
if re.search( pat, relPath):
if bugLev >= 5:
print 'searchDirs: match keepPattern: %s for relPath: %s' \
% (pat, relPath,)
found = True
break
if not found: keepIt = False
omitIt = False
if omitPatterns != None:
for pat in omitPatterns:
if re.search( pat, relPath):
if bugLev >= 5:
print 'searchDirs: match omitPattern: %s for relPath: %s' \
% (pat, relPath,)
omitIt = True
hasMetadata = False
mpath = os.path.join( inDir, metadataName)
if os.path.isfile( mpath):
parseMetadata( mpath) # check validity
hasMetadata = True
if bugLev >= 5:
print 'searchDirs: relPath: %s keepIt: %s omitIt: %s hasMetadata: %s' \
% (relPath, keepIt, omitIt, hasMetadata,)
if keepIt and (not omitIt) and hasMetadata:
relDirs.append( relPath)
processDir( bugLev, requireNames, optionNames,
absTopDir, relPath, requireIcsd, warnings, relFiles)
# Recurse to subdirs
if not omitIt:
subNames = os.listdir( inDir)
subNames.sort()
for subName in subNames:
subPath = os.path.join( relPath, subName)
if os.path.isdir( os.path.join( absTopDir, subPath)):
searchDirs(
bugLev,
requireNames,
optionNames,
keepPatterns,
omitPatterns,
absTopDir,
subPath, # relPath: relative path so far
requireIcsd, # require icsd info in absTopDir string
warnings, # may append to warnings
relDirs, # appends to list
relFiles) # appends to list
else:
print 'wrapUpload: %-18s %s' % ('skip subTree', inDir,)
#====================================================================
[docs]def iterateDirs(
bugLev,
requireNames,
optionNames,
keepAbsPaths,
absTopDir,
requireIcsd, # require icsd info in absTopDir string
warnings, # may append to warnings
relDirs, # appends to list
relFiles): # appends to list
'''
For each path in keepAbsPaths, checks dir contents,
and appends names to lists of dirs and files.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* requireNames (str[]): names of required files.
* optionNames (str[]): names of optional files.
* keepAbsPaths (str[]):
List of absolute paths of dirs to archive.
* absTopDir (str): Absolute path of the original top of dir tree to upload.
* requireIcsd (boolean): if True, the absTopDir string must
contain ICSD info that :func:`getIcsdMap` can extract.
* relDirs (str[]): We append dirs to be archived.
* relFiles (str[]): We append file names to be archived.
**Returns**
* None
'''
for inDir in keepAbsPaths:
if bugLev >= 5:
print 'iterateDirs: inDir: %s' % (inDir,)
if not inDir.startswith( absTopDir):
throwerr('inDir does not start with absTopDir')
relPath = inDir[len(absTopDir) : ]
while relPath.startswith('/'):
relPath = relPath[1:]
if bugLev >= 5:
print 'iterateDirs: relPath: %s' % (relPath,)
relDirs.append( relPath)
processDir( bugLev, requireNames, optionNames,
absTopDir, relPath, requireIcsd, warnings, relFiles)
#====================================================================
[docs]def processDir(
bugLev,
requireNames,
optionNames,
absTopDir,
relPath, # relative path so far
requireIcsd, # require icsd info in absTopDir string
warnings, # may append to warnings
relFiles): # appends to list
'''
Prepares to archive a single directory,
and appends names to lists of dirs and files.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* requireNames (str[]): names of required files.
* optionNames (str[]): names of optional files.
* absTopDir (str): Absolute path of the original top of dir tree to upload.
* relPath (str): Relative path so far, somewhere below absTopDir.
* requireIcsd (boolean): if True, the absTopDir string must
contain ICSD info that :func:`getIcsdMap` can extract.
* relFiles (str[]): We append file names to be archived.
**Returns**
* None
'''
inDir = os.path.abspath( os.path.join( absTopDir, relPath))
if bugLev >= 5:
print 'processDir: relPath: %s' % (relPath,)
print 'processDir: inDir: %s' % (inDir,)
if not os.path.isdir( inDir): throwerr('not a dir')
subNames = os.listdir( inDir)
subNames.sort()
# If metadataForce, we ignore local metadata files.
reqNames = list( requireNames) # shallow copy
optNames = list( optionNames) # shallow copy
# Check for requireNames
for nm in requireNames:
subRelPath = os.path.join( relPath, nm)
subFile = os.path.join( inDir, nm)
if os.path.isfile( subFile):
relFiles.append( os.path.join( relPath, nm))
else: warnings.append('missing file: %s' % (subFile,))
# Check for optionNames
for nm in optionNames:
subRelPath = os.path.join( relPath, nm)
subFile = os.path.join( inDir, nm)
if os.path.isfile( subFile):
relFiles.append( subRelPath)
if requireIcsd:
try:
icsdMap = getIcsdMap( bugLev, relPath)
except Exception, exc:
print '===== start traceback'
print traceback.format_exc( limit=None)
print '===== end traceback'
throwerr('icsd info not found. absTopDir: %s relPath: %s traceback:\n%s' \
% (absTopDir, relPath, traceback.format_exc( limit=None),))
#====================================================================
# Creates a map of ICSD info based on the dir name
[docs]def getIcsdMap( bugLev, relPath):
'''
Creates a map of ICSD info based on the dir name.
Example parsing inDir: ::
.../icsd_083665/icsd_083665.cif/ls-anti-ferro-7/relax_cellshape/1
^^^^^^ ^^^^^^^^ ^ ^^^^^^^^^^^^^^^ ^
icsdNum magType magNum relaxType relaxNum
Resulting map: ::
icsdMap = {
'icsdNum' : 83665,
'magType' : 'lsaf',
'magNum' : 7,
'relaxType' : 'rc',
'relaxNum' : 1,
}
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* relPath (str): Relative path.
**Returns**
* map similar to the above.
'''
# Extract the ICSD number from the path '.../icsd_dddddd/...'
mat = re.match('^(.*/)?icsd_(\d{6})/.*$', relPath)
if mat == None:
throwerr('no icsd id found in relPath: "%s"' % (relPath,))
icsdNum = int( mat.group(2))
# Extract magnetic moment type from the path like '.../ls-anti-ferro-33/...'
magType = None
magNum = 0
pairs = [
['/anti-ferro', 'afer'],
['/ferro', 'fer'],
['/hs-ferro', 'hsf'],
['/hs-anti-ferro', 'hsaf'],
['/ls-ferro', 'lsf'],
['/ls-anti-ferro', 'lsaf'],
['/non-magnetic', 'nm'],
]
for (tname,tcode) in pairs:
ix = relPath.find( tname)
if ix >= 0:
magType = tcode
if tcode in ['hsaf', 'lsaf']:
rest = relPath[(ix+len(tname)):]
mat = re.match('^-(\d+).*$', rest)
if mat == None:
throwerr('no magNum found in relPath: "%s"' % (relPath,))
magNum = int( mat.group(1))
break
if magType == None:
throwerr('no magType found in relPath: "%s"' % (relPath,))
# Extract relaxType from relPath
# If relPath contains 'relax_cellshape', set relaxType = 'rc'
# and relaxNum = the number of the subfolder. Similarly for 'relax_ions'.
relaxType = 'std'
relaxNum = 0
pairs = [['relax_cellshape', 'rc'],
['relax_ions', 'ri']]
for (tname,tcode) in pairs:
ix = relPath.find( tname)
if ix >= 0:
relaxType = tcode
rest = relPath[(ix+len(tname)):]
mat = re.match('^/(\d+).*$', rest)
if mat == None:
throwerr('no subfolder found in relPath: "%s"' % (relPath,))
relaxNum = int( mat.group(1))
break
# Save the info from the path name and statInfo
icsdMap = {
'icsdNum' : icsdNum,
'magType' : magType,
'magNum' : magNum,
'relaxType' : relaxType,
'relaxNum' : relaxNum,
}
return icsdMap
#====================================================================
[docs]def checkFileFull( fname):
'''
Insures that fname exists and has length > 0.
**Parameters**:
* fname (str): Name of the input file.
**Returns**
* None
**Raises**:
* Exception if fname does not exist or has length == 0.
'''
checkFile( fname)
if os.path.getsize( fname) == 0:
throwerr('file is empty: \"%s\"' % (fname,))
#====================================================================
[docs]def checkFile( fname):
'''
Insures that fname exists. It may have length == 0.
**Parameters**:
* fname (str): Name of the input file.
**Returns**
* None
**Raises**:
* Exception if fname does not exist.
'''
if (type(fname).__name__ not in ('str', 'unicode')) or len(fname) == 0:
throwerr('invalid file name: "%s"' % (fname,))
if not os.path.isfile( fname):
throwerr('file not found: \"%s\"' % (fname,))
if not os.access( fname, os.R_OK):
throwerr('file is not readable: \"%s\"' % (fname,))
#====================================================================
[docs]def runSubprocess( bugLev, wkDir, args, showStdout):
'''
Calls the executable indicated by args and waits for completion.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* wkDir (str): The working directory to use for the subprocess.
* args (str[]): The executable name (in args[0]) followed by
the command line arguments in args[1:].
* showStdout (boolean): If True, print the stdout from the subprocess.
**Returns**
* None
**Raises**:
* Exception if subprocess rc != 0.
'''
if bugLev >= 1:
print 'runSubprocess: args: %s' % (args,)
print 'runSubprocess: cmd: %s' % (' '.join(args),)
if bugLev >= 5: pipe = None
else: pipe = subprocess.PIPE
proc = subprocess.Popen(
args,
shell=False,
cwd=wkDir,
stdin=subprocess.PIPE, stdout=pipe, stderr=pipe,
bufsize=10*1000*1000)
(stdout, stderr) = proc.communicate()
rc = proc.returncode
if rc != 0:
msg = 'subprocess failed.\n'
msg += 'wkDir: %s\n' % (wkDir,)
msg += 'args: %s\n' % (args,)
msg += 'rc: %d\n' % (rc,)
msg += '\n===== stdout:\n%s\n' % (stdout,)
msg += '\n===== stderr:\n%s\n' % (stderr,)
throwerr( msg)
if showStdout:
print 'runSubProcess: len(stderr): %d' % (len(stderr),)
print 'runSubProcess: len(stdout): %d' % (len(stdout),)
if len(stderr) > 0:
print '\n===== stderr:\n%s\n' % (stderr,)
if len(stdout) > 0:
print '\n===== stdout:\n%s\n' % (stdout,)
#====================================================================
[docs]def findNumFiles( tag, dir):
'''
Finds the number of files having name == tag in the tree at dir.
Yes, Python has os.walk, but this is better.
**Parameters**:
* tag (str): The name to search for.
* dir (str): The root of the directory tree to search.
**Returns**
* integer number of matches.
'''
if not os.path.isdir( dir): throwerr('not a dir: %s' % (dir,))
nms = os.listdir( dir)
nms.sort()
count = 0
for nm in nms:
if nm == tag: count += 1
subDir = dir + '/' + nm
if os.path.isdir( subDir):
count += findNumFiles( tag, subDir) # recursion
return count
#====================================================================
# Coord with parseUui, below.
[docs]def parseUui( stg):
'''
Parses a uui (wrapId).
A wrapId looks like: ::
@2013.08.13@12.58.22.735311@someUser@home.someUser.redmesa.old.td.testlada.2013.04.06.Fe.O@
The input string may have subdirs info after the initial wrapId.
**Parameters**:
* stg (str): The string to be parsed.
**Returns**
* If stg is a valid wrapId, returns the wrapId == stg.
Else returns None.
'''
uuiPattern = r'^(@(\d{4}\.\d{2}\.\d{2})@(\d{2}\.\d{2}\.\d{2}\.\d{6})@([^./@]*)@([^/@]*)@)'
res = None
mat = re.match( uuiPattern, stg)
if mat:
wrapId = mat.group(1)
dateStg = mat.group(2)
timeStg = mat.group(3)
userid = mat.group(4)
dir = mat.group(5)
adate = datetime.datetime.strptime(
dateStg + ' ' + timeStg, '%Y.%m.%d %H.%M.%S.%f')
res = wrapId # Could return (wrapId, adate, userid, dir)
return res
#====================================================================
# Used by deepCompare to unify types
def fixType( val):
tpa = type(val).__name__
if tpa == 'float64': val = float( val)
elif tpa == 'int64': val = float( val)
elif tpa == 'int': val = float( val)
elif tpa == 'string_': val = str( val)
elif tpa == 'list': val = np.array( val)
return val
#====================================================================
# Returns None if match; else returns error msg.
def deepCompare( bugLev, epsilon, taga, va, tagb, vb):
'''
Compares two objects.
Returns None if the same; else returns error msg.
Used by :func:`readVasp.readVasp` to compare the results
of parsing vasprun.xml and OUTCAR.
'''
cmsg = None
va = fixType( va)
vb = fixType( vb)
tpa = type(va).__name__
tpb = type(vb).__name__
if tpa != tpb:
cmsg = 'types differ: %s: %s %s: %s' % ( taga, tpa, tagb, tpb,)
elif va == None and vb == None:
pass
elif tpa == 'dict':
cmsg = compareMaps( bugLev, taga, va, tagb, vb)
elif tpa == 'instance':
cmsg = compareMaps( bugLev, taga, va.__dict__, tagb, vb.__dict__)
elif tpa == 'list':
if len(va) != len(vb):
cmsg = 'len mismatch: %s: %d %s: %d' % (taga, len(va), tagb, len(vb),)
else:
for ii in range(len(va)):
cmsg = deepCompare( bugLev, epsilon, taga, va[ii], tagb, vb[ii])
if cmsg != None: break
elif tpa == 'ndarray':
if va.shape != vb.shape:
cmsg = 'shape mismatch: %s: %s %s: %s' \
% (taga, va.shape, tagb, vb.shape,)
else:
fa = va.flatten()
fb = vb.flatten()
for ii in range( fa.size):
cmsg = deepCompare( bugLev, epsilon, taga, fa[ii], tagb, fb[ii])
if cmsg != None: break
elif type(va).__name__ == 'float':
if abs( va - vb) > epsilon:
if va == 0: relerr = float('NaN')
else: relerr = (vb - va) / va
cmsg = ('float value mismatch: %s: %g %s: %g vb-va: %g'
+ ' (vb-va)/va: %g') % (taga, va, tagb, vb, vb - va, relerr)
elif type(va).__name__ in [ 'bool', 'datetime', 'int', 'str']:
if va != vb:
cmsg = 'scalar value mismatch: %s: %s %s: %s' \
% (taga, repr(va), tagb, repr(vb),)
else: throwerr('unknown type: %s' % (type(va),))
return cmsg
#====================================================================
def compareMaps( bugLev, taga, va, tagb, vb):
'''
Compares two dictionaries.
Returns None if the same; else returns error message.
'''
# Compare va and vb, key for key
akeys = va.keys()
akeys.sort()
bkeys = vb.keys()
bkeys.sort()
iaa = 0 # index into akeys
ibb = 0 # index into bkeys
cmsg = '' # so we can append
while True:
if iaa >= len( akeys) and ibb >= len( bkeys): break
elif iaa >= len( akeys):
cm = 'Unique key for %s: %s\n' % (tagb, bkeys[ibb],)
cmsg += cm
if bugLev >= 5: print 'compareMaps: %s' % (cm,)
ibb += 1
elif ibb >= len( bkeys):
cm = 'Unique key for %s: %s\n' % (taga, akeys[iaa],)
cmsg += cm
if bugLev >= 5: print 'compareMaps: %s' % (cm,)
iaa += 1
else:
akey = akeys[iaa]
bkey = bkeys[ibb]
aval = va[akey]
bval = vb[bkey]
if akey == bkey:
epsilon = 5.e-5
compMsg = deepCompare( bugLev, epsilon, taga, aval, tagb, bval)
if compMsg == None:
if bugLev >= 5:
print 'compareMaps: value match. key: %s' % (akey,)
else:
cm = ('Value mismatch: key: %s msg: %s\n' \
+ ' %s: %s\n %s: %s\n') \
% (akey, compMsg, taga, aval, tagb, bval,)
cmsg += cm
if bugLev >= 5: print 'compareMaps: %s' % (cm,)
iaa += 1
ibb += 1
elif akey < bkey:
cm = 'Unique key for %s: %s\n' % (taga, akeys[iaa],)
cmsg += cm
if bugLev >= 5: print 'compareMaps: %s' % (cm,)
iaa += 1
else:
cm = 'Unique key for %s: %s\n' % (tagb, bkeys[ibb],)
cmsg += cm
if bugLev >= 5: print 'compareMaps: %s' % (cm,)
ibb += 1
if cmsg == '': cmsg = None
return cmsg
#====================================================================
[docs]def printMap( tag, vmap, maxLen):
'''
Prints a map.
**Parameters**:
* tag (str): Explanatory name of the map.
* vmap (map): The map to print.
* maxLen (int): The max length to use in printing a value, or 0.
**Returns**
* None
'''
print '\n%s' % (tag,)
if vmap == None:
print ' Map is None.'
else:
print ' Map len: %d' % (len(vmap),)
keys = vmap.keys()
keys.sort()
for key in keys:
val = str( vmap[key])
if maxLen > 0 and len(val) > maxLen: val = val[:maxLen] + '...'
print ' %s: %s' % (key, val,)
#====================================================================
def parseBoolean( stg):
'''
Parses a string to get a boolean.
'''
if stg.lower() in ['false', 'no']: res = False
elif stg.lower() in ['true', 'yes']: res = True
else: throwerr('invalid boolean: "%s"' % (stg,))
return res
#====================================================================
[docs]def throwerr( msg):
'''
Prints an error message and raises Exception.
**Parameters**:
* msg (str): Error message.
**Returns**
* (Never returns)
**Raises**
* Exception
'''
raise Exception( msg)
#====================================================================
if __name__ == '__main__': main()