Source code for nrelmat.wrapUpload

#!/bin/env python
# Copyright 2013 National Renewable Energy Laboratory, Golden CO, USA
# This file is part of NREL MatDB.
#
# NREL MatDB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# NREL MatDB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with NREL MatDB.  If not, see <http://www.gnu.org/licenses/>.

import datetime, json, math, os, pwd, re
import shutil, socket, stat, subprocess, sys, time, traceback

import numpy as np
np.seterr( all='raise', under='ignore')


version = '1.0.0'

# Name of the metadata file
metadataName = 'metadata'



# Work and archive subdir of the top level dir.
digestDirName = 'wrapUpload.archive'


#====================================================================

def badparms( msg):
  print '\nError: %s' % (msg,)
  print ''
  print 'There are three ways to run wrapUpload.py.'
  print ''
  print '1. Specify a list of directories to upload, using -keepList.'
  print ''
  print '2. Specify regular expressions matching the names of directories'
  print 'to upload, using -keepPatterns and -omitPatterns.'
  print ''
  print '3. Do not specify -keepList or -keepPatterns or -omitPatterns.'
  print 'In this case wrapUpload will upload every directory containing'
  print 'a metadata file in the tree rooted at topDir.'
  print ''
  print 'Parmeters:'
  print ''
  print '  -bugLev     <int>       Debug level.  Default: 0'
  print ''
  print '  -readType     <string>  outcar / xml'
  print ''
  print '  -requireInput <boolean> no/yes: do we require that the files'
  print '                          INCAR, KPOINTS, POSCAR exist.'
  print ''
  print '  -requireIcsd  <boolean> no/yes: do we require that the file'
  print '                          paths names contain ICSD info.'
  print ''
  print '  -keepList   <string>    File containing the absolute paths'
  print '                          of the dirs to be uploaded.'
  print '                          Still topDir must be specified,'
  print '                          and all paths in keepList must'
  print '                          start with the specified topDir.'
  print '                          If keepList is specified,'
  print '                          keepPatterns and omitPatterns'
  print '                          must not be specified.'
  print ''
  print '  -keepPatterns <string>  Comma separated list of'
  print '                          regular expressions matching'
  print '                          the relative paths of those directories'
  print '                          to be kept.  If specified,'
  print '                          keepList must not be specified.'
  print '                          If none of keepList, keepPatterns,'
  print '                          or omitPattens are specified, all dirs'
  print '                          below topDir containing a metadata'
  print '                          file will be archived.'
  print ''
  print '  -omitPatterns <string>  Comma separated list of'
  print '                          regular expressions matching'
  print '                          the relative paths of those directories'
  print '                          to be omitted.  If specified,'
  print '                          keepList must not be specified.'
  print ''
  print '  -topDir     <string>    Top of dir tree to upload.'
  print ''
  print '  -workDir    <string>    Work dir'
  print ''

  sys.exit(1)


#====================================================================


[docs]def main(): ''' Locates model runs, checks and extracts dir contents, and uses ``tar`` and ``scp`` to send the data to the server running :mod:`wrapReceive`. Command line parameters: ================= ========= =========================================== Parameter Type Description ================= ========= =========================================== **-bugLev** integer Debug level. Normally 0. **-readType** string If 'outcar', read the OUTCAR files. Else if 'xml', read the vasprun.xml files. **-requireInput** boolean no/yes: do we require that the files INCAR, KPOINTS, POSCAR exist. **-requireIcsd** boolean no/yes: do we require that the file path names contain ICSD info. See notes below. **-keepList** string File containing the absolute paths of the dirs to be uploaded. Still ``topDir`` must be specified, and all paths in ``keepList`` must start with the specified ``topDir``. If ``keepList`` is specified, ``keepPatterns`` and ``omitPatterns`` must not be specified. If none of keepList, keepPatterns, or omitPattens are specified, all dirs below topDir containing a metadata file will be archived. **-keepPatterns** string Comma separated list of regular expressions matching the relative paths of those directories to be kept. If specified, ``keepList`` must not be specified. **-omitPatterns** string Comma separated list of regular expressions matching the relative paths of those directories to be omitted. If specified, ``keepList`` must not be specified. **-topDir** string Top of dir tree to upload. **-workDir** string Work dir ================= ========= =========================================== If requireIcsd is true, :func:`getIcsdMap` must be able to extract ICSD info from the file names. File names must be like: :: .../icsd_083665/icsd_083665.cif/ls-anti-ferro-7/relax_cellshape/1 ^^^^^^ ^^^^^^^^ ^ ^^^^^^^^^^^^^^^ ^ icsdNum magType magNum relaxType relaxNum ''' bugLev = 0 readType = None requireInput = None requireIcsd = None keepList = None keepPatterns = None omitPatterns = None topDir = None workDir = None if len(sys.argv) % 2 != 1: badparms('Parms must be key/value pairs') for iarg in range( 1, len(sys.argv), 2): key = sys.argv[iarg] val = sys.argv[iarg+1] if key == '-bugLev': bugLev = int( val) elif key == '-readType': readType = val elif key == '-requireInput': requireInput = parseBoolean( val) elif key == '-requireIcsd': requireIcsd = parseBoolean( val) elif key == '-keepList': keepList = val elif key == '-keepPatterns': keepPatterns = val.split(',') elif key == '-omitPatterns': omitPatterns = val.split(',') elif key == '-topDir': topDir = val elif key == '-workDir': workDir = val else: badparms('unknown key: "%s"' % (key,)) if readType == None: badparms('parm not specified: -readType') if requireInput == None: badparms('missing parameter: -requireInput') if requireIcsd == None: badparms('missing parameter: -requireIcsd') # keepList is optional # keepPatterns is optional # omitPatterns is optional if keepList != None and (keepPatterns != None or omitPatterns != None): badparms('with keepList, may not spec keepPatterns or omitPatterns') if topDir == None: badparms('missing parameter: -topDir') if workDir == None: badparms('missing parameter: -workDir') absTopDir = os.path.abspath( topDir) print 'wrapUpload: readType: %s' % (readType,) print 'wrapUpload: keepList: %s' % (keepList,) print 'wrapUpload: keepPatterns: %s' % (keepPatterns,) print 'wrapUpload: omitPatterns: %s' % (omitPatterns,) print 'wrapUpload: topDir: %s' % (topDir,) print 'wrapUpload: absTopDir: %s' % (absTopDir,) print 'wrapUpload: workDir: %s' % (workDir,) # Names of required files requireNames = [ metadataName, ] # Names of optional files optionNames = [ 'pbserr', 'pbsout', 'pbsscript', 'stderr', 'stdout', 'DOSCAR', 'POTCAR', ] inputNames = [ 'INCAR', 'KPOINTS', 'POSCAR'] if requireInput: requireNames += inputNames else: optionNames += inputNames if readType == 'xml': requireNames.append('vasprun.xml') optionNames.append('OUTCAR') elif readType == 'outcar': requireNames.append('OUTCAR') optionNames.append('vasprun.xml') else: badparms('invalid readType: %s' % (readType,)) doUpload( bugLev, requireNames, optionNames, requireIcsd, keepList, keepPatterns, omitPatterns, topDir, workDir) #====================================================================
[docs]def doUpload( bugLev, requireNames, optionNames, requireIcsd, # require icsd info in absTopDir string keepList, keepPatterns, omitPatterns, topDir, workDir): ''' Locates model runs, checks and extracts dir contents, and uses ``tar`` and ``scp`` to send the data to the server running :mod:`wrapReceive`. If ``keepList`` is specified, creates keepAbsPaths = unique absolute paths and calls :func:`iterateDirs`. Otherwise calls :func:`searchDirs` to recursively search the directory tree starting at ``topDir``. **Parameters**: * bugLev (int): Debug level. Normally 0. * requireNames (str[]): names of required files. * optionNames (str[]): names of optional files. * requireIcsd (boolean): if True, the absTopDir string must contain ICSD info that :func:`getIcsdMap` can extract. * keepList (str[]): List of the absolute paths of the dirs to be uploaded, or None. Still ``topDir`` must be specified, and all paths in ``keepList`` must start with the specified ``topDir``. If ``keepList`` is specified, ``keepPatterns`` and ``omitPatterns`` must not be specified. * keepPatterns (str[]): List of regular expressions matching the relative paths of those directories to be kept. If specified, ``keepList`` must not be specified. * omitPatterns (str[]): List of regular expressions matching the relative paths of those directories to be omitted. If specified, ``keepList`` must not be specified. * topDir (str): Top of dir tree to upload. * workDir (str): Work dir **Returns** * None ''' absTopDir = os.path.abspath( topDir) if not os.path.isdir( workDir): throwerr('workDir does not exist: %s' % (workDir,)) nms = os.listdir( workDir) if len(nms) != 0: throwerr('workDir is not empty: %s' % (workDir,)) # Get keepAbsPaths from file keepList # Use a set and os.path.abspath to make sure entries are unique. keepAbsPaths = None if keepList != None: keepAbsPathSet = set() with open( keepList) as fin: iline = 0 while True: line = fin.readline() if line == '': break iline += 1 line = line.strip() if len(line) > 0 and not line.startswith('#'): apath = line if apath.endswith('/'): apath = apath[:-1] if apath != os.path.abspath( apath): throwerr('keepList line is not an abs path. iline: %d line: %s' \ % (iline, line,)) if not os.path.isdir( apath): throwerr( 'keepList line is not a dir. iline: %d line: %s' \ % (iline, line,)) keepAbsPathSet.add( apath) keepAbsPaths = list( keepAbsPathSet) keepAbsPaths.sort() if bugLev >= 1: print 'doUpload: len(keepAbsPaths): ', len(keepAbsPaths) if bugLev >= 5: print 'doUpload: keepAbsPaths: ', keepAbsPaths digestDir = os.path.join( workDir, digestDirName) if os.path.lexists( digestDir): throwerr('workDir is not empty: subdir already exists: %s' \ % (digestDir,)) warnings = [] relDirs = [] # list of dirs we archive relFiles = [] # list of files to archive if keepAbsPaths != None: # Get the relative paths of all files to be archived, # using the keepAbsPaths list. iterateDirs( bugLev, requireNames, optionNames, keepAbsPaths, absTopDir, requireIcsd, # require icsd info in absTopDir string warnings, # may append to warnings relDirs, # appends to list relFiles) # appends to list else: # Get the relative paths of all files to be archived, # starting at absTopDir. searchDirs( bugLev, requireNames, optionNames, keepPatterns, omitPatterns, absTopDir, '', # relative path so far requireIcsd, # require icsd info in absTopDir string warnings, # may append to warnings relDirs, # appends to list relFiles) # appends to list numWarn = len( warnings) if numWarn > 0: print '' for warn in warnings: print 'Warning: %s' % (warn,) print '' throwerr('\nFound %d warnings. See above.' % (numWarn,)) numKeptDir = len( relDirs) numKeptFile = len( relFiles) if bugLev >= 0: print 'wrapUpload: numKeptDir: %d' % (numKeptDir,) print 'wrapUpload: numKeptFile: %d' % (numKeptFile,) os.mkdir( digestDir) listFile = os.path.join( digestDir, 'digest.list') with open( listFile, 'w') as fout: for path in relFiles: print >> fout, path print 'wrapUpload: beginning tar (this could take several minutes)' curDate = datetime.datetime.now() userId = pwd.getpwuid(os.getuid())[0] uui = formatUui( curDate, userId, absTopDir) fBase = os.path.join( digestDir, uui) tarFile = fBase + '.tgz' flagFile = fBase + '.zzflag' # Create tarFile = tar of the files to be saved. # Use -h to deref symlinks. args = ['/bin/tar', '-chzf', tarFile, '-T', listFile, '--mode=660'] runSubprocess( bugLev, absTopDir, args, False) # showStdout = False # Create flagFile with version num with open( flagFile, 'w') as fout: msg = version + ' ' + uui print >> fout, msg print 'wrapUpload: Completed. numKeptDir: %d numKeptFile: %d' \ % (numKeptDir, numKeptFile,) #====================================================================
[docs]def searchDirs( bugLev, requireNames, optionNames, keepPatterns, omitPatterns, absTopDir, relPath, # relative path so far requireIcsd, # require icsd info in absTopDir string warnings, # may append to warnings relDirs, # appends to list relFiles): # appends to list ''' Recursive: locates model runs, checks dir contents, and appends names to lists of dirs and files. **Parameters**: * bugLev (int): Debug level. Normally 0. * requireNames (str[]): names of required files. * optionNames (str[]): names of optional files. * keepPatterns (str[]): List of regular expressions matching the relative paths of those directories to be kept. If specified, ``keepList`` must not be specified. * omitPatterns (str[]): List of regular expressions matching the relative paths of those directories to be omitted. If specified, ``keepList`` must not be specified. * absTopDir (str): Absolute path of the original top of dir tree to upload. * relPath (str): Relative path so far, somewhere below absTopDir. * requireIcsd (boolean): if True, the absTopDir string must contain ICSD info that :func:`getIcsdMap` can extract. * relDirs (str[]): We append dirs to be archived. * relFiles (str[]): We append file names to be archived. **Returns** * None ''' inDir = os.path.abspath( os.path.join( absTopDir, relPath)) if bugLev >= 5: print 'searchDirs: relPath: %s' % (relPath,) print 'searchDirs: inDir: %s' % (inDir,) if not os.path.isdir( inDir): throwerr('not a dir') # Check for keepPattern and omitPattern matches. # If any keepPatterns exist: # keepIt = (not any omitPattern) and some keepPattern # Else: # keepIt = not any omitPattern keepIt = True if keepPatterns != None: found = False for pat in keepPatterns: if re.search( pat, relPath): if bugLev >= 5: print 'searchDirs: match keepPattern: %s for relPath: %s' \ % (pat, relPath,) found = True break if not found: keepIt = False omitIt = False if omitPatterns != None: for pat in omitPatterns: if re.search( pat, relPath): if bugLev >= 5: print 'searchDirs: match omitPattern: %s for relPath: %s' \ % (pat, relPath,) omitIt = True hasMetadata = False mpath = os.path.join( inDir, metadataName) if os.path.isfile( mpath): parseMetadata( mpath) # check validity hasMetadata = True if bugLev >= 5: print 'searchDirs: relPath: %s keepIt: %s omitIt: %s hasMetadata: %s' \ % (relPath, keepIt, omitIt, hasMetadata,) if keepIt and (not omitIt) and hasMetadata: relDirs.append( relPath) processDir( bugLev, requireNames, optionNames, absTopDir, relPath, requireIcsd, warnings, relFiles) # Recurse to subdirs if not omitIt: subNames = os.listdir( inDir) subNames.sort() for subName in subNames: subPath = os.path.join( relPath, subName) if os.path.isdir( os.path.join( absTopDir, subPath)): searchDirs( bugLev, requireNames, optionNames, keepPatterns, omitPatterns, absTopDir, subPath, # relPath: relative path so far requireIcsd, # require icsd info in absTopDir string warnings, # may append to warnings relDirs, # appends to list relFiles) # appends to list else: print 'wrapUpload: %-18s %s' % ('skip subTree', inDir,) #====================================================================
[docs]def iterateDirs( bugLev, requireNames, optionNames, keepAbsPaths, absTopDir, requireIcsd, # require icsd info in absTopDir string warnings, # may append to warnings relDirs, # appends to list relFiles): # appends to list ''' For each path in keepAbsPaths, checks dir contents, and appends names to lists of dirs and files. **Parameters**: * bugLev (int): Debug level. Normally 0. * requireNames (str[]): names of required files. * optionNames (str[]): names of optional files. * keepAbsPaths (str[]): List of absolute paths of dirs to archive. * absTopDir (str): Absolute path of the original top of dir tree to upload. * requireIcsd (boolean): if True, the absTopDir string must contain ICSD info that :func:`getIcsdMap` can extract. * relDirs (str[]): We append dirs to be archived. * relFiles (str[]): We append file names to be archived. **Returns** * None ''' for inDir in keepAbsPaths: if bugLev >= 5: print 'iterateDirs: inDir: %s' % (inDir,) if not inDir.startswith( absTopDir): throwerr('inDir does not start with absTopDir') relPath = inDir[len(absTopDir) : ] while relPath.startswith('/'): relPath = relPath[1:] if bugLev >= 5: print 'iterateDirs: relPath: %s' % (relPath,) relDirs.append( relPath) processDir( bugLev, requireNames, optionNames, absTopDir, relPath, requireIcsd, warnings, relFiles) #====================================================================
[docs]def processDir( bugLev, requireNames, optionNames, absTopDir, relPath, # relative path so far requireIcsd, # require icsd info in absTopDir string warnings, # may append to warnings relFiles): # appends to list ''' Prepares to archive a single directory, and appends names to lists of dirs and files. **Parameters**: * bugLev (int): Debug level. Normally 0. * requireNames (str[]): names of required files. * optionNames (str[]): names of optional files. * absTopDir (str): Absolute path of the original top of dir tree to upload. * relPath (str): Relative path so far, somewhere below absTopDir. * requireIcsd (boolean): if True, the absTopDir string must contain ICSD info that :func:`getIcsdMap` can extract. * relFiles (str[]): We append file names to be archived. **Returns** * None ''' inDir = os.path.abspath( os.path.join( absTopDir, relPath)) if bugLev >= 5: print 'processDir: relPath: %s' % (relPath,) print 'processDir: inDir: %s' % (inDir,) if not os.path.isdir( inDir): throwerr('not a dir') subNames = os.listdir( inDir) subNames.sort() # If metadataForce, we ignore local metadata files. reqNames = list( requireNames) # shallow copy optNames = list( optionNames) # shallow copy # Check for requireNames for nm in requireNames: subRelPath = os.path.join( relPath, nm) subFile = os.path.join( inDir, nm) if os.path.isfile( subFile): relFiles.append( os.path.join( relPath, nm)) else: warnings.append('missing file: %s' % (subFile,)) # Check for optionNames for nm in optionNames: subRelPath = os.path.join( relPath, nm) subFile = os.path.join( inDir, nm) if os.path.isfile( subFile): relFiles.append( subRelPath) if requireIcsd: try: icsdMap = getIcsdMap( bugLev, relPath) except Exception, exc: print '===== start traceback' print traceback.format_exc( limit=None) print '===== end traceback' throwerr('icsd info not found. absTopDir: %s relPath: %s traceback:\n%s' \ % (absTopDir, relPath, traceback.format_exc( limit=None),)) #==================================================================== # Creates a map of ICSD info based on the dir name
[docs]def getIcsdMap( bugLev, relPath): ''' Creates a map of ICSD info based on the dir name. Example parsing inDir: :: .../icsd_083665/icsd_083665.cif/ls-anti-ferro-7/relax_cellshape/1 ^^^^^^ ^^^^^^^^ ^ ^^^^^^^^^^^^^^^ ^ icsdNum magType magNum relaxType relaxNum Resulting map: :: icsdMap = { 'icsdNum' : 83665, 'magType' : 'lsaf', 'magNum' : 7, 'relaxType' : 'rc', 'relaxNum' : 1, } **Parameters**: * bugLev (int): Debug level. Normally 0. * relPath (str): Relative path. **Returns** * map similar to the above. ''' # Extract the ICSD number from the path '.../icsd_dddddd/...' mat = re.match('^(.*/)?icsd_(\d{6})/.*$', relPath) if mat == None: throwerr('no icsd id found in relPath: "%s"' % (relPath,)) icsdNum = int( mat.group(2)) # Extract magnetic moment type from the path like '.../ls-anti-ferro-33/...' magType = None magNum = 0 pairs = [ ['/anti-ferro', 'afer'], ['/ferro', 'fer'], ['/hs-ferro', 'hsf'], ['/hs-anti-ferro', 'hsaf'], ['/ls-ferro', 'lsf'], ['/ls-anti-ferro', 'lsaf'], ['/non-magnetic', 'nm'], ] for (tname,tcode) in pairs: ix = relPath.find( tname) if ix >= 0: magType = tcode if tcode in ['hsaf', 'lsaf']: rest = relPath[(ix+len(tname)):] mat = re.match('^-(\d+).*$', rest) if mat == None: throwerr('no magNum found in relPath: "%s"' % (relPath,)) magNum = int( mat.group(1)) break if magType == None: throwerr('no magType found in relPath: "%s"' % (relPath,)) # Extract relaxType from relPath # If relPath contains 'relax_cellshape', set relaxType = 'rc' # and relaxNum = the number of the subfolder. Similarly for 'relax_ions'. relaxType = 'std' relaxNum = 0 pairs = [['relax_cellshape', 'rc'], ['relax_ions', 'ri']] for (tname,tcode) in pairs: ix = relPath.find( tname) if ix >= 0: relaxType = tcode rest = relPath[(ix+len(tname)):] mat = re.match('^/(\d+).*$', rest) if mat == None: throwerr('no subfolder found in relPath: "%s"' % (relPath,)) relaxNum = int( mat.group(1)) break # Save the info from the path name and statInfo icsdMap = { 'icsdNum' : icsdNum, 'magType' : magType, 'magNum' : magNum, 'relaxType' : relaxType, 'relaxNum' : relaxNum, } return icsdMap #====================================================================
[docs]def unused_extractPotcar( fname): ''' (No longer used): Reads and saves the header sections from a POTCAR file. Saves every section starting with 'PAW_PBE' to the following line 'Description'. **Parameters**: * fname (str): Name of the input POTCAR file. **Returns** * List of pairs: [ specieName, listOfSavedLines] ''' pseudos = [] # list of pairs: [ specieName, listOfSavedLines] with open( fname) as fin: state = False # not capturing specie = None saves = [] iline = 0 while True: line = fin.readline() if line == '': break iline += 1 mat = re.match(r'^\s*PAW_PBE\s+(\S+)\s+.*$', line) if mat: state = True # capturing specie = mat.group(1) mat = re.match(r'^\s*Description\s*$', line) if mat: pseudos.append( [ specie, saves]) state = False # not capturing specie = None saves = [] if state: # if capturing saves.append( line) return pseudos #====================================================================
[docs]def parseMetadata( fpath): ''' Parses a metadata file and returns a corresponding map. **Parameters**: * fpath (str): Name of the input metadata file. **Returns** * Map of metadata values. The map structure is: ============= ========== =========================================== Key Value Type Source description ============= ========== =========================================== firstName str Researcher first name lastName str Researcher last name spacegroup str international space group number parents str[] comma separated list of sha1sum(s) of the vasprun.xml files of previous runs, if any publications str[] comma separated list of DOIs without the leading ``http://`` standards str[] comma separated list of standardized keywords keywords str[] comma separated list of any keywords notes str textual notes ============= ========== =========================================== ''' firstNameTag = 'firstName' keywordsTag = 'keywords' lastNameTag = 'lastName' spacegroupTag = 'spacegroup' notesTag = 'notes' parentsTag = 'parents' publicationsTag = 'publications' standardsTag = 'standards' requiredFields = [firstNameTag, lastNameTag, publicationsTag, standardsTag, keywordsTag, notesTag] checkFileFull( fpath) with open( fpath) as fin: lines = fin.readlines() # not stripped. Includes final \n. metaMap = {} if len(lines) < 2: throwerr('invalid metadata. file: "%s"' % ( fpath,)) iline = 0 while iline < len(lines): line = lines[iline] if re.match(r'^\s*$', line) or line.startswith('#'): iline += 1 # ignore blank lines and comments else: mat = re.match(r'^:(\w+):(.*)$', line) if not mat: throwerr(('invalid metadata.' + ' file: "%s" approx iline: %d line: "%s"') % (fpath, iline, line,)) field = mat.group(1) value = mat.group(2) # init value #if metaMap.has_key(field): # throwerr(('multiple spec of field: "%s"' # + ' file: "%s" approx iline: %d line: "%s"') # % (field, fpath, iline, line,)) if field in [parentsTag, publicationsTag, standardsTag, keywordsTag]: # Strip before we test for trailing comma below value = value.strip() # Append lines onto value as we scan for # the next comment or field iline += 1 while iline < len(lines): line = lines[iline] if line.startswith('#') or re.match(r'^:(\w+):(.*)', line): break if field in [parentsTag, publicationsTag, standardsTag, keywordsTag]: # If user forgot a trailing comma, help them out. if len(value.strip()) > 0 and not value.endswith(','): value += ',' value += line.strip() else: value += line # line includes \n iline += 1 value = value.strip() # get rid of whitespace at ends if field in [firstNameTag, lastNameTag]: if re.search(r'[^-a-zA-Z]', value) \ or (not re.match('^[A-Z]$', value[0])) \ or (not re.match('^[a-zA-Z]$', value[-1])): throwerr(('invalid name: "%s"' + ' file: "%s" approx iline: %d line: "%s"') % (value, fpath, iline, line,)) if metaMap.has_key(field): throwerr(('multiple spec of field: "%s"' + ' file: "%s" approx iline: %d line: "%s"') % (field, fpath, iline, line,)) metaMap[field] = value elif field in [spacegroupTag]: if not re.match(r'[0-9]+', value): throwerr(('invalid number: "%s"' + ' file: "%s" approx iline: %d line: "%s"') % (value, fpath, iline, line,)) if metaMap.has_key(field): throwerr(('multiple spec of field: "%s"' + ' file: "%s" approx iline: %d line: "%s"') % (field, fpath, iline, line,)) metaMap[field] = value elif field in [parentsTag, publicationsTag, standardsTag, keywordsTag]: # Convert value to list of keywords. # Insure keywords don't contain illegal chars. vals = [] if len( value) > 0: toks = value.split(',') for tok in toks: tok = tok.strip() errMsg = '' if field == parentsTag: # If sha1: 40 chars; if sha512: 128 chars if len(tok) != 128 or re.search(r'[^a-f0-9]', tok): errMsg += 'Invalid parent (must be 128 chars).\n' if field == publicationsTag: if field.startswith('http'): errMsg += 'Specify DOI without the initial http://\n' if field in [standardsTag, keywordsTag]: if len(tok) < 1 \ or re.search(r'[^-+_=.@%&a-zA-Z0-9]', tok) \ or (not re.match('^[a-zA-Z]$', tok[0])): errMsg += 'Invalid keyword: "%s"\n' % (tok,) if len(errMsg) > 0: errMsg += ' Invalid item: "%s"\n' % (tok,) \ + ' Containing value: "%s"\n' % (value,) \ + ' file: "%s" approx iline: %d line: "%s"\n' \ % (fpath, iline, line,) throwerr( errMsg) vals.append( tok) # Append values to previous ones if not metaMap.has_key(field): metaMap[field] = [] metaMap[field] += vals # set list elif field == notesTag: if metaMap.has_key(field): metaMap[field] += '\n' + value # append text else: metaMap[field] = value # set text else: throwerr('unknown field: \"%s\" approx iline: %d file: \"%s\"' \ % (field, iline, fpath,)) for nm in requiredFields: if not metaMap.has_key(nm): throwerr('missing field: \"%s\" in file: \"%s\"' % (nm, fpath,)) legalStandards = [ 'fere', 'gwvd', 'wave', 'aexp', 'post-lopt', 'post-chi', 'post-chibs', 'post-dos', 'post-lopt', 'wave', 'vexp', ] for key in metaMap[ standardsTag]: if key not in legalStandards: throwerr('illegal standard "%s" in file: ' % (key, fpath,)) return metaMap #====================================================================
[docs]def checkFileFull( fname): ''' Insures that fname exists and has length > 0. **Parameters**: * fname (str): Name of the input file. **Returns** * None **Raises**: * Exception if fname does not exist or has length == 0. ''' checkFile( fname) if os.path.getsize( fname) == 0: throwerr('file is empty: \"%s\"' % (fname,)) #====================================================================
[docs]def checkFile( fname): ''' Insures that fname exists. It may have length == 0. **Parameters**: * fname (str): Name of the input file. **Returns** * None **Raises**: * Exception if fname does not exist. ''' if (type(fname).__name__ not in ('str', 'unicode')) or len(fname) == 0: throwerr('invalid file name: "%s"' % (fname,)) if not os.path.isfile( fname): throwerr('file not found: \"%s\"' % (fname,)) if not os.access( fname, os.R_OK): throwerr('file is not readable: \"%s\"' % (fname,)) #====================================================================
[docs]def runSubprocess( bugLev, wkDir, args, showStdout): ''' Calls the executable indicated by args and waits for completion. **Parameters**: * bugLev (int): Debug level. Normally 0. * wkDir (str): The working directory to use for the subprocess. * args (str[]): The executable name (in args[0]) followed by the command line arguments in args[1:]. * showStdout (boolean): If True, print the stdout from the subprocess. **Returns** * None **Raises**: * Exception if subprocess rc != 0. ''' if bugLev >= 1: print 'runSubprocess: args: %s' % (args,) print 'runSubprocess: cmd: %s' % (' '.join(args),) if bugLev >= 5: pipe = None else: pipe = subprocess.PIPE proc = subprocess.Popen( args, shell=False, cwd=wkDir, stdin=subprocess.PIPE, stdout=pipe, stderr=pipe, bufsize=10*1000*1000) (stdout, stderr) = proc.communicate() rc = proc.returncode if rc != 0: msg = 'subprocess failed.\n' msg += 'wkDir: %s\n' % (wkDir,) msg += 'args: %s\n' % (args,) msg += 'rc: %d\n' % (rc,) msg += '\n===== stdout:\n%s\n' % (stdout,) msg += '\n===== stderr:\n%s\n' % (stderr,) throwerr( msg) if showStdout: print 'runSubProcess: len(stderr): %d' % (len(stderr),) print 'runSubProcess: len(stdout): %d' % (len(stdout),) if len(stderr) > 0: print '\n===== stderr:\n%s\n' % (stderr,) if len(stdout) > 0: print '\n===== stdout:\n%s\n' % (stdout,) #====================================================================
[docs]def findNumFiles( tag, dir): ''' Finds the number of files having name == tag in the tree at dir. Yes, Python has os.walk, but this is better. **Parameters**: * tag (str): The name to search for. * dir (str): The root of the directory tree to search. **Returns** * integer number of matches. ''' if not os.path.isdir( dir): throwerr('not a dir: %s' % (dir,)) nms = os.listdir( dir) nms.sort() count = 0 for nm in nms: if nm == tag: count += 1 subDir = dir + '/' + nm if os.path.isdir( subDir): count += findNumFiles( tag, subDir) # recursion return count #==================================================================== # Coord with parseUui, below.
[docs]def formatUui( curDate, userId, absTopDir): ''' Formats a uui (wrapId). A wrapId looks like: :: @2013.08.13@12.58.22.735311@someUser@home.someUser.redmesa.old.td.testlada.2013.04.06.Fe.O@ **Parameters**: * curDate (datetime.datetime): The current date. * userId (str): The current user id. * absTopDir (str): The absolute path of the top dir. **Returns** * A wrapId ''' if not absTopDir.startswith('/'): throwerr('absTopDir not abs') modDir = absTopDir[1:] # get rid of initial / modDir = modDir.replace('/', '.') # get rid of / modDir = modDir.replace('@', '.') # get rid of @ uui = '@%s@%s@%s@' \ % (curDate.strftime('%Y.%m.%d@%H.%M.%S.%f'), userId, modDir,) return uui #========== # Coord with formatUui, above. # If matches, returns wrapId == stg. # Else returns None.
[docs]def parseUui( stg): ''' Parses a uui (wrapId). A wrapId looks like: :: @2013.08.13@12.58.22.735311@someUser@home.someUser.redmesa.old.td.testlada.2013.04.06.Fe.O@ The input string may have subdirs info after the initial wrapId. **Parameters**: * stg (str): The string to be parsed. **Returns** * If stg is a valid wrapId, returns the wrapId == stg. Else returns None. ''' uuiPattern = r'^(@(\d{4}\.\d{2}\.\d{2})@(\d{2}\.\d{2}\.\d{2}\.\d{6})@([^./@]*)@([^/@]*)@)' res = None mat = re.match( uuiPattern, stg) if mat: wrapId = mat.group(1) dateStg = mat.group(2) timeStg = mat.group(3) userid = mat.group(4) dir = mat.group(5) adate = datetime.datetime.strptime( dateStg + ' ' + timeStg, '%Y.%m.%d %H.%M.%S.%f') res = wrapId # Could return (wrapId, adate, userid, dir) return res #==================================================================== # Used by deepCompare to unify types
def fixType( val): tpa = type(val).__name__ if tpa == 'float64': val = float( val) elif tpa == 'int64': val = float( val) elif tpa == 'int': val = float( val) elif tpa == 'string_': val = str( val) elif tpa == 'list': val = np.array( val) return val #==================================================================== # Returns None if match; else returns error msg. def deepCompare( bugLev, epsilon, taga, va, tagb, vb): ''' Compares two objects. Returns None if the same; else returns error msg. Used by :func:`readVasp.readVasp` to compare the results of parsing vasprun.xml and OUTCAR. ''' cmsg = None va = fixType( va) vb = fixType( vb) tpa = type(va).__name__ tpb = type(vb).__name__ if tpa != tpb: cmsg = 'types differ: %s: %s %s: %s' % ( taga, tpa, tagb, tpb,) elif va == None and vb == None: pass elif tpa == 'dict': cmsg = compareMaps( bugLev, taga, va, tagb, vb) elif tpa == 'instance': cmsg = compareMaps( bugLev, taga, va.__dict__, tagb, vb.__dict__) elif tpa == 'list': if len(va) != len(vb): cmsg = 'len mismatch: %s: %d %s: %d' % (taga, len(va), tagb, len(vb),) else: for ii in range(len(va)): cmsg = deepCompare( bugLev, epsilon, taga, va[ii], tagb, vb[ii]) if cmsg != None: break elif tpa == 'ndarray': if va.shape != vb.shape: cmsg = 'shape mismatch: %s: %s %s: %s' \ % (taga, va.shape, tagb, vb.shape,) else: fa = va.flatten() fb = vb.flatten() for ii in range( fa.size): cmsg = deepCompare( bugLev, epsilon, taga, fa[ii], tagb, fb[ii]) if cmsg != None: break elif type(va).__name__ == 'float': if abs( va - vb) > epsilon: if va == 0: relerr = float('NaN') else: relerr = (vb - va) / va cmsg = ('float value mismatch: %s: %g %s: %g vb-va: %g' + ' (vb-va)/va: %g') % (taga, va, tagb, vb, vb - va, relerr) elif type(va).__name__ in [ 'bool', 'datetime', 'int', 'str']: if va != vb: cmsg = 'scalar value mismatch: %s: %s %s: %s' \ % (taga, repr(va), tagb, repr(vb),) else: throwerr('unknown type: %s' % (type(va),)) return cmsg #==================================================================== def compareMaps( bugLev, taga, va, tagb, vb): ''' Compares two dictionaries. Returns None if the same; else returns error message. ''' # Compare va and vb, key for key akeys = va.keys() akeys.sort() bkeys = vb.keys() bkeys.sort() iaa = 0 # index into akeys ibb = 0 # index into bkeys cmsg = '' # so we can append while True: if iaa >= len( akeys) and ibb >= len( bkeys): break elif iaa >= len( akeys): cm = 'Unique key for %s: %s\n' % (tagb, bkeys[ibb],) cmsg += cm if bugLev >= 5: print 'compareMaps: %s' % (cm,) ibb += 1 elif ibb >= len( bkeys): cm = 'Unique key for %s: %s\n' % (taga, akeys[iaa],) cmsg += cm if bugLev >= 5: print 'compareMaps: %s' % (cm,) iaa += 1 else: akey = akeys[iaa] bkey = bkeys[ibb] aval = va[akey] bval = vb[bkey] if akey == bkey: epsilon = 5.e-5 compMsg = deepCompare( bugLev, epsilon, taga, aval, tagb, bval) if compMsg == None: if bugLev >= 5: print 'compareMaps: value match. key: %s' % (akey,) else: cm = ('Value mismatch: key: %s msg: %s\n' \ + ' %s: %s\n %s: %s\n') \ % (akey, compMsg, taga, aval, tagb, bval,) cmsg += cm if bugLev >= 5: print 'compareMaps: %s' % (cm,) iaa += 1 ibb += 1 elif akey < bkey: cm = 'Unique key for %s: %s\n' % (taga, akeys[iaa],) cmsg += cm if bugLev >= 5: print 'compareMaps: %s' % (cm,) iaa += 1 else: cm = 'Unique key for %s: %s\n' % (tagb, bkeys[ibb],) cmsg += cm if bugLev >= 5: print 'compareMaps: %s' % (cm,) ibb += 1 if cmsg == '': cmsg = None return cmsg #====================================================================
[docs]def printMap( tag, vmap, maxLen): ''' Prints a map. **Parameters**: * tag (str): Explanatory name of the map. * vmap (map): The map to print. * maxLen (int): The max length to use in printing a value, or 0. **Returns** * None ''' print '\n%s' % (tag,) if vmap == None: print ' Map is None.' else: print ' Map len: %d' % (len(vmap),) keys = vmap.keys() keys.sort() for key in keys: val = str( vmap[key]) if maxLen > 0 and len(val) > maxLen: val = val[:maxLen] + '...' print ' %s: %s' % (key, val,) #====================================================================
[docs]def formatMatrix( mat): ''' Formats a 2D matrix. **Parameters**: * mat (float[][] or numpy 2D array): input matrix **Returns** * string representation of the matrix. ''' msg = '' for ii in range(len(mat)): row = mat[ii] msg += ' [%2d] ' % (ii,) for jj in range(len(row)): msg += ' %8.4f' % (mat[ii][jj],) msg += '\n' return msg #====================================================================
def parseBoolean( stg): ''' Parses a string to get a boolean. ''' if stg.lower() in ['false', 'no']: res = False elif stg.lower() in ['true', 'yes']: res = True else: throwerr('invalid boolean: "%s"' % (stg,)) return res #====================================================================
[docs]def throwerr( msg): ''' Prints an error message and raises Exception. **Parameters**: * msg (str): Error message. **Returns** * (Never returns) **Raises** * Exception ''' raise Exception( msg) #====================================================================
if __name__ == '__main__': main()