Source code for nrelmat.wrapUpload

#!/bin/env python
# Copyright 2013 National Renewable Energy Laboratory, Golden CO, USA
# This file is part of NREL MatDB.
#
# NREL MatDB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# NREL MatDB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with NREL MatDB.  If not, see <http://www.gnu.org/licenses/>.

import datetime, json, math, os, pwd, re
import shutil, socket, stat, subprocess, sys, time, traceback

import numpy as np
np.seterr( all='raise', under='ignore')


version = '1.0.0'

# Name of the metadata file
metadataName = 'metadata'



# Work and archive subdir of the top level dir.
digestDirName = 'wrapUpload.archive'


#====================================================================

def badparms( msg):
  print '\nError: %s' % (msg,)
  print ''
  print 'There are three ways to run wrapUpload.py.'
  print ''
  print '1. Specify a list of directories to upload, using -keepList.'
  print ''
  print '2. Specify regular expressions matching the names of directories'
  print 'to upload, using -keepPatterns and -omitPatterns.'
  print ''
  print '3. Do not specify -keepList or -keepPatterns or -omitPatterns.'
  print 'In this case wrapUpload will upload every directory containing'
  print 'a metadata file in the tree rooted at topDir.'
  print ''
  print 'Parmeters:'
  print ''
  print '  -bugLev     <int>       Debug level.  Default: 0'
  print ''
  print '  -readType     <string>  outcar / xml'
  print ''
  print '  -requireInput <boolean> no/yes: do we require that the files'
  print '                          INCAR, KPOINTS, POSCAR exist.'
  print ''
  print '  -requireIcsd  <boolean> no/yes: do we require that the file'
  print '                          paths names contain ICSD info.'
  print ''
  print '  -keepList   <string>    File containing the absolute paths'
  print '                          of the dirs to be uploaded.'
  print '                          Still topDir must be specified,'
  print '                          and all paths in keepList must'
  print '                          start with the specified topDir.'
  print '                          If keepList is specified,'
  print '                          keepPatterns and omitPatterns'
  print '                          must not be specified.'
  print ''
  print '  -keepPatterns <string>  Comma separated list of'
  print '                          regular expressions matching'
  print '                          the relative paths of those directories'
  print '                          to be kept.  If specified,'
  print '                          keepList must not be specified.'
  print '                          If none of keepList, keepPatterns,'
  print '                          or omitPattens are specified, all dirs'
  print '                          below topDir containing a metadata'
  print '                          file will be archived.'
  print ''
  print '  -omitPatterns <string>  Comma separated list of'
  print '                          regular expressions matching'
  print '                          the relative paths of those directories'
  print '                          to be omitted.  If specified,'
  print '                          keepList must not be specified.'
  print ''
  print '  -topDir     <string>    Top of dir tree to upload.'
  print ''
  print '  -workDir    <string>    Work dir'
  print ''

  sys.exit(1)


#====================================================================


[docs]def main():
  '''
  Locates model runs, checks and extracts dir contents,
  and uses ``tar`` and ``scp`` to send the data to the server running
  :mod:`wrapReceive`.

  Command line parameters:

  =================   =========    ===========================================
  Parameter           Type         Description
  =================   =========    ===========================================
  **-bugLev**         integer      Debug level.  Normally 0.

  **-readType**       string       If 'outcar', read the OUTCAR files.
                                   Else if 'xml', read the vasprun.xml files.

  **-requireInput**   boolean      no/yes: do we require that the files
                                   INCAR, KPOINTS, POSCAR exist.

  **-requireIcsd**    boolean      no/yes: do we require that the file
                                   path names contain ICSD info.
                                   See notes below.

  **-keepList**       string       File containing the absolute paths
                                   of the dirs to be uploaded.
                                   Still ``topDir`` must be specified,
                                   and all paths in ``keepList`` must
                                   start with the specified ``topDir``.
                                   If ``keepList`` is specified,
                                   ``keepPatterns`` and ``omitPatterns``
                                   must not be specified.
                                   If none of keepList, keepPatterns,
                                   or omitPattens are specified, all dirs
                                   below topDir containing a metadata
                                   file will be archived.

  **-keepPatterns**   string       Comma separated list of
                                   regular expressions matching
                                   the relative paths of those directories
                                   to be kept.  If specified,
                                   ``keepList`` must not be specified.

  **-omitPatterns**   string       Comma separated list of
                                   regular expressions matching
                                   the relative paths of those directories
                                   to be omitted.  If specified,
                                   ``keepList`` must not be specified.

  **-topDir**         string       Top of dir tree to upload.

  **-workDir**        string       Work dir

  =================   =========    ===========================================


  If requireIcsd is true, :func:`getIcsdMap` must be able
  to extract ICSD info from the file names.  File names must be like: ::

    .../icsd_083665/icsd_083665.cif/ls-anti-ferro-7/relax_cellshape/1
             ^^^^^^                 ^^^^^^^^      ^ ^^^^^^^^^^^^^^^ ^
            icsdNum                 magType  magNum relaxType       relaxNum
  '''


  bugLev = 0
  readType = None
  requireInput = None
  requireIcsd = None
  keepList = None
  keepPatterns = None
  omitPatterns = None
  topDir = None
  workDir = None

  if len(sys.argv) % 2 != 1:
    badparms('Parms must be key/value pairs')
  for iarg in range( 1, len(sys.argv), 2):
    key = sys.argv[iarg]
    val = sys.argv[iarg+1]
    if   key == '-bugLev': bugLev = int( val)
    elif key == '-readType': readType = val
    elif key == '-requireInput': requireInput = parseBoolean( val)
    elif key == '-requireIcsd': requireIcsd = parseBoolean( val)
    elif key == '-keepList': keepList = val
    elif key == '-keepPatterns': keepPatterns = val.split(',')
    elif key == '-omitPatterns': omitPatterns = val.split(',')
    elif key == '-topDir': topDir = val
    elif key == '-workDir': workDir = val
    else: badparms('unknown key: "%s"' % (key,))

  if readType == None: badparms('parm not specified: -readType')
  if requireInput == None: badparms('missing parameter: -requireInput')
  if requireIcsd == None: badparms('missing parameter: -requireIcsd')
  # keepList is optional
  # keepPatterns is optional
  # omitPatterns is optional
  if keepList != None and (keepPatterns != None or omitPatterns != None):
    badparms('with keepList, may not spec keepPatterns or omitPatterns')
  if topDir == None: badparms('missing parameter: -topDir')
  if workDir == None: badparms('missing parameter: -workDir')
  absTopDir = os.path.abspath( topDir)

  print 'wrapUpload: readType: %s' % (readType,)
  print 'wrapUpload: keepList: %s' % (keepList,)
  print 'wrapUpload: keepPatterns: %s' % (keepPatterns,)
  print 'wrapUpload: omitPatterns: %s' % (omitPatterns,)
  print 'wrapUpload: topDir: %s' % (topDir,)
  print 'wrapUpload: absTopDir: %s' % (absTopDir,)
  print 'wrapUpload: workDir: %s' % (workDir,)


  # Names of required files
  requireNames = [
    metadataName,
  ]

  # Names of optional files
  optionNames = [
    'pbserr',
    'pbsout',
    'pbsscript',
    'stderr',
    'stdout',
    'DOSCAR',
    'POTCAR',
  ]

  inputNames = [ 'INCAR', 'KPOINTS', 'POSCAR']
  if requireInput: requireNames += inputNames
  else: optionNames += inputNames

  if readType == 'xml':
    requireNames.append('vasprun.xml')
    optionNames.append('OUTCAR')
  elif readType == 'outcar':
    requireNames.append('OUTCAR')
    optionNames.append('vasprun.xml')
  else: badparms('invalid readType: %s' % (readType,))

  doUpload( bugLev, requireNames, optionNames, requireIcsd,
    keepList, keepPatterns, omitPatterns,
    topDir, workDir)



#====================================================================


[docs]def doUpload(
  bugLev,
  requireNames,
  optionNames,
  requireIcsd,                # require icsd info in absTopDir string
  keepList,
  keepPatterns,
  omitPatterns,
  topDir,
  workDir):
  '''
  Locates model runs, checks and extracts dir contents,
  and uses ``tar`` and ``scp`` to send the data to the server running
  :mod:`wrapReceive`.

  If ``keepList`` is specified, creates keepAbsPaths = unique absolute
  paths and calls :func:`iterateDirs`.

  Otherwise calls :func:`searchDirs` to recursively search the
  directory tree starting at ``topDir``.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.

  * requireNames (str[]): names of required files.

  * optionNames (str[]): names of optional files.

  * requireIcsd (boolean): if True, the absTopDir string must
    contain ICSD info that :func:`getIcsdMap` can extract.

  * keepList (str[]):
    List of the absolute paths
    of the dirs to be uploaded, or None.
    Still ``topDir`` must be specified,
    and all paths in ``keepList`` must
    start with the specified ``topDir``.
    If ``keepList`` is specified,
    ``keepPatterns`` and ``omitPatterns``
    must not be specified.

  * keepPatterns (str[]):
    List of regular expressions matching
    the relative paths of those directories
    to be kept.  If specified,
    ``keepList`` must not be specified.

  * omitPatterns (str[]):
    List of regular expressions matching
    the relative paths of those directories
    to be omitted.  If specified,
    ``keepList`` must not be specified.

  * topDir (str):       Top of dir tree to upload.

  * workDir (str):      Work dir

  **Returns**

  * None

  '''

  absTopDir = os.path.abspath( topDir)

  if not os.path.isdir( workDir):
    throwerr('workDir does not exist: %s' % (workDir,))
  nms = os.listdir( workDir)
  if len(nms) != 0:
    throwerr('workDir is not empty: %s' % (workDir,))

  # Get keepAbsPaths from file keepList
  # Use a set and os.path.abspath to make sure entries are unique.
  keepAbsPaths = None
  if keepList != None:
    keepAbsPathSet = set()
    with open( keepList) as fin:
      iline = 0
      while True:
        line = fin.readline()
        if line == '': break
        iline += 1
        line = line.strip()
        if len(line) > 0 and not line.startswith('#'):
          apath = line
          if apath.endswith('/'): apath = apath[:-1]
          if apath != os.path.abspath( apath):
            throwerr('keepList line is not an abs path.  iline: %d  line: %s' \
              % (iline, line,))
          if not os.path.isdir( apath):
            throwerr(
              'keepList line is not a dir.  iline: %d  line: %s' \
              % (iline, line,))
          keepAbsPathSet.add( apath)

    keepAbsPaths = list( keepAbsPathSet)
    keepAbsPaths.sort()
    if bugLev >= 1: print 'doUpload: len(keepAbsPaths): ', len(keepAbsPaths)
    if bugLev >= 5: print 'doUpload: keepAbsPaths: ', keepAbsPaths

  digestDir = os.path.join( workDir, digestDirName)

  if os.path.lexists( digestDir):
    throwerr('workDir is not empty: subdir already exists: %s' \
      % (digestDir,))

  warnings = []

  relDirs = []           # list of dirs we archive
  relFiles = []          # list of files to archive

  if keepAbsPaths != None:
    # Get the relative paths of all files to be archived,
    # using the keepAbsPaths list.
    iterateDirs(
      bugLev,
      requireNames,
      optionNames,
      keepAbsPaths,
      absTopDir,
      requireIcsd,                # require icsd info in absTopDir string
      warnings,                   # may append to warnings
      relDirs,                    # appends to list
      relFiles)                   # appends to list


  else:
    # Get the relative paths of all files to be archived,
    # starting at absTopDir.
    searchDirs(
      bugLev,
      requireNames,
      optionNames,
      keepPatterns,
      omitPatterns,
      absTopDir,
      '',                         # relative path so far
      requireIcsd,                # require icsd info in absTopDir string
      warnings,                   # may append to warnings
      relDirs,                    # appends to list
      relFiles)                   # appends to list

  numWarn = len( warnings)
  if numWarn > 0:
    print ''
    for warn in warnings:
      print 'Warning: %s' % (warn,)
    print ''
    throwerr('\nFound %d warnings.  See above.' % (numWarn,))

  numKeptDir = len( relDirs)
  numKeptFile = len( relFiles)
  if bugLev >= 0:
    print 'wrapUpload: numKeptDir: %d' % (numKeptDir,)
    print 'wrapUpload: numKeptFile: %d' % (numKeptFile,)

  os.mkdir( digestDir)

  listFile = os.path.join( digestDir, 'digest.list')
  with open( listFile, 'w') as fout:
    for path in relFiles:
      print >> fout, path

  print 'wrapUpload: beginning tar (this could take several minutes)'
  curDate = datetime.datetime.now()
  userId = pwd.getpwuid(os.getuid())[0]
  uui = formatUui( curDate, userId, absTopDir)
  fBase = os.path.join( digestDir, uui)

  tarFile = fBase + '.tgz'
  flagFile = fBase + '.zzflag'

  # Create tarFile = tar of the files to be saved.
  # Use -h to deref symlinks.
  args = ['/bin/tar', '-chzf', tarFile, '-T', listFile, '--mode=660']
  runSubprocess( bugLev, absTopDir, args, False)  # showStdout = False

  # Create flagFile with version num
  with open( flagFile, 'w') as fout:
    msg = version + ' ' + uui
    print >> fout, msg

  print 'wrapUpload: Completed.  numKeptDir: %d  numKeptFile: %d' \
    % (numKeptDir, numKeptFile,)



#====================================================================


[docs]def searchDirs(
  bugLev,
  requireNames,
  optionNames,
  keepPatterns,
  omitPatterns,
  absTopDir,
  relPath,                    # relative path so far
  requireIcsd,                # require icsd info in absTopDir string
  warnings,                   # may append to warnings
  relDirs,                    # appends to list
  relFiles):                  # appends to list
  '''
  Recursive: locates model runs, checks dir contents,
  and appends names to lists of dirs and files.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.

  * requireNames (str[]): names of required files.

  * optionNames (str[]): names of optional files.

  * keepPatterns (str[]):
    List of regular expressions matching
    the relative paths of those directories
    to be kept.  If specified,
    ``keepList`` must not be specified.

  * omitPatterns (str[]):
    List of regular expressions matching
    the relative paths of those directories
    to be omitted.  If specified,
    ``keepList`` must not be specified.

  * absTopDir (str): Absolute path of the original top of dir tree to upload.

  * relPath (str): Relative path so far, somewhere below absTopDir.

  * requireIcsd (boolean): if True, the absTopDir string must
    contain ICSD info that :func:`getIcsdMap` can extract.

  * relDirs (str[]): We append dirs to be archived.

  * relFiles (str[]): We append file names to be archived.

  **Returns**

  * None
  '''

  inDir = os.path.abspath( os.path.join( absTopDir, relPath))
  if bugLev >= 5:
    print 'searchDirs: relPath: %s' % (relPath,)
    print 'searchDirs: inDir: %s' % (inDir,)
  if not os.path.isdir( inDir): throwerr('not a dir')

  # Check for keepPattern and omitPattern matches.
  # If any keepPatterns exist:
  #   keepIt = (not any omitPattern) and some keepPattern
  # Else:
  #   keepIt = not any omitPattern

  keepIt = True
  if keepPatterns != None:
    found = False
    for pat in keepPatterns:
      if re.search( pat, relPath):
        if bugLev >= 5:
          print 'searchDirs: match keepPattern: %s  for relPath: %s' \
            % (pat, relPath,)
        found = True
        break
    if not found: keepIt = False

  omitIt = False
  if omitPatterns != None:
    for pat in omitPatterns:
      if re.search( pat, relPath):
        if bugLev >= 5:
          print 'searchDirs: match omitPattern: %s  for relPath: %s' \
            % (pat, relPath,)
        omitIt = True

  hasMetadata = False
  mpath = os.path.join( inDir, metadataName)
  if os.path.isfile( mpath):
    parseMetadata( mpath)        # check validity
    hasMetadata = True

  if bugLev >= 5:
    print 'searchDirs: relPath: %s  keepIt: %s  omitIt: %s  hasMetadata: %s' \
      % (relPath, keepIt, omitIt, hasMetadata,)

  if keepIt and (not omitIt) and hasMetadata:
    relDirs.append( relPath)
    processDir( bugLev, requireNames, optionNames,
      absTopDir, relPath, requireIcsd, warnings, relFiles)

  # Recurse to subdirs
  if not omitIt:
    subNames = os.listdir( inDir)
    subNames.sort()
    for subName in subNames:
      subPath = os.path.join( relPath, subName)
      if os.path.isdir( os.path.join( absTopDir, subPath)):
        searchDirs(
          bugLev,
          requireNames,
          optionNames,
          keepPatterns,
          omitPatterns,
          absTopDir,
          subPath,                    # relPath: relative path so far
          requireIcsd,                # require icsd info in absTopDir string
          warnings,                   # may append to warnings
          relDirs,                    # appends to list
          relFiles)                   # appends to list

  else:
    print 'wrapUpload: %-18s %s' % ('skip subTree', inDir,)


#====================================================================


[docs]def iterateDirs(
  bugLev,
  requireNames,
  optionNames,
  keepAbsPaths,
  absTopDir,
  requireIcsd,                # require icsd info in absTopDir string
  warnings,                   # may append to warnings
  relDirs,                    # appends to list
  relFiles):                  # appends to list
  '''
  For each path in keepAbsPaths, checks dir contents,
  and appends names to lists of dirs and files.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.

  * requireNames (str[]): names of required files.

  * optionNames (str[]): names of optional files.

  * keepAbsPaths (str[]):
    List of absolute paths of dirs to archive.

  * absTopDir (str): Absolute path of the original top of dir tree to upload.

  * requireIcsd (boolean): if True, the absTopDir string must
    contain ICSD info that :func:`getIcsdMap` can extract.

  * relDirs (str[]): We append dirs to be archived.

  * relFiles (str[]): We append file names to be archived.

  **Returns**

  * None
  '''

  for inDir in keepAbsPaths:
    if bugLev >= 5:
      print 'iterateDirs: inDir: %s' % (inDir,)
    if not inDir.startswith( absTopDir):
      throwerr('inDir does not start with absTopDir')
    relPath = inDir[len(absTopDir) : ]
    while relPath.startswith('/'):
      relPath = relPath[1:]
    if bugLev >= 5:
      print 'iterateDirs: relPath: %s' % (relPath,)

    relDirs.append( relPath)
    processDir( bugLev, requireNames, optionNames,
      absTopDir, relPath, requireIcsd, warnings, relFiles)



#====================================================================


[docs]def processDir(
  bugLev,
  requireNames,
  optionNames,
  absTopDir,
  relPath,                    # relative path so far
  requireIcsd,                # require icsd info in absTopDir string
  warnings,                   # may append to warnings
  relFiles):                  # appends to list
  '''
  Prepares to archive a single directory,
  and appends names to lists of dirs and files.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.

  * requireNames (str[]): names of required files.

  * optionNames (str[]): names of optional files.

  * absTopDir (str): Absolute path of the original top of dir tree to upload.

  * relPath (str): Relative path so far, somewhere below absTopDir.

  * requireIcsd (boolean): if True, the absTopDir string must
    contain ICSD info that :func:`getIcsdMap` can extract.

  * relFiles (str[]): We append file names to be archived.

  **Returns**

  * None
  '''


  inDir = os.path.abspath( os.path.join( absTopDir, relPath))
  if bugLev >= 5:
    print 'processDir: relPath: %s' % (relPath,)
    print 'processDir: inDir: %s' % (inDir,)
  if not os.path.isdir( inDir): throwerr('not a dir')
  subNames = os.listdir( inDir)
  subNames.sort()

  # If metadataForce, we ignore local metadata files.
  reqNames = list( requireNames)    # shallow copy
  optNames = list( optionNames)     # shallow copy

  # Check for requireNames
  for nm in requireNames:
    subRelPath = os.path.join( relPath, nm)
    subFile = os.path.join( inDir, nm)
    if os.path.isfile( subFile):
      relFiles.append( os.path.join( relPath, nm))
    else: warnings.append('missing file: %s' % (subFile,))

  # Check for optionNames
  for nm in optionNames:
    subRelPath = os.path.join( relPath, nm)
    subFile = os.path.join( inDir, nm)
    if os.path.isfile( subFile):
      relFiles.append( subRelPath)

  if requireIcsd:
    try:
      icsdMap = getIcsdMap( bugLev, relPath)
    except Exception, exc:
      print '===== start traceback'
      print traceback.format_exc( limit=None)
      print '===== end traceback'
      throwerr('icsd info not found.  absTopDir: %s  relPath: %s  traceback:\n%s' \
        % (absTopDir, relPath, traceback.format_exc( limit=None),))



#====================================================================


# Creates a map of ICSD info based on the dir name

[docs]def getIcsdMap( bugLev, relPath):
  '''
  Creates a map of ICSD info based on the dir name.

  Example parsing inDir: ::

    .../icsd_083665/icsd_083665.cif/ls-anti-ferro-7/relax_cellshape/1
             ^^^^^^                 ^^^^^^^^      ^ ^^^^^^^^^^^^^^^ ^
            icsdNum                 magType  magNum relaxType       relaxNum

  Resulting map: ::

    icsdMap = {
      'icsdNum'   : 83665,
      'magType'   : 'lsaf',
      'magNum'    : 7,
      'relaxType' : 'rc',
      'relaxNum'  : 1,
    }

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.

  * relPath (str): Relative path.

  **Returns**

  * map similar to the above.
  '''

  # Extract the ICSD number from the path '.../icsd_dddddd/...'
  mat = re.match('^(.*/)?icsd_(\d{6})/.*$', relPath)
  if mat == None:
    throwerr('no icsd id found in relPath: "%s"' % (relPath,))
  icsdNum = int( mat.group(2))

  # Extract magnetic moment type from the path like '.../ls-anti-ferro-33/...'
  magType = None
  magNum = 0
  pairs = [
    ['/anti-ferro',    'afer'],
    ['/ferro',         'fer'],
    ['/hs-ferro',      'hsf'],
    ['/hs-anti-ferro', 'hsaf'],
    ['/ls-ferro',      'lsf'],
    ['/ls-anti-ferro', 'lsaf'],
    ['/non-magnetic',  'nm'],
  ]
  for (tname,tcode) in pairs:
    ix = relPath.find( tname)
    if ix >= 0:
      magType = tcode
      if tcode in ['hsaf', 'lsaf']:
        rest = relPath[(ix+len(tname)):]
        mat = re.match('^-(\d+).*$', rest)
        if mat == None:
          throwerr('no magNum found in relPath: "%s"' % (relPath,))
        magNum = int( mat.group(1))
      break
  if magType == None:
    throwerr('no magType found in relPath: "%s"' % (relPath,))

  # Extract relaxType from relPath

  # If relPath contains 'relax_cellshape', set relaxType = 'rc'
  # and relaxNum = the number of the subfolder.  Similarly for 'relax_ions'.
  relaxType = 'std'
  relaxNum = 0
  pairs = [['relax_cellshape', 'rc'],
           ['relax_ions',      'ri']]
  for (tname,tcode) in pairs:
    ix = relPath.find( tname)
    if ix >= 0:
      relaxType = tcode
      rest = relPath[(ix+len(tname)):]
      mat = re.match('^/(\d+).*$', rest)
      if mat == None:
        throwerr('no subfolder found in relPath: "%s"' % (relPath,))
      relaxNum = int( mat.group(1))
      break

  # Save the info from the path name and statInfo
  icsdMap = {
    'icsdNum'   : icsdNum,
    'magType'   : magType,
    'magNum'    : magNum,
    'relaxType' : relaxType,
    'relaxNum'  : relaxNum,
  }

  return icsdMap



#====================================================================



[docs]def unused_extractPotcar( fname):
  '''
  (No longer used): Reads and saves the header sections from a POTCAR file.

  Saves every section starting with 'PAW_PBE' to the following
  line 'Description'.

  **Parameters**:

  * fname (str): Name of the input POTCAR file.

  **Returns**

  * List of pairs: [ specieName, listOfSavedLines]
  '''

  pseudos = []          # list of pairs: [ specieName, listOfSavedLines]
  with open( fname) as fin:
    state = False                   # not capturing
    specie = None
    saves = []
    iline = 0
    while True:
      line = fin.readline()
      if line == '': break
      iline += 1

      mat = re.match(r'^\s*PAW_PBE\s+(\S+)\s+.*$', line)
      if mat:
        state = True                # capturing
        specie = mat.group(1)

      mat = re.match(r'^\s*Description\s*$', line)
      if mat:
        pseudos.append( [ specie, saves])
        state = False               # not capturing
        specie = None
        saves = []

      if state:                     # if capturing
        saves.append( line)

  return pseudos


#====================================================================


[docs]def parseMetadata( fpath):
  '''
  Parses a metadata file and returns a corresponding map.

  **Parameters**:

  * fpath (str): Name of the input metadata file.

  **Returns**

  * Map of metadata values.  The map structure is:

    =============  ==========  ===========================================
    Key            Value Type  Source description
    =============  ==========  ===========================================
    firstName      str         Researcher first name
    lastName       str         Researcher last name
    spacegroup     str         international space group number
    parents        str[]       comma separated list of sha1sum(s) of the
                               vasprun.xml files of previous runs, if any
    publications   str[]       comma separated list of DOIs
                               without the leading ``http://``
    standards      str[]       comma separated list of standardized
                               keywords
    keywords       str[]       comma separated list of any keywords
    notes          str         textual notes
    =============  ==========  ===========================================
  '''

  firstNameTag = 'firstName'
  keywordsTag = 'keywords'
  lastNameTag = 'lastName'
  spacegroupTag = 'spacegroup'
  notesTag = 'notes'
  parentsTag = 'parents'
  publicationsTag = 'publications'
  standardsTag = 'standards'

  requiredFields = [firstNameTag, lastNameTag,
    publicationsTag, standardsTag, keywordsTag, notesTag]

  checkFileFull( fpath)
  with open( fpath) as fin:
    lines = fin.readlines()       # not stripped.  Includes final \n.

  metaMap = {}
  if len(lines) < 2:
    throwerr('invalid metadata.  file: "%s"' % ( fpath,))

  iline = 0
  while iline < len(lines):
    line = lines[iline]
    if re.match(r'^\s*$', line) or line.startswith('#'):
      iline += 1           # ignore blank lines and comments
    else:
      mat = re.match(r'^:(\w+):(.*)$', line)
      if not mat:
        throwerr(('invalid metadata.'
          + '  file: "%s"  approx iline: %d  line: "%s"')
          % (fpath, iline, line,))
      field = mat.group(1)
      value = mat.group(2)        # init value

      #if metaMap.has_key(field):
      #  throwerr(('multiple spec of field: "%s"'
      #    + '  file: "%s"  approx iline: %d  line: "%s"')
      #    % (field, fpath, iline, line,))

      if field in [parentsTag, publicationsTag, standardsTag, keywordsTag]:
        # Strip before we test for trailing comma below
        value = value.strip()

      # Append lines onto value as we scan for
      # the next comment or field
      iline += 1
      while iline < len(lines):
        line = lines[iline]
        if line.startswith('#') or re.match(r'^:(\w+):(.*)', line): break
        if field in [parentsTag, publicationsTag, standardsTag, keywordsTag]:
          # If user forgot a trailing comma, help them out.
          if len(value.strip()) > 0 and not value.endswith(','):
            value += ','
          value += line.strip()
        else:
          value += line        # line includes \n
        iline += 1

      value = value.strip()     # get rid of whitespace at ends

      if field in [firstNameTag, lastNameTag]:
        if re.search(r'[^-a-zA-Z]', value) \
          or (not re.match('^[A-Z]$', value[0])) \
          or (not re.match('^[a-zA-Z]$', value[-1])):
          throwerr(('invalid name: "%s"'
            + '  file: "%s"  approx iline: %d  line: "%s"')
            % (value, fpath, iline, line,))
        if metaMap.has_key(field):
          throwerr(('multiple spec of field: "%s"'
            + '  file: "%s"  approx iline: %d  line: "%s"')
            % (field, fpath, iline, line,))
        metaMap[field] = value

      elif field in [spacegroupTag]:
        if not re.match(r'[0-9]+', value):
          throwerr(('invalid number: "%s"'
            + '  file: "%s"  approx iline: %d  line: "%s"')
            % (value, fpath, iline, line,))
        if metaMap.has_key(field):
          throwerr(('multiple spec of field: "%s"'
            + '  file: "%s"  approx iline: %d  line: "%s"')
            % (field, fpath, iline, line,))
        metaMap[field] = value

      elif field in [parentsTag, publicationsTag, standardsTag, keywordsTag]:
        # Convert value to list of keywords.
        # Insure keywords don't contain illegal chars.
        vals = []
        if len( value) > 0:
          toks = value.split(',')
          for tok in toks:
            tok = tok.strip()
            errMsg = ''
            if field == parentsTag:
              # If sha1: 40 chars; if sha512: 128 chars
              if len(tok) != 128 or re.search(r'[^a-f0-9]', tok):
                errMsg += 'Invalid parent (must be 128 chars).\n'
            if field == publicationsTag:
              if field.startswith('http'):
                errMsg += 'Specify DOI without the initial http://\n'
            if field in [standardsTag, keywordsTag]:
              if len(tok) < 1  \
                or re.search(r'[^-+_=.@%&a-zA-Z0-9]', tok) \
                or (not re.match('^[a-zA-Z]$', tok[0])):
                errMsg += 'Invalid keyword: "%s"\n' % (tok,)
            if len(errMsg) > 0:
              errMsg += '  Invalid item: "%s"\n' % (tok,) \
                + '  Containing value: "%s"\n' % (value,) \
                + '  file: "%s"  approx iline: %d  line: "%s"\n' \
                % (fpath, iline, line,)
              throwerr( errMsg)
            vals.append( tok)

        # Append values to previous ones
        if not metaMap.has_key(field): metaMap[field] = []
        metaMap[field] += vals         # set list

      elif field == notesTag:
        if metaMap.has_key(field):
          metaMap[field] += '\n' + value       # append text
        else: metaMap[field] = value           # set text

      else:
        throwerr('unknown field: \"%s\"  approx iline: %d  file: \"%s\"' \
          % (field, iline, fpath,))

  for nm in requiredFields:
    if not metaMap.has_key(nm):
      throwerr('missing field: \"%s\" in file: \"%s\"' % (nm, fpath,))

  legalStandards = [
    'fere',
    'gwvd',
    'wave',
    'aexp',
    'post-lopt',
    'post-chi',
    'post-chibs',
    'post-dos',
    'post-lopt',
    'wave',
    'vexp',
  ]
  for key in metaMap[ standardsTag]:
    if key not in legalStandards:
      throwerr('illegal standard "%s" in file: ' % (key, fpath,))

  return metaMap


#====================================================================


[docs]def checkFileFull( fname):
  '''
  Insures that fname exists and has length > 0.

  **Parameters**:

  * fname (str): Name of the input file.

  **Returns**

  * None

  **Raises**:

  * Exception if fname does not exist or has length == 0.
  '''

  checkFile( fname)
  if os.path.getsize( fname) == 0:
    throwerr('file is empty: \"%s\"' % (fname,))

#====================================================================

[docs]def checkFile( fname):
  '''
  Insures that fname exists.  It may have length == 0.

  **Parameters**:

  * fname (str): Name of the input file.

  **Returns**

  * None

  **Raises**:

  * Exception if fname does not exist.
  '''

  if (type(fname).__name__ not in ('str', 'unicode')) or len(fname) == 0:
    throwerr('invalid file name: "%s"' % (fname,))
  if not os.path.isfile( fname):
    throwerr('file not found: \"%s\"' % (fname,))
  if not os.access( fname, os.R_OK):
    throwerr('file is not readable: \"%s\"' % (fname,))

#====================================================================

[docs]def runSubprocess( bugLev, wkDir, args, showStdout):
  '''
  Calls the executable indicated by args and waits for completion.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.

  * wkDir (str): The working directory to use for the subprocess.

  * args (str[]): The executable name (in args[0]) followed by
    the command line arguments in args[1:].

  * showStdout (boolean): If True, print the stdout from the subprocess.

  **Returns**

  * None

  **Raises**:

  * Exception if subprocess rc != 0.
  '''

  if bugLev >= 1:
    print 'runSubprocess: args: %s' % (args,)
    print 'runSubprocess: cmd: %s' % (' '.join(args),)
  if bugLev >= 5: pipe = None
  else: pipe = subprocess.PIPE
  proc = subprocess.Popen(
    args,
    shell=False,
    cwd=wkDir,
    stdin=subprocess.PIPE, stdout=pipe, stderr=pipe,
    bufsize=10*1000*1000)
  (stdout, stderr) = proc.communicate()
  rc = proc.returncode
  if rc != 0:
    msg = 'subprocess failed.\n'
    msg += 'wkDir: %s\n' % (wkDir,)
    msg += 'args: %s\n' % (args,)
    msg += 'rc: %d\n' % (rc,)
    msg += '\n===== stdout:\n%s\n' % (stdout,)
    msg += '\n===== stderr:\n%s\n' % (stderr,)
    throwerr( msg)
  if showStdout:
    print 'runSubProcess: len(stderr): %d' % (len(stderr),)
    print 'runSubProcess: len(stdout): %d' % (len(stdout),)
    if len(stderr) > 0:
      print '\n===== stderr:\n%s\n' % (stderr,)
    if len(stdout) > 0:
      print '\n===== stdout:\n%s\n' % (stdout,)

#====================================================================



[docs]def findNumFiles( tag, dir):
  '''
  Finds the number of files having name == tag in the tree at dir.
  Yes, Python has os.walk, but this is better.

  **Parameters**:

  * tag (str): The name to search for.

  * dir (str): The root of the directory tree to search.

  **Returns**

  * integer number of matches.
  '''

  if not os.path.isdir( dir): throwerr('not a dir: %s' % (dir,))
  nms = os.listdir( dir)
  nms.sort()
  count = 0
  for nm in nms:
    if nm == tag: count += 1
    subDir = dir + '/' + nm
    if os.path.isdir( subDir):
      count += findNumFiles( tag, subDir)      # recursion
  return count

#====================================================================

# Coord with parseUui, below.
[docs]def formatUui( curDate, userId, absTopDir):
  '''
  Formats a uui (wrapId).

  A wrapId looks like: ::

    @2013.08.13@12.58.22.735311@someUser@home.someUser.redmesa.old.td.testlada.2013.04.06.Fe.O@

  **Parameters**:

  * curDate (datetime.datetime): The current date.

  * userId (str): The current user id.

  * absTopDir (str): The absolute path of the top dir.

  **Returns**

  * A wrapId
  '''

  if not absTopDir.startswith('/'): throwerr('absTopDir not abs')
  modDir = absTopDir[1:]    # get rid of initial /
  modDir = modDir.replace('/', '.')     # get rid of /
  modDir = modDir.replace('@', '.')     # get rid of @
  uui = '@%s@%s@%s@' \
    % (curDate.strftime('%Y.%m.%d@%H.%M.%S.%f'), userId, modDir,)
  return uui

#==========

# Coord with formatUui, above.
# If matches, returns wrapId == stg.
# Else returns None.

[docs]def parseUui( stg):
  '''
  Parses a uui (wrapId).

  A wrapId looks like: ::
    @2013.08.13@12.58.22.735311@someUser@home.someUser.redmesa.old.td.testlada.2013.04.06.Fe.O@

  The input string may have subdirs info after the initial wrapId.

  **Parameters**:

  * stg (str): The string to be parsed.


  **Returns**

  * If stg is a valid wrapId, returns the wrapId == stg.
    Else returns None.
  '''

  uuiPattern = r'^(@(\d{4}\.\d{2}\.\d{2})@(\d{2}\.\d{2}\.\d{2}\.\d{6})@([^./@]*)@([^/@]*)@)'
  res = None
  mat = re.match( uuiPattern, stg)
  if mat:
    wrapId = mat.group(1)
    dateStg = mat.group(2)
    timeStg = mat.group(3)
    userid = mat.group(4)
    dir = mat.group(5)
    adate = datetime.datetime.strptime(
      dateStg + ' ' + timeStg, '%Y.%m.%d %H.%M.%S.%f')
    res = wrapId     # Could return (wrapId, adate, userid, dir)
  return res

#====================================================================

# Used by deepCompare to unify types

def fixType( val):
  tpa = type(val).__name__
  if tpa == 'float64': val = float( val)
  elif tpa == 'int64': val = float( val)
  elif tpa == 'int': val = float( val)
  elif tpa == 'string_': val = str( val)
  elif tpa == 'list': val = np.array( val)
  return val

#====================================================================


# Returns None if match; else returns error msg.

def deepCompare( bugLev, epsilon, taga, va, tagb, vb):
  '''
  Compares two objects.

  Returns None if the same; else returns error msg.

  Used by :func:`readVasp.readVasp` to compare the results
  of parsing vasprun.xml and OUTCAR.
  '''

  cmsg = None

  va = fixType( va)
  vb = fixType( vb)

  tpa = type(va).__name__
  tpb = type(vb).__name__

  if tpa != tpb:
    cmsg = 'types differ: %s: %s  %s: %s' % ( taga, tpa, tagb, tpb,)

  elif va == None and vb == None:
    pass

  elif tpa == 'dict':
    cmsg = compareMaps( bugLev, taga, va, tagb, vb)

  elif tpa == 'instance':
    cmsg = compareMaps( bugLev, taga, va.__dict__, tagb, vb.__dict__)


  elif tpa == 'list':
    if len(va) != len(vb):
      cmsg = 'len mismatch: %s: %d  %s: %d' % (taga, len(va), tagb, len(vb),)
    else:
      for ii in range(len(va)):
        cmsg = deepCompare( bugLev, epsilon, taga, va[ii], tagb, vb[ii])
        if cmsg != None: break

  elif tpa == 'ndarray':
    if va.shape != vb.shape:
      cmsg = 'shape mismatch: %s: %s  %s: %s' \
        % (taga, va.shape, tagb, vb.shape,)
    else:
      fa = va.flatten()
      fb = vb.flatten()
      for ii in range( fa.size):
        cmsg = deepCompare( bugLev, epsilon, taga, fa[ii], tagb, fb[ii])
        if cmsg != None: break

  elif type(va).__name__ == 'float':
    if abs( va - vb) > epsilon:
      if va == 0: relerr = float('NaN')
      else: relerr = (vb - va) / va
      cmsg = ('float value mismatch: %s: %g  %s: %g  vb-va: %g'
        + '  (vb-va)/va: %g') % (taga, va, tagb, vb, vb - va, relerr)

  elif type(va).__name__ in [ 'bool', 'datetime', 'int', 'str']:
    if va != vb:
      cmsg = 'scalar value mismatch: %s: %s  %s: %s' \
        % (taga, repr(va), tagb, repr(vb),)

  else: throwerr('unknown type: %s' % (type(va),))

  return cmsg


#====================================================================


def compareMaps( bugLev, taga, va, tagb, vb):
  '''
  Compares two dictionaries.

  Returns None if the same; else returns error message.
  '''

  # Compare va and vb, key for key
  akeys = va.keys()
  akeys.sort()
  bkeys = vb.keys()
  bkeys.sort()

  iaa = 0      # index into akeys
  ibb = 0      # index into bkeys
  cmsg = ''    # so we can append
  while True:
    if iaa >= len( akeys) and ibb >= len( bkeys): break
    elif iaa >= len( akeys):
      cm = 'Unique key for %s: %s\n' % (tagb, bkeys[ibb],)
      cmsg += cm
      if bugLev >= 5: print 'compareMaps: %s' % (cm,)
      ibb += 1
    elif ibb >= len( bkeys):
      cm = 'Unique key for %s: %s\n' % (taga, akeys[iaa],)
      cmsg += cm
      if bugLev >= 5: print 'compareMaps: %s' % (cm,)
      iaa += 1
    else:
      akey = akeys[iaa]
      bkey = bkeys[ibb]
      aval = va[akey]
      bval = vb[bkey]
      if akey == bkey:
        epsilon = 5.e-5
        compMsg = deepCompare( bugLev, epsilon, taga, aval, tagb, bval)
        if compMsg == None:
          if bugLev >= 5:
            print 'compareMaps: value match.  key: %s' % (akey,)
        else:
          cm = ('Value mismatch: key: %s  msg: %s\n' \
            + '  %s: %s\n  %s: %s\n') \
            % (akey, compMsg, taga, aval, tagb, bval,)
          cmsg += cm
          if bugLev >= 5: print 'compareMaps: %s' % (cm,)
        iaa += 1
        ibb += 1
      elif akey < bkey:
        cm = 'Unique key for %s: %s\n' % (taga, akeys[iaa],)
        cmsg += cm
        if bugLev >= 5: print 'compareMaps: %s' % (cm,)
        iaa += 1
      else:
        cm = 'Unique key for %s: %s\n' % (tagb, bkeys[ibb],)
        cmsg += cm
        if bugLev >= 5: print 'compareMaps: %s' % (cm,)
        ibb += 1

  if cmsg == '': cmsg = None
  return cmsg


#====================================================================

[docs]def printMap( tag, vmap, maxLen):
  '''
  Prints a map.

  **Parameters**:

  * tag (str): Explanatory name of the map.

  * vmap (map): The map to print.

  * maxLen (int): The max length to use in printing a value, or 0.

  **Returns**

  * None
  '''

  print '\n%s' % (tag,)
  if vmap == None:
    print '    Map is None.'
  else:
    print '    Map len: %d' % (len(vmap),)
    keys = vmap.keys()
    keys.sort()
    for key in keys:
      val = str( vmap[key])
      if maxLen > 0 and len(val) > maxLen: val = val[:maxLen] + '...'
      print '    %s: %s' % (key, val,)

#====================================================================

[docs]def formatMatrix( mat):
  '''
  Formats a 2D matrix.

  **Parameters**:

  * mat (float[][] or numpy 2D array): input matrix

  **Returns**

  * string representation of the matrix.
  '''

  msg = ''
  for ii in range(len(mat)):
    row = mat[ii]
    msg += '  [%2d]  ' % (ii,)
    for jj in range(len(row)):
      msg += '  %8.4f' % (mat[ii][jj],)
    msg += '\n'
  return msg


#====================================================================

def parseBoolean( stg):
  '''
  Parses a string to get a boolean.
  '''

  if stg.lower() in ['false', 'no']: res = False
  elif stg.lower() in ['true', 'yes']: res = True
  else: throwerr('invalid boolean: "%s"' % (stg,))
  return res

#====================================================================


[docs]def throwerr( msg):
  '''
  Prints an error message and raises Exception.

  **Parameters**:

  * msg (str): Error message.

  **Returns**

  * (Never returns)

  **Raises**

  * Exception
  '''

  raise Exception( msg)


#====================================================================

if __name__ == '__main__': main()
Navigation

Source code for nrelmat.wrapUpload

Quick search

Navigation