Source code for nrelmat.ScanXml

#!/usr/bin/env python
# Copyright 2013 National Renewable Energy Laboratory, Golden CO, USA
# This file is part of NREL MatDB.
#
# NREL MatDB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# NREL MatDB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with NREL MatDB.  If not, see <http://www.gnu.org/licenses/>.


import datetime, re, sys, traceback, os.path
import xml.etree.cElementTree as etree

import numpy as np
np.seterr( all='raise', under='ignore')

import readVasp


#====================================================================

def badparms( msg):
  print '\nError: %s' % (msg,)
  print 'Parms:'
  print '  -bugLev    <int>      debug level'
  print '  -inFile    <string>   input file'
  print '  -maxLev    <int>      max xml print level'
  print ''
  sys.exit(1)

#====================================================================

[docs]def main():
  '''
  Test driver: Extracts info from a VASP vasprun.xml file.

  Command line parameters:

  ================  =========    ==============================================
  Parameter         Type         Description
  ================  =========    ==============================================
  **-bugLev**       integer      Debug level.  Normally 0.
  **-inFile**       string       Input file
  **-maxLev**       int          max xml print level
  ================  =========    ==============================================
  '''

  bugLev = 0
  inFile = None
  maxLev = 0

  if len(sys.argv) % 2 != 1:
    badparms('Parms must be key/value pairs')
  for iarg in range( 1, len(sys.argv), 2):
    key = sys.argv[iarg]
    val = sys.argv[iarg+1]
    if key == '-bugLev': bugLev = int( val)
    elif key == '-inFile': inFile = val
    elif key == '-maxLev': maxLev = int( val)
    else: badparms('unknown key: "%s"' % (key,))

  if bugLev == None: badparms('parm not specified: -bugLev')
  if inFile == None: badparms('parm not specified: -inFile')
  if maxLev == None: badparms('parm not specified: -maxLev')

  resObj = ResClass()
  parseXml( bugLev, inFile, maxLev, resObj)


#====================================================================
#====================================================================

[docs]class ResClass:
  '''
  An empty class used as a data container for results.
  '''

  def __str__(self):
    keys = self.__dict__.keys()
    keys.sort()
    msg = ''
    for key in keys:
      val = self.__dict__[key]
      stg = str( val)
      if stg.find('\n') >= 0: sep = '\n'
      else: sep = ' '
      msg += '  %s:  type: %s  val:%s%s\n' % (key, type(val), sep, val,)
    return msg

#====================================================================
#====================================================================


# Fills resObj.

[docs]def parseXml( bugLev, inFile, maxLev, resObj):
  '''
  Extracts info from the vasprun.xml file from a VASP run,
  using the Python xml.etree.cElementTree API.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.
  * inFile (str): Path of the input vasprun.xml file.
  * resObj (class ResClass): data object: we set attributes here.

  **Returns**:

  * None
  '''

  try:
    tree = etree.parse( inFile)
  except Exception, exc:
    throwerr(('parseXml: invalid xml in file: "%s"\n'
      + '  Msg: %s\n') % (inFile, repr(exc),))

  root = tree.getroot()
  if bugLev >= 1: printNode( root, 0, maxLev)      # node, curLev, maxLev

  if bugLev >= 5: print '\n===== program, version, date etc =====\n'

  # xxx program, version, subversion, etc

  # PyLada: vasp/extract/base.py: datetime()
  # OUTCAR: use the 1 occurance of:
  #   executed on             LinuxIFC date 2013.03.11  09:32:24
  dtStg = getString( root, 'generator/i[@name=\'date\']')
  tmStg = getString( root, 'generator/i[@name=\'time\']')
  dateFmtIn = '%Y %m %d %H:%M:%S'
  dateFmtOut = '%Y-%m-%d %H:%M:%S'
  resObj.runDate = datetime.datetime.strptime(
    '%s %s' % (dtStg, tmStg), dateFmtIn)
  if bugLev >= 5: print 'runDate: %s' % (resObj.runDate.strftime( dateFmtOut),)


  # iterTimes
  # Each node is has cpuTime, wallTime:
  #       <time name='totalsc'>22.49 24.43</time>
  nodes = root.findall('calculation/time[@name=\'totalsc\']')
  iterCpuTimes = []
  iterRealTimes = []
  for node in nodes:
    txt = node.text
    toks = txt.split()
    if len(toks) == 1:
      # Kluge: sometimes the two time fields run together:
      #     <time name="totalsc">18560.1718566.89</time>
      # should be:
      #     <time name="totalsc">18560.17 18566.89</time>
      # In this case, try to split it,
      # or just use the first time for both values.
      tok = toks[0]
      ix = tok.find('.')
      if ix < 0: throwerr('invalid times: %s' % (etree.tostring(node),))
      iy = tok.find('.', ix + 1)
      if iy < 0 or iy != len(tok) - 3:
        throwerr('invalid times: %s' % (etree.tostring(node),))
      tmStga = tok[:ix+3]
      tmStgb = tok[ix+3:]
    elif len(toks) == 2:
      tmStga = toks[0]
      tmStgb = toks[1]
    else: throwerr('invalid times: %s' % (etree.tostring(node),))
    iterCpuTimes.append( float( tmStga))
    iterRealTimes.append( float( tmStgb))

  resObj.iterCpuTimes = iterCpuTimes
  resObj.iterRealTimes = iterRealTimes
  if bugLev >= 5:
    print 'iterCpuTimes: %s' % (resObj.iterCpuTimes,)
    print 'iterRealTimes: %s' % (resObj.iterRealTimes,)


  if bugLev >= 5: print '\n===== incar parameters =====\n'

  # algo
  # PyLada: vasp/extract/base.py: algo()
  # OUTCAR: use the 1 occurance of:
  #   ALGO = Fast
  algoPath = 'incar/i[@name=\'ALGO\']'
  if len( root.findall( algoPath)) == 0:
    resObj.algo = 'n/a'
  else:
    resObj.algo = getString( root, algoPath)
  if bugLev >= 5: print 'algo: "%s"' % (resObj.algo,)

  # encut
  # PyLada: vasp/extract/base.py: encut()
  # OUTCAR: use the first occurance of:
  #   ENCUT  =  252.0 eV  18.52 Ry    4.30 a.u.   4.08  4.08 15.92*2*pi/ulx,y,z
  #   ENCUT = 252.0
  resObj.encut_ev = getScalar( root, 'incar/i[@name=\'ENCUT\']', float)
  if bugLev >= 5: print 'encut_ev: %g' % (resObj.encut_ev,)

  # ldauType
  # PyLada: vasp/extract/base.py: LDAUType()
  # OUTCAR: use the first occurance of:
  #   LDA+U is selected, type is set to LDAUTYPE =  2
  #   LDAUTYPE = 2
  #rawLdauType = getScalar( root, 'incar/v[@name=\'LDAUTYPE\']', int)
  #if rawLdauType == 1: resObj.ldauType = 'liechtenstein'
  #elif rawLdauType == 2: resObj.ldauType = 'dudarev'
  #else: throwerr('unknown rawLdauType: %d' % (rawLdauType,))
  #if bugLev >= 5:
  #  print 'rawLdauType: %d  ldauType: %s' % (rawLdauType, resObj.ldauType,)

  tstg = getString( root, 'incar/i[@name=\'SYSTEM\']')
  resObj.systemName = tstg.strip('\'"')   # get rid of extraneous quotes
  if bugLev >= 5: print 'systemName: "%s"' % (resObj.systemName,)



  if bugLev >= 5: print '\n===== general parameters =====\n'

  resObj.generalName = getString(
    root, 'parameters/separator[@name=\'general\']/i[@name=\'SYSTEM\']')
  if bugLev >= 5: print 'generalName: "%s"' % (resObj.generalName,)



  if bugLev >= 5: print '\n===== electronic parameters =====\n'

  lst = root.findall('parameters/separator[@name=\'electronic\']')
  if len(lst) != 1: throwerr('electronic parameters not found')
  elecNode = lst[0]

  # ialgo
  # PyLada: use the 1 occurance of:
  #   Electronic relaxation 2 (details)
  #     IALGO  =     68    algorithm
  resObj.ialgo = getScalar( elecNode, 'i[@name=\'IALGO\']', int)
  resObj.ediff = getScalar( elecNode, 'i[@name=\'EDIFF\']', float)
  if bugLev >= 5:
    print 'ialgo: %d' % (resObj.ialgo,)
    print 'ediff: %g' % (resObj.ediff,)

  # numBand = nbands
  # Caution: in some cases NBANDS != eigenMrr['eigene'].shape[2]
  # So we use the eigene dimension instead.
  # See further below.
  prmNumBand = getScalar( elecNode, 'i[@name=\'NBANDS\']', int)
  if bugLev >= 5: print 'prmNumBand: %d' % (prmNumBand,)

  # numElectron = nelect
  # PyLada: vasp/extract/base.py: nelect()
  # OUTCAR: use the 1 occurance of:
  #     NELECT =      48.0000    total number of electrons
  resObj.numElectron = getScalar( elecNode, 'i[@name=\'NELECT\']', float)
  if bugLev >= 5: print 'numElectron: %d' % (resObj.numElectron,)

  # icharg
  resObj.icharg = getScalar(
    elecNode,
    'separator[@name=\'electronic startup\']/i[@name=\'ICHARG\']',
    int)
  if bugLev >= 5: print 'icharg: %g' % (resObj.icharg,)

  # numSpin == ispin
  resObj.numSpin = getScalar(
    elecNode, 'separator[@name=\'electronic spin\']/i[@name=\'ISPIN\']', int)
  if bugLev >= 5: print 'numSpin: %g' % (resObj.numSpin,)



  if bugLev >= 5: print '\n===== ionic parameters =====\n'

  # Some parameters like IBRION are also found in INCAR, sometimes.
  # But apparently they are always in the ionic parameters section.
  lst = root.findall('parameters/separator[@name=\'ionic\']')
  if len(lst) != 1: throwerr('ionic parameters not found')
  ionNode = lst[0]
  resObj.ibrion = getScalar( ionNode, 'i[@name=\'IBRION\']', int)
  resObj.isif = getScalar( ionNode, 'i[@name=\'ISIF\']', int)
  resObj.posScaleFactor = getScalar( ionNode, 'i[@name=\'SCALEE\']', float)
  if bugLev >= 5:
    print 'ibrion: %d' % (resObj.ibrion,)
    print 'isif: %d' % (resObj.isif,)
    print 'posScaleFactor: %g' % (resObj.posScaleFactor,)


  if bugLev >= 5: print '\n===== symmetry parameters =====\n'

  # Some parameters like ISYM are also found in INCAR, sometimes.
  # But apparently they are always in the symmetry parameters section.
  lst = root.findall('parameters/separator[@name=\'symmetry\']')
  if len(lst) != 1: throwerr('symmetry parameters not found')
  symNode = lst[0]
  resObj.isym = getScalar( symNode, 'i[@name=\'ISYM\']', int)
  if bugLev >= 5:
    print 'isym: %d' % (resObj.isym,)


  if bugLev >= 5: print '\n===== writing parameters =====\n'

  # Some parameters like LOPTICS are also found in INCAR, sometimes.
  # But apparently they are always in the writing parameters section.
  lst = root.findall('parameters/separator[@name=\'writing\']')
  if len(lst) != 1: throwerr('writing parameters not found')
  writeNode = lst[0]
  resObj.loptics = getScalar( writeNode, 'i[@name=\'LOPTICS\']', bool)
  resObj.lwave = getScalar( writeNode, 'i[@name=\'LWAVE\']', bool)
  if bugLev >= 5:
    print 'loptics: %s' % (resObj.loptics,)
    print 'lwave: %s' % (resObj.lwave,)



  if bugLev >= 5: print '\n===== atom info =====\n'

  # atomTypeMrr = map containing array.  Example (some whitespace omitted):
  #   _dimLens: [2]
  #   _dimNames: ['type']
  #   _fieldNames: ['atomspertype' 'element' 'mass' 'valence' 'pseudopotential']
  #   _fieldTypes: ['i' 's' 'f' 'f' 's']
  #   atomspertype: [1 4]
  #   element: ['C ' 'Fe']
  #   mass: [ 12.011  55.847]
  #   valence: [ 4.  8.]
  #   pseudopotential: [' PAW_PBE C_s 06Sep2000 ' ' PAW_PBE Fe 06Sep2000 ']

  atomTypeMrr = getArrayByPath(
    bugLev, root, 'atominfo/array[@name=\'atomtypes\']')
  resObj.typeNames       = atomTypeMrr['element']
  resObj.typeNums        = atomTypeMrr['atomspertype']
  resObj.typeMasses_amu  = atomTypeMrr['mass']
  resObj.typeValences    = atomTypeMrr['valence']
  resObj.typePseudos     = atomTypeMrr['pseudopotential']
  resObj.numAtom = sum( resObj.typeNums)

  if bugLev >= 5:
    print '\natomTypeMrr:'
    printMrr( atomTypeMrr)
    print '\natomTypes:'
    print 'typeNames: %s' % ( resObj.typeNames,)
    print 'typeNums: %s' % ( resObj.typeNums,)
    print 'typeMasses_amu: %s' % ( resObj.typeMasses_amu,)
    print 'typeValences: %s' % ( resObj.typeValences,)
    print 'typePseudos: %s' % ( resObj.typePseudos,)
    print 'numAtom: %d' % ( resObj.numAtom,)

  numType = len( resObj.typeNames)
  if len( resObj.typeNums) != numType: 
    self.throwerr('len( typeNums) mismatch')
  if len( resObj.typeMasses_amu) != numType: 
    self.throwerr('len( typeMasses_amu) mismatch')
  if len( resObj.typeValences) != numType: 
    self.throwerr('len( typeValences) mismatch')
  if len( resObj.typePseudos) != numType: 
    self.throwerr('len( typePseudos) mismatch')

  ## Sort parallel arrays typeNames, typeNums, etc,
  ## by typeNames alphabetic order.
  ## Use an index sort with typeIxs.
  ## In rare cases like icsd_024360.cif/hs-ferro
  ## and icsd_060845_GW, the names are out of order.

  ## Sort to set typeIxs[newIx] == oldIx.

  #typeIxs = range( numType)
  #def sortFunc( ia, ib):
  #  return cmp( resObj.typeNames[ia], resObj.typeNames[ib])
  #typeIxs.sort( sortFunc)
  #if bugLev >= 5:
  #  print 'typeIxs: %s' % (typeIxs,)

  #resObj.typeIxs        = typeIxs
  #resObj.typeNames      = [resObj.typeNames[ix] for ix in typeIxs]
  #resObj.typeNums       = [resObj.typeNums[ix] for ix in typeIxs]
  #resObj.typeMasses_amu = [resObj.typeMasses_amu[ix] for ix in typeIxs]
  #resObj.typeValences   = [resObj.typeValences[ix] for ix in typeIxs]
  #resObj.typePseudos    = [resObj.typePseudos[ix] for ix in typeIxs]

  #if bugLev >= 5:
  #  print '\nsorted atomTypes:'
  #  print 'typeIxs: %s' % ( resObj.typeIxs,)
  #  print 'typeNames: %s' % ( resObj.typeNames,)
  #  print 'typeNums: %s' % ( resObj.typeNums,)
  #  print 'typeMasses_amu: %s' % ( resObj.typeMasses_amu,)
  #  print 'typeValences: %s' % ( resObj.typeValences,)
  #  print 'typePseudos: %s' % ( resObj.typePseudos,)








  ## Old way to get atomNames, atomMasses, atomValences, atomPseudos.
  ## Retained in case some day we need it.
  ## But the real values are set in "New way", below.
  ## atomMrr = map containing array.  Example:
  ##   _dimLens: [5]
  ##   _dimNames: ['ion']
  ##   _fieldNames: ['element' 'atomtype']
  ##   _fieldTypes: ['s' 'i']
  ##   element: ['C ' 'Fe' 'Fe' 'Fe' 'Fe']
  ##   atomtype: [1 2 2 2 2]

  #atomMrr = getArrayByPath(
  #  bugLev, root, 'atominfo/array[@name=\'atoms\']')
  #oatomNames = atomMrr['element']

  ## oatomTypes are indices into typeNames, typeNums, typeMasses,
  ## typeValences, typePseudos
  #oatomTypes = [ix - 1 for ix in atomMrr['atomtype']]  # change to origin 0
  #natom = len( oatomTypes)

  #if bugLev >= 5:
  #  print '\natomMrr:'
  #  printMrr( atomMrr)
  #  print '\nunsorted atoms:'
  #  print 'oatomNames: %s' % ( oatomNames,)
  #  print 'oatomTypes: %s' % ( oatomTypes,)

  ## The permutation array typeIxs maps typeIxs[newIx] = oldIx.
  ## Invert it to get typeIxInvs[oldIx] = newIx.
  #typeIxInvs = numType * [0]
  #for ii in range( numType):
  #  typeIxInvs[ typeIxs[ii]] = ii
  #if bugLev >= 5:
  #  print 'typeIxInvs: %s' % (typeIxInvs,)

  ## Sort oatomNames, oatomTypes by typeIxInvs[atomtype] so they
  ## are in the same order as typenames, typenums, etc, above.
  ## Currently oatomTypes[i] = old index num into type*.
  ## We want to sort by new index num into type*.

  ## Sort to set oatomIxs[newIx] == oldIx.

  #oatomIxs = range( natom)
  #def sortFunc( ia, ib):
  #  return cmp( typeIxInvs[ oatomTypes[ ia]], typeIxInvs[ oatomTypes[ ib]])
  #oatomIxs.sort( sortFunc)
  #oatomNames = [oatomNames[ix] for ix in oatomIxs]
  #oatomTypes = [typeIxInvs[ oatomTypes[ix]] for ix in oatomIxs]

  #if bugLev >= 5:
  #  print '\noatomIxs: %s' % (oatomIxs,)
  #  print '\nsorted atoms:'
  #  print 'oatomNames: %s' % ( oatomNames,)
  #  print 'oatomTypes: %s' % ( oatomTypes,)
  #    # indices into typeNames, typeNums

  #oatomMasses_amu = natom * [None]
  #oatomValences = natom * [None]
  #oatomPseudos = natom * [None]
  #for ii in range( natom):
  #  ix = oatomTypes[ii]
  #  if oatomNames[ii] != resObj.typeNames[ix]:
  #    throwerr('name mismatch')
  #  oatomMasses_amu[ii] = resObj.typeMasses_amu[ix]
  #  oatomValences[ii] = resObj.typeValences[ix]
  #  oatomPseudos[ii] = resObj.typePseudos[ix]
  #if bugLev >= 5:
  #  print 'oatomNames: %s' % ( oatomNames,)
  #  print 'oatomTypes: %s' % ( oatomTypes,)
  #  print 'oatomMasses_amu: %s' % ( oatomMasses_amu,)
  #  print 'oatomValences: %s' % ( oatomValences,)
  #  print 'oatomPseudos: %s' % ( oatomPseudos,)

  ## Make sure typenames are in alphabetic order
  #for ii in range(len(resObj.typeNames) - 1):
  #  if resObj.typeNames[ii] > resObj.typeNames[ii+1]:
  #    throwerr('typeNames not in order')

  ## Make sure atomnames are in alphabetic order
  #for ii in range(len(resObj.atomNames) - 1):
  #  if resObj.atomNames[ii] > resObj.atomNames[ii+1]:
  #    throwerr('atomNames not in order')




  # New way to get atomNames, atomMasses, atomValences, atomPseudos

  resObj.atomNames = []
  resObj.atomMasses_amu = []
  resObj.atomValences = []
  resObj.atomPseudos = []

  for ii in range( numType):
    nn = resObj.typeNums[ ii]
    resObj.atomNames += nn * [resObj.typeNames[ii]]
    resObj.atomMasses_amu += nn * [resObj.typeMasses_amu[ii]]
    resObj.atomValences += nn * [resObj.typeValences[ii]]
    resObj.atomPseudos += nn * [resObj.typePseudos[ii]]
  numAtom = len( resObj.atomNames)

  if bugLev >= 5: print '\n===== initial structure =====\n'

  # Initial structure
  # PyLada: vasp/extract/base.py: initial_structure()
  # OUTCAR: uses the appended INITIAL STRUCTURE section.
  lst = root.findall(
    'structure[@name=\'initialpos\']/crystal/varray[@name=\'basis\']/v')
  if bugLev >= 5: print 'len(lst) a:', len(lst)

  # initial_structure
  # POSCAR specifies each basis vector as one row.
  # So does vasprun.xml.
  # But PyLada's structure.cell is the transpose: each basis vec is a column.
  resObj.initialBasisMat = getRawArray(
    root,
    'structure[@name=\'initialpos\']/crystal/varray[@name=\'basis\']/v',
    3, 3, float)
  resObj.initialRecipBasisMat = getRawArray(
    root,
    'structure[@name=\'initialpos\']/crystal/varray[@name=\'rec_basis\']/v',
    3, 3, float)

  # Get initialFracPosMat
  resObj.initialFracPosMat = getRawArray(
    root,
    'structure[@name=\'initialpos\']/varray[@name=\'positions\']/v',
    0, 3, float)
  # xxx Check: nrow should be numAtom

  resObj.initialCartPosMat = np.dot(
    resObj.initialFracPosMat, resObj.initialBasisMat)
  # xxx mult by scale factor?

  if bugLev >= 5:
    print 'initialBasisMat:\n%s' % (repr(resObj.initialBasisMat),)
    print 'initialRecipBasisMat:\n%s' % (repr(resObj.initialRecipBasisMat),)
    print 'initialFracPosMat:\n%s' % (repr(resObj.initialFracPosMat),)
    print 'initialCartPosMat:\n%s' % (repr(resObj.initialCartPosMat),)



  if bugLev >= 5: print '\n===== final structure =====\n'

  # structure == final pos
  # POSCAR and OUTCAR specify each basis vector as one row.
  # So does vasprun.xml.
  # But PyLada's structure.cell is the transpose: each basis vec is a column.
  #
  # In vasprun.xml and OUTCAR, the basis vectors are rows.
  finalPath = 'structure[@name=\'finalpos\']/crystal/varray[@name=\'basis\']/v'
  nodes = root.findall( finalPath)
  if len(nodes) == 0:
    resObj.finalBasisMat = None
    resObj.finalRecipBasisMat = None
    resObj.finalFracPosMat = None
    resObj.finalCartPosMat = None
  else:
    resObj.finalBasisMat = getRawArray(
      root,
      finalPath,
      3, 3, float)
    resObj.finalRecipBasisMat = getRawArray(
      root,
      'structure[@name=\'finalpos\']/crystal/varray[@name=\'rec_basis\']/v',
      3, 3, float)

    # Get finalFracPosMat
    resObj.finalFracPosMat = getRawArray(
      root,
      'structure[@name=\'finalpos\']/varray[@name=\'positions\']/v',
      0, 3, float)    # xxx nrow should be numAtom
    # xxx Check: nrow should be numAtom

    resObj.finalCartPosMat = np.dot(
      resObj.finalFracPosMat, resObj.finalBasisMat)
    # xxx mult by scale factor?

  if bugLev >= 5:
    print 'finalBasisMat:\n%s' % (repr(resObj.finalBasisMat),)
    print 'finalRecipBasisMat:\n%s' % (repr(resObj.finalRecipBasisMat),)
    print 'finalFracPosMat:\n%s' % (repr(resObj.finalFracPosMat),)
    print 'finalCartPosMat:\n%s' % (repr(resObj.finalCartPosMat),)


  if bugLev >= 5: print '\n===== magmom =====\n'
  # Apparently the magnetization values are only in OUTCAR
  # See ScanOutcar.py.
  resObj.magmoms = None
  if bugLev >= 5: print 'magmoms: %s' % (resObj.magmoms,)



  if bugLev >= 5: print '\n===== kpoints =====\n'

  # kpoint coordinates.
  # In gw the kpoints get updated, so we use the last.
  # Not in PyLada?
  lst = root.findall( 'kpoints/varray[@name=\'kpointlist\']')
  if len(lst) == 0: throwerr('kpoints varray not found')
  kpointsNode = lst[-1]
  resObj.kpointFracMat = getRawArray( kpointsNode, 'v', 0, 3, float)
  resObj.numKpoint = resObj.kpointFracMat.shape[0]

  resObj.kpointCartMat = resObj.posScaleFactor * np.dot(
    resObj.kpointFracMat, resObj.initialRecipBasisMat)
  if bugLev >= 5:
    print 'numKpoint: %g' % (resObj.numKpoint,)
    print 'kpointFracMat:\n%s' % (repr(resObj.kpointFracMat),)
    print 'kpointCartMat:\n%s' % (repr(resObj.kpointCartMat),)

  # This is what PyLada calls multiplicity.
  # The only diff is the scaling.
  #   sum( Pylada multiplicity) = numKpoint
  #   sum( our kpointWeights) = 1.0
  lst = root.findall( 'kpoints/varray[@name=\'weights\']')
  if len(lst) == 0: throwerr('weights varray not found')
  weightsNode = lst[-1]
  resObj.kpointWeights = getRawArray( weightsNode, 'v', 0, 1, float)
  resObj.kpointWeights = resObj.kpointWeights[:,0]   # Only 1 col in 2d array
  if resObj.kpointWeights.shape[0] != resObj.numKpoint:
    throwerr('numKpoint mismatch')
  if bugLev >= 5:
    print 'kpointWeights:\n%s' % (repr(resObj.kpointWeights),)
    print 'kpointWeights sum: %g' % (sum(resObj.kpointWeights),)



  if bugLev >= 5: print '\n===== final volume =====\n'

  volPath = 'structure[@name=\'finalpos\']/crystal/i[@name=\'volume\']'
  nodes = root.findall( volPath)
  if len(nodes) == 0:
    resObj.finalVolume_ang3 = None
  else:
    resObj.finalVolume_ang3 = getScalar( root, volPath, float)
  if bugLev >= 5:
    print 'finalVolume_ang3: %s' % (resObj.finalVolume_ang3,)



  if bugLev >= 5: print '\n===== last calc forces =====\n'

  forcePath = 'calculation[last()]/varray[@name=\'forces\']/v'
  nodes = root.findall( forcePath)
  if len(nodes) == 0:
    resObj.finalForceMat_ev_ang = None
    resObj.finalStressMat_kbar = None
  else:
    # Get forceMat
    resObj.finalForceMat_ev_ang = getRawArray(
      root, forcePath,
      0, 3, float)
    # Sometimes forceMat can have a few NaN elements,
    # which postgresql calls invalid.
    if not all( np.isfinite( resObj.finalForceMat_ev_ang.flat)):
      resObj.finalForceMat_ev_ang = None
      resObj.finalStressMat_kbar = None
    else:
      # Get stressMat
      resObj.finalStressMat_kbar = getRawArray(
        root, 'calculation[last()]/varray[@name=\'stress\']/v',
        3, 3, float)
  if bugLev >= 5:
    print 'finalForceMat_ev_ang:\n%s' % (repr(resObj.finalForceMat_ev_ang),)
    print 'finalStressMat_kbar:\n%s' % (repr(resObj.finalStressMat_kbar),)



  if bugLev >= 5: print '\n===== eigenvalues and occupancies =====\n'

  # PyLada: eigenvalues
  eigenPath = 'calculation[last()]/eigenvalues/array'
  nodes = root.findall( eigenPath)
  if len(nodes) == 0:
    resObj.numBand = None
    resObj.eigenMat = None
    resObj.occupMat = None

  else:
    eigenMrr = getArrayByPath( bugLev, root, eigenPath)
    if bugLev >= 5:
      print '\neigenMrr beg =====:'
      printMrr( eigenMrr)
      print '\neigenMrr end =====:'
      for isp in range( resObj.numSpin):
        print '\neigenMrr: eigene[isp=%d][0]\n%s' \
          % (isp, repr(eigenMrr['eigene'][isp][0]),)
        print '\neigenMrr: occ[isp=%d][0]\n%s' \
          % (isp, repr(eigenMrr['occ'][isp][0]),)

    shp = eigenMrr['_dimLens']
    if bugLev >= 5: print 'eigenMrr shape: %s' % (shp,)
    if shp[0] != resObj.numSpin: throwerr('numSpin mismatch')
    if shp[1] != resObj.numKpoint: throwerr('numKpoint mismatch')
    if shp[2] != prmNumBand:     # see caution at prmNumBand, above
      print('numBand mismatch: prm: %d  shape: %d  inFile: %s' \
        % (prmNumBand, shp[2], inFile,))
    resObj.numBand = shp[2]

    resObj.eigenMat = eigenMrr['eigene']
    # Caution: for non-magnetic (numSpin==1),
    #   OUTCAR has occupMat values = 2, while vasprun.xml has values = 1.
    # For magnetic (numSpin==2), both OUTCAR and vasprun.xml have 1.
    resObj.occupMat = eigenMrr['occ']
    if resObj.numSpin == 1: resObj.occupMat *= 2
    if bugLev >= 5:
      print 'resObj.eigenMat.shape: ', resObj.eigenMat.shape
      print 'resObj.occupMat.shape: ', resObj.occupMat.shape
    
    # Compare projected and standard eigenvalues
    getProjected = False
    if getProjected:
      for isp in range( resObj.numSpin):
        projEigenMrr = getArrayByPath(
          bugLev, root, 'calculation[last()]/projected/eigenvalues/array')
        
        # eigs and projected eigs are identical
        eigs = resObj.eigenMrr['eigene'][isp]
        peigs = projEigenMrr['eigene'][isp]
        if bugLev >= 5:
          print 'Compare iegs, peigs for isp: %d' % (isp,)
          print '  eigs.shape:  ', eigs.shape
          print '  peigs.shape: ', peigs.shape
          print '  eigs[0,:]: ', eigs[0,:]
          print '  peigs[0,:]: ', peigs[0,:]
          print '  Diff projeigs - eigs: max maxabs: %g' \
            % (max( map( max, abs(peigs - eigs))),)

        # occs and projected occs are identical
        occs = resObj.eigenMrr['occ'][isp]
        poccs = projEigenMrr['occ'][isp]
        if bugLev >= 5:
          print 'Compare occs, poccs for isp: %d' % (isp,)
          print '  occs.shape:  ', occs.shape
          print '  poccs.shape: ', poccs.shape
          print '  occs[0,:]: ', occs[0,:]
          print '  poccs[0,:]: ', poccs[0,:]
          print '  Diff projoccs - occs: max maxabs: %g' \
            % (max( map( max, abs(poccs - occs))),)



  if bugLev >= 5: print '\n===== dielectric function =====\n'

  imagPath = 'calculation[last()]/dielectricfunction/imag/array'
  nodes = root.findall( imagPath)
  if len(nodes) == 0:
    resObj.dielectricImag = None
    resObj.dielectricReal = None

  else:
    dielImagMrr = getArrayByPath( bugLev, root, imagPath)
    dielRealMrr = getArrayByPath(
      bugLev, root, 'calculation[last()]/dielectricfunction/real/array')
    ndielectric = dielImagMrr['energy'].shape[0]
    dielectricImag = np.zeros((ndielectric, 7))    # E,xx,yy,zz,xy,yz,zx
    dielectricReal = np.zeros((ndielectric, 7))    # E,xx,yy,zz,xy,yz,zx

    dietags = ['energy', 'xx', 'yy', 'zz', 'xy', 'yz', 'zx']
    for jj in range( len( dietags)):
      tag = dietags[jj]
      immat = dielImagMrr[tag]
      remat = dielRealMrr[tag]
      for ii in range( ndielectric):
        dielectricImag[ii,jj] = immat[ii]
        dielectricReal[ii,jj] = remat[ii]

    resObj.dielectricImag = dielectricImag
    resObj.dielectricReal = dielectricReal

    if bugLev >= 5:
      print '\ndielImagMrr:'
      printMrr( dielImagMrr)
      print '\ndielRealMrr:'
      printMrr( dielRealMrr)

      print '\ndielectricImag:'
      for ii in range( ndielectric):
        print '  [%d]: %s' % (ii, resObj.dielectricImag[ii,:],)
      print ''
      print '\ndielectricReal:'
      for ii in range( ndielectric):
        print '  [%d]: %s' % (ii, resObj.dielectricReal[ii,:],)
      print ''
      
  if bugLev >= 5: print '\n===== misc junk =====\n'


  # functional: comes from appended FUNCTIONAL.

  # success: look for final section
  #   General timing and accounting informations for this job:

  # xxx skip: Hubbard / NLEP

  

  if bugLev >= 5: print '\n===== energy, efermi0 =====\n'

  energyPath = 'calculation/energy/i[@name=\'e_wo_entrp\']'
  nodes = root.findall( energyPath)
  if len(nodes) == 0:
    resObj.energyNoEntrps = None
  else:
    resObj.energyNoEntrps = []
    for node in nodes:
      txt = node.text.strip()
      resObj.energyNoEntrps.append( float( txt))
  if bugLev >= 5: print 'energyNoEntrps: %s' % (resObj.energyNoEntrps,)

  # efermi0
  # PyLada uses an algorithm to compare the sum of occupancies
  # to the valence.
  # We get it from the xml file here.
  #   PyLada: 5.8574
  #   XML:    5.93253

  efermiPath = 'calculation[last()]/dos/i[@name=\'efermi\']'
  nodes = root.findall( efermiPath)
  if len(nodes) == 0:
    resObj.efermi0 = None
  else:
    resObj.efermi0 = getScalar( root, efermiPath, float)
  if bugLev >= 5: print 'efermi0: %s' % (resObj.efermi0,)

  return


  ########################### End of parseXml ###############################


  # The following code was used for initial testing,
  # and who knows, someday may be useful again.

  #print '\n'
  #print '\ntesta:'
  #lst = root.findall('kpoints/generation/v[@name=\'genvec2\']')
  #amat = []
  #for ele in lst:
  #  text = ele.text
  #  print '  ele.text: %s' % (text,)
  #  toks = text.split()
  #  vals = map( float, toks)
  #  amat.append( vals)
  #print 'amat: %s' % (amat,)

  #amat = np.array( amat, dtype=float)
  #print 'amat:\n%s' % (amat,)

  #vec = getVec( root, 'kpoints/generation/v[@name=\'genvec2\']', 0, 0, float)
  #print 'vec:\n%s' % (vec,)

  #amat = getRawArray( root, 'kpoints/generation/v', 0, 0, float)
  #print 'amat:\n%s' % (amat,)

  #calcNodes = root.findall('calculation')
  #print '\nlen(calcNodes): %d' % (len(calcNodes,))

  ## pairs: (itot, en_wo_entrp) for the energy of each scstep
  #scstep_withouts = []
  ## pairs: (itot, en_wo_entrp) for the last energy of each calculation step
  #calcstep_withouts = []

  #basisMats = []
  #recipBasisMats = []
  #posMats = []
  #forceMats = []
  #stressMats = []

  #itot = 0     # index all scsteps, across calculations

  #ncalc = len( calcNodes)
  #for icalc in range( ncalc):
  #  cnode = calcNodes[icalc]
  #  forceMat = getRawArray( cnode, 'varray[@name=\'forces\']/v', 0, 0, float)
  #  print '\nforceMat for calcNodes[%d]:\n%s' % (icalc, forceMat,)
  #  scNodes = cnode.findall('scstep')
  #  print '    len(scNodes): %d' % (len(scNodes,))
  #  for isc in range(len(scNodes)):
  #    snode = scNodes[isc]
  #    sc_e_fr = getScalar( snode, 'energy/i[@name=\'e_fr_energy\']', float)
  #    sc_e_wo = getScalar( snode, 'energy/i[@name=\'e_wo_entrp\']', float)
  #    sc_e_0  = getScalar( snode, 'energy/i[@name=\'e_0_energy\']', float)
  #    print '    scNodes[%d]: sc_e_fr: %g  sc_e_wo: %g  sc_e_0: %g' \
  #      % (isc, sc_e_fr, sc_e_wo, sc_e_0,)

  #    scstep_withouts.append( (itot, sc_e_wo,))
  #    itot += 1

  #  # Structure for this calculation step
  #  strucNodes = cnode.findall('structure')
  #  if len(strucNodes) != 1: throwerr('calc structure not found')
  #  snode = strucNodes[0]
  #  basisMat = getRawArray(
  #    snode, 'crystal/varray[@name=\'basis\']/v', 3, 3, float)
  #  recipBasisMat = getRawArray(
  #    snode, 'crystal/varray[@name=\'rec_basis\']/v', 3, 3, float)
  #  # xxx should be nrow = num atoms
  #  posMat = getRawArray(
  #    snode, 'varray[@name=\'positions\']/v', 0, 3, float)
  #  print '  Calc final: basisMat:\n%s' % (basisMat,)
  #  print '  Calc final: recipBasisMat:\n%s' % (recipBasisMat,)
  #  print '  Calc final: posMat:\n%s' % (posMat,)
  #  basisMats.append( basisMat)
  #  recipBasisMats.append( recipBasisMat)
  #  posMats.append( posMat)

  #  # Forces for this calculation step
  #  forceNodes = cnode.findall('varray[@name=\'forces\']')
  #  if len(forceNodes) != 1: throwerr('calc forces not found')
  #  forceMat = getRawArray( forceNodes[0], 'v', 0, 3, float)
  #  print '  Calc final: forceMat:\n%s' % (forceMat,)
  #  forceMats.append( forceMat)

  #  # Stress for this calculation step
  #  stressNodes = cnode.findall('varray[@name=\'stress\']')
  #  if len(stressNodes) != 1: throwerr('calc stresses not found')
  #  stressMat = getRawArray( stressNodes[0], 'v', 3, 3, float)
  #  print '  Calc final: stressMat:\n%s' % (stressMat,)
  #  stressMats.append( stressMat)

  #  # Final energy for this calculation step
  #  enNodes = cnode.findall('energy')
  #  if len(enNodes) != 1: throwerr('calc energy not found')
  #  enode = enNodes[0]
  #  c_e_fr = getScalar( enode, 'i[@name=\'e_fr_energy\']', float)
  #  c_e_wo = getScalar( enode, 'i[@name=\'e_wo_entrp\']', float)
  #  c_e_0  = getScalar( enode, 'i[@name=\'e_0_energy\']', float)
  #  print '  Calc final: c_e_fr: %g  c_e_wo: %g  c_e_0: %g' \
  #    % (c_e_fr, c_e_wo, c_e_0,)
  #  calcstep_withouts.append( (itot - 1, c_e_wo,))

  #print ''
  #print 'scstep_withouts: %s' % (scstep_withouts,)
  #print ''
  #print 'calcstep_withouts: %s' % (calcstep_withouts,)

  #scmat = np.array( scstep_withouts, dtype=float)
  #print ''
  #print 'scmat:\n%s' % (scmat,)

  #calcmat = np.array( calcstep_withouts, dtype=float)
  #print ''
  #print 'calcmat:\n%s' % (calcmat,)


  #print ''
  #print 'Investigate DOS'
  #icals = len(calcNodes) - 1
  #cnode = calcNodes[icalc]
  #setNodes = cnode.findall('dos/total/array/set/set[@comment=\'spin 1\']')
  #print '    len(total setNodes): %d' % (len(setNodes),)
  #print '    setNodes[0]: %s' % (setNodes[0],)
  #if len(setNodes) != 1: throwerr('dos/total not found')
  #dosTotalMat = getRawArray( setNodes[0], 'r', 0, 0, float)
  #print ''
  #print 'type(dosTotalMat): ', type(dosTotalMat)
  #print 'dosTotalMat.shape: ', dosTotalMat.shape
  #print ''
  #print 'dosTotalMat:\n%s' % (dosTotalMat,)

  #dosPartialMats = []
  #partialSetNodes = cnode.findall('dos/partial/array/set')
  #print '    len(partialSetNodes): %d' % (len(partialSetNodes),)
  #if len(partialSetNodes) != 1: throwerr('dos/partial not found')
  #partialSet = partialSetNodes[0]

  #ionNodes = partialSet.findall('set')
  #print '    len(ionNodes): %d' % (len(ionNodes),)
  ## xxx should be nrow = num atoms
  #for ii in range(len(ionNodes)):
  #  dosPartialMat = getRawArray(
  #    ionNodes[ii], 'set[@comment=\'spin 1\']/r', 0, 0, float)
  #  print ''
  #  print 'dosPartialMat %d:' % (ii,)
  #  print 'type(dosPartialMat): ', type(dosPartialMat)
  #  print 'dosPartialMat.shape: ', dosPartialMat.shape
  #  print ''
  #  print 'dosPartialMat:\n%s' % (dosPartialMat,)
  #  dosPartialMats.append( dosPartialMat)

  #print 'len(dosPartialMats): %d' % (len(dosPartialMats),)




  #print '\nbasisMats:  len: %d' % (len(basisMats),)
  #for mat in basisMats: print '%s' % (mat,)

  #print '\nrecipBasisMats:  len: %d' % (len(recipBasisMats),)
  #for mat in recipBasisMats: print '%s' % (mat,)

  #print '\nposMats:  len: %d' % (len(posMats),)
  #for mat in posMats: print '%s' % (mat,)

  #print '\nforceMats:  len: %d' % (len(forceMats),)
  #for mat in forceMats: print '%s' % (mat,)

  #print '\nstressMats:  len: %d' % (len(stressMats),)
  #for mat in stressMats: print '%s' % (mat,)

  #basisDeltas = calcMatDeltas( basisMats)
  #recipBasisDeltas = calcMatDeltas( recipBasisMats)
  #posDeltas = calcMatDeltas( posMats)
  #forceDeltas = calcMatDeltas( forceMats)
  #stressDeltas = calcMatDeltas( stressMats)

  #print 'basisDeltas: %s' % ( basisDeltas,)
  #print 'recipBasisDeltas: %s' % ( recipBasisDeltas,)
  #print 'posDeltas: %s' % ( posDeltas,)
  #print 'forceDeltas: %s' % ( forceDeltas,)
  #print 'stressDeltas: %s' % ( stressDeltas,)


  #import matplotlib
  #matplotlib.use('tkagg')
  #import matplotlib.pyplot as plt

  #fig, axes = plt.subplots( 1, 1)
  ###ax00 = axes[0,0]
  #ax00 = axes
  #ax00.plot( dosTotalMat[:,0], dosTotalMat[:,1], color='r', linestyle='-',
  #  marker=None)
  #ax00.set_xlabel('Energy, eV')
  #ax00.set_ylabel('Number of states per unit cell')
  #ax00.set_title('Density of states')
  #ax00.xaxis.grid(color='lightblue', linestyle='solid')
  #ax00.yaxis.grid(color='lightblue', linestyle='solid')
  ##plt.show()


  ##fig, ax = plt.subplots()
  ##
  ##ax.plot( scmat[:,0], scmat[:,1], 'b-')
  ##ax.plot( calcmat[:,0], calcmat[:,1], 'bo')
  ##ax.set_ylim( calcmat[-1,1] - 5, calcmat[-1,1] + 5)
  ##ax.xaxis.grid(color='lightblue', linestyle='solid')
  ##ax.yaxis.grid(color='lightblue', linestyle='solid')
  ##
  ##savefig('tempa.png', dpi=100, orientation='landscape', papertype='letter')
  ##
  ##plt.show()


  #tnodes = root.findall('calculation[last()]')
  #printNode( tnodes[0], 0, 1)

  #tnodes = root.findall('calculation[last()]/eigenvalues')
  #printNode( tnodes[0], 0, 1)

  #tnodes = root.findall('calculation[last()]/eigenvalues/array')
  #printNode( tnodes[0], 0, 1)

  #res = getArrayByPath(
  #  bugLev, root, 'calculation[last()]/eigenvalues/array')
  #print '\ncalculation[last()]/eigenvalues:\n%s' % (res,)

  #print '\n'

#====================================================================


[docs]def printNode( node, curLev, maxLev):
  '''
  Recursively prints an XML tree, given an xml.etree.cElementTree node.

  **Parameters**:

  * node (xml.etree.ElementTree.Element): The root of the XML tree.
  * curLev (int): The current recursion level.  Starts at 0 and
    is incremented for each recursive call.
  * maxLev (int): The max number of levels to print

  **Returns**:

  * None
  '''

  if curLev <= maxLev:
    if node.tail == None: tail = 'None'
    else: tail = '"%s"' % (node.tail.strip(),)

    if node.text == None: text = 'None'
    else: text = '"%s"' % (node.text.strip(),)

    print '%stag: %s  attrib: %s  tail: %s  text: %s' \
      % (curLev * '  ', node.tag, node.attrib, tail, text,)

    for kid in node:
      printNode( kid, curLev + 1, maxLev)

#====================================================================

[docs]def parseText( path, nmin, nmax, dtype, text):
  '''
  Splits ``text`` into tokens, and converts each token to ``dtype``.

  Called by getVec, getRawArray.

  **Parameters**:

  * path (str): the XML tree path to the current node, for error msgs.
  * nmin (int): the minimum num tokens.  If fewer are found, throwerr.
  * nmax (int): the maximum num tokens.  If more are found, throwerr.
  * dtype (python type): Either int, float, or str: the tokens
    are converted to dtype.
  * text (str): the text string to be split.

  **Returns**:

  * list of tokens each having type = dtype.
  '''

  toks = text.split()
  ntok = len( toks)
  if ntok < nmin:
    throwerr('ntok < nmin for path: "%s"  text: "%s"' % (path, text,))
  if nmax > 0 and ntok > nmax:
    throwerr('ntok > nmax for path: "%s"  text: "%s"' % (path, text,))

  vals = ntok * [None]
  for ii in range(ntok):
    tok = toks[ii]
    if dtype == bool:
      tok = tok.lower()
      if tok in ['f', 'false', '.false.']: val = False
      elif tok in ['t', 'true', '.true.']: val = True
      else: throwerr('invalid bool in path: "%s"  text: "%s"' % (path, text,))
    elif dtype == int:
      try: val = int( tok)
      except ValueError, exc:
        throwerr('invalid int in path: "%s"  text: "%s"' % (path, text,))
    elif dtype == float:
      try: val = float( tok)
      except ValueError, exc:
        throwerr('invalid float in path: "%s"  text: "%s"' % (path, text,))
    elif dtype == str: val = tok
    else: throwerr('unknown dtype for path: "%s"' % (path,))
    vals[ii] = val
  return vals


#====================================================================

[docs]def getVec( root, path, nmin, nmax, dtype):
  '''
  Gets text at the specified XML path, splits, and converts tokens ``dtype``.

  **Parameters**:

  * root (xml.etree.ElementTree.Element): The current XML node.
  * path (str): the XML path from the current node.
  * nmin (int): the minimum num tokens.  If fewer are found, throwerr.
  * nmax (int): the maximum num tokens.  If more are found, throwerr.
  * dtype (python type): Either int, float, or str: the tokens
    are converted to dtype.

  **Returns**:

  * list of tokens each having type = dtype.
  '''

  text = getString( root, path)
  vals = parseText( path, nmin, nmax, dtype, text)
  return vals

#====================================================================

# Return stripped string

[docs]def getString( root, path):
  '''
  Gets text at the specified XML path, insures there's just 1, and returns it.

  **Parameters**:

  * root (xml.etree.ElementTree.Element): The current XML node.
  * path (str): the XML path from the current node.

  **Returns**:

  * stripped string.
  '''

  lst = root.findall( path)
  if len(lst) == 0:
    throwerr('path not found: "%s"' % (path,))
  if len(lst) > 1:
    throwerr('multiple matches for path: "%s"' % (path,))
  ele = lst[0]
  text = ele.text
  return text.strip()

#====================================================================

[docs]def getScalar( root, path, dtype):
  '''
  Gets text at the specified XML path, and converts it to ``dtype``.

  **Parameters**:

  * root (xml.etree.ElementTree.Element): The current XML node.
  * path (str): the XML path from the current node.
  * dtype (python type): Either int, float, or str: the token
    is converted to dtype.

  **Returns**:

  * item having type = dtype.
  '''

  lst = getVec( root, path, 1, 1, dtype)
  return lst[0]

#====================================================================

[docs]def getRawArray( root, path, nrow, ncol, dtype):
  '''
  Gets text at the specified XML path, and converts to a
  2D numpy array of ``dtype``.

  The text must be organized as one text element per row.

  **Parameters**:

  * root (xml.etree.ElementTree.Element): The current XML node.
  * path (str): the XML path from the current node.
  * nrow (int): the number of rows.  If 0, allow any number.
  * ncol (int): the number of columns.  If 0, allow any number.
  * dtype (python type): Either int, float, or str: the tokens
    are converted to dtype.

  **Returns**:

  * A regular 2-dimensional numpy array of dtype.
  '''

  lst = root.findall( path)
  nlst = len( lst)
  if nlst == 0:
    throwerr('path not found: "%s"' % (path,))
  if nrow > 0 and nlst != nrow:
    throwerr('nrow mismatch for path: "%s".  expected: %d  found: %d' \
      % (path, nrow, nlst,))

  rows = []
  for ii in range(nlst):

    ele = lst[ii]
    text = ele.text
    vals = parseText( path, 0, 0, dtype, text)
    if len(rows) == 0: ncolActual = len( vals)
    if len(vals) != ncolActual:
      throwerr('irregular array for path: "%s"' % (path,))
    if ncol > 0 and ncolActual != ncol:
      throwerr('ncol mismatch path: "%s"' % (path,))
    rows.append( vals)

  if dtype == int:
    amat = np.array( rows, dtype=int)
  elif dtype == float:
    amat = np.array( rows, dtype=float)
  else: throwerr('unknown dtype for path: "%s"' % (path,))

  return amat


#====================================================================

[docs]def getArrayByPath( bugLev, baseNode, path):
  '''
  Converts an XML ``<array>`` element in vasprun.xml
  to a map with an array.

  See :func:`getArrayByNode` for details.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.
  * baseNode (xml.etree.ElementTree.Element): current XML node
  * path (str): XML path from baseNode for the ``<array>`` element.

  **Returns**:

  * A Python array
  '''


  arrNodes = baseNode.findall( path)
  if len(arrNodes) != 1:
    throwerr('path not found: "%s"' % (path,))
  res = getArrayByNode( bugLev, arrNodes[0])
  return res

#====================================================================


# Returns Mrr == map containing array, like:
#   atomMrr:
#     _dimLens: [6]
#     _dimNames: ['ion']
#     _fieldNames: ['element', 'atomtype']
#     _fieldTypes: ['s', 'i']
#     element: ['Mo' 'Mo' 'S' 'S' 'S' 'S']
#     atomtype: [1 1 2 2 2 2]

[docs]def getArrayByNode( bugLev, arrNode):
  '''
  Converts an XML ``<array>`` element in vasprun.xml
  to a map with an array.

  Calls getArraySub to extract each field.
  The output Python map has the following structure:

  =============   ========================================================
  key             value
  =============   ========================================================
  _dimLens        numpy vec of dimension lengths.
                  len( dimLens) == n == numDimensions.
  _dimNames       numpy vec of dimension names.
                  len( dimLens) == n == numDimensions.
  _fieldNames     numpy vec of field names in the parallel arrays.
                  len( fieldNames) == numVariables.
  _fieldTypes     numpy vec of field types in the parallel arrays.
                  len( fieldTypes) == numVariables.
                  The types are: 'i': int, 'f': float, 's': str

  <fieldName>     numpy n-dimensional array of the field <fieldName>
  <fieldName>     numpy n-dimensional array of the field <fieldName>
  <fieldName>     numpy n-dimensional array of the field <fieldName>
  ...
  =============   ========================================================

  Example XML for a 1-dimensional array with 2 fields: ::

    <array name="atoms" >
     <dimension dim="1">ion</dimension>
     <field type="string">element</field>
     <field type="int">atomtype</field>
     <set>
      <rc><c>C </c><c>   1</c></rc>
      <rc><c>Fe</c><c>   2</c></rc>
      <rc><c>Fe</c><c>   2</c></rc>
      <rc><c>Fe</c><c>   2</c></rc>
      <rc><c>Fe</c><c>   2</c></rc>
     </set>
    </array>

  Example resulting map: ::

    _dimLens: [5]
    _dimNames: ['ion']
    _fieldNames: ['element' 'atomtype']
    _fieldTypes: ['s' 'i']
    element: ['C' 'Fe' 'Fe' 'Fe' 'Fe']
    atomtype: [1 2 2 2 2]

  Multiple dimension arrays also are supported.

  The vasprun.xml handling of dimensions is unusual.
  What they claim is ``dim="1"`` actually is the least
  significant dimension and varies fastest, both
  in the XML data and in our resulting Python array.

  So the XML ``<dimension dim="1">band</dimension>``
  becomes the last dimension in the resulting Python array.

  Example XML for a 3 dimensional array with 2 fields: ::

    <array>
     <dimension dim="1">band</dimension>
     <dimension dim="2">kpoint</dimension>
     <dimension dim="3">spin</dimension>
     <field>eigene</field>
     <field>occ</field>
     <set>
      <set comment="spin 1">
       <set comment="kpoint 1">
        <r>   -6.5058    1.0000 </r>
        <r>    0.2537    1.0000 </r>
        <r>    0.7101    1.0000 </r>
        ...
        <r>    8.1390    0.0000 </r>
       </set>
       <set comment="kpoint 2">
        <r>   -6.3718    1.0000 </r>
        <r>   -0.0841    1.0000 </r>
        <r>    0.7508    1.0000 </r>
       ...
       </set>
       <set comment="kpoint 101">
        <r>   -5.8567    1.0000 </r>
        <r>   -0.0854    1.0000 </r>
        <r>    0.9602    1.0000 </r>
        <r>    7.7174    0.0000 </r>
        <r>    7.8556    0.0000 </r>
       </set>
      </set>
     </set>
    </array>

  Example resulting map: ::

    _dimLens: [  1 101  22]
    _dimNames: ['spin' 'kpoint' 'band']
    _fieldNames: ['eigene' 'occ']
    _fieldTypes: ['f' 'f']
    eigene: [[[-6.5058  0.2537  0.7101 ...,  7.6096  7.8817  8.139 ]
        [-6.3718 -0.0841  0.7508 ...,  7.481   7.8491  7.9595]
        [-6.1332 -0.611   1.0672 ...,  7.0857  7.8655  7.9314]
        ...,
        [-5.8462  0.3687  0.9498 ...,  7.1721  7.4739  7.6631]
        [-5.8016  0.5503  0.5886 ...,  7.4113  7.5794  7.7332]
        [-5.8567 -0.0854  0.9602 ...,  7.2729  7.7174  7.8556]]]
    occ: [[[ 1.      1.      1.     ...,  0.      0.      0.    ]
        [ 1.      1.      1.     ...,  0.      0.      0.    ]
        [ 1.      1.      1.     ...,  1.      0.      0.    ]
        ...,
        [ 1.      1.      1.     ...,  1.      0.      0.    ]
        [ 1.      1.      1.     ...,  0.      0.      0.    ]
        [ 1.      1.      1.     ...,  0.9751  0.      0.    ]]]


  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.
  * node (xml.etree.ElementTree.Element):
    The XML node for the ``<array>`` element.

  **Returns**:

  * A Python array
  '''

  dimNodes = arrNode.findall('dimension')
  ndim = len( dimNodes)
  if ndim == 0: throwerr('no dimensions found')
  dimNames = [nd.text for nd in dimNodes]
  dimNames.reverse()         # dimNames are in reverse order in XML
  dimNames = np.array( dimNames, dtype=str)
  dimLens = np.zeros( [ndim], dtype=int)

  fieldNodes = arrNode.findall('field')
  nfield = len( fieldNodes)
  if nfield == 0: throwerr('no fields found')
  fieldNames = [nd.text for nd in fieldNodes]
  fieldNames = np.array( fieldNames, dtype=str)

  # We set fieldTypes[ifield] to max( all found types for ifield)
  # Types are: 0:int, 1:float, 2:string
  fieldTypes = nfield * [0]

  setNodes = arrNode.findall('set')
  if len(setNodes) != 1: throwerr('wrong len for primary set')
  setNode = setNodes[0]
  resList = nfield * [None]
  for ifield in range( nfield):
    amat = getArraySub(
      bugLev,
      setNode,
      ifield,
      fieldTypes,
      0,            # idim
      dimLens)

    # Convert all elements of each field ifield to fieldTypes[ifield].
    if   fieldTypes[ifield] == 0: amat = np.array( amat, dtype=int)
    elif fieldTypes[ifield] == 1: amat = np.array( amat, dtype=float)
    elif fieldTypes[ifield] == 2: amat = np.array( amat, dtype=str)
    else: throwerr('unknown fieldType')

    resList[ifield] = amat

  # Convert fieldTypes from 0,1,2 to 'i', 'f', 's'
  fldMap = { 0:'i', 1:'f', 2:'s'}
  fieldTypeStgs = map( lambda x: fldMap[x], fieldTypes)
  fieldTypeStgs = np.array( fieldTypeStgs, dtype=str)

  resMap = {
    '_dimNames': dimNames,
    '_dimLens': dimLens,
    '_fieldNames': fieldNames,
    '_fieldTypes': fieldTypeStgs,
  }
  for ii in range(len(fieldNames)):
    ar = resList[ii]
    if not all(ar.shape == np.array(dimLens)): throwerr('dimLens mismatch')
    resMap[fieldNames[ii]] = ar

  return resMap


#====================================================================


[docs]def getArraySub(
  bugLev,
  setNode,
  ifield,
  fieldTypes,
  idim,
  dimLens):
  '''
  Decodes the XML for one field (one variable) for an
  ``<array>``.

  Called by getArrayByNode.  See :func:`getArrayByNode` for details.

  **Parameters**:

  * bugLev (int): Debug level.  Normally 0.
  * setNode (xml.etree.ElementTree.Element): the element for ``<set>``.
  * ifield (int): the index number of the field.
  * fieldTypes (int[]): the numeric field types so far.
    The numeric types are: 0: int, 1: float, 2: str.
    We take the max of the field types.
  * tp (Python type): The desired type.
  * idim (int): dimension number == recursion level == array nest level.
    0 on the first call, 1 for the next level array, etc.
  * dimLens (int[]): list of dimension lengths.  Updated.

  **Returns**:

  * A Python array with elements of type str.
    The caller converts them to the correct type.
  '''


  nfield = len(fieldTypes)
  ndim = len(dimLens)

  # If we're at the last dimension, decode the element values.
  if idim == ndim - 1:
    # Try long form:
    #   <set>
    #     <rc>
    #       <c>2</c>
    #       <c>Mo</c>
    rcNodes = setNode.findall('rc')     # long form: <rc> <c>

    # Try short form:
    #   <set comment='spin 1'>
    #     <set comment='kpoint 1'>
    #       <r>-30.3711 1.0000</r>
    #       <r>-30.3709 1.0000</r>
    rNodes = setNode.findall('r')       # short form: <r>

    nval = max( len( rcNodes), len( rNodes))
    if dimLens[idim] == 0: dimLens[idim] = nval
    if nval != dimLens[idim]: throwerr('irregular array')
    resVec = nval * [None]

    if len(rcNodes) > 0:                # long form: <rc> <c>
      for ival in range( nval):
        cNodes = rcNodes[ival].findall('c')
        if len(cNodes) != nfield: throwerr('wrong num fields')
        stg = cNodes[ifield].text
        resVec[ival] = stg

    elif len(rNodes) > 0:               # short form: <r>
      for ival in range( nval):
        txt = rNodes[ival].text
        toks = txt.split()
        if len(toks) != nfield: throwerr('wrong num fields')
        resVec[ival] = toks[ifield]

    else: throwerr('unknown array structure')

    # Strip all strings.
    # Set fieldTypes[ifield] to max( current type, all found types)
    # Types are: 0:int, 1:float, 2:string
    for ival in range( nval):
      resVec[ival] = resVec[ival].strip()
      stg = resVec[ival]
      ftype = 2            # assume worst case: string
      try:
        float( stg)
        ftype = 1
      except ValueError: pass
      try:
        int( stg)
        ftype = 0
      except ValueError: pass
      fieldTypes[ifield] = max( fieldTypes[ifield], ftype)


  else:    # else idim < ndim - 1.  Recursion.
    setNodes = setNode.findall('set')
    nset = len( setNodes)
    if dimLens[idim] == 0: dimLens[idim] = nset
    if nset != dimLens[idim]: throwerr('irregular array')
    resVec = nset * [None]
    for iset in range(nset):
      resVec[iset] = getArraySub(          # recursion
        bugLev,
        setNodes[iset],
        ifield,
        fieldTypes,
        idim + 1,
        dimLens)

  return resVec
 

#====================================================================

# Not used

def convertTypesUnused( tp, vec):
  '''
  Recursively converts the elements of an array ``vec``
  from str to the specified type.

  **Parameters**:

  * tp (Python type): The desired type.
  * vec (str[] or str[][] or ...): the array to be converted.

  **Returns**:

  * A Python array with elements of type ``tp``.
  '''

  if isinstance( vec[0], str):
    for ii in range(len(vec)):
      vec[ii] = tp( vec[ii])
  elif isinstance( vec[0], list):
    for subVec in vec:
      convertTypes( tp, subVec)             # recursion
  else: throwerr('unknown array structure')

#====================================================================


[docs]def maxAbsDiff( mata, matb):
  '''
  Returns the max abs diff between two 2D numpy matrices.

  **Parameters**:

  * mata (numpy 2D array): Array to be compared.
  * matb (numpy 2D array): Array to be compared.

  **Returns**:

  * float scalar: max_i( max_j( abs( mata[i][j] - matb[i][j]))
  '''

  (nrow,ncol) = mata.shape
  if matb.shape != mata.shape: throwerr('maxAbsDiff: shape mismatch')
  diffMat =  abs( matb - mata)
  res = max( map( max, diffMat))
  return res

#====================================================================

[docs]def calcMatDeltas( mats):
  '''
  Returns the max abs diffs between adjacent pairs of a
  list of 2D numpy matrices.

  **Parameters**:

  * mats (list of 2D numpy matrices)

  **Returns**:

  * deltas (float[]): deltas[k] = maxAbsDiff( mats[k-1], mats[k])
  '''

  nmat = len( mats)
  deltas = []
  for ii in range( 1, nmat):
    delta = maxAbsDiff( mats[ii-1], mats[ii])
    deltas.append( delta)
  return deltas

#====================================================================

[docs]def printMrr( vmap):
  '''
  Prints the Mrr map returned by getArrayByPath or getArrayByNode.

  **Parameters**:

  * vmap (map): the MRR map

  **Returns**:

  * None
  '''

  keys = vmap.keys()
  keys.sort()
  for key in keys:
    if key.startswith('_'):
      val = vmap[key]
      print '  %s: %s' % (key, val,)
  for key in vmap['_fieldNames']:
    val = vmap[key]
    print '  %s: %s' % (key, val,)
  print ''

#====================================================================

[docs]def throwerr( msg):
  '''
  Prints an error message and raises Exception.

  **Parameters**:

  * msg (str): Error message.

  **Returns**

  * (Never returns)
  
  **Raises**

  * Exception
  '''

  print msg
  print >> sys.stderr, msg
  raise Exception( msg)

#====================================================================

if __name__ == '__main__': main()

#====================================================================
Navigation

Source code for nrelmat.ScanXml

Quick search

Navigation