Source code for nrelmat.ScanXml

#!/usr/bin/env python
# Copyright 2013 National Renewable Energy Laboratory, Golden CO, USA
# This file is part of NREL MatDB.
#
# NREL MatDB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# NREL MatDB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with NREL MatDB.  If not, see <http://www.gnu.org/licenses/>.


import datetime, re, sys, traceback, os.path
import xml.etree.cElementTree as etree

import numpy as np
np.seterr( all='raise', under='ignore')

import readVasp


#====================================================================

def badparms( msg):
  print '\nError: %s' % (msg,)
  print 'Parms:'
  print '  -bugLev    <int>      debug level'
  print '  -inFile    <string>   input file'
  print '  -maxLev    <int>      max xml print level'
  print ''
  sys.exit(1)

#====================================================================

[docs]def main(): ''' Test driver: Extracts info from a VASP vasprun.xml file. Command line parameters: ================ ========= ============================================== Parameter Type Description ================ ========= ============================================== **-bugLev** integer Debug level. Normally 0. **-inFile** string Input file **-maxLev** int max xml print level ================ ========= ============================================== ''' bugLev = 0 inFile = None maxLev = 0 if len(sys.argv) % 2 != 1: badparms('Parms must be key/value pairs') for iarg in range( 1, len(sys.argv), 2): key = sys.argv[iarg] val = sys.argv[iarg+1] if key == '-bugLev': bugLev = int( val) elif key == '-inFile': inFile = val elif key == '-maxLev': maxLev = int( val) else: badparms('unknown key: "%s"' % (key,)) if bugLev == None: badparms('parm not specified: -bugLev') if inFile == None: badparms('parm not specified: -inFile') if maxLev == None: badparms('parm not specified: -maxLev') resObj = ResClass() parseXml( bugLev, inFile, maxLev, resObj) #==================================================================== #====================================================================
[docs]class ResClass: ''' An empty class used as a data container for results. ''' def __str__(self): keys = self.__dict__.keys() keys.sort() msg = '' for key in keys: val = self.__dict__[key] stg = str( val) if stg.find('\n') >= 0: sep = '\n' else: sep = ' ' msg += ' %s: type: %s val:%s%s\n' % (key, type(val), sep, val,) return msg #==================================================================== #==================================================================== # Fills resObj.
[docs]def parseXml( bugLev, inFile, maxLev, resObj): ''' Extracts info from the vasprun.xml file from a VASP run, using the Python xml.etree.cElementTree API. **Parameters**: * bugLev (int): Debug level. Normally 0. * inFile (str): Path of the input vasprun.xml file. * resObj (class ResClass): data object: we set attributes here. **Returns**: * None ''' try: tree = etree.parse( inFile) except Exception, exc: throwerr(('parseXml: invalid xml in file: "%s"\n' + ' Msg: %s\n') % (inFile, repr(exc),)) root = tree.getroot() if bugLev >= 1: printNode( root, 0, maxLev) # node, curLev, maxLev if bugLev >= 5: print '\n===== program, version, date etc =====\n' # xxx program, version, subversion, etc # PyLada: vasp/extract/base.py: datetime() # OUTCAR: use the 1 occurance of: # executed on LinuxIFC date 2013.03.11 09:32:24 dtStg = getString( root, 'generator/i[@name=\'date\']') tmStg = getString( root, 'generator/i[@name=\'time\']') dateFmtIn = '%Y %m %d %H:%M:%S' dateFmtOut = '%Y-%m-%d %H:%M:%S' resObj.runDate = datetime.datetime.strptime( '%s %s' % (dtStg, tmStg), dateFmtIn) if bugLev >= 5: print 'runDate: %s' % (resObj.runDate.strftime( dateFmtOut),) # iterTimes # Each node is has cpuTime, wallTime: # <time name='totalsc'>22.49 24.43</time> nodes = root.findall('calculation/time[@name=\'totalsc\']') iterCpuTimes = [] iterRealTimes = [] for node in nodes: txt = node.text toks = txt.split() if len(toks) == 1: # Kluge: sometimes the two time fields run together: # <time name="totalsc">18560.1718566.89</time> # should be: # <time name="totalsc">18560.17 18566.89</time> # In this case, try to split it, # or just use the first time for both values. tok = toks[0] ix = tok.find('.') if ix < 0: throwerr('invalid times: %s' % (etree.tostring(node),)) iy = tok.find('.', ix + 1) if iy < 0 or iy != len(tok) - 3: throwerr('invalid times: %s' % (etree.tostring(node),)) tmStga = tok[:ix+3] tmStgb = tok[ix+3:] elif len(toks) == 2: tmStga = toks[0] tmStgb = toks[1] else: throwerr('invalid times: %s' % (etree.tostring(node),)) iterCpuTimes.append( float( tmStga)) iterRealTimes.append( float( tmStgb)) resObj.iterCpuTimes = iterCpuTimes resObj.iterRealTimes = iterRealTimes if bugLev >= 5: print 'iterCpuTimes: %s' % (resObj.iterCpuTimes,) print 'iterRealTimes: %s' % (resObj.iterRealTimes,) if bugLev >= 5: print '\n===== incar parameters =====\n' # algo # PyLada: vasp/extract/base.py: algo() # OUTCAR: use the 1 occurance of: # ALGO = Fast algoPath = 'incar/i[@name=\'ALGO\']' if len( root.findall( algoPath)) == 0: resObj.algo = 'n/a' else: resObj.algo = getString( root, algoPath) if bugLev >= 5: print 'algo: "%s"' % (resObj.algo,) # encut # PyLada: vasp/extract/base.py: encut() # OUTCAR: use the first occurance of: # ENCUT = 252.0 eV 18.52 Ry 4.30 a.u. 4.08 4.08 15.92*2*pi/ulx,y,z # ENCUT = 252.0 resObj.encut_ev = getScalar( root, 'incar/i[@name=\'ENCUT\']', float) if bugLev >= 5: print 'encut_ev: %g' % (resObj.encut_ev,) # ldauType # PyLada: vasp/extract/base.py: LDAUType() # OUTCAR: use the first occurance of: # LDA+U is selected, type is set to LDAUTYPE = 2 # LDAUTYPE = 2 #rawLdauType = getScalar( root, 'incar/v[@name=\'LDAUTYPE\']', int) #if rawLdauType == 1: resObj.ldauType = 'liechtenstein' #elif rawLdauType == 2: resObj.ldauType = 'dudarev' #else: throwerr('unknown rawLdauType: %d' % (rawLdauType,)) #if bugLev >= 5: # print 'rawLdauType: %d ldauType: %s' % (rawLdauType, resObj.ldauType,) tstg = getString( root, 'incar/i[@name=\'SYSTEM\']') resObj.systemName = tstg.strip('\'"') # get rid of extraneous quotes if bugLev >= 5: print 'systemName: "%s"' % (resObj.systemName,) if bugLev >= 5: print '\n===== general parameters =====\n' resObj.generalName = getString( root, 'parameters/separator[@name=\'general\']/i[@name=\'SYSTEM\']') if bugLev >= 5: print 'generalName: "%s"' % (resObj.generalName,) if bugLev >= 5: print '\n===== electronic parameters =====\n' lst = root.findall('parameters/separator[@name=\'electronic\']') if len(lst) != 1: throwerr('electronic parameters not found') elecNode = lst[0] # ialgo # PyLada: use the 1 occurance of: # Electronic relaxation 2 (details) # IALGO = 68 algorithm resObj.ialgo = getScalar( elecNode, 'i[@name=\'IALGO\']', int) resObj.ediff = getScalar( elecNode, 'i[@name=\'EDIFF\']', float) if bugLev >= 5: print 'ialgo: %d' % (resObj.ialgo,) print 'ediff: %g' % (resObj.ediff,) # numBand = nbands # Caution: in some cases NBANDS != eigenMrr['eigene'].shape[2] # So we use the eigene dimension instead. # See further below. prmNumBand = getScalar( elecNode, 'i[@name=\'NBANDS\']', int) if bugLev >= 5: print 'prmNumBand: %d' % (prmNumBand,) # numElectron = nelect # PyLada: vasp/extract/base.py: nelect() # OUTCAR: use the 1 occurance of: # NELECT = 48.0000 total number of electrons resObj.numElectron = getScalar( elecNode, 'i[@name=\'NELECT\']', float) if bugLev >= 5: print 'numElectron: %d' % (resObj.numElectron,) # icharg resObj.icharg = getScalar( elecNode, 'separator[@name=\'electronic startup\']/i[@name=\'ICHARG\']', int) if bugLev >= 5: print 'icharg: %g' % (resObj.icharg,) # numSpin == ispin resObj.numSpin = getScalar( elecNode, 'separator[@name=\'electronic spin\']/i[@name=\'ISPIN\']', int) if bugLev >= 5: print 'numSpin: %g' % (resObj.numSpin,) if bugLev >= 5: print '\n===== ionic parameters =====\n' # Some parameters like IBRION are also found in INCAR, sometimes. # But apparently they are always in the ionic parameters section. lst = root.findall('parameters/separator[@name=\'ionic\']') if len(lst) != 1: throwerr('ionic parameters not found') ionNode = lst[0] resObj.ibrion = getScalar( ionNode, 'i[@name=\'IBRION\']', int) resObj.isif = getScalar( ionNode, 'i[@name=\'ISIF\']', int) resObj.posScaleFactor = getScalar( ionNode, 'i[@name=\'SCALEE\']', float) if bugLev >= 5: print 'ibrion: %d' % (resObj.ibrion,) print 'isif: %d' % (resObj.isif,) print 'posScaleFactor: %g' % (resObj.posScaleFactor,) if bugLev >= 5: print '\n===== symmetry parameters =====\n' # Some parameters like ISYM are also found in INCAR, sometimes. # But apparently they are always in the symmetry parameters section. lst = root.findall('parameters/separator[@name=\'symmetry\']') if len(lst) != 1: throwerr('symmetry parameters not found') symNode = lst[0] resObj.isym = getScalar( symNode, 'i[@name=\'ISYM\']', int) if bugLev >= 5: print 'isym: %d' % (resObj.isym,) if bugLev >= 5: print '\n===== writing parameters =====\n' # Some parameters like LOPTICS are also found in INCAR, sometimes. # But apparently they are always in the writing parameters section. lst = root.findall('parameters/separator[@name=\'writing\']') if len(lst) != 1: throwerr('writing parameters not found') writeNode = lst[0] resObj.loptics = getScalar( writeNode, 'i[@name=\'LOPTICS\']', bool) resObj.lwave = getScalar( writeNode, 'i[@name=\'LWAVE\']', bool) if bugLev >= 5: print 'loptics: %s' % (resObj.loptics,) print 'lwave: %s' % (resObj.lwave,) if bugLev >= 5: print '\n===== atom info =====\n' # atomTypeMrr = map containing array. Example (some whitespace omitted): # _dimLens: [2] # _dimNames: ['type'] # _fieldNames: ['atomspertype' 'element' 'mass' 'valence' 'pseudopotential'] # _fieldTypes: ['i' 's' 'f' 'f' 's'] # atomspertype: [1 4] # element: ['C ' 'Fe'] # mass: [ 12.011 55.847] # valence: [ 4. 8.] # pseudopotential: [' PAW_PBE C_s 06Sep2000 ' ' PAW_PBE Fe 06Sep2000 '] atomTypeMrr = getArrayByPath( bugLev, root, 'atominfo/array[@name=\'atomtypes\']') resObj.typeNames = atomTypeMrr['element'] resObj.typeNums = atomTypeMrr['atomspertype'] resObj.typeMasses_amu = atomTypeMrr['mass'] resObj.typeValences = atomTypeMrr['valence'] resObj.typePseudos = atomTypeMrr['pseudopotential'] resObj.numAtom = sum( resObj.typeNums) if bugLev >= 5: print '\natomTypeMrr:' printMrr( atomTypeMrr) print '\natomTypes:' print 'typeNames: %s' % ( resObj.typeNames,) print 'typeNums: %s' % ( resObj.typeNums,) print 'typeMasses_amu: %s' % ( resObj.typeMasses_amu,) print 'typeValences: %s' % ( resObj.typeValences,) print 'typePseudos: %s' % ( resObj.typePseudos,) print 'numAtom: %d' % ( resObj.numAtom,) numType = len( resObj.typeNames) if len( resObj.typeNums) != numType: self.throwerr('len( typeNums) mismatch') if len( resObj.typeMasses_amu) != numType: self.throwerr('len( typeMasses_amu) mismatch') if len( resObj.typeValences) != numType: self.throwerr('len( typeValences) mismatch') if len( resObj.typePseudos) != numType: self.throwerr('len( typePseudos) mismatch') ## Sort parallel arrays typeNames, typeNums, etc, ## by typeNames alphabetic order. ## Use an index sort with typeIxs. ## In rare cases like icsd_024360.cif/hs-ferro ## and icsd_060845_GW, the names are out of order. ## Sort to set typeIxs[newIx] == oldIx. #typeIxs = range( numType) #def sortFunc( ia, ib): # return cmp( resObj.typeNames[ia], resObj.typeNames[ib]) #typeIxs.sort( sortFunc) #if bugLev >= 5: # print 'typeIxs: %s' % (typeIxs,) #resObj.typeIxs = typeIxs #resObj.typeNames = [resObj.typeNames[ix] for ix in typeIxs] #resObj.typeNums = [resObj.typeNums[ix] for ix in typeIxs] #resObj.typeMasses_amu = [resObj.typeMasses_amu[ix] for ix in typeIxs] #resObj.typeValences = [resObj.typeValences[ix] for ix in typeIxs] #resObj.typePseudos = [resObj.typePseudos[ix] for ix in typeIxs] #if bugLev >= 5: # print '\nsorted atomTypes:' # print 'typeIxs: %s' % ( resObj.typeIxs,) # print 'typeNames: %s' % ( resObj.typeNames,) # print 'typeNums: %s' % ( resObj.typeNums,) # print 'typeMasses_amu: %s' % ( resObj.typeMasses_amu,) # print 'typeValences: %s' % ( resObj.typeValences,) # print 'typePseudos: %s' % ( resObj.typePseudos,) ## Old way to get atomNames, atomMasses, atomValences, atomPseudos. ## Retained in case some day we need it. ## But the real values are set in "New way", below. ## atomMrr = map containing array. Example: ## _dimLens: [5] ## _dimNames: ['ion'] ## _fieldNames: ['element' 'atomtype'] ## _fieldTypes: ['s' 'i'] ## element: ['C ' 'Fe' 'Fe' 'Fe' 'Fe'] ## atomtype: [1 2 2 2 2] #atomMrr = getArrayByPath( # bugLev, root, 'atominfo/array[@name=\'atoms\']') #oatomNames = atomMrr['element'] ## oatomTypes are indices into typeNames, typeNums, typeMasses, ## typeValences, typePseudos #oatomTypes = [ix - 1 for ix in atomMrr['atomtype']] # change to origin 0 #natom = len( oatomTypes) #if bugLev >= 5: # print '\natomMrr:' # printMrr( atomMrr) # print '\nunsorted atoms:' # print 'oatomNames: %s' % ( oatomNames,) # print 'oatomTypes: %s' % ( oatomTypes,) ## The permutation array typeIxs maps typeIxs[newIx] = oldIx. ## Invert it to get typeIxInvs[oldIx] = newIx. #typeIxInvs = numType * [0] #for ii in range( numType): # typeIxInvs[ typeIxs[ii]] = ii #if bugLev >= 5: # print 'typeIxInvs: %s' % (typeIxInvs,) ## Sort oatomNames, oatomTypes by typeIxInvs[atomtype] so they ## are in the same order as typenames, typenums, etc, above. ## Currently oatomTypes[i] = old index num into type*. ## We want to sort by new index num into type*. ## Sort to set oatomIxs[newIx] == oldIx. #oatomIxs = range( natom) #def sortFunc( ia, ib): # return cmp( typeIxInvs[ oatomTypes[ ia]], typeIxInvs[ oatomTypes[ ib]]) #oatomIxs.sort( sortFunc) #oatomNames = [oatomNames[ix] for ix in oatomIxs] #oatomTypes = [typeIxInvs[ oatomTypes[ix]] for ix in oatomIxs] #if bugLev >= 5: # print '\noatomIxs: %s' % (oatomIxs,) # print '\nsorted atoms:' # print 'oatomNames: %s' % ( oatomNames,) # print 'oatomTypes: %s' % ( oatomTypes,) # # indices into typeNames, typeNums #oatomMasses_amu = natom * [None] #oatomValences = natom * [None] #oatomPseudos = natom * [None] #for ii in range( natom): # ix = oatomTypes[ii] # if oatomNames[ii] != resObj.typeNames[ix]: # throwerr('name mismatch') # oatomMasses_amu[ii] = resObj.typeMasses_amu[ix] # oatomValences[ii] = resObj.typeValences[ix] # oatomPseudos[ii] = resObj.typePseudos[ix] #if bugLev >= 5: # print 'oatomNames: %s' % ( oatomNames,) # print 'oatomTypes: %s' % ( oatomTypes,) # print 'oatomMasses_amu: %s' % ( oatomMasses_amu,) # print 'oatomValences: %s' % ( oatomValences,) # print 'oatomPseudos: %s' % ( oatomPseudos,) ## Make sure typenames are in alphabetic order #for ii in range(len(resObj.typeNames) - 1): # if resObj.typeNames[ii] > resObj.typeNames[ii+1]: # throwerr('typeNames not in order') ## Make sure atomnames are in alphabetic order #for ii in range(len(resObj.atomNames) - 1): # if resObj.atomNames[ii] > resObj.atomNames[ii+1]: # throwerr('atomNames not in order') # New way to get atomNames, atomMasses, atomValences, atomPseudos resObj.atomNames = [] resObj.atomMasses_amu = [] resObj.atomValences = [] resObj.atomPseudos = [] for ii in range( numType): nn = resObj.typeNums[ ii] resObj.atomNames += nn * [resObj.typeNames[ii]] resObj.atomMasses_amu += nn * [resObj.typeMasses_amu[ii]] resObj.atomValences += nn * [resObj.typeValences[ii]] resObj.atomPseudos += nn * [resObj.typePseudos[ii]] numAtom = len( resObj.atomNames) if bugLev >= 5: print '\n===== initial structure =====\n' # Initial structure # PyLada: vasp/extract/base.py: initial_structure() # OUTCAR: uses the appended INITIAL STRUCTURE section. lst = root.findall( 'structure[@name=\'initialpos\']/crystal/varray[@name=\'basis\']/v') if bugLev >= 5: print 'len(lst) a:', len(lst) # initial_structure # POSCAR specifies each basis vector as one row. # So does vasprun.xml. # But PyLada's structure.cell is the transpose: each basis vec is a column. resObj.initialBasisMat = getRawArray( root, 'structure[@name=\'initialpos\']/crystal/varray[@name=\'basis\']/v', 3, 3, float) resObj.initialRecipBasisMat = getRawArray( root, 'structure[@name=\'initialpos\']/crystal/varray[@name=\'rec_basis\']/v', 3, 3, float) # Get initialFracPosMat resObj.initialFracPosMat = getRawArray( root, 'structure[@name=\'initialpos\']/varray[@name=\'positions\']/v', 0, 3, float) # xxx Check: nrow should be numAtom resObj.initialCartPosMat = np.dot( resObj.initialFracPosMat, resObj.initialBasisMat) # xxx mult by scale factor? if bugLev >= 5: print 'initialBasisMat:\n%s' % (repr(resObj.initialBasisMat),) print 'initialRecipBasisMat:\n%s' % (repr(resObj.initialRecipBasisMat),) print 'initialFracPosMat:\n%s' % (repr(resObj.initialFracPosMat),) print 'initialCartPosMat:\n%s' % (repr(resObj.initialCartPosMat),) if bugLev >= 5: print '\n===== final structure =====\n' # structure == final pos # POSCAR and OUTCAR specify each basis vector as one row. # So does vasprun.xml. # But PyLada's structure.cell is the transpose: each basis vec is a column. # # In vasprun.xml and OUTCAR, the basis vectors are rows. finalPath = 'structure[@name=\'finalpos\']/crystal/varray[@name=\'basis\']/v' nodes = root.findall( finalPath) if len(nodes) == 0: resObj.finalBasisMat = None resObj.finalRecipBasisMat = None resObj.finalFracPosMat = None resObj.finalCartPosMat = None else: resObj.finalBasisMat = getRawArray( root, finalPath, 3, 3, float) resObj.finalRecipBasisMat = getRawArray( root, 'structure[@name=\'finalpos\']/crystal/varray[@name=\'rec_basis\']/v', 3, 3, float) # Get finalFracPosMat resObj.finalFracPosMat = getRawArray( root, 'structure[@name=\'finalpos\']/varray[@name=\'positions\']/v', 0, 3, float) # xxx nrow should be numAtom # xxx Check: nrow should be numAtom resObj.finalCartPosMat = np.dot( resObj.finalFracPosMat, resObj.finalBasisMat) # xxx mult by scale factor? if bugLev >= 5: print 'finalBasisMat:\n%s' % (repr(resObj.finalBasisMat),) print 'finalRecipBasisMat:\n%s' % (repr(resObj.finalRecipBasisMat),) print 'finalFracPosMat:\n%s' % (repr(resObj.finalFracPosMat),) print 'finalCartPosMat:\n%s' % (repr(resObj.finalCartPosMat),) if bugLev >= 5: print '\n===== magmom =====\n' # Apparently the magnetization values are only in OUTCAR # See ScanOutcar.py. resObj.magmoms = None if bugLev >= 5: print 'magmoms: %s' % (resObj.magmoms,) if bugLev >= 5: print '\n===== kpoints =====\n' # kpoint coordinates. # In gw the kpoints get updated, so we use the last. # Not in PyLada? lst = root.findall( 'kpoints/varray[@name=\'kpointlist\']') if len(lst) == 0: throwerr('kpoints varray not found') kpointsNode = lst[-1] resObj.kpointFracMat = getRawArray( kpointsNode, 'v', 0, 3, float) resObj.numKpoint = resObj.kpointFracMat.shape[0] resObj.kpointCartMat = resObj.posScaleFactor * np.dot( resObj.kpointFracMat, resObj.initialRecipBasisMat) if bugLev >= 5: print 'numKpoint: %g' % (resObj.numKpoint,) print 'kpointFracMat:\n%s' % (repr(resObj.kpointFracMat),) print 'kpointCartMat:\n%s' % (repr(resObj.kpointCartMat),) # This is what PyLada calls multiplicity. # The only diff is the scaling. # sum( Pylada multiplicity) = numKpoint # sum( our kpointWeights) = 1.0 lst = root.findall( 'kpoints/varray[@name=\'weights\']') if len(lst) == 0: throwerr('weights varray not found') weightsNode = lst[-1] resObj.kpointWeights = getRawArray( weightsNode, 'v', 0, 1, float) resObj.kpointWeights = resObj.kpointWeights[:,0] # Only 1 col in 2d array if resObj.kpointWeights.shape[0] != resObj.numKpoint: throwerr('numKpoint mismatch') if bugLev >= 5: print 'kpointWeights:\n%s' % (repr(resObj.kpointWeights),) print 'kpointWeights sum: %g' % (sum(resObj.kpointWeights),) if bugLev >= 5: print '\n===== final volume =====\n' volPath = 'structure[@name=\'finalpos\']/crystal/i[@name=\'volume\']' nodes = root.findall( volPath) if len(nodes) == 0: resObj.finalVolume_ang3 = None else: resObj.finalVolume_ang3 = getScalar( root, volPath, float) if bugLev >= 5: print 'finalVolume_ang3: %s' % (resObj.finalVolume_ang3,) if bugLev >= 5: print '\n===== last calc forces =====\n' forcePath = 'calculation[last()]/varray[@name=\'forces\']/v' nodes = root.findall( forcePath) if len(nodes) == 0: resObj.finalForceMat_ev_ang = None resObj.finalStressMat_kbar = None else: # Get forceMat resObj.finalForceMat_ev_ang = getRawArray( root, forcePath, 0, 3, float) # Sometimes forceMat can have a few NaN elements, # which postgresql calls invalid. if not all( np.isfinite( resObj.finalForceMat_ev_ang.flat)): resObj.finalForceMat_ev_ang = None resObj.finalStressMat_kbar = None else: # Get stressMat resObj.finalStressMat_kbar = getRawArray( root, 'calculation[last()]/varray[@name=\'stress\']/v', 3, 3, float) if bugLev >= 5: print 'finalForceMat_ev_ang:\n%s' % (repr(resObj.finalForceMat_ev_ang),) print 'finalStressMat_kbar:\n%s' % (repr(resObj.finalStressMat_kbar),) if bugLev >= 5: print '\n===== eigenvalues and occupancies =====\n' # PyLada: eigenvalues eigenPath = 'calculation[last()]/eigenvalues/array' nodes = root.findall( eigenPath) if len(nodes) == 0: resObj.numBand = None resObj.eigenMat = None resObj.occupMat = None else: eigenMrr = getArrayByPath( bugLev, root, eigenPath) if bugLev >= 5: print '\neigenMrr beg =====:' printMrr( eigenMrr) print '\neigenMrr end =====:' for isp in range( resObj.numSpin): print '\neigenMrr: eigene[isp=%d][0]\n%s' \ % (isp, repr(eigenMrr['eigene'][isp][0]),) print '\neigenMrr: occ[isp=%d][0]\n%s' \ % (isp, repr(eigenMrr['occ'][isp][0]),) shp = eigenMrr['_dimLens'] if bugLev >= 5: print 'eigenMrr shape: %s' % (shp,) if shp[0] != resObj.numSpin: throwerr('numSpin mismatch') if shp[1] != resObj.numKpoint: throwerr('numKpoint mismatch') if shp[2] != prmNumBand: # see caution at prmNumBand, above print('numBand mismatch: prm: %d shape: %d inFile: %s' \ % (prmNumBand, shp[2], inFile,)) resObj.numBand = shp[2] resObj.eigenMat = eigenMrr['eigene'] # Caution: for non-magnetic (numSpin==1), # OUTCAR has occupMat values = 2, while vasprun.xml has values = 1. # For magnetic (numSpin==2), both OUTCAR and vasprun.xml have 1. resObj.occupMat = eigenMrr['occ'] if resObj.numSpin == 1: resObj.occupMat *= 2 if bugLev >= 5: print 'resObj.eigenMat.shape: ', resObj.eigenMat.shape print 'resObj.occupMat.shape: ', resObj.occupMat.shape # Compare projected and standard eigenvalues getProjected = False if getProjected: for isp in range( resObj.numSpin): projEigenMrr = getArrayByPath( bugLev, root, 'calculation[last()]/projected/eigenvalues/array') # eigs and projected eigs are identical eigs = resObj.eigenMrr['eigene'][isp] peigs = projEigenMrr['eigene'][isp] if bugLev >= 5: print 'Compare iegs, peigs for isp: %d' % (isp,) print ' eigs.shape: ', eigs.shape print ' peigs.shape: ', peigs.shape print ' eigs[0,:]: ', eigs[0,:] print ' peigs[0,:]: ', peigs[0,:] print ' Diff projeigs - eigs: max maxabs: %g' \ % (max( map( max, abs(peigs - eigs))),) # occs and projected occs are identical occs = resObj.eigenMrr['occ'][isp] poccs = projEigenMrr['occ'][isp] if bugLev >= 5: print 'Compare occs, poccs for isp: %d' % (isp,) print ' occs.shape: ', occs.shape print ' poccs.shape: ', poccs.shape print ' occs[0,:]: ', occs[0,:] print ' poccs[0,:]: ', poccs[0,:] print ' Diff projoccs - occs: max maxabs: %g' \ % (max( map( max, abs(poccs - occs))),) if bugLev >= 5: print '\n===== dielectric function =====\n' imagPath = 'calculation[last()]/dielectricfunction/imag/array' nodes = root.findall( imagPath) if len(nodes) == 0: resObj.dielectricImag = None resObj.dielectricReal = None else: dielImagMrr = getArrayByPath( bugLev, root, imagPath) dielRealMrr = getArrayByPath( bugLev, root, 'calculation[last()]/dielectricfunction/real/array') ndielectric = dielImagMrr['energy'].shape[0] dielectricImag = np.zeros((ndielectric, 7)) # E,xx,yy,zz,xy,yz,zx dielectricReal = np.zeros((ndielectric, 7)) # E,xx,yy,zz,xy,yz,zx dietags = ['energy', 'xx', 'yy', 'zz', 'xy', 'yz', 'zx'] for jj in range( len( dietags)): tag = dietags[jj] immat = dielImagMrr[tag] remat = dielRealMrr[tag] for ii in range( ndielectric): dielectricImag[ii,jj] = immat[ii] dielectricReal[ii,jj] = remat[ii] resObj.dielectricImag = dielectricImag resObj.dielectricReal = dielectricReal if bugLev >= 5: print '\ndielImagMrr:' printMrr( dielImagMrr) print '\ndielRealMrr:' printMrr( dielRealMrr) print '\ndielectricImag:' for ii in range( ndielectric): print ' [%d]: %s' % (ii, resObj.dielectricImag[ii,:],) print '' print '\ndielectricReal:' for ii in range( ndielectric): print ' [%d]: %s' % (ii, resObj.dielectricReal[ii,:],) print '' if bugLev >= 5: print '\n===== misc junk =====\n' # functional: comes from appended FUNCTIONAL. # success: look for final section # General timing and accounting informations for this job: # xxx skip: Hubbard / NLEP if bugLev >= 5: print '\n===== energy, efermi0 =====\n' energyPath = 'calculation/energy/i[@name=\'e_wo_entrp\']' nodes = root.findall( energyPath) if len(nodes) == 0: resObj.energyNoEntrps = None else: resObj.energyNoEntrps = [] for node in nodes: txt = node.text.strip() resObj.energyNoEntrps.append( float( txt)) if bugLev >= 5: print 'energyNoEntrps: %s' % (resObj.energyNoEntrps,) # efermi0 # PyLada uses an algorithm to compare the sum of occupancies # to the valence. # We get it from the xml file here. # PyLada: 5.8574 # XML: 5.93253 efermiPath = 'calculation[last()]/dos/i[@name=\'efermi\']' nodes = root.findall( efermiPath) if len(nodes) == 0: resObj.efermi0 = None else: resObj.efermi0 = getScalar( root, efermiPath, float) if bugLev >= 5: print 'efermi0: %s' % (resObj.efermi0,) return ########################### End of parseXml ############################### # The following code was used for initial testing, # and who knows, someday may be useful again. #print '\n' #print '\ntesta:' #lst = root.findall('kpoints/generation/v[@name=\'genvec2\']') #amat = [] #for ele in lst: # text = ele.text # print ' ele.text: %s' % (text,) # toks = text.split() # vals = map( float, toks) # amat.append( vals) #print 'amat: %s' % (amat,) #amat = np.array( amat, dtype=float) #print 'amat:\n%s' % (amat,) #vec = getVec( root, 'kpoints/generation/v[@name=\'genvec2\']', 0, 0, float) #print 'vec:\n%s' % (vec,) #amat = getRawArray( root, 'kpoints/generation/v', 0, 0, float) #print 'amat:\n%s' % (amat,) #calcNodes = root.findall('calculation') #print '\nlen(calcNodes): %d' % (len(calcNodes,)) ## pairs: (itot, en_wo_entrp) for the energy of each scstep #scstep_withouts = [] ## pairs: (itot, en_wo_entrp) for the last energy of each calculation step #calcstep_withouts = [] #basisMats = [] #recipBasisMats = [] #posMats = [] #forceMats = [] #stressMats = [] #itot = 0 # index all scsteps, across calculations #ncalc = len( calcNodes) #for icalc in range( ncalc): # cnode = calcNodes[icalc] # forceMat = getRawArray( cnode, 'varray[@name=\'forces\']/v', 0, 0, float) # print '\nforceMat for calcNodes[%d]:\n%s' % (icalc, forceMat,) # scNodes = cnode.findall('scstep') # print ' len(scNodes): %d' % (len(scNodes,)) # for isc in range(len(scNodes)): # snode = scNodes[isc] # sc_e_fr = getScalar( snode, 'energy/i[@name=\'e_fr_energy\']', float) # sc_e_wo = getScalar( snode, 'energy/i[@name=\'e_wo_entrp\']', float) # sc_e_0 = getScalar( snode, 'energy/i[@name=\'e_0_energy\']', float) # print ' scNodes[%d]: sc_e_fr: %g sc_e_wo: %g sc_e_0: %g' \ # % (isc, sc_e_fr, sc_e_wo, sc_e_0,) # scstep_withouts.append( (itot, sc_e_wo,)) # itot += 1 # # Structure for this calculation step # strucNodes = cnode.findall('structure') # if len(strucNodes) != 1: throwerr('calc structure not found') # snode = strucNodes[0] # basisMat = getRawArray( # snode, 'crystal/varray[@name=\'basis\']/v', 3, 3, float) # recipBasisMat = getRawArray( # snode, 'crystal/varray[@name=\'rec_basis\']/v', 3, 3, float) # # xxx should be nrow = num atoms # posMat = getRawArray( # snode, 'varray[@name=\'positions\']/v', 0, 3, float) # print ' Calc final: basisMat:\n%s' % (basisMat,) # print ' Calc final: recipBasisMat:\n%s' % (recipBasisMat,) # print ' Calc final: posMat:\n%s' % (posMat,) # basisMats.append( basisMat) # recipBasisMats.append( recipBasisMat) # posMats.append( posMat) # # Forces for this calculation step # forceNodes = cnode.findall('varray[@name=\'forces\']') # if len(forceNodes) != 1: throwerr('calc forces not found') # forceMat = getRawArray( forceNodes[0], 'v', 0, 3, float) # print ' Calc final: forceMat:\n%s' % (forceMat,) # forceMats.append( forceMat) # # Stress for this calculation step # stressNodes = cnode.findall('varray[@name=\'stress\']') # if len(stressNodes) != 1: throwerr('calc stresses not found') # stressMat = getRawArray( stressNodes[0], 'v', 3, 3, float) # print ' Calc final: stressMat:\n%s' % (stressMat,) # stressMats.append( stressMat) # # Final energy for this calculation step # enNodes = cnode.findall('energy') # if len(enNodes) != 1: throwerr('calc energy not found') # enode = enNodes[0] # c_e_fr = getScalar( enode, 'i[@name=\'e_fr_energy\']', float) # c_e_wo = getScalar( enode, 'i[@name=\'e_wo_entrp\']', float) # c_e_0 = getScalar( enode, 'i[@name=\'e_0_energy\']', float) # print ' Calc final: c_e_fr: %g c_e_wo: %g c_e_0: %g' \ # % (c_e_fr, c_e_wo, c_e_0,) # calcstep_withouts.append( (itot - 1, c_e_wo,)) #print '' #print 'scstep_withouts: %s' % (scstep_withouts,) #print '' #print 'calcstep_withouts: %s' % (calcstep_withouts,) #scmat = np.array( scstep_withouts, dtype=float) #print '' #print 'scmat:\n%s' % (scmat,) #calcmat = np.array( calcstep_withouts, dtype=float) #print '' #print 'calcmat:\n%s' % (calcmat,) #print '' #print 'Investigate DOS' #icals = len(calcNodes) - 1 #cnode = calcNodes[icalc] #setNodes = cnode.findall('dos/total/array/set/set[@comment=\'spin 1\']') #print ' len(total setNodes): %d' % (len(setNodes),) #print ' setNodes[0]: %s' % (setNodes[0],) #if len(setNodes) != 1: throwerr('dos/total not found') #dosTotalMat = getRawArray( setNodes[0], 'r', 0, 0, float) #print '' #print 'type(dosTotalMat): ', type(dosTotalMat) #print 'dosTotalMat.shape: ', dosTotalMat.shape #print '' #print 'dosTotalMat:\n%s' % (dosTotalMat,) #dosPartialMats = [] #partialSetNodes = cnode.findall('dos/partial/array/set') #print ' len(partialSetNodes): %d' % (len(partialSetNodes),) #if len(partialSetNodes) != 1: throwerr('dos/partial not found') #partialSet = partialSetNodes[0] #ionNodes = partialSet.findall('set') #print ' len(ionNodes): %d' % (len(ionNodes),) ## xxx should be nrow = num atoms #for ii in range(len(ionNodes)): # dosPartialMat = getRawArray( # ionNodes[ii], 'set[@comment=\'spin 1\']/r', 0, 0, float) # print '' # print 'dosPartialMat %d:' % (ii,) # print 'type(dosPartialMat): ', type(dosPartialMat) # print 'dosPartialMat.shape: ', dosPartialMat.shape # print '' # print 'dosPartialMat:\n%s' % (dosPartialMat,) # dosPartialMats.append( dosPartialMat) #print 'len(dosPartialMats): %d' % (len(dosPartialMats),) #print '\nbasisMats: len: %d' % (len(basisMats),) #for mat in basisMats: print '%s' % (mat,) #print '\nrecipBasisMats: len: %d' % (len(recipBasisMats),) #for mat in recipBasisMats: print '%s' % (mat,) #print '\nposMats: len: %d' % (len(posMats),) #for mat in posMats: print '%s' % (mat,) #print '\nforceMats: len: %d' % (len(forceMats),) #for mat in forceMats: print '%s' % (mat,) #print '\nstressMats: len: %d' % (len(stressMats),) #for mat in stressMats: print '%s' % (mat,) #basisDeltas = calcMatDeltas( basisMats) #recipBasisDeltas = calcMatDeltas( recipBasisMats) #posDeltas = calcMatDeltas( posMats) #forceDeltas = calcMatDeltas( forceMats) #stressDeltas = calcMatDeltas( stressMats) #print 'basisDeltas: %s' % ( basisDeltas,) #print 'recipBasisDeltas: %s' % ( recipBasisDeltas,) #print 'posDeltas: %s' % ( posDeltas,) #print 'forceDeltas: %s' % ( forceDeltas,) #print 'stressDeltas: %s' % ( stressDeltas,) #import matplotlib #matplotlib.use('tkagg') #import matplotlib.pyplot as plt #fig, axes = plt.subplots( 1, 1) ###ax00 = axes[0,0] #ax00 = axes #ax00.plot( dosTotalMat[:,0], dosTotalMat[:,1], color='r', linestyle='-', # marker=None) #ax00.set_xlabel('Energy, eV') #ax00.set_ylabel('Number of states per unit cell') #ax00.set_title('Density of states') #ax00.xaxis.grid(color='lightblue', linestyle='solid') #ax00.yaxis.grid(color='lightblue', linestyle='solid') ##plt.show() ##fig, ax = plt.subplots() ## ##ax.plot( scmat[:,0], scmat[:,1], 'b-') ##ax.plot( calcmat[:,0], calcmat[:,1], 'bo') ##ax.set_ylim( calcmat[-1,1] - 5, calcmat[-1,1] + 5) ##ax.xaxis.grid(color='lightblue', linestyle='solid') ##ax.yaxis.grid(color='lightblue', linestyle='solid') ## ##savefig('tempa.png', dpi=100, orientation='landscape', papertype='letter') ## ##plt.show() #tnodes = root.findall('calculation[last()]') #printNode( tnodes[0], 0, 1) #tnodes = root.findall('calculation[last()]/eigenvalues') #printNode( tnodes[0], 0, 1) #tnodes = root.findall('calculation[last()]/eigenvalues/array') #printNode( tnodes[0], 0, 1) #res = getArrayByPath( # bugLev, root, 'calculation[last()]/eigenvalues/array') #print '\ncalculation[last()]/eigenvalues:\n%s' % (res,) #print '\n' #====================================================================
[docs]def printNode( node, curLev, maxLev): ''' Recursively prints an XML tree, given an xml.etree.cElementTree node. **Parameters**: * node (xml.etree.ElementTree.Element): The root of the XML tree. * curLev (int): The current recursion level. Starts at 0 and is incremented for each recursive call. * maxLev (int): The max number of levels to print **Returns**: * None ''' if curLev <= maxLev: if node.tail == None: tail = 'None' else: tail = '"%s"' % (node.tail.strip(),) if node.text == None: text = 'None' else: text = '"%s"' % (node.text.strip(),) print '%stag: %s attrib: %s tail: %s text: %s' \ % (curLev * ' ', node.tag, node.attrib, tail, text,) for kid in node: printNode( kid, curLev + 1, maxLev) #====================================================================
[docs]def parseText( path, nmin, nmax, dtype, text): ''' Splits ``text`` into tokens, and converts each token to ``dtype``. Called by getVec, getRawArray. **Parameters**: * path (str): the XML tree path to the current node, for error msgs. * nmin (int): the minimum num tokens. If fewer are found, throwerr. * nmax (int): the maximum num tokens. If more are found, throwerr. * dtype (python type): Either int, float, or str: the tokens are converted to dtype. * text (str): the text string to be split. **Returns**: * list of tokens each having type = dtype. ''' toks = text.split() ntok = len( toks) if ntok < nmin: throwerr('ntok < nmin for path: "%s" text: "%s"' % (path, text,)) if nmax > 0 and ntok > nmax: throwerr('ntok > nmax for path: "%s" text: "%s"' % (path, text,)) vals = ntok * [None] for ii in range(ntok): tok = toks[ii] if dtype == bool: tok = tok.lower() if tok in ['f', 'false', '.false.']: val = False elif tok in ['t', 'true', '.true.']: val = True else: throwerr('invalid bool in path: "%s" text: "%s"' % (path, text,)) elif dtype == int: try: val = int( tok) except ValueError, exc: throwerr('invalid int in path: "%s" text: "%s"' % (path, text,)) elif dtype == float: try: val = float( tok) except ValueError, exc: throwerr('invalid float in path: "%s" text: "%s"' % (path, text,)) elif dtype == str: val = tok else: throwerr('unknown dtype for path: "%s"' % (path,)) vals[ii] = val return vals #====================================================================
[docs]def getVec( root, path, nmin, nmax, dtype): ''' Gets text at the specified XML path, splits, and converts tokens ``dtype``. **Parameters**: * root (xml.etree.ElementTree.Element): The current XML node. * path (str): the XML path from the current node. * nmin (int): the minimum num tokens. If fewer are found, throwerr. * nmax (int): the maximum num tokens. If more are found, throwerr. * dtype (python type): Either int, float, or str: the tokens are converted to dtype. **Returns**: * list of tokens each having type = dtype. ''' text = getString( root, path) vals = parseText( path, nmin, nmax, dtype, text) return vals #==================================================================== # Return stripped string
[docs]def getString( root, path): ''' Gets text at the specified XML path, insures there's just 1, and returns it. **Parameters**: * root (xml.etree.ElementTree.Element): The current XML node. * path (str): the XML path from the current node. **Returns**: * stripped string. ''' lst = root.findall( path) if len(lst) == 0: throwerr('path not found: "%s"' % (path,)) if len(lst) > 1: throwerr('multiple matches for path: "%s"' % (path,)) ele = lst[0] text = ele.text return text.strip() #====================================================================
[docs]def getScalar( root, path, dtype): ''' Gets text at the specified XML path, and converts it to ``dtype``. **Parameters**: * root (xml.etree.ElementTree.Element): The current XML node. * path (str): the XML path from the current node. * dtype (python type): Either int, float, or str: the token is converted to dtype. **Returns**: * item having type = dtype. ''' lst = getVec( root, path, 1, 1, dtype) return lst[0] #====================================================================
[docs]def getRawArray( root, path, nrow, ncol, dtype): ''' Gets text at the specified XML path, and converts to a 2D numpy array of ``dtype``. The text must be organized as one text element per row. **Parameters**: * root (xml.etree.ElementTree.Element): The current XML node. * path (str): the XML path from the current node. * nrow (int): the number of rows. If 0, allow any number. * ncol (int): the number of columns. If 0, allow any number. * dtype (python type): Either int, float, or str: the tokens are converted to dtype. **Returns**: * A regular 2-dimensional numpy array of dtype. ''' lst = root.findall( path) nlst = len( lst) if nlst == 0: throwerr('path not found: "%s"' % (path,)) if nrow > 0 and nlst != nrow: throwerr('nrow mismatch for path: "%s". expected: %d found: %d' \ % (path, nrow, nlst,)) rows = [] for ii in range(nlst): ele = lst[ii] text = ele.text vals = parseText( path, 0, 0, dtype, text) if len(rows) == 0: ncolActual = len( vals) if len(vals) != ncolActual: throwerr('irregular array for path: "%s"' % (path,)) if ncol > 0 and ncolActual != ncol: throwerr('ncol mismatch path: "%s"' % (path,)) rows.append( vals) if dtype == int: amat = np.array( rows, dtype=int) elif dtype == float: amat = np.array( rows, dtype=float) else: throwerr('unknown dtype for path: "%s"' % (path,)) return amat #====================================================================
[docs]def getArrayByPath( bugLev, baseNode, path): ''' Converts an XML ``<array>`` element in vasprun.xml to a map with an array. See :func:`getArrayByNode` for details. **Parameters**: * bugLev (int): Debug level. Normally 0. * baseNode (xml.etree.ElementTree.Element): current XML node * path (str): XML path from baseNode for the ``<array>`` element. **Returns**: * A Python array ''' arrNodes = baseNode.findall( path) if len(arrNodes) != 1: throwerr('path not found: "%s"' % (path,)) res = getArrayByNode( bugLev, arrNodes[0]) return res #==================================================================== # Returns Mrr == map containing array, like: # atomMrr: # _dimLens: [6] # _dimNames: ['ion'] # _fieldNames: ['element', 'atomtype'] # _fieldTypes: ['s', 'i'] # element: ['Mo' 'Mo' 'S' 'S' 'S' 'S'] # atomtype: [1 1 2 2 2 2]
[docs]def getArrayByNode( bugLev, arrNode): ''' Converts an XML ``<array>`` element in vasprun.xml to a map with an array. Calls getArraySub to extract each field. The output Python map has the following structure: ============= ======================================================== key value ============= ======================================================== _dimLens numpy vec of dimension lengths. len( dimLens) == n == numDimensions. _dimNames numpy vec of dimension names. len( dimLens) == n == numDimensions. _fieldNames numpy vec of field names in the parallel arrays. len( fieldNames) == numVariables. _fieldTypes numpy vec of field types in the parallel arrays. len( fieldTypes) == numVariables. The types are: 'i': int, 'f': float, 's': str <fieldName> numpy n-dimensional array of the field <fieldName> <fieldName> numpy n-dimensional array of the field <fieldName> <fieldName> numpy n-dimensional array of the field <fieldName> ... ============= ======================================================== Example XML for a 1-dimensional array with 2 fields: :: <array name="atoms" > <dimension dim="1">ion</dimension> <field type="string">element</field> <field type="int">atomtype</field> <set> <rc><c>C </c><c> 1</c></rc> <rc><c>Fe</c><c> 2</c></rc> <rc><c>Fe</c><c> 2</c></rc> <rc><c>Fe</c><c> 2</c></rc> <rc><c>Fe</c><c> 2</c></rc> </set> </array> Example resulting map: :: _dimLens: [5] _dimNames: ['ion'] _fieldNames: ['element' 'atomtype'] _fieldTypes: ['s' 'i'] element: ['C' 'Fe' 'Fe' 'Fe' 'Fe'] atomtype: [1 2 2 2 2] Multiple dimension arrays also are supported. The vasprun.xml handling of dimensions is unusual. What they claim is ``dim="1"`` actually is the least significant dimension and varies fastest, both in the XML data and in our resulting Python array. So the XML ``<dimension dim="1">band</dimension>`` becomes the last dimension in the resulting Python array. Example XML for a 3 dimensional array with 2 fields: :: <array> <dimension dim="1">band</dimension> <dimension dim="2">kpoint</dimension> <dimension dim="3">spin</dimension> <field>eigene</field> <field>occ</field> <set> <set comment="spin 1"> <set comment="kpoint 1"> <r> -6.5058 1.0000 </r> <r> 0.2537 1.0000 </r> <r> 0.7101 1.0000 </r> ... <r> 8.1390 0.0000 </r> </set> <set comment="kpoint 2"> <r> -6.3718 1.0000 </r> <r> -0.0841 1.0000 </r> <r> 0.7508 1.0000 </r> ... </set> <set comment="kpoint 101"> <r> -5.8567 1.0000 </r> <r> -0.0854 1.0000 </r> <r> 0.9602 1.0000 </r> <r> 7.7174 0.0000 </r> <r> 7.8556 0.0000 </r> </set> </set> </set> </array> Example resulting map: :: _dimLens: [ 1 101 22] _dimNames: ['spin' 'kpoint' 'band'] _fieldNames: ['eigene' 'occ'] _fieldTypes: ['f' 'f'] eigene: [[[-6.5058 0.2537 0.7101 ..., 7.6096 7.8817 8.139 ] [-6.3718 -0.0841 0.7508 ..., 7.481 7.8491 7.9595] [-6.1332 -0.611 1.0672 ..., 7.0857 7.8655 7.9314] ..., [-5.8462 0.3687 0.9498 ..., 7.1721 7.4739 7.6631] [-5.8016 0.5503 0.5886 ..., 7.4113 7.5794 7.7332] [-5.8567 -0.0854 0.9602 ..., 7.2729 7.7174 7.8556]]] occ: [[[ 1. 1. 1. ..., 0. 0. 0. ] [ 1. 1. 1. ..., 0. 0. 0. ] [ 1. 1. 1. ..., 1. 0. 0. ] ..., [ 1. 1. 1. ..., 1. 0. 0. ] [ 1. 1. 1. ..., 0. 0. 0. ] [ 1. 1. 1. ..., 0.9751 0. 0. ]]] **Parameters**: * bugLev (int): Debug level. Normally 0. * node (xml.etree.ElementTree.Element): The XML node for the ``<array>`` element. **Returns**: * A Python array ''' dimNodes = arrNode.findall('dimension') ndim = len( dimNodes) if ndim == 0: throwerr('no dimensions found') dimNames = [nd.text for nd in dimNodes] dimNames.reverse() # dimNames are in reverse order in XML dimNames = np.array( dimNames, dtype=str) dimLens = np.zeros( [ndim], dtype=int) fieldNodes = arrNode.findall('field') nfield = len( fieldNodes) if nfield == 0: throwerr('no fields found') fieldNames = [nd.text for nd in fieldNodes] fieldNames = np.array( fieldNames, dtype=str) # We set fieldTypes[ifield] to max( all found types for ifield) # Types are: 0:int, 1:float, 2:string fieldTypes = nfield * [0] setNodes = arrNode.findall('set') if len(setNodes) != 1: throwerr('wrong len for primary set') setNode = setNodes[0] resList = nfield * [None] for ifield in range( nfield): amat = getArraySub( bugLev, setNode, ifield, fieldTypes, 0, # idim dimLens) # Convert all elements of each field ifield to fieldTypes[ifield]. if fieldTypes[ifield] == 0: amat = np.array( amat, dtype=int) elif fieldTypes[ifield] == 1: amat = np.array( amat, dtype=float) elif fieldTypes[ifield] == 2: amat = np.array( amat, dtype=str) else: throwerr('unknown fieldType') resList[ifield] = amat # Convert fieldTypes from 0,1,2 to 'i', 'f', 's' fldMap = { 0:'i', 1:'f', 2:'s'} fieldTypeStgs = map( lambda x: fldMap[x], fieldTypes) fieldTypeStgs = np.array( fieldTypeStgs, dtype=str) resMap = { '_dimNames': dimNames, '_dimLens': dimLens, '_fieldNames': fieldNames, '_fieldTypes': fieldTypeStgs, } for ii in range(len(fieldNames)): ar = resList[ii] if not all(ar.shape == np.array(dimLens)): throwerr('dimLens mismatch') resMap[fieldNames[ii]] = ar return resMap #====================================================================
[docs]def getArraySub( bugLev, setNode, ifield, fieldTypes, idim, dimLens): ''' Decodes the XML for one field (one variable) for an ``<array>``. Called by getArrayByNode. See :func:`getArrayByNode` for details. **Parameters**: * bugLev (int): Debug level. Normally 0. * setNode (xml.etree.ElementTree.Element): the element for ``<set>``. * ifield (int): the index number of the field. * fieldTypes (int[]): the numeric field types so far. The numeric types are: 0: int, 1: float, 2: str. We take the max of the field types. * tp (Python type): The desired type. * idim (int): dimension number == recursion level == array nest level. 0 on the first call, 1 for the next level array, etc. * dimLens (int[]): list of dimension lengths. Updated. **Returns**: * A Python array with elements of type str. The caller converts them to the correct type. ''' nfield = len(fieldTypes) ndim = len(dimLens) # If we're at the last dimension, decode the element values. if idim == ndim - 1: # Try long form: # <set> # <rc> # <c>2</c> # <c>Mo</c> rcNodes = setNode.findall('rc') # long form: <rc> <c> # Try short form: # <set comment='spin 1'> # <set comment='kpoint 1'> # <r>-30.3711 1.0000</r> # <r>-30.3709 1.0000</r> rNodes = setNode.findall('r') # short form: <r> nval = max( len( rcNodes), len( rNodes)) if dimLens[idim] == 0: dimLens[idim] = nval if nval != dimLens[idim]: throwerr('irregular array') resVec = nval * [None] if len(rcNodes) > 0: # long form: <rc> <c> for ival in range( nval): cNodes = rcNodes[ival].findall('c') if len(cNodes) != nfield: throwerr('wrong num fields') stg = cNodes[ifield].text resVec[ival] = stg elif len(rNodes) > 0: # short form: <r> for ival in range( nval): txt = rNodes[ival].text toks = txt.split() if len(toks) != nfield: throwerr('wrong num fields') resVec[ival] = toks[ifield] else: throwerr('unknown array structure') # Strip all strings. # Set fieldTypes[ifield] to max( current type, all found types) # Types are: 0:int, 1:float, 2:string for ival in range( nval): resVec[ival] = resVec[ival].strip() stg = resVec[ival] ftype = 2 # assume worst case: string try: float( stg) ftype = 1 except ValueError: pass try: int( stg) ftype = 0 except ValueError: pass fieldTypes[ifield] = max( fieldTypes[ifield], ftype) else: # else idim < ndim - 1. Recursion. setNodes = setNode.findall('set') nset = len( setNodes) if dimLens[idim] == 0: dimLens[idim] = nset if nset != dimLens[idim]: throwerr('irregular array') resVec = nset * [None] for iset in range(nset): resVec[iset] = getArraySub( # recursion bugLev, setNodes[iset], ifield, fieldTypes, idim + 1, dimLens) return resVec #==================================================================== # Not used
def convertTypesUnused( tp, vec): ''' Recursively converts the elements of an array ``vec`` from str to the specified type. **Parameters**: * tp (Python type): The desired type. * vec (str[] or str[][] or ...): the array to be converted. **Returns**: * A Python array with elements of type ``tp``. ''' if isinstance( vec[0], str): for ii in range(len(vec)): vec[ii] = tp( vec[ii]) elif isinstance( vec[0], list): for subVec in vec: convertTypes( tp, subVec) # recursion else: throwerr('unknown array structure') #====================================================================
[docs]def maxAbsDiff( mata, matb): ''' Returns the max abs diff between two 2D numpy matrices. **Parameters**: * mata (numpy 2D array): Array to be compared. * matb (numpy 2D array): Array to be compared. **Returns**: * float scalar: max_i( max_j( abs( mata[i][j] - matb[i][j])) ''' (nrow,ncol) = mata.shape if matb.shape != mata.shape: throwerr('maxAbsDiff: shape mismatch') diffMat = abs( matb - mata) res = max( map( max, diffMat)) return res #====================================================================
[docs]def calcMatDeltas( mats): ''' Returns the max abs diffs between adjacent pairs of a list of 2D numpy matrices. **Parameters**: * mats (list of 2D numpy matrices) **Returns**: * deltas (float[]): deltas[k] = maxAbsDiff( mats[k-1], mats[k]) ''' nmat = len( mats) deltas = [] for ii in range( 1, nmat): delta = maxAbsDiff( mats[ii-1], mats[ii]) deltas.append( delta) return deltas #====================================================================
[docs]def printMrr( vmap): ''' Prints the Mrr map returned by getArrayByPath or getArrayByNode. **Parameters**: * vmap (map): the MRR map **Returns**: * None ''' keys = vmap.keys() keys.sort() for key in keys: if key.startswith('_'): val = vmap[key] print ' %s: %s' % (key, val,) for key in vmap['_fieldNames']: val = vmap[key] print ' %s: %s' % (key, val,) print '' #====================================================================
[docs]def throwerr( msg): ''' Prints an error message and raises Exception. **Parameters**: * msg (str): Error message. **Returns** * (Never returns) **Raises** * Exception ''' print msg print >> sys.stderr, msg raise Exception( msg) #====================================================================
if __name__ == '__main__': main() #====================================================================