#!/usr/bin/env python
# Copyright 2013 National Renewable Energy Laboratory, Golden CO, USA
# This file is part of NREL MatDB.
#
# NREL MatDB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# NREL MatDB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with NREL MatDB. If not, see <http://www.gnu.org/licenses/>.
import datetime, re, sys, traceback, os.path
import xml.etree.cElementTree as etree
import numpy as np
np.seterr( all='raise', under='ignore')
import readVasp
#====================================================================
def badparms( msg):
print '\nError: %s' % (msg,)
print 'Parms:'
print ' -bugLev <int> debug level'
print ' -inFile <string> input file'
print ' -maxLev <int> max xml print level'
print ''
sys.exit(1)
#====================================================================
[docs]def main():
'''
Test driver: Extracts info from a VASP vasprun.xml file.
Command line parameters:
================ ========= ==============================================
Parameter Type Description
================ ========= ==============================================
**-bugLev** integer Debug level. Normally 0.
**-inFile** string Input file
**-maxLev** int max xml print level
================ ========= ==============================================
'''
bugLev = 0
inFile = None
maxLev = 0
if len(sys.argv) % 2 != 1:
badparms('Parms must be key/value pairs')
for iarg in range( 1, len(sys.argv), 2):
key = sys.argv[iarg]
val = sys.argv[iarg+1]
if key == '-bugLev': bugLev = int( val)
elif key == '-inFile': inFile = val
elif key == '-maxLev': maxLev = int( val)
else: badparms('unknown key: "%s"' % (key,))
if bugLev == None: badparms('parm not specified: -bugLev')
if inFile == None: badparms('parm not specified: -inFile')
if maxLev == None: badparms('parm not specified: -maxLev')
resObj = ResClass()
parseXml( bugLev, inFile, maxLev, resObj)
#====================================================================
#====================================================================
[docs]class ResClass:
'''
An empty class used as a data container for results.
'''
def __str__(self):
keys = self.__dict__.keys()
keys.sort()
msg = ''
for key in keys:
val = self.__dict__[key]
stg = str( val)
if stg.find('\n') >= 0: sep = '\n'
else: sep = ' '
msg += ' %s: type: %s val:%s%s\n' % (key, type(val), sep, val,)
return msg
#====================================================================
#====================================================================
# Fills resObj.
[docs]def parseXml( bugLev, inFile, maxLev, resObj):
'''
Extracts info from the vasprun.xml file from a VASP run,
using the Python xml.etree.cElementTree API.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* inFile (str): Path of the input vasprun.xml file.
* resObj (class ResClass): data object: we set attributes here.
**Returns**:
* None
'''
try:
tree = etree.parse( inFile)
except Exception, exc:
throwerr(('parseXml: invalid xml in file: "%s"\n'
+ ' Msg: %s\n') % (inFile, repr(exc),))
root = tree.getroot()
if bugLev >= 1: printNode( root, 0, maxLev) # node, curLev, maxLev
if bugLev >= 5: print '\n===== program, version, date etc =====\n'
# xxx program, version, subversion, etc
# PyLada: vasp/extract/base.py: datetime()
# OUTCAR: use the 1 occurance of:
# executed on LinuxIFC date 2013.03.11 09:32:24
dtStg = getString( root, 'generator/i[@name=\'date\']')
tmStg = getString( root, 'generator/i[@name=\'time\']')
dateFmtIn = '%Y %m %d %H:%M:%S'
dateFmtOut = '%Y-%m-%d %H:%M:%S'
resObj.runDate = datetime.datetime.strptime(
'%s %s' % (dtStg, tmStg), dateFmtIn)
if bugLev >= 5: print 'runDate: %s' % (resObj.runDate.strftime( dateFmtOut),)
# iterTimes
# Each node is has cpuTime, wallTime:
# <time name='totalsc'>22.49 24.43</time>
nodes = root.findall('calculation/time[@name=\'totalsc\']')
iterCpuTimes = []
iterRealTimes = []
for node in nodes:
txt = node.text
toks = txt.split()
if len(toks) == 1:
# Kluge: sometimes the two time fields run together:
# <time name="totalsc">18560.1718566.89</time>
# should be:
# <time name="totalsc">18560.17 18566.89</time>
# In this case, try to split it,
# or just use the first time for both values.
tok = toks[0]
ix = tok.find('.')
if ix < 0: throwerr('invalid times: %s' % (etree.tostring(node),))
iy = tok.find('.', ix + 1)
if iy < 0 or iy != len(tok) - 3:
throwerr('invalid times: %s' % (etree.tostring(node),))
tmStga = tok[:ix+3]
tmStgb = tok[ix+3:]
elif len(toks) == 2:
tmStga = toks[0]
tmStgb = toks[1]
else: throwerr('invalid times: %s' % (etree.tostring(node),))
iterCpuTimes.append( float( tmStga))
iterRealTimes.append( float( tmStgb))
resObj.iterCpuTimes = iterCpuTimes
resObj.iterRealTimes = iterRealTimes
if bugLev >= 5:
print 'iterCpuTimes: %s' % (resObj.iterCpuTimes,)
print 'iterRealTimes: %s' % (resObj.iterRealTimes,)
if bugLev >= 5: print '\n===== incar parameters =====\n'
# algo
# PyLada: vasp/extract/base.py: algo()
# OUTCAR: use the 1 occurance of:
# ALGO = Fast
algoPath = 'incar/i[@name=\'ALGO\']'
if len( root.findall( algoPath)) == 0:
resObj.algo = 'n/a'
else:
resObj.algo = getString( root, algoPath)
if bugLev >= 5: print 'algo: "%s"' % (resObj.algo,)
# encut
# PyLada: vasp/extract/base.py: encut()
# OUTCAR: use the first occurance of:
# ENCUT = 252.0 eV 18.52 Ry 4.30 a.u. 4.08 4.08 15.92*2*pi/ulx,y,z
# ENCUT = 252.0
resObj.encut_ev = getScalar( root, 'incar/i[@name=\'ENCUT\']', float)
if bugLev >= 5: print 'encut_ev: %g' % (resObj.encut_ev,)
# ldauType
# PyLada: vasp/extract/base.py: LDAUType()
# OUTCAR: use the first occurance of:
# LDA+U is selected, type is set to LDAUTYPE = 2
# LDAUTYPE = 2
#rawLdauType = getScalar( root, 'incar/v[@name=\'LDAUTYPE\']', int)
#if rawLdauType == 1: resObj.ldauType = 'liechtenstein'
#elif rawLdauType == 2: resObj.ldauType = 'dudarev'
#else: throwerr('unknown rawLdauType: %d' % (rawLdauType,))
#if bugLev >= 5:
# print 'rawLdauType: %d ldauType: %s' % (rawLdauType, resObj.ldauType,)
tstg = getString( root, 'incar/i[@name=\'SYSTEM\']')
resObj.systemName = tstg.strip('\'"') # get rid of extraneous quotes
if bugLev >= 5: print 'systemName: "%s"' % (resObj.systemName,)
if bugLev >= 5: print '\n===== general parameters =====\n'
resObj.generalName = getString(
root, 'parameters/separator[@name=\'general\']/i[@name=\'SYSTEM\']')
if bugLev >= 5: print 'generalName: "%s"' % (resObj.generalName,)
if bugLev >= 5: print '\n===== electronic parameters =====\n'
lst = root.findall('parameters/separator[@name=\'electronic\']')
if len(lst) != 1: throwerr('electronic parameters not found')
elecNode = lst[0]
# ialgo
# PyLada: use the 1 occurance of:
# Electronic relaxation 2 (details)
# IALGO = 68 algorithm
resObj.ialgo = getScalar( elecNode, 'i[@name=\'IALGO\']', int)
resObj.ediff = getScalar( elecNode, 'i[@name=\'EDIFF\']', float)
if bugLev >= 5:
print 'ialgo: %d' % (resObj.ialgo,)
print 'ediff: %g' % (resObj.ediff,)
# numBand = nbands
# Caution: in some cases NBANDS != eigenMrr['eigene'].shape[2]
# So we use the eigene dimension instead.
# See further below.
prmNumBand = getScalar( elecNode, 'i[@name=\'NBANDS\']', int)
if bugLev >= 5: print 'prmNumBand: %d' % (prmNumBand,)
# numElectron = nelect
# PyLada: vasp/extract/base.py: nelect()
# OUTCAR: use the 1 occurance of:
# NELECT = 48.0000 total number of electrons
resObj.numElectron = getScalar( elecNode, 'i[@name=\'NELECT\']', float)
if bugLev >= 5: print 'numElectron: %d' % (resObj.numElectron,)
# icharg
resObj.icharg = getScalar(
elecNode,
'separator[@name=\'electronic startup\']/i[@name=\'ICHARG\']',
int)
if bugLev >= 5: print 'icharg: %g' % (resObj.icharg,)
# numSpin == ispin
resObj.numSpin = getScalar(
elecNode, 'separator[@name=\'electronic spin\']/i[@name=\'ISPIN\']', int)
if bugLev >= 5: print 'numSpin: %g' % (resObj.numSpin,)
if bugLev >= 5: print '\n===== ionic parameters =====\n'
# Some parameters like IBRION are also found in INCAR, sometimes.
# But apparently they are always in the ionic parameters section.
lst = root.findall('parameters/separator[@name=\'ionic\']')
if len(lst) != 1: throwerr('ionic parameters not found')
ionNode = lst[0]
resObj.ibrion = getScalar( ionNode, 'i[@name=\'IBRION\']', int)
resObj.isif = getScalar( ionNode, 'i[@name=\'ISIF\']', int)
resObj.posScaleFactor = getScalar( ionNode, 'i[@name=\'SCALEE\']', float)
if bugLev >= 5:
print 'ibrion: %d' % (resObj.ibrion,)
print 'isif: %d' % (resObj.isif,)
print 'posScaleFactor: %g' % (resObj.posScaleFactor,)
if bugLev >= 5: print '\n===== symmetry parameters =====\n'
# Some parameters like ISYM are also found in INCAR, sometimes.
# But apparently they are always in the symmetry parameters section.
lst = root.findall('parameters/separator[@name=\'symmetry\']')
if len(lst) != 1: throwerr('symmetry parameters not found')
symNode = lst[0]
resObj.isym = getScalar( symNode, 'i[@name=\'ISYM\']', int)
if bugLev >= 5:
print 'isym: %d' % (resObj.isym,)
if bugLev >= 5: print '\n===== writing parameters =====\n'
# Some parameters like LOPTICS are also found in INCAR, sometimes.
# But apparently they are always in the writing parameters section.
lst = root.findall('parameters/separator[@name=\'writing\']')
if len(lst) != 1: throwerr('writing parameters not found')
writeNode = lst[0]
resObj.loptics = getScalar( writeNode, 'i[@name=\'LOPTICS\']', bool)
resObj.lwave = getScalar( writeNode, 'i[@name=\'LWAVE\']', bool)
if bugLev >= 5:
print 'loptics: %s' % (resObj.loptics,)
print 'lwave: %s' % (resObj.lwave,)
if bugLev >= 5: print '\n===== atom info =====\n'
# atomTypeMrr = map containing array. Example (some whitespace omitted):
# _dimLens: [2]
# _dimNames: ['type']
# _fieldNames: ['atomspertype' 'element' 'mass' 'valence' 'pseudopotential']
# _fieldTypes: ['i' 's' 'f' 'f' 's']
# atomspertype: [1 4]
# element: ['C ' 'Fe']
# mass: [ 12.011 55.847]
# valence: [ 4. 8.]
# pseudopotential: [' PAW_PBE C_s 06Sep2000 ' ' PAW_PBE Fe 06Sep2000 ']
atomTypeMrr = getArrayByPath(
bugLev, root, 'atominfo/array[@name=\'atomtypes\']')
resObj.typeNames = atomTypeMrr['element']
resObj.typeNums = atomTypeMrr['atomspertype']
resObj.typeMasses_amu = atomTypeMrr['mass']
resObj.typeValences = atomTypeMrr['valence']
resObj.typePseudos = atomTypeMrr['pseudopotential']
resObj.numAtom = sum( resObj.typeNums)
if bugLev >= 5:
print '\natomTypeMrr:'
printMrr( atomTypeMrr)
print '\natomTypes:'
print 'typeNames: %s' % ( resObj.typeNames,)
print 'typeNums: %s' % ( resObj.typeNums,)
print 'typeMasses_amu: %s' % ( resObj.typeMasses_amu,)
print 'typeValences: %s' % ( resObj.typeValences,)
print 'typePseudos: %s' % ( resObj.typePseudos,)
print 'numAtom: %d' % ( resObj.numAtom,)
numType = len( resObj.typeNames)
if len( resObj.typeNums) != numType:
self.throwerr('len( typeNums) mismatch')
if len( resObj.typeMasses_amu) != numType:
self.throwerr('len( typeMasses_amu) mismatch')
if len( resObj.typeValences) != numType:
self.throwerr('len( typeValences) mismatch')
if len( resObj.typePseudos) != numType:
self.throwerr('len( typePseudos) mismatch')
## Sort parallel arrays typeNames, typeNums, etc,
## by typeNames alphabetic order.
## Use an index sort with typeIxs.
## In rare cases like icsd_024360.cif/hs-ferro
## and icsd_060845_GW, the names are out of order.
## Sort to set typeIxs[newIx] == oldIx.
#typeIxs = range( numType)
#def sortFunc( ia, ib):
# return cmp( resObj.typeNames[ia], resObj.typeNames[ib])
#typeIxs.sort( sortFunc)
#if bugLev >= 5:
# print 'typeIxs: %s' % (typeIxs,)
#resObj.typeIxs = typeIxs
#resObj.typeNames = [resObj.typeNames[ix] for ix in typeIxs]
#resObj.typeNums = [resObj.typeNums[ix] for ix in typeIxs]
#resObj.typeMasses_amu = [resObj.typeMasses_amu[ix] for ix in typeIxs]
#resObj.typeValences = [resObj.typeValences[ix] for ix in typeIxs]
#resObj.typePseudos = [resObj.typePseudos[ix] for ix in typeIxs]
#if bugLev >= 5:
# print '\nsorted atomTypes:'
# print 'typeIxs: %s' % ( resObj.typeIxs,)
# print 'typeNames: %s' % ( resObj.typeNames,)
# print 'typeNums: %s' % ( resObj.typeNums,)
# print 'typeMasses_amu: %s' % ( resObj.typeMasses_amu,)
# print 'typeValences: %s' % ( resObj.typeValences,)
# print 'typePseudos: %s' % ( resObj.typePseudos,)
## Old way to get atomNames, atomMasses, atomValences, atomPseudos.
## Retained in case some day we need it.
## But the real values are set in "New way", below.
## atomMrr = map containing array. Example:
## _dimLens: [5]
## _dimNames: ['ion']
## _fieldNames: ['element' 'atomtype']
## _fieldTypes: ['s' 'i']
## element: ['C ' 'Fe' 'Fe' 'Fe' 'Fe']
## atomtype: [1 2 2 2 2]
#atomMrr = getArrayByPath(
# bugLev, root, 'atominfo/array[@name=\'atoms\']')
#oatomNames = atomMrr['element']
## oatomTypes are indices into typeNames, typeNums, typeMasses,
## typeValences, typePseudos
#oatomTypes = [ix - 1 for ix in atomMrr['atomtype']] # change to origin 0
#natom = len( oatomTypes)
#if bugLev >= 5:
# print '\natomMrr:'
# printMrr( atomMrr)
# print '\nunsorted atoms:'
# print 'oatomNames: %s' % ( oatomNames,)
# print 'oatomTypes: %s' % ( oatomTypes,)
## The permutation array typeIxs maps typeIxs[newIx] = oldIx.
## Invert it to get typeIxInvs[oldIx] = newIx.
#typeIxInvs = numType * [0]
#for ii in range( numType):
# typeIxInvs[ typeIxs[ii]] = ii
#if bugLev >= 5:
# print 'typeIxInvs: %s' % (typeIxInvs,)
## Sort oatomNames, oatomTypes by typeIxInvs[atomtype] so they
## are in the same order as typenames, typenums, etc, above.
## Currently oatomTypes[i] = old index num into type*.
## We want to sort by new index num into type*.
## Sort to set oatomIxs[newIx] == oldIx.
#oatomIxs = range( natom)
#def sortFunc( ia, ib):
# return cmp( typeIxInvs[ oatomTypes[ ia]], typeIxInvs[ oatomTypes[ ib]])
#oatomIxs.sort( sortFunc)
#oatomNames = [oatomNames[ix] for ix in oatomIxs]
#oatomTypes = [typeIxInvs[ oatomTypes[ix]] for ix in oatomIxs]
#if bugLev >= 5:
# print '\noatomIxs: %s' % (oatomIxs,)
# print '\nsorted atoms:'
# print 'oatomNames: %s' % ( oatomNames,)
# print 'oatomTypes: %s' % ( oatomTypes,)
# # indices into typeNames, typeNums
#oatomMasses_amu = natom * [None]
#oatomValences = natom * [None]
#oatomPseudos = natom * [None]
#for ii in range( natom):
# ix = oatomTypes[ii]
# if oatomNames[ii] != resObj.typeNames[ix]:
# throwerr('name mismatch')
# oatomMasses_amu[ii] = resObj.typeMasses_amu[ix]
# oatomValences[ii] = resObj.typeValences[ix]
# oatomPseudos[ii] = resObj.typePseudos[ix]
#if bugLev >= 5:
# print 'oatomNames: %s' % ( oatomNames,)
# print 'oatomTypes: %s' % ( oatomTypes,)
# print 'oatomMasses_amu: %s' % ( oatomMasses_amu,)
# print 'oatomValences: %s' % ( oatomValences,)
# print 'oatomPseudos: %s' % ( oatomPseudos,)
## Make sure typenames are in alphabetic order
#for ii in range(len(resObj.typeNames) - 1):
# if resObj.typeNames[ii] > resObj.typeNames[ii+1]:
# throwerr('typeNames not in order')
## Make sure atomnames are in alphabetic order
#for ii in range(len(resObj.atomNames) - 1):
# if resObj.atomNames[ii] > resObj.atomNames[ii+1]:
# throwerr('atomNames not in order')
# New way to get atomNames, atomMasses, atomValences, atomPseudos
resObj.atomNames = []
resObj.atomMasses_amu = []
resObj.atomValences = []
resObj.atomPseudos = []
for ii in range( numType):
nn = resObj.typeNums[ ii]
resObj.atomNames += nn * [resObj.typeNames[ii]]
resObj.atomMasses_amu += nn * [resObj.typeMasses_amu[ii]]
resObj.atomValences += nn * [resObj.typeValences[ii]]
resObj.atomPseudos += nn * [resObj.typePseudos[ii]]
numAtom = len( resObj.atomNames)
if bugLev >= 5: print '\n===== initial structure =====\n'
# Initial structure
# PyLada: vasp/extract/base.py: initial_structure()
# OUTCAR: uses the appended INITIAL STRUCTURE section.
lst = root.findall(
'structure[@name=\'initialpos\']/crystal/varray[@name=\'basis\']/v')
if bugLev >= 5: print 'len(lst) a:', len(lst)
# initial_structure
# POSCAR specifies each basis vector as one row.
# So does vasprun.xml.
# But PyLada's structure.cell is the transpose: each basis vec is a column.
resObj.initialBasisMat = getRawArray(
root,
'structure[@name=\'initialpos\']/crystal/varray[@name=\'basis\']/v',
3, 3, float)
resObj.initialRecipBasisMat = getRawArray(
root,
'structure[@name=\'initialpos\']/crystal/varray[@name=\'rec_basis\']/v',
3, 3, float)
# Get initialFracPosMat
resObj.initialFracPosMat = getRawArray(
root,
'structure[@name=\'initialpos\']/varray[@name=\'positions\']/v',
0, 3, float)
# xxx Check: nrow should be numAtom
resObj.initialCartPosMat = np.dot(
resObj.initialFracPosMat, resObj.initialBasisMat)
# xxx mult by scale factor?
if bugLev >= 5:
print 'initialBasisMat:\n%s' % (repr(resObj.initialBasisMat),)
print 'initialRecipBasisMat:\n%s' % (repr(resObj.initialRecipBasisMat),)
print 'initialFracPosMat:\n%s' % (repr(resObj.initialFracPosMat),)
print 'initialCartPosMat:\n%s' % (repr(resObj.initialCartPosMat),)
if bugLev >= 5: print '\n===== final structure =====\n'
# structure == final pos
# POSCAR and OUTCAR specify each basis vector as one row.
# So does vasprun.xml.
# But PyLada's structure.cell is the transpose: each basis vec is a column.
#
# In vasprun.xml and OUTCAR, the basis vectors are rows.
finalPath = 'structure[@name=\'finalpos\']/crystal/varray[@name=\'basis\']/v'
nodes = root.findall( finalPath)
if len(nodes) == 0:
resObj.finalBasisMat = None
resObj.finalRecipBasisMat = None
resObj.finalFracPosMat = None
resObj.finalCartPosMat = None
else:
resObj.finalBasisMat = getRawArray(
root,
finalPath,
3, 3, float)
resObj.finalRecipBasisMat = getRawArray(
root,
'structure[@name=\'finalpos\']/crystal/varray[@name=\'rec_basis\']/v',
3, 3, float)
# Get finalFracPosMat
resObj.finalFracPosMat = getRawArray(
root,
'structure[@name=\'finalpos\']/varray[@name=\'positions\']/v',
0, 3, float) # xxx nrow should be numAtom
# xxx Check: nrow should be numAtom
resObj.finalCartPosMat = np.dot(
resObj.finalFracPosMat, resObj.finalBasisMat)
# xxx mult by scale factor?
if bugLev >= 5:
print 'finalBasisMat:\n%s' % (repr(resObj.finalBasisMat),)
print 'finalRecipBasisMat:\n%s' % (repr(resObj.finalRecipBasisMat),)
print 'finalFracPosMat:\n%s' % (repr(resObj.finalFracPosMat),)
print 'finalCartPosMat:\n%s' % (repr(resObj.finalCartPosMat),)
if bugLev >= 5: print '\n===== magmom =====\n'
# Apparently the magnetization values are only in OUTCAR
# See ScanOutcar.py.
resObj.magmoms = None
if bugLev >= 5: print 'magmoms: %s' % (resObj.magmoms,)
if bugLev >= 5: print '\n===== kpoints =====\n'
# kpoint coordinates.
# In gw the kpoints get updated, so we use the last.
# Not in PyLada?
lst = root.findall( 'kpoints/varray[@name=\'kpointlist\']')
if len(lst) == 0: throwerr('kpoints varray not found')
kpointsNode = lst[-1]
resObj.kpointFracMat = getRawArray( kpointsNode, 'v', 0, 3, float)
resObj.numKpoint = resObj.kpointFracMat.shape[0]
resObj.kpointCartMat = resObj.posScaleFactor * np.dot(
resObj.kpointFracMat, resObj.initialRecipBasisMat)
if bugLev >= 5:
print 'numKpoint: %g' % (resObj.numKpoint,)
print 'kpointFracMat:\n%s' % (repr(resObj.kpointFracMat),)
print 'kpointCartMat:\n%s' % (repr(resObj.kpointCartMat),)
# This is what PyLada calls multiplicity.
# The only diff is the scaling.
# sum( Pylada multiplicity) = numKpoint
# sum( our kpointWeights) = 1.0
lst = root.findall( 'kpoints/varray[@name=\'weights\']')
if len(lst) == 0: throwerr('weights varray not found')
weightsNode = lst[-1]
resObj.kpointWeights = getRawArray( weightsNode, 'v', 0, 1, float)
resObj.kpointWeights = resObj.kpointWeights[:,0] # Only 1 col in 2d array
if resObj.kpointWeights.shape[0] != resObj.numKpoint:
throwerr('numKpoint mismatch')
if bugLev >= 5:
print 'kpointWeights:\n%s' % (repr(resObj.kpointWeights),)
print 'kpointWeights sum: %g' % (sum(resObj.kpointWeights),)
if bugLev >= 5: print '\n===== final volume =====\n'
volPath = 'structure[@name=\'finalpos\']/crystal/i[@name=\'volume\']'
nodes = root.findall( volPath)
if len(nodes) == 0:
resObj.finalVolume_ang3 = None
else:
resObj.finalVolume_ang3 = getScalar( root, volPath, float)
if bugLev >= 5:
print 'finalVolume_ang3: %s' % (resObj.finalVolume_ang3,)
if bugLev >= 5: print '\n===== last calc forces =====\n'
forcePath = 'calculation[last()]/varray[@name=\'forces\']/v'
nodes = root.findall( forcePath)
if len(nodes) == 0:
resObj.finalForceMat_ev_ang = None
resObj.finalStressMat_kbar = None
else:
# Get forceMat
resObj.finalForceMat_ev_ang = getRawArray(
root, forcePath,
0, 3, float)
# Sometimes forceMat can have a few NaN elements,
# which postgresql calls invalid.
if not all( np.isfinite( resObj.finalForceMat_ev_ang.flat)):
resObj.finalForceMat_ev_ang = None
resObj.finalStressMat_kbar = None
else:
# Get stressMat
resObj.finalStressMat_kbar = getRawArray(
root, 'calculation[last()]/varray[@name=\'stress\']/v',
3, 3, float)
if bugLev >= 5:
print 'finalForceMat_ev_ang:\n%s' % (repr(resObj.finalForceMat_ev_ang),)
print 'finalStressMat_kbar:\n%s' % (repr(resObj.finalStressMat_kbar),)
if bugLev >= 5: print '\n===== eigenvalues and occupancies =====\n'
# PyLada: eigenvalues
eigenPath = 'calculation[last()]/eigenvalues/array'
nodes = root.findall( eigenPath)
if len(nodes) == 0:
resObj.numBand = None
resObj.eigenMat = None
resObj.occupMat = None
else:
eigenMrr = getArrayByPath( bugLev, root, eigenPath)
if bugLev >= 5:
print '\neigenMrr beg =====:'
printMrr( eigenMrr)
print '\neigenMrr end =====:'
for isp in range( resObj.numSpin):
print '\neigenMrr: eigene[isp=%d][0]\n%s' \
% (isp, repr(eigenMrr['eigene'][isp][0]),)
print '\neigenMrr: occ[isp=%d][0]\n%s' \
% (isp, repr(eigenMrr['occ'][isp][0]),)
shp = eigenMrr['_dimLens']
if bugLev >= 5: print 'eigenMrr shape: %s' % (shp,)
if shp[0] != resObj.numSpin: throwerr('numSpin mismatch')
if shp[1] != resObj.numKpoint: throwerr('numKpoint mismatch')
if shp[2] != prmNumBand: # see caution at prmNumBand, above
print('numBand mismatch: prm: %d shape: %d inFile: %s' \
% (prmNumBand, shp[2], inFile,))
resObj.numBand = shp[2]
resObj.eigenMat = eigenMrr['eigene']
# Caution: for non-magnetic (numSpin==1),
# OUTCAR has occupMat values = 2, while vasprun.xml has values = 1.
# For magnetic (numSpin==2), both OUTCAR and vasprun.xml have 1.
resObj.occupMat = eigenMrr['occ']
if resObj.numSpin == 1: resObj.occupMat *= 2
if bugLev >= 5:
print 'resObj.eigenMat.shape: ', resObj.eigenMat.shape
print 'resObj.occupMat.shape: ', resObj.occupMat.shape
# Compare projected and standard eigenvalues
getProjected = False
if getProjected:
for isp in range( resObj.numSpin):
projEigenMrr = getArrayByPath(
bugLev, root, 'calculation[last()]/projected/eigenvalues/array')
# eigs and projected eigs are identical
eigs = resObj.eigenMrr['eigene'][isp]
peigs = projEigenMrr['eigene'][isp]
if bugLev >= 5:
print 'Compare iegs, peigs for isp: %d' % (isp,)
print ' eigs.shape: ', eigs.shape
print ' peigs.shape: ', peigs.shape
print ' eigs[0,:]: ', eigs[0,:]
print ' peigs[0,:]: ', peigs[0,:]
print ' Diff projeigs - eigs: max maxabs: %g' \
% (max( map( max, abs(peigs - eigs))),)
# occs and projected occs are identical
occs = resObj.eigenMrr['occ'][isp]
poccs = projEigenMrr['occ'][isp]
if bugLev >= 5:
print 'Compare occs, poccs for isp: %d' % (isp,)
print ' occs.shape: ', occs.shape
print ' poccs.shape: ', poccs.shape
print ' occs[0,:]: ', occs[0,:]
print ' poccs[0,:]: ', poccs[0,:]
print ' Diff projoccs - occs: max maxabs: %g' \
% (max( map( max, abs(poccs - occs))),)
if bugLev >= 5: print '\n===== dielectric function =====\n'
imagPath = 'calculation[last()]/dielectricfunction/imag/array'
nodes = root.findall( imagPath)
if len(nodes) == 0:
resObj.dielectricImag = None
resObj.dielectricReal = None
else:
dielImagMrr = getArrayByPath( bugLev, root, imagPath)
dielRealMrr = getArrayByPath(
bugLev, root, 'calculation[last()]/dielectricfunction/real/array')
ndielectric = dielImagMrr['energy'].shape[0]
dielectricImag = np.zeros((ndielectric, 7)) # E,xx,yy,zz,xy,yz,zx
dielectricReal = np.zeros((ndielectric, 7)) # E,xx,yy,zz,xy,yz,zx
dietags = ['energy', 'xx', 'yy', 'zz', 'xy', 'yz', 'zx']
for jj in range( len( dietags)):
tag = dietags[jj]
immat = dielImagMrr[tag]
remat = dielRealMrr[tag]
for ii in range( ndielectric):
dielectricImag[ii,jj] = immat[ii]
dielectricReal[ii,jj] = remat[ii]
resObj.dielectricImag = dielectricImag
resObj.dielectricReal = dielectricReal
if bugLev >= 5:
print '\ndielImagMrr:'
printMrr( dielImagMrr)
print '\ndielRealMrr:'
printMrr( dielRealMrr)
print '\ndielectricImag:'
for ii in range( ndielectric):
print ' [%d]: %s' % (ii, resObj.dielectricImag[ii,:],)
print ''
print '\ndielectricReal:'
for ii in range( ndielectric):
print ' [%d]: %s' % (ii, resObj.dielectricReal[ii,:],)
print ''
if bugLev >= 5: print '\n===== misc junk =====\n'
# functional: comes from appended FUNCTIONAL.
# success: look for final section
# General timing and accounting informations for this job:
# xxx skip: Hubbard / NLEP
if bugLev >= 5: print '\n===== energy, efermi0 =====\n'
energyPath = 'calculation/energy/i[@name=\'e_wo_entrp\']'
nodes = root.findall( energyPath)
if len(nodes) == 0:
resObj.energyNoEntrps = None
else:
resObj.energyNoEntrps = []
for node in nodes:
txt = node.text.strip()
resObj.energyNoEntrps.append( float( txt))
if bugLev >= 5: print 'energyNoEntrps: %s' % (resObj.energyNoEntrps,)
# efermi0
# PyLada uses an algorithm to compare the sum of occupancies
# to the valence.
# We get it from the xml file here.
# PyLada: 5.8574
# XML: 5.93253
efermiPath = 'calculation[last()]/dos/i[@name=\'efermi\']'
nodes = root.findall( efermiPath)
if len(nodes) == 0:
resObj.efermi0 = None
else:
resObj.efermi0 = getScalar( root, efermiPath, float)
if bugLev >= 5: print 'efermi0: %s' % (resObj.efermi0,)
return
########################### End of parseXml ###############################
# The following code was used for initial testing,
# and who knows, someday may be useful again.
#print '\n'
#print '\ntesta:'
#lst = root.findall('kpoints/generation/v[@name=\'genvec2\']')
#amat = []
#for ele in lst:
# text = ele.text
# print ' ele.text: %s' % (text,)
# toks = text.split()
# vals = map( float, toks)
# amat.append( vals)
#print 'amat: %s' % (amat,)
#amat = np.array( amat, dtype=float)
#print 'amat:\n%s' % (amat,)
#vec = getVec( root, 'kpoints/generation/v[@name=\'genvec2\']', 0, 0, float)
#print 'vec:\n%s' % (vec,)
#amat = getRawArray( root, 'kpoints/generation/v', 0, 0, float)
#print 'amat:\n%s' % (amat,)
#calcNodes = root.findall('calculation')
#print '\nlen(calcNodes): %d' % (len(calcNodes,))
## pairs: (itot, en_wo_entrp) for the energy of each scstep
#scstep_withouts = []
## pairs: (itot, en_wo_entrp) for the last energy of each calculation step
#calcstep_withouts = []
#basisMats = []
#recipBasisMats = []
#posMats = []
#forceMats = []
#stressMats = []
#itot = 0 # index all scsteps, across calculations
#ncalc = len( calcNodes)
#for icalc in range( ncalc):
# cnode = calcNodes[icalc]
# forceMat = getRawArray( cnode, 'varray[@name=\'forces\']/v', 0, 0, float)
# print '\nforceMat for calcNodes[%d]:\n%s' % (icalc, forceMat,)
# scNodes = cnode.findall('scstep')
# print ' len(scNodes): %d' % (len(scNodes,))
# for isc in range(len(scNodes)):
# snode = scNodes[isc]
# sc_e_fr = getScalar( snode, 'energy/i[@name=\'e_fr_energy\']', float)
# sc_e_wo = getScalar( snode, 'energy/i[@name=\'e_wo_entrp\']', float)
# sc_e_0 = getScalar( snode, 'energy/i[@name=\'e_0_energy\']', float)
# print ' scNodes[%d]: sc_e_fr: %g sc_e_wo: %g sc_e_0: %g' \
# % (isc, sc_e_fr, sc_e_wo, sc_e_0,)
# scstep_withouts.append( (itot, sc_e_wo,))
# itot += 1
# # Structure for this calculation step
# strucNodes = cnode.findall('structure')
# if len(strucNodes) != 1: throwerr('calc structure not found')
# snode = strucNodes[0]
# basisMat = getRawArray(
# snode, 'crystal/varray[@name=\'basis\']/v', 3, 3, float)
# recipBasisMat = getRawArray(
# snode, 'crystal/varray[@name=\'rec_basis\']/v', 3, 3, float)
# # xxx should be nrow = num atoms
# posMat = getRawArray(
# snode, 'varray[@name=\'positions\']/v', 0, 3, float)
# print ' Calc final: basisMat:\n%s' % (basisMat,)
# print ' Calc final: recipBasisMat:\n%s' % (recipBasisMat,)
# print ' Calc final: posMat:\n%s' % (posMat,)
# basisMats.append( basisMat)
# recipBasisMats.append( recipBasisMat)
# posMats.append( posMat)
# # Forces for this calculation step
# forceNodes = cnode.findall('varray[@name=\'forces\']')
# if len(forceNodes) != 1: throwerr('calc forces not found')
# forceMat = getRawArray( forceNodes[0], 'v', 0, 3, float)
# print ' Calc final: forceMat:\n%s' % (forceMat,)
# forceMats.append( forceMat)
# # Stress for this calculation step
# stressNodes = cnode.findall('varray[@name=\'stress\']')
# if len(stressNodes) != 1: throwerr('calc stresses not found')
# stressMat = getRawArray( stressNodes[0], 'v', 3, 3, float)
# print ' Calc final: stressMat:\n%s' % (stressMat,)
# stressMats.append( stressMat)
# # Final energy for this calculation step
# enNodes = cnode.findall('energy')
# if len(enNodes) != 1: throwerr('calc energy not found')
# enode = enNodes[0]
# c_e_fr = getScalar( enode, 'i[@name=\'e_fr_energy\']', float)
# c_e_wo = getScalar( enode, 'i[@name=\'e_wo_entrp\']', float)
# c_e_0 = getScalar( enode, 'i[@name=\'e_0_energy\']', float)
# print ' Calc final: c_e_fr: %g c_e_wo: %g c_e_0: %g' \
# % (c_e_fr, c_e_wo, c_e_0,)
# calcstep_withouts.append( (itot - 1, c_e_wo,))
#print ''
#print 'scstep_withouts: %s' % (scstep_withouts,)
#print ''
#print 'calcstep_withouts: %s' % (calcstep_withouts,)
#scmat = np.array( scstep_withouts, dtype=float)
#print ''
#print 'scmat:\n%s' % (scmat,)
#calcmat = np.array( calcstep_withouts, dtype=float)
#print ''
#print 'calcmat:\n%s' % (calcmat,)
#print ''
#print 'Investigate DOS'
#icals = len(calcNodes) - 1
#cnode = calcNodes[icalc]
#setNodes = cnode.findall('dos/total/array/set/set[@comment=\'spin 1\']')
#print ' len(total setNodes): %d' % (len(setNodes),)
#print ' setNodes[0]: %s' % (setNodes[0],)
#if len(setNodes) != 1: throwerr('dos/total not found')
#dosTotalMat = getRawArray( setNodes[0], 'r', 0, 0, float)
#print ''
#print 'type(dosTotalMat): ', type(dosTotalMat)
#print 'dosTotalMat.shape: ', dosTotalMat.shape
#print ''
#print 'dosTotalMat:\n%s' % (dosTotalMat,)
#dosPartialMats = []
#partialSetNodes = cnode.findall('dos/partial/array/set')
#print ' len(partialSetNodes): %d' % (len(partialSetNodes),)
#if len(partialSetNodes) != 1: throwerr('dos/partial not found')
#partialSet = partialSetNodes[0]
#ionNodes = partialSet.findall('set')
#print ' len(ionNodes): %d' % (len(ionNodes),)
## xxx should be nrow = num atoms
#for ii in range(len(ionNodes)):
# dosPartialMat = getRawArray(
# ionNodes[ii], 'set[@comment=\'spin 1\']/r', 0, 0, float)
# print ''
# print 'dosPartialMat %d:' % (ii,)
# print 'type(dosPartialMat): ', type(dosPartialMat)
# print 'dosPartialMat.shape: ', dosPartialMat.shape
# print ''
# print 'dosPartialMat:\n%s' % (dosPartialMat,)
# dosPartialMats.append( dosPartialMat)
#print 'len(dosPartialMats): %d' % (len(dosPartialMats),)
#print '\nbasisMats: len: %d' % (len(basisMats),)
#for mat in basisMats: print '%s' % (mat,)
#print '\nrecipBasisMats: len: %d' % (len(recipBasisMats),)
#for mat in recipBasisMats: print '%s' % (mat,)
#print '\nposMats: len: %d' % (len(posMats),)
#for mat in posMats: print '%s' % (mat,)
#print '\nforceMats: len: %d' % (len(forceMats),)
#for mat in forceMats: print '%s' % (mat,)
#print '\nstressMats: len: %d' % (len(stressMats),)
#for mat in stressMats: print '%s' % (mat,)
#basisDeltas = calcMatDeltas( basisMats)
#recipBasisDeltas = calcMatDeltas( recipBasisMats)
#posDeltas = calcMatDeltas( posMats)
#forceDeltas = calcMatDeltas( forceMats)
#stressDeltas = calcMatDeltas( stressMats)
#print 'basisDeltas: %s' % ( basisDeltas,)
#print 'recipBasisDeltas: %s' % ( recipBasisDeltas,)
#print 'posDeltas: %s' % ( posDeltas,)
#print 'forceDeltas: %s' % ( forceDeltas,)
#print 'stressDeltas: %s' % ( stressDeltas,)
#import matplotlib
#matplotlib.use('tkagg')
#import matplotlib.pyplot as plt
#fig, axes = plt.subplots( 1, 1)
###ax00 = axes[0,0]
#ax00 = axes
#ax00.plot( dosTotalMat[:,0], dosTotalMat[:,1], color='r', linestyle='-',
# marker=None)
#ax00.set_xlabel('Energy, eV')
#ax00.set_ylabel('Number of states per unit cell')
#ax00.set_title('Density of states')
#ax00.xaxis.grid(color='lightblue', linestyle='solid')
#ax00.yaxis.grid(color='lightblue', linestyle='solid')
##plt.show()
##fig, ax = plt.subplots()
##
##ax.plot( scmat[:,0], scmat[:,1], 'b-')
##ax.plot( calcmat[:,0], calcmat[:,1], 'bo')
##ax.set_ylim( calcmat[-1,1] - 5, calcmat[-1,1] + 5)
##ax.xaxis.grid(color='lightblue', linestyle='solid')
##ax.yaxis.grid(color='lightblue', linestyle='solid')
##
##savefig('tempa.png', dpi=100, orientation='landscape', papertype='letter')
##
##plt.show()
#tnodes = root.findall('calculation[last()]')
#printNode( tnodes[0], 0, 1)
#tnodes = root.findall('calculation[last()]/eigenvalues')
#printNode( tnodes[0], 0, 1)
#tnodes = root.findall('calculation[last()]/eigenvalues/array')
#printNode( tnodes[0], 0, 1)
#res = getArrayByPath(
# bugLev, root, 'calculation[last()]/eigenvalues/array')
#print '\ncalculation[last()]/eigenvalues:\n%s' % (res,)
#print '\n'
#====================================================================
[docs]def printNode( node, curLev, maxLev):
'''
Recursively prints an XML tree, given an xml.etree.cElementTree node.
**Parameters**:
* node (xml.etree.ElementTree.Element): The root of the XML tree.
* curLev (int): The current recursion level. Starts at 0 and
is incremented for each recursive call.
* maxLev (int): The max number of levels to print
**Returns**:
* None
'''
if curLev <= maxLev:
if node.tail == None: tail = 'None'
else: tail = '"%s"' % (node.tail.strip(),)
if node.text == None: text = 'None'
else: text = '"%s"' % (node.text.strip(),)
print '%stag: %s attrib: %s tail: %s text: %s' \
% (curLev * ' ', node.tag, node.attrib, tail, text,)
for kid in node:
printNode( kid, curLev + 1, maxLev)
#====================================================================
[docs]def parseText( path, nmin, nmax, dtype, text):
'''
Splits ``text`` into tokens, and converts each token to ``dtype``.
Called by getVec, getRawArray.
**Parameters**:
* path (str): the XML tree path to the current node, for error msgs.
* nmin (int): the minimum num tokens. If fewer are found, throwerr.
* nmax (int): the maximum num tokens. If more are found, throwerr.
* dtype (python type): Either int, float, or str: the tokens
are converted to dtype.
* text (str): the text string to be split.
**Returns**:
* list of tokens each having type = dtype.
'''
toks = text.split()
ntok = len( toks)
if ntok < nmin:
throwerr('ntok < nmin for path: "%s" text: "%s"' % (path, text,))
if nmax > 0 and ntok > nmax:
throwerr('ntok > nmax for path: "%s" text: "%s"' % (path, text,))
vals = ntok * [None]
for ii in range(ntok):
tok = toks[ii]
if dtype == bool:
tok = tok.lower()
if tok in ['f', 'false', '.false.']: val = False
elif tok in ['t', 'true', '.true.']: val = True
else: throwerr('invalid bool in path: "%s" text: "%s"' % (path, text,))
elif dtype == int:
try: val = int( tok)
except ValueError, exc:
throwerr('invalid int in path: "%s" text: "%s"' % (path, text,))
elif dtype == float:
try: val = float( tok)
except ValueError, exc:
throwerr('invalid float in path: "%s" text: "%s"' % (path, text,))
elif dtype == str: val = tok
else: throwerr('unknown dtype for path: "%s"' % (path,))
vals[ii] = val
return vals
#====================================================================
[docs]def getVec( root, path, nmin, nmax, dtype):
'''
Gets text at the specified XML path, splits, and converts tokens ``dtype``.
**Parameters**:
* root (xml.etree.ElementTree.Element): The current XML node.
* path (str): the XML path from the current node.
* nmin (int): the minimum num tokens. If fewer are found, throwerr.
* nmax (int): the maximum num tokens. If more are found, throwerr.
* dtype (python type): Either int, float, or str: the tokens
are converted to dtype.
**Returns**:
* list of tokens each having type = dtype.
'''
text = getString( root, path)
vals = parseText( path, nmin, nmax, dtype, text)
return vals
#====================================================================
# Return stripped string
[docs]def getString( root, path):
'''
Gets text at the specified XML path, insures there's just 1, and returns it.
**Parameters**:
* root (xml.etree.ElementTree.Element): The current XML node.
* path (str): the XML path from the current node.
**Returns**:
* stripped string.
'''
lst = root.findall( path)
if len(lst) == 0:
throwerr('path not found: "%s"' % (path,))
if len(lst) > 1:
throwerr('multiple matches for path: "%s"' % (path,))
ele = lst[0]
text = ele.text
return text.strip()
#====================================================================
[docs]def getScalar( root, path, dtype):
'''
Gets text at the specified XML path, and converts it to ``dtype``.
**Parameters**:
* root (xml.etree.ElementTree.Element): The current XML node.
* path (str): the XML path from the current node.
* dtype (python type): Either int, float, or str: the token
is converted to dtype.
**Returns**:
* item having type = dtype.
'''
lst = getVec( root, path, 1, 1, dtype)
return lst[0]
#====================================================================
[docs]def getRawArray( root, path, nrow, ncol, dtype):
'''
Gets text at the specified XML path, and converts to a
2D numpy array of ``dtype``.
The text must be organized as one text element per row.
**Parameters**:
* root (xml.etree.ElementTree.Element): The current XML node.
* path (str): the XML path from the current node.
* nrow (int): the number of rows. If 0, allow any number.
* ncol (int): the number of columns. If 0, allow any number.
* dtype (python type): Either int, float, or str: the tokens
are converted to dtype.
**Returns**:
* A regular 2-dimensional numpy array of dtype.
'''
lst = root.findall( path)
nlst = len( lst)
if nlst == 0:
throwerr('path not found: "%s"' % (path,))
if nrow > 0 and nlst != nrow:
throwerr('nrow mismatch for path: "%s". expected: %d found: %d' \
% (path, nrow, nlst,))
rows = []
for ii in range(nlst):
ele = lst[ii]
text = ele.text
vals = parseText( path, 0, 0, dtype, text)
if len(rows) == 0: ncolActual = len( vals)
if len(vals) != ncolActual:
throwerr('irregular array for path: "%s"' % (path,))
if ncol > 0 and ncolActual != ncol:
throwerr('ncol mismatch path: "%s"' % (path,))
rows.append( vals)
if dtype == int:
amat = np.array( rows, dtype=int)
elif dtype == float:
amat = np.array( rows, dtype=float)
else: throwerr('unknown dtype for path: "%s"' % (path,))
return amat
#====================================================================
[docs]def getArrayByPath( bugLev, baseNode, path):
'''
Converts an XML ``<array>`` element in vasprun.xml
to a map with an array.
See :func:`getArrayByNode` for details.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* baseNode (xml.etree.ElementTree.Element): current XML node
* path (str): XML path from baseNode for the ``<array>`` element.
**Returns**:
* A Python array
'''
arrNodes = baseNode.findall( path)
if len(arrNodes) != 1:
throwerr('path not found: "%s"' % (path,))
res = getArrayByNode( bugLev, arrNodes[0])
return res
#====================================================================
# Returns Mrr == map containing array, like:
# atomMrr:
# _dimLens: [6]
# _dimNames: ['ion']
# _fieldNames: ['element', 'atomtype']
# _fieldTypes: ['s', 'i']
# element: ['Mo' 'Mo' 'S' 'S' 'S' 'S']
# atomtype: [1 1 2 2 2 2]
[docs]def getArrayByNode( bugLev, arrNode):
'''
Converts an XML ``<array>`` element in vasprun.xml
to a map with an array.
Calls getArraySub to extract each field.
The output Python map has the following structure:
============= ========================================================
key value
============= ========================================================
_dimLens numpy vec of dimension lengths.
len( dimLens) == n == numDimensions.
_dimNames numpy vec of dimension names.
len( dimLens) == n == numDimensions.
_fieldNames numpy vec of field names in the parallel arrays.
len( fieldNames) == numVariables.
_fieldTypes numpy vec of field types in the parallel arrays.
len( fieldTypes) == numVariables.
The types are: 'i': int, 'f': float, 's': str
<fieldName> numpy n-dimensional array of the field <fieldName>
<fieldName> numpy n-dimensional array of the field <fieldName>
<fieldName> numpy n-dimensional array of the field <fieldName>
...
============= ========================================================
Example XML for a 1-dimensional array with 2 fields: ::
<array name="atoms" >
<dimension dim="1">ion</dimension>
<field type="string">element</field>
<field type="int">atomtype</field>
<set>
<rc><c>C </c><c> 1</c></rc>
<rc><c>Fe</c><c> 2</c></rc>
<rc><c>Fe</c><c> 2</c></rc>
<rc><c>Fe</c><c> 2</c></rc>
<rc><c>Fe</c><c> 2</c></rc>
</set>
</array>
Example resulting map: ::
_dimLens: [5]
_dimNames: ['ion']
_fieldNames: ['element' 'atomtype']
_fieldTypes: ['s' 'i']
element: ['C' 'Fe' 'Fe' 'Fe' 'Fe']
atomtype: [1 2 2 2 2]
Multiple dimension arrays also are supported.
The vasprun.xml handling of dimensions is unusual.
What they claim is ``dim="1"`` actually is the least
significant dimension and varies fastest, both
in the XML data and in our resulting Python array.
So the XML ``<dimension dim="1">band</dimension>``
becomes the last dimension in the resulting Python array.
Example XML for a 3 dimensional array with 2 fields: ::
<array>
<dimension dim="1">band</dimension>
<dimension dim="2">kpoint</dimension>
<dimension dim="3">spin</dimension>
<field>eigene</field>
<field>occ</field>
<set>
<set comment="spin 1">
<set comment="kpoint 1">
<r> -6.5058 1.0000 </r>
<r> 0.2537 1.0000 </r>
<r> 0.7101 1.0000 </r>
...
<r> 8.1390 0.0000 </r>
</set>
<set comment="kpoint 2">
<r> -6.3718 1.0000 </r>
<r> -0.0841 1.0000 </r>
<r> 0.7508 1.0000 </r>
...
</set>
<set comment="kpoint 101">
<r> -5.8567 1.0000 </r>
<r> -0.0854 1.0000 </r>
<r> 0.9602 1.0000 </r>
<r> 7.7174 0.0000 </r>
<r> 7.8556 0.0000 </r>
</set>
</set>
</set>
</array>
Example resulting map: ::
_dimLens: [ 1 101 22]
_dimNames: ['spin' 'kpoint' 'band']
_fieldNames: ['eigene' 'occ']
_fieldTypes: ['f' 'f']
eigene: [[[-6.5058 0.2537 0.7101 ..., 7.6096 7.8817 8.139 ]
[-6.3718 -0.0841 0.7508 ..., 7.481 7.8491 7.9595]
[-6.1332 -0.611 1.0672 ..., 7.0857 7.8655 7.9314]
...,
[-5.8462 0.3687 0.9498 ..., 7.1721 7.4739 7.6631]
[-5.8016 0.5503 0.5886 ..., 7.4113 7.5794 7.7332]
[-5.8567 -0.0854 0.9602 ..., 7.2729 7.7174 7.8556]]]
occ: [[[ 1. 1. 1. ..., 0. 0. 0. ]
[ 1. 1. 1. ..., 0. 0. 0. ]
[ 1. 1. 1. ..., 1. 0. 0. ]
...,
[ 1. 1. 1. ..., 1. 0. 0. ]
[ 1. 1. 1. ..., 0. 0. 0. ]
[ 1. 1. 1. ..., 0.9751 0. 0. ]]]
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* node (xml.etree.ElementTree.Element):
The XML node for the ``<array>`` element.
**Returns**:
* A Python array
'''
dimNodes = arrNode.findall('dimension')
ndim = len( dimNodes)
if ndim == 0: throwerr('no dimensions found')
dimNames = [nd.text for nd in dimNodes]
dimNames.reverse() # dimNames are in reverse order in XML
dimNames = np.array( dimNames, dtype=str)
dimLens = np.zeros( [ndim], dtype=int)
fieldNodes = arrNode.findall('field')
nfield = len( fieldNodes)
if nfield == 0: throwerr('no fields found')
fieldNames = [nd.text for nd in fieldNodes]
fieldNames = np.array( fieldNames, dtype=str)
# We set fieldTypes[ifield] to max( all found types for ifield)
# Types are: 0:int, 1:float, 2:string
fieldTypes = nfield * [0]
setNodes = arrNode.findall('set')
if len(setNodes) != 1: throwerr('wrong len for primary set')
setNode = setNodes[0]
resList = nfield * [None]
for ifield in range( nfield):
amat = getArraySub(
bugLev,
setNode,
ifield,
fieldTypes,
0, # idim
dimLens)
# Convert all elements of each field ifield to fieldTypes[ifield].
if fieldTypes[ifield] == 0: amat = np.array( amat, dtype=int)
elif fieldTypes[ifield] == 1: amat = np.array( amat, dtype=float)
elif fieldTypes[ifield] == 2: amat = np.array( amat, dtype=str)
else: throwerr('unknown fieldType')
resList[ifield] = amat
# Convert fieldTypes from 0,1,2 to 'i', 'f', 's'
fldMap = { 0:'i', 1:'f', 2:'s'}
fieldTypeStgs = map( lambda x: fldMap[x], fieldTypes)
fieldTypeStgs = np.array( fieldTypeStgs, dtype=str)
resMap = {
'_dimNames': dimNames,
'_dimLens': dimLens,
'_fieldNames': fieldNames,
'_fieldTypes': fieldTypeStgs,
}
for ii in range(len(fieldNames)):
ar = resList[ii]
if not all(ar.shape == np.array(dimLens)): throwerr('dimLens mismatch')
resMap[fieldNames[ii]] = ar
return resMap
#====================================================================
[docs]def getArraySub(
bugLev,
setNode,
ifield,
fieldTypes,
idim,
dimLens):
'''
Decodes the XML for one field (one variable) for an
``<array>``.
Called by getArrayByNode. See :func:`getArrayByNode` for details.
**Parameters**:
* bugLev (int): Debug level. Normally 0.
* setNode (xml.etree.ElementTree.Element): the element for ``<set>``.
* ifield (int): the index number of the field.
* fieldTypes (int[]): the numeric field types so far.
The numeric types are: 0: int, 1: float, 2: str.
We take the max of the field types.
* tp (Python type): The desired type.
* idim (int): dimension number == recursion level == array nest level.
0 on the first call, 1 for the next level array, etc.
* dimLens (int[]): list of dimension lengths. Updated.
**Returns**:
* A Python array with elements of type str.
The caller converts them to the correct type.
'''
nfield = len(fieldTypes)
ndim = len(dimLens)
# If we're at the last dimension, decode the element values.
if idim == ndim - 1:
# Try long form:
# <set>
# <rc>
# <c>2</c>
# <c>Mo</c>
rcNodes = setNode.findall('rc') # long form: <rc> <c>
# Try short form:
# <set comment='spin 1'>
# <set comment='kpoint 1'>
# <r>-30.3711 1.0000</r>
# <r>-30.3709 1.0000</r>
rNodes = setNode.findall('r') # short form: <r>
nval = max( len( rcNodes), len( rNodes))
if dimLens[idim] == 0: dimLens[idim] = nval
if nval != dimLens[idim]: throwerr('irregular array')
resVec = nval * [None]
if len(rcNodes) > 0: # long form: <rc> <c>
for ival in range( nval):
cNodes = rcNodes[ival].findall('c')
if len(cNodes) != nfield: throwerr('wrong num fields')
stg = cNodes[ifield].text
resVec[ival] = stg
elif len(rNodes) > 0: # short form: <r>
for ival in range( nval):
txt = rNodes[ival].text
toks = txt.split()
if len(toks) != nfield: throwerr('wrong num fields')
resVec[ival] = toks[ifield]
else: throwerr('unknown array structure')
# Strip all strings.
# Set fieldTypes[ifield] to max( current type, all found types)
# Types are: 0:int, 1:float, 2:string
for ival in range( nval):
resVec[ival] = resVec[ival].strip()
stg = resVec[ival]
ftype = 2 # assume worst case: string
try:
float( stg)
ftype = 1
except ValueError: pass
try:
int( stg)
ftype = 0
except ValueError: pass
fieldTypes[ifield] = max( fieldTypes[ifield], ftype)
else: # else idim < ndim - 1. Recursion.
setNodes = setNode.findall('set')
nset = len( setNodes)
if dimLens[idim] == 0: dimLens[idim] = nset
if nset != dimLens[idim]: throwerr('irregular array')
resVec = nset * [None]
for iset in range(nset):
resVec[iset] = getArraySub( # recursion
bugLev,
setNodes[iset],
ifield,
fieldTypes,
idim + 1,
dimLens)
return resVec
#====================================================================
# Not used
def convertTypesUnused( tp, vec):
'''
Recursively converts the elements of an array ``vec``
from str to the specified type.
**Parameters**:
* tp (Python type): The desired type.
* vec (str[] or str[][] or ...): the array to be converted.
**Returns**:
* A Python array with elements of type ``tp``.
'''
if isinstance( vec[0], str):
for ii in range(len(vec)):
vec[ii] = tp( vec[ii])
elif isinstance( vec[0], list):
for subVec in vec:
convertTypes( tp, subVec) # recursion
else: throwerr('unknown array structure')
#====================================================================
[docs]def maxAbsDiff( mata, matb):
'''
Returns the max abs diff between two 2D numpy matrices.
**Parameters**:
* mata (numpy 2D array): Array to be compared.
* matb (numpy 2D array): Array to be compared.
**Returns**:
* float scalar: max_i( max_j( abs( mata[i][j] - matb[i][j]))
'''
(nrow,ncol) = mata.shape
if matb.shape != mata.shape: throwerr('maxAbsDiff: shape mismatch')
diffMat = abs( matb - mata)
res = max( map( max, diffMat))
return res
#====================================================================
[docs]def calcMatDeltas( mats):
'''
Returns the max abs diffs between adjacent pairs of a
list of 2D numpy matrices.
**Parameters**:
* mats (list of 2D numpy matrices)
**Returns**:
* deltas (float[]): deltas[k] = maxAbsDiff( mats[k-1], mats[k])
'''
nmat = len( mats)
deltas = []
for ii in range( 1, nmat):
delta = maxAbsDiff( mats[ii-1], mats[ii])
deltas.append( delta)
return deltas
#====================================================================
[docs]def printMrr( vmap):
'''
Prints the Mrr map returned by getArrayByPath or getArrayByNode.
**Parameters**:
* vmap (map): the MRR map
**Returns**:
* None
'''
keys = vmap.keys()
keys.sort()
for key in keys:
if key.startswith('_'):
val = vmap[key]
print ' %s: %s' % (key, val,)
for key in vmap['_fieldNames']:
val = vmap[key]
print ' %s: %s' % (key, val,)
print ''
#====================================================================
[docs]def throwerr( msg):
'''
Prints an error message and raises Exception.
**Parameters**:
* msg (str): Error message.
**Returns**
* (Never returns)
**Raises**
* Exception
'''
print msg
print >> sys.stderr, msg
raise Exception( msg)
#====================================================================
if __name__ == '__main__': main()
#====================================================================