Source code for nrelmat.queryMatDb

#!/usr/bin/env python
# Copyright 2013 National Renewable Energy Laboratory, Golden CO, USA
# This file is part of NREL MatDB.
#
# NREL MatDB is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# NREL MatDB is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with NREL MatDB.  If not, see <http://www.gnu.org/licenses/>.


import datetime, fractions, json, os, re, sys
import psycopg2
import numpy as np

# In order to run this you will need the psycopg2 package.
#
# On Redhat systems:             yum install python-pyscopg2
# On Ubuntu and Debian systems:  apt-get install python-pyscopg2
# Or if you have pip installed:  pip install pyscopg2
#
# See badparms() below for parameters and examples.

#====================================================================


[docs]def badparms( msg): ''' Parameters: -p <string> password -q <string> SQL query string The columns are: * **mident** MatDB ID * **wrapid** Identifier for this upload * **abspath** absolute file path * **relpath** relative file path * **icsdnum** ICSD id * **magtype** mag moment type: hsf=hs-ferro, hsaf=hs-antiferro, nm=non-mag * **magnum** mag moment num for hs, ls antiferro * **relaxtype** Type of run: std=standard, rc=relax_cellshape, ri=relax_ions * **relaxnum** Folder num for rc or ri * **excmsg** exception msg from digestVasp.py * **exctrace** exception trace from digestVasp.py * **rundate** run date * **itertotaltime** run time in seconds * **systemname** system name * **encut_ev** INCAR encut parameter, eV * **ibrion** INCAR ibrion parameter * **isif** INCAR isif parameter * **numatom** num atoms in unit cell * **typenames** atom types * **typenums** num of each type of atom * **typemasses** atom mass for each type * **typepseudos** atom pseudopotential name for each type * **typevalences** atom valences for each type * **atomnames** atom names * **atommasses_amu** atom masses * **atompseudos** atom pseudopotential names * **atomvalences** atom valences in cell * **initialbasismat** initial basis matrix, rows are basis vecs * **initialrecipbasismat** initial reciprocal basis matrix, rows are basis vecs * **initialcartposmat** initial cartesian position matrix * **initialfracposmat** initial fractional (direct) position matrix * **finalbasismat** final basis matrix, rows are basis vecs * **finalrecipbasismat** final reciprocal basis matrix, rows are basis vecs * **finalcartposmat** final cartesian position matrix (row per atom, = direct * basis) * **finalfracposmat** final fractional (direct) position matrix * **finalvolume_ang3** final cell volume, Angstrom3 * **finaldensity_g_cm3** final cell density, g/cm3 * **finalforcemat_ev_ang** final force matrix, eV/Ang, row per atom * **finalstressmat_kbar** final stress matrix, kbar, (3x3) * **finalpressure_kbar** final pressure, Kbar * **eigenmat** final eigenvalue matrix ([spin][kpoint][band]) * **dielectricImag** dielectric fcn, imag part: E(ev),x,y,z,xy,yz,zx * **dielectricReal** dielectric fcn, real part: E(ev),x,y,z,xy,yz,zx * **dosMat** density of states: E(ev),DOS,integDOS * **energynoentrp** final total energy without entropy * **energyperatom** final total energy without entropy, per atom in cell * **efermi0** fermi energy at 0K * **cbmin** conduction band minimum energy * **vbmax** valence band maximum energy * **bandgapindirect** indirect bandgap * **bandgapdirect** direct bandgap * **netcharge** net charge * **formula** chemical formula * **chemtext** chemical formula with spaces * **minenergyid** id having min energy for this formula * **enthalpy** final enthalpy, eV/atom * **initialspacegroupname** initial space group name * **initialspacegroupnum** initial space group number * **finalspacegroupname** final space group name * **finalspacegroupnum** final space group number * **hashstring** sha512 sum of vasprun.xml file * **meta_parents** parent entries * **meta_firstname** First name of the researcher. * **meta_lastname** Last name of the researcher. * **meta_publications** DOIs of publications * **meta_standards*** standards * **meta_keywords** keywords * **meta_notes** notes Example queries:: # Find structures with Ge and Na. # Note that the elements in the formulas are always in alphabetic order, # so searching for 'Na.*Ge' won't work. # Below the "~" operator is a regular expression match. queryMatDb.py -p someSecret -q "select mident, icsdnum, relaxtype, \ formula, energyperatom, bandgapindirect from model where formula ~ 'Ge.*Na'" # Search for Ge structures having 0.5 <= bandgapindirect <= 0.7 queryMatDb.py -p someSecret -q "select mident, icsdnum, relaxtype, \ formula, energyperatom, bandgapindirect from model where formula ~ 'Ge' \ and bandgapindirect >= 0.5 and bandgapindirect <= 0.7" ''' print '\nError: %s' % (msg,) print badparms.__doc__ sys.exit(1) #====================================================================
def doQuery(password, query): ''' perform one query of the database Parameters: * password (string): password for database. * query (string): postgresql query to execute Returns * rowMaps(list): list of python dictionaries (one for each row retrieved) whose keys are retrieved column names and values are corresponding field values ''' bugLev = 1 # debug level dbhost = 'kestrel.hpc.nrel.gov' dbport = 5432 dbuser = 'cidread' dbpswd = password dbname = 'cidlada' dbschema = 'satom' rowMaps = [] conn = None cursor = None try: conn = psycopg2.connect( host=dbhost, port=dbport, user=dbuser, password=dbpswd, database=dbname) if bugLev >= 1: print 'main: got conn. dbhost: %s dbport: %d' % (dbhost, dbport,) cursor = conn.cursor() rowMaps = testQuery( dbschema, cursor, query) finally: if cursor != None: cursor.close() if conn != None: conn.close() return rowMaps
[docs]def main(): ''' Demo query of the NRELMatDB SQL database. Simply issues the specified query and prints the resulting rows. See badparms for more documentation. ''' password = None query = None if len(sys.argv) % 2 != 1: badparms('Parms must be key/value pairs') for iarg in range( 1, len(sys.argv), 2): key = sys.argv[iarg] val = sys.argv[iarg+1] if key == '-p': password = val elif key == '-q': query = val else: badparms('unknown parameter: "%s"' % (key,)) if password == None: badparms('parm not specified: -p') if query == None: badparms('parm not specified: -q') rowMaps = doQuery(password, query) for irow in range(len( rowMaps)): rowMap = rowMaps[irow] print '\nrow %d:' % (irow,) keys = rowMap.keys() keys.sort() for key in keys: print ' key: %-20s value: %s' % (key, rowMap[key],) # Print some values as numpy arrays print "NOW PRINTING MORE STUFF??" if (False): for irow in range(len( rowMaps)): rowMap = rowMaps[irow] for key in ['initialbasismat', 'initialfracposmat', 'initialcartposmat']: if rowMap.has_key(key): print 'row %d %s:' % (irow, key,) print np.array( rowMap[key]) #====================================================================
[docs]def testQuery( dbschema, cursor, query): ''' Simply issues the specified query and prints the resulting rows. ''' bugLev = 0 # debug verbosity cursor.execute('set search_path to %s', (dbschema,)) cursor.execute( query) db_rows = cursor.fetchall() db_cols = [desc[0] for desc in cursor.description] print 'number of columns: %d' % (len(db_cols),) print 'number of rows: %d' % (len(db_rows),) rowMaps = [] for irow in range(len(db_rows)): row = db_rows[irow] rowMap = {} for icol in range(len(db_cols)): rowMap[db_cols[icol]] = row[icol] rowMaps.append( rowMap) return rowMaps #====================================================================
[docs]def throwerr( msg): ''' Prints an error message and raises Exception. ''' print msg print >> sys.stderr, msg raise Exception( msg) #====================================================================
if __name__ == '__main__': main()