""" Classes to manipulate output from job folders. """
__docformat__ = "restructuredtext en"
__all__ = ['AbstractMassExtract']
from abc import ABCMeta, abstractmethod
[docs]class AbstractMassExtract(object):
""" Collects extraction methods from different job-folders.
Wraps around a root job folder and provides means to access it (or
something related to it). In practice, a derived class will hold a list
of *somethings* which does something good for a particular folder. This
is a base class, concerned mostly with providing a rich mapping and
attribute access interface. It allows the user to focus on a small set of
executable folders `via` the mapping (``[]``) methods, e.g. a view of the
folders. The attributes of the wrapped *somethings* of the current view
are retrieved into a :py:class:`forwarding dict
<pylada.jobfolder.forwardingdict.ForwardingDict`.
The :py:meth:`__iter_alljobs__` method should be implemented within
derived classes. It should yield for each executable folder a tuple
consisting of the name of that folder and the relevant *something*.
"""
__metaclass__ = ABCMeta
[docs] def __init__(self, path=None, view=None, excludes=None, dynamic=False, ordered=True,
naked_end=None, unix_re=True):
""" Initializes extraction object.
:param str path:
Root directory for which to investigate all subdirectories.
If None, uses current working directory.
:param str view:
Pattern which the job names must match to be included in the
extraction. Ignored if None.
:para excludes:
List of patterns which the job names must *not* match to be
included in the extraction. Ignored if None.
:param bool dynamic:
If true, chooses a slower but more dynamic caching method. Only
usefull for ipython shell.
:param bool ordered:
If true, uses OrderedDict rather than conventional dict.
:param bool naked_end:
True if should return value rather than dict when only one item.
:param bool unix_re:
Converts regex patterns from unix-like expression.
"""
from .. import jobparams_naked_end, unix_re
from ..misc import RelativePath
from .ordered_dict import OrderedDict
super(AbstractMassExtract, self).__init__()
# this fools the derived classes' __setattr__
self.__dict__.update({'dicttype': dict, 'view': '/', 'naked_end': naked_end,
'unix_re': unix_re, '_excludes': excludes,
'_cached_extractors': None, 'dynamic': dynamic })
self.naked_end = jobparams_naked_end if naked_end is None else naked_end
""" If True and dict to return contains only one item, returns value itself. """
self.unix_re = unix_re
""" If True, then all regex matching is done using unix-command-line patterns. """
self.excludes = excludes
""" Patterns to exclude. """
self._cached_extractors = None
""" List of extration objects. """
self.dynamic = dynamic
""" If True chooses a slower but more dynamic caching method. """
self.dicttype = OrderedDict if ordered else dict
""" Type of dictionary to use. """
if path is None: self.__dict__['_rootpath'] = None
else: self.__dict__['_rootpath']= RelativePath(path, hook=self.uncache)
@property
def rootpath(self):
""" Root of the directory-tree to trawl for OUTCARs. """
return self._rootpath.path if self._rootpath is not None else None
@rootpath.setter
[docs] def rootpath(self, value):
from ..misc import RelativePath
if self._rootpath is None:
self._rootpath = RelativePath(path=value, hook=self.uncache)
else: self._rootpath.path = value
@property
def excludes(self):
""" Pattern or List of patterns to ignore. or None.
:py:attr:`unix_re` determines whether these are unix-command-line like
patterns or true python regex.
"""
try: return self._excludes
except AttributeError: return None
@excludes.setter
[docs] def excludes(self, value):
if isinstance(value, str): self._excludes = [value]
else: self._excludes = value
[docs] def avoid(self, excludes):
""" Returns a new object with further exclusions.
:param excludes: Pattern or patterns to exclude from output.
:type excludes: str or list of str or None
The goal of this function is to work as an *anti* operator [], i.e. by
excluding from the output anything that matches the patterns, rather
including only those which match the pattern.
This is strickly equivalent to:
>>> other = massextract.copy(excludes=excludes)
>>> other.excludes.extend(massextract.excludes)
and then doing calculations with ``other``. The advantage is that it
can all be done on one line.
If the ``excludes`` argument is None or an empty list, then the
returned object will not exlude anything.
"""
if excludes is None or len(excludes) == 0: return self.shallow_copy(excludes=None)
result = self.shallow_copy(excludes=excludes)
if self.excludes is not None: result.excludes.extend(self.excludes)
return result
[docs] def iteritems(self):
""" Iterates through all extraction objects and names. """
for name, job in self._regex_extractors(): yield name, job
[docs] def items(self):
""" Iterates through all extraction objects and names. """
return [(name, job) for name, job in self.iteritems()]
[docs] def itervalues(self):
""" Iterates through all extraction objects. """
for name, job in self._regex_extractors(): yield job
[docs] def values(self):
""" Iterates through all extraction objects. """
return [job for job in self.itervalues()]
[docs] def iterkeys(self):
""" Iterates through all extraction objects. """
for name, job in self._regex_extractors(): yield name
[docs] def keys(self):
""" Iterates through all extraction objects. """
return [name for name in self.iterkeys()]
[docs] def __iter__(self):
""" Iterates through all job names. """
for name, job in self.iteritems(): yield name
def __len__(self):
""" Returns length of output dictionary. """
return len(self.keys())
[docs] def __contains__(self, key):
""" Returns True if key is valid and not empty. """
from re import compile
rekey = compile(key)
for key in self.iterkeys():
if rekey.match(key): return True
return False
def _regex_pattern(self, pattern, flags=0):
""" Returns a regular expression. """
from re import compile
from ..misc import translate_to_regex
if self.unix_re: pattern = translate_to_regex(pattern)
if len(pattern) == 0: return compile("", flags)
if pattern[-1] in ('/', '\Z', '$'): return compile(pattern, flags)
return compile(pattern + r"(?=/|\Z)(?ms)", flags)
@abstractmethod
def __iter_alljobs__(self):
""" Generator to go through all relevant jobs.
:return: (name, extractor), where name is the name of the job, and
extractor an extraction object.
"""
pass
@property
def _extractors(self):
""" Goes through all jobs and collects Extract if available. """
if self.dynamic:
if self._cached_extractors is None: self._cached_extractors = self.dicttype()
result = self.dicttype()
for name, extract in self.__iter_alljobs__():
if name not in self._cached_extractors: self._cached_extractors[name] = extract
result[name] = self._cached_extractors[name]
return result
else:
if self._cached_extractors is not None: return self._cached_extractors
result = self.dicttype()
for name, extract in self.__iter_alljobs__(): result[name] = extract
self._cached_extractors = result
return result
def _regex_extractors(self):
""" Loops through jobs in this view. """
if self.excludes is not None:
excludes = [self._regex_pattern(u) for u in self.excludes]
if self.view == "/":
for key, value in self._extractors.iteritems():
if self.excludes is not None \
and any(u.match(key) is not None for u in excludes):
continue
yield key, value
return
regex = self._regex_pattern(self.view)
for key, value in self._extractors.iteritems():
if regex.match(key) is None: continue
if self.excludes is not None \
and any(u.match(key) is not None for u in excludes):
continue
yield key, value
@property
def _attributes(self):
""" Returns __dir__ special to the extraction itself. """
results = set([])
for key, value in self.iteritems():
results |= set([u for u in dir(value) if u[0] != '_'])
return results
def __dir__(self):
from itertools import chain
results = chain( [u for u in self.__dict__ if u[0] != '_'], \
[u for u in dir(self.__class__) if u[0] != '_'], \
self._attributes )
return list(set(results))
def __getattr__(self, name):
""" Returns extracted values. """
from .forwarding_dict import ForwardingDict
assert name in self._attributes, AttributeError("Unknown attribute {0}.".format(name))
result = self.dicttype()
for key, value in self.iteritems():
try: result[key] = getattr(value, name)
except: result.pop(key, None)
if self.naked_end and len(result) == 1: return result[result.keys()[0]]
return ForwardingDict(dictionary=result, naked_end=self.naked_end)
[docs] def __getitem__(self, name):
""" Returns a view of the current job-dictionary.
.. note:: normpath_ returns a valid path when descending below
root, e.g.``normpath('/../../other') == '/other'), so there won't be
any errors on that account.
.. _normpath: http://docs.python.org/library/os.path.html#os.path.normpath
"""
from os.path import normpath, join
if name[0] != '/': name = join(self.view, name)
if self.unix_re: name = normpath(name)
return self.shallow_copy(view=name)
def __delitem__(self, name):
""" Removes items from the collection path.
This basically adds to the excludes attributes.
"""
if self.excludes is None: self._excludes = [name]
elif name not in self.excludes: self.excludes.append(name)
def __getstate__(self):
d = self.__dict__.copy()
return d
def __setstate__(self, arg):
self.__dict__.update(arg)
[docs] def shallow_copy(self, **kwargs):
""" Returns a shallow copy.
:param kwargs: Any keyword attribute will modify the corresponding
attribute of the copy.
"""
from copy import copy
result = copy(self)
for key, value in kwargs.iteritems(): setattr(result, key, value)
return result
[docs] def iterfiles(self, **kwargs):
""" Iterates over output/input files.
This is rerouted to all extraction objects.
"""
for job in self.itervalues():
if hasattr(job, 'iterfiles'):
for file in job.iterfiles(**kwargs): yield file
def __getstate__(self):
d = self.__dict__.copy()
if d["_rootpath"] is not None: d["_rootpath"].hook = None
return d
def __setstate__(self, arg):
self.__dict__.update(arg)
if self._rootpath is not None: self._rootpath.hook = self.uncache