'''
Module providing functions for reading the input and other related files
'''
from __future__ import print_function, absolute_import, division
import argparse
import io
import os
import re
import six
import sys
from collections import defaultdict, OrderedDict
import numpy as np
import pandas as pd
from ase.io.vasp import read_vasp
from ase.io.trajectory import Trajectory
if sys.version_info.major == 3:
import configparser as cp
else:
import ConfigParser as cp
[docs]def parse_arguments():
'''
Parse the input/config file name from the command line, parse the config
and return the parameters.
'''
parser = argparse.ArgumentParser()
parser.add_argument('command',
choices=('harmonic', 'anharmonic'),
help='choose what to do')
parser.add_argument('config',
help='file with the configuration parameters for thermo')
args = parser.parse_args()
if not os.path.exists(args.config):
raise ValueError('Specified file <{}> does not exist'.format(args.config))
defaults = {
'Tinitial' : '303.15',
'Tfinal' : '303.15',
'Tstep' : '0.0',
'pressure' : '0.1',
'translations': 'true',
'rotations' : 'true',
'pointgroup' : 'C1',
'phase' : 'gas',
'code' : None,
}
config = cp.ConfigParser(defaults=defaults, allow_no_value=True)
config.read(args.config)
conditions = {}
conditions['Tinitial'] = config.getfloat('conditions', 'Tinitial')
conditions['Tfinal'] = config.getfloat('conditions', 'Tfinal')
conditions['Tstep'] = config.getfloat('conditions', 'Tstep')
conditions['pressure'] = config.getfloat('conditions', 'pressure')
job = {}
job['proj_translations'] = config.getboolean('job', 'translations')
job['proj_rotations'] = config.getboolean('job', 'rotations')
job['code'] = config.get('job', 'code')
system = {}
system['phase'] = config.get('system', 'phase')
system['pointgroup'] = config.get('system', 'pointgroup')
system['symmetrynumber'] = get_symmetry_number(system['pointgroup'])
return args, conditions, job, system
[docs]def get_symmetry_number(pointgroup):
'''
Return the symmetry number for a given point group
.. seealso::
C. J. Cramer, `Essentials of Computational Chemistry, Theories
and Models`, 2nd Edition, p. 363
Parameters
----------
pointgroup : str
Symbol of the point group
'''
symmetrynumbers = {'Ci': 1, 'Cs': 1, 'Coov': 1, 'Dooh': 2,
'T' : 12, 'Td': 12, 'Oh' : 24, 'Ih' : 60}
cpatt = re.compile(r'C(\d+)[vh]?')
dpatt = re.compile(r'D(\d+)[dh]?')
spatt = re.compile(r'S(\d+)')
if pointgroup in symmetrynumbers.keys():
return symmetrynumbers[pointgroup]
else:
mc = cpatt.match(pointgroup)
md = dpatt.match(pointgroup)
ms = spatt.match(pointgroup)
if mc:
return int(mc.group(1))
elif md:
return 2 * int(md.group(1))
elif ms:
return int(ms.group(1)) // 2
else:
raise ValueError('Point group label "{}" unknown, cannot assign '
'a rotational symmetry number'.format(pointgroup))
[docs]def read_vasp_hessian(outcar='OUTCAR', symmetrize=True, convert_to_au=True,
dof_labels=False):
'''
Parse the hessian from the VASP ``OUTCAR`` file into a numpy array
Parameters
----------
outcar : str
Name of the VASP output, default is ``OUTCAR``
symmetrize : bool
If ``True`` the hessian will be symmetrized
convert_to_au : bool
If ``True`` convert the hessian to atomic units, in the other
case hessian is returned in [eV/Angstrom**2]
dof_labels : bool, default is False
If ``True`` a list of labels corresponding to the degrees of
freedom will also be returned
Returns
-------
hessian : numpy.array
Hessian matrix
Notes
-----
.. note::
By default VASP prints negative hessian so the elements should be
multiplied by -1 to restore the original hessian, this is done by
default, hessian in the XML file is **NOT** symmetrized by default
'''
from ase.units import Hartree, Bohr
with open(outcar, 'r') as foutcar:
for line in foutcar:
if 'Degrees of freedom DOF' in line:
dof = int(line.split()[-1])
hessian = np.zeros((dof, dof), dtype=float)
labels = []
break
for line in foutcar:
if 'SECOND DERIVATIVES ' in line:
for _ in range(2):
line = next(foutcar)
for i in range(dof):
line = next(foutcar)
linesplit = line.split()
labels.append(linesplit[0])
hessian[i] = [float(x) for x in linesplit[1:]]
break
else:
raise ValueError('No hessian found in file: {}'.format(outcar))
if symmetrize:
hessian = (hessian + hessian.T) * 0.5
if convert_to_au:
hessian = hessian * Bohr**2 / Hartree
if dof_labels:
pat = re.compile(r'(\d+)([XYZ])')
return -1.0 * hessian, [pat.match(x).groups() for x in labels]
else:
return -1.0 * hessian
[docs]def read_vasp_hessian_xml(xml='vasprun.xml', convert_to_au=True,
stripmass=True):
'''
Parse the hessian from the VASP ``vasprun.xml`` file into a numpy array
Parameters
----------
xml : str
Name of the VASP output, default is ``vasprun.xml``
convert_to_au : bool
If ``True`` convert the hessian to atomic units, in the other
case hessian is returned in [eV/Angstrom**2]
dof_labels : bool, default is False
If ``True`` a list of labels corresponding to the degrees of
freedom will also be returned
stripmass : bool
If ``True`` use VASP default masses to transform hessian to
non-mass-weighted form
Returns
-------
hessian : numpy.array
Hessian matrix
Notes
-----
.. note::
By default VASP prints negative hessian so the elements should be
multiplied by -1 to restore the original hessian, this is done by
default, hessian in the XML file is symmetrized by default
'''
from lxml.etree import parse
from ase.units import Hartree, Bohr
doc = parse(xml)
root = doc.getroot()
species = []
for entry in root.find("atominfo/array[@name='atoms']/set"):
species.append(entry[0].text.strip())
natoms = int(root.find('atominfo/atoms').text)
dof = 3 * natoms
constblock = root.find(
'structure[@name="initialpos"]/varray[@name="selective"]')
if constblock is not None:
# partial hessian calculation
selective = np.ones((natoms, 3), dtype=bool)
for i, v in enumerate(constblock):
for j, fixed in enumerate(v.text.split()):
selective[i, j] = (fixed == 'T')
index = np.flatnonzero(selective.ravel())
hess_size = index.size
else:
# complete hessian calculation
index = np.arange(dof, dtype=int)
hess_size = dof
hessian = np.zeros((hess_size, hess_size), dtype=float)
for i, v in enumerate(root.find(
'calculation/dynmat/varray[@name="hessian"]')):
hessian[i] = -np.array([float(val) for val in v.text.split()])
if stripmass:
# get the masses that VASP uses by default
vasp_mass = {}
for element in root.find("atominfo/array[@name='atomtypes']/set"):
vasp_mass[element[1].text.strip()] = float(element[2].text)
vasp_massvec = np.zeros(hess_size, dtype=float)
for i, j in enumerate(np.floor_divide(index, 3)):
vasp_massvec[i] = vasp_mass[species[j]]
hessian *= np.sqrt(np.outer(vasp_massvec, vasp_massvec))
if convert_to_au:
hessian = hessian * Bohr**2 / Hartree
return hessian
[docs]def print_modeinfo(mi, output=None):
'''
Print the vibrational population data
Parameters
----------
mi : pandas.DataFrame
output : str
Name of the file to store the printout, if ``None`` stdout will be used
'''
if output is not None:
fobj = open(output, 'w')
else:
fobj = None
fmts = {
'HOfreq': '{:12.6f}'.format,
'effective_mass': '{:12.6f}'.format,
'displacement': '{:12.6f}'.format,
'P_stretch': '{:>8.2%}'.format,
'P_bend': '{:>8.2%}'.format,
'P_torsion': '{:>8.2%}'.format,
'P_longrange': '{:>8.2%}'.format,
}
print(mi[mi['vibration']].to_string(formatters=fmts), file=fobj)
if output is not None:
fobj.close()
[docs]def print_mode_thermo(df, info=False):
'''
After calculating all the anharmonic modes print the per mode themochemical
functions
'''
fmts = {
'freq': '{:12.6f}'.format,
'zpve': '{:12.6f}'.format,
'qvib': '{:14.6e}'.format,
'U' : '{:12.6f}'.format,
'S' : '{:14.6e}'.format,
}
# header with the units
header = ' {0:>12s} {1:>12s} {2:>14s} {3:>12s} {4:>14s}'
print(header.format('[cm^-1]', '[kJ/mol]', ' ', '[kJ/mol]', '[kJ/mol*K]'))
print(df.to_string(formatters=fmts))
if info:
print('INFO codes')
print('-' * 10)
print('OK : Succesfully converged the anharmonic eigenproblem')
print('AGTH : Anharmonic frequency greater than the harmonic')
print('MAXITER : Maximum number of iterations exhausted')
# parsers and write methods for legacy EIGEN_HESS files
[docs]def read_bmatdat():
'''
Read the ``bmat.dat`` file with internal coordiantes and the B matrix
produced by the original ``writeBmat`` code
Returns
-------
internals, Bmatrix : tuple
Internal coordiantes and B matrix
'''
with open('bmat.dat', 'r') as fdat:
lines = fdat.readlines()
nint, ndof = (int(x) for x in lines[1].split())
coordlno = next(i for i, line in enumerate(lines) if 'Coordinates (au):' in line)
bmatlno = next(i for i, line in enumerate(lines) if 'Bmatrix(ij):' in line)
internals = np.array([tuple(row.split()[:2]) for row in lines[coordlno + 1:coordlno + nint + 1]],
dtype=[('type', 'S4'), ('value', np.float32)])
Bmatrix = np.zeros((nint, ndof), dtype=float)
for line in lines[bmatlno + 1:]:
i, j, val = line.split()
Bmatrix[int(i) - 1, int(j) - 1] = float(val)
return internals, Bmatrix
[docs]def read_em_freq(fname):
'''
Read the file ``fname`` with the frequencies, reduced masses and fitted
fitted coefficients for the potential into a pandas DataFrame.
Parameters
----------
fname : str
Name of the file with PES
'''
cols = ['type', 'freq', 'mass', 'a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6']
data = pd.read_csv(fname, sep=r'\s+', engine='python', names=cols)
data.set_index(np.arange(1, data.shape[0] + 1), inplace=True)
for col in cols[1:]:
data[col] = data[col].astype(float)
return data
[docs]def read_pes(fname):
'''
Parse the file with the potential energy surface (PES) into a dict of
numpy arrays with mode numbers as keys
Parameters
----------
fname : str
Name of the file with PES
'''
with open(fname, 'r') as fobj:
data = fobj.read()
pat = re.compile(' Scan along mode # =\s*(\d+)')
parsed = [x for x in pat.split(data) if x != '']
it = iter(parsed)
parsed = {int(mode): np.loadtxt(io.StringIO(six.text_type(pes))) for mode, pes in zip(it, it)}
return parsed
[docs]def read_poscars(filename):
'''
Read POSCARs file with the displaced structures and return
an OrderedDict with the Atoms objects
'''
pat = re.compile(r'Mode\s*=\s*(\d+)\s*point\s*=\s*(-?\d+)')
if os.path.exists(filename):
with open(filename, 'r') as fdata:
poscars = fdata.read()
else:
raise OSError('File "{}" does not exist'.format(filename))
parsed = [x for x in pat.split(poscars) if x != ' ']
images = OrderedDict()
it = iter(parsed)
for mode, point, geometry in zip(it, it, it):
images[tuple([int(mode) -1, int(point)])] = read_vasp(io.StringIO(six.text_type(geometry)))
return images
[docs]def write_modes(filename='POSCARs'):
'''
Convert a file with multiple geometries representing vibrational modes
in ``POSCAR``/``CONTCAR`` format into trajectory files with modes.
'''
pat = re.compile(r'Mode\s*=\s*(\d+)\s*point\s*=\s*(-?\d+)')
if os.path.exists(filename):
with open(filename, 'r') as fdata:
poscars = fdata.read()
else:
raise OSError('File "{}" does not exist'.format(filename))
parsed = [x for x in pat.split(poscars) if x != ' ']
it = iter(parsed)
dd = defaultdict(list)
for i, j, item in zip(it, it, it):
dd[i].append(item)
for mode, geometries in dd.items():
traj = Trajectory('mode.{}.traj'.format(mode), 'w')
for geometry in geometries:
atoms = read_vasp(io.StringIO(six.text_type(geometry)))
traj.write(atoms)
traj.close()