Source code for pyscf.gto.basis.parse_molpro

#!/usr/bin/env python
# Copyright 2014-2020 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Qiming Sun <osirpt.sun@gmail.com>
#

'''
Parses for basis set in the Molpro format
'''

__all__ = ['parse', 'load']

import re
import numpy

try:
    from pyscf.gto.basis.parse_nwchem import optimize_contraction
    from pyscf.gto.basis.parse_nwchem import remove_zero
except ImportError:
    optimize_contraction = lambda basis: basis
    remove_zero = lambda basis: basis

from pyscf import __config__
DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False)

MAXL = 12
SPDF = 'SPDFGHIKLMNO'
MAPSPDF = {key: l for l, key in enumerate(SPDF)}
COMMENT_KEYWORDS = '!*#'

# parse the basis text which is in Molpro format, return an internal basis
# format which can be assigned to gto.mole.basis
[docs] def parse(string, optimize=True): raw_basis = [] for x in string.splitlines(): x = x.strip() if x and x[0] not in COMMENT_KEYWORDS: raw_basis.append(x) return _parse(raw_basis, optimize)
[docs] def load(basisfile, symb, optimize=True): raw_basis = search_seg(basisfile, symb) #if not raw_basis: # raise BasisNotFoundError('Basis not found for %s in %s' % (symb, basisfile)) return _parse(raw_basis, optimize)
def search_seg(basisfile, symb): raw_basis = [] with open(basisfile, 'r') as fin: dat = fin.readline() while dat: if dat[0] in COMMENT_KEYWORDS: dat = fin.readline() continue elif dat[0].isalpha(): if dat.startswith(symb+' '): raw_basis.append(dat.splitlines()[0]) elif raw_basis: return raw_basis fin.readline() # line for references elif raw_basis: raw_basis.append(dat.splitlines()[0]) dat = fin.readline() return raw_basis def _parse(raw_basis, optimize=True): # pass 1 basis_add = [] for dat in raw_basis: dat = dat.upper() if dat[0].isalpha(): if ' ' not in dat: # Skip the line of comments continue status = dat val = [] basis_add.append([status, val]) else: val.append(dat) raw_basis = [[k, ' '.join(v)] for k,v in basis_add] # pass 2 basis_add = [] for status, valstring in raw_basis: tmp = status.split(':') key = tmp[0].split() l = MAPSPDF[key[1].upper()] #TODO if key[-1] == 'SV' val = tmp[1].split() np = int(val[0]) nc = int(val[1]) rawd = [float(x) for x in valstring.replace('D','e').split()] if nc == 0: for e in rawd: basis_add.append([l, [e, 1.]]) else: exps = numpy.array(rawd[:np]) coeff = numpy.zeros((np,nc)) p1 = np for i in range(nc): start, end = val[2+i].split('.') start, end = int(start), int(end) nd = end - start + 1 p0, p1 = p1, p1 + nd coeff[start-1:end,i] = rawd[p0:p1] bval = numpy.hstack((exps[:,None], coeff)) basis_add.append([l] + bval.tolist()) basis_sorted = [] for l in range(MAXL): basis_sorted.extend([b for b in basis_add if b[0] == l]) if optimize: basis_sorted = optimize_contraction(basis_sorted) basis_sorted = remove_zero(basis_sorted) return basis_sorted def parse_ecp(string): ecptxt = [] for x in string.splitlines(): x = x.strip() if x and x[0] not in COMMENT_KEYWORDS: ecptxt.append(x) return _parse_ecp(ecptxt) def _parse_ecp(raw_ecp): symb, nelec, nshell, nso = re.split(',|;', raw_ecp[0])[1:5] nelec = int(nelec) nshell = int(nshell) nso = int(nso) assert len(raw_ecp) == (nshell + nso + 2), "ecp info doesn't match with data" def parse_terms(terms): r_orders = [[] for i in range(7)] # up to r^6 for term in terms: line = term.split(',') order = int(line[0]) try: coef = [float(x) for x in line[1:]] except ValueError: raise ValueError('Failed to parse ecp %s' % line) r_orders[order].append(coef) return r_orders ecp_add = {} ul = [x.strip() for x in raw_ecp[1].replace('D', 'e').split(';') if x.strip()] assert int(ul[0]) + 1 == len(ul), "UL doesn't match data" ecp_add[-1] = parse_terms(ul[1:]) for i, sf_terms in enumerate(raw_ecp[2:2+nshell]): terms = [x.strip() for x in sf_terms.replace('D', 'e').split(';') if x.strip()] assert int(terms[0]) + 1 == len(terms), \ "ECP %s Shell doesn't match data" % SPDF[i] ecp_add[i] = parse_terms(terms[1:]) if nso > 0: for i, so_terms in enumerate(raw_ecp[2+nshell:]): terms = [x.strip() for x in so_terms.replace('D', 'e').split(';') if x.strip()] assert int(terms[0]) + 1 == len(terms), \ "ECP-SOC Shell %s doesn't match data" % SPDF[i+1] soc_data = parse_terms(terms[1:]) for order, coefs in enumerate(soc_data): if not coefs: continue sf_coefs = ecp_add[i+1][order] assert ([x[0] for x in sf_coefs] == [x[0] for x in coefs]), \ "In ECP Shell %s order %d, SF and SOC do not match" % (SPDF[i+1], order) for j, c in enumerate(coefs): sf_coefs[j].append(c[1]) bsort = [] for l in range(-1, MAXL): if l in ecp_add: bsort.append([l, ecp_add[l]]) return [nelec, bsort] if __name__ == '__main__': #print(search_seg('minao.libmol', 'C')) print(load('cc_pvdz.libmol', 'C'))