Source code for pyscf.gto.basis.parse_cp2k

#!/usr/bin/env python
# Copyright 2014-2023 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Timothy Berkelbach <tim.berkelbach@gmail.com>
#

'''
parse CP2K format
'''

import re
from pyscf.lib.exceptions import BasisNotFoundError
from pyscf.gto.basis import parse_nwchem
from pyscf.gto.basis.parse_nwchem import _search_basis_block
from pyscf import __config__

DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False)

MAXL = 8


[docs]
def parse(string, symb=None, optimize=False):
    '''Parse the basis text which is in CP2K format, return an internal
    basis format which can be assigned to :attr:`Mole.basis`
    Lines started with # are ignored.

    Examples:

    >>> cell = gto.Cell()
    >>> cell.basis = {'C': pyscf.gto.basis.parse_cp2k.parse("""
    ... C DZVP-GTH
    ...   2
    ...   2  0  1  4  2  2
    ...         4.3362376436   0.1490797872   0.0000000000  -0.0878123619   0.0000000000
    ...         1.2881838513  -0.0292640031   0.0000000000  -0.2775560300   0.0000000000
    ...         0.4037767149  -0.6882040510   0.0000000000  -0.4712295093   0.0000000000
    ...         0.1187877657  -0.3964426906   1.0000000000  -0.4058039291   1.0000000000
    ...   3  2  2  1  1
    ...         0.5500000000   1.0000000000
    ... #
    ... """)}
    '''
    if symb is not None:
        raw_data = list(filter(None, re.split(BASIS_SET_DELIMITER, string)))
        string = _search_basis_block(raw_data, symb)
        if not string:
            raise BasisNotFoundError(f'Basis not found for {symb}')

    bastxt = []
    for dat in string.splitlines():
        x = dat.split('#')[0].strip()
        if (x and not x.startswith('END') and not x.startswith('BASIS')):
            bastxt.append(x)
    return _parse(bastxt, optimize)



[docs]
def load(basisfile, symb, optimize=False):
    return _parse(search_seg(basisfile, symb), optimize)


def _parse(blines, optimize=False):
    blines_iter = iter(blines)
    try:
        header_ln = next(blines_iter)  # noqa: F841
        nsets = int(next(blines_iter))
    except Exception:
        raise BasisNotFoundError('Not basis data')

    basis = []
    try:
        for n in range(nsets):
            comp = [int(p) for p in next(blines_iter).split()]
            lmin, lmax, nexps, ncontractions = comp[1], comp[2], comp[3], comp[4:]
            basis_n = [[l] for l in range(lmin,lmax+1)]
            for nexp in range(nexps):
                line = next(blines_iter)
                dat = line.split()
                try:
                    bfun = [float(x) for x in dat]
                except ValueError:
                    if DISABLE_EVAL:
                        raise ValueError('Failed to parse %s' % line)
                    else:
                        bfun = eval(','.join(dat))

                if len(bfun) != sum(ncontractions) + 1:
                    raise ValueError('Basis data incomplete')

                bfun_iter = iter(bfun)
                exp = next(bfun_iter)
                for i,l in enumerate(range(lmin,lmax+1)):
                    cl = [exp]
                    for c in range(ncontractions[i]):
                        cl.append(next(bfun_iter))
                    basis_n[i].append(cl)
            basis.extend(basis_n)
    except StopIteration:
        raise ValueError('Basis data incomplete')

    basis_sorted = []
    for l in range(MAXL):
        basis_sorted.extend([b for b in basis if b[0] == l])
    if not basis_sorted:
        raise BasisNotFoundError('Basis data not found')

    if optimize:
        basis_sorted = parse_nwchem.optimize_contraction(basis_sorted)

    basis_sorted = parse_nwchem.remove_zero(basis_sorted)
    return basis_sorted

BASIS_SET_DELIMITER = re.compile('# *BASIS SET.*\n')

[docs]
def search_seg(basisfile, symb):
    with open(basisfile, 'r') as fin:
        fdata = re.split(BASIS_SET_DELIMITER, fin.read())
    raw_basis = _search_basis_block(fdata[1:], symb)
    if not raw_basis:
        raise BasisNotFoundError(f'Basis for {symb} not found in {basisfile}')
    return [x.strip() for x in raw_basis.splitlines()
            if x.strip() and 'END' not in x]