Source code for pyscf.gto.basis.parse_cp2k

#!/usr/bin/env python
# Copyright 2014-2023 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Timothy Berkelbach <tim.berkelbach@gmail.com>
#

'''
parse CP2K format
'''

import re
from pyscf.lib.exceptions import BasisNotFoundError
from pyscf.gto.basis import parse_nwchem
from pyscf.gto.basis.parse_nwchem import _search_basis_block
from pyscf import __config__

DISABLE_EVAL = getattr(__config__, 'DISABLE_EVAL', False)

MAXL = 8


[docs]
def parse(string, symb=None, optimize=False):
    '''Parse the basis text which is in CP2K format, return an internal
    basis format which can be assigned to :attr:`Mole.basis`
    Lines started with # are ignored.

    Examples:

    >>> cell = gto.Cell()
    >>> cell.basis = {'C': pyscf.gto.basis.parse_cp2k.parse("""
    ... C DZVP-GTH
    ...   2
    ...   2  0  1  4  2  2
    ...         4.3362376436   0.1490797872   0.0000000000  -0.0878123619   0.0000000000
    ...         1.2881838513  -0.0292640031   0.0000000000  -0.2775560300   0.0000000000
    ...         0.4037767149  -0.6882040510   0.0000000000  -0.4712295093   0.0000000000
    ...         0.1187877657  -0.3964426906   1.0000000000  -0.4058039291   1.0000000000
    ...   3  2  2  1  1
    ...         0.5500000000   1.0000000000
    ... #
    ... """)}
    '''
    if symb is not None:
        raw_data = list(filter(None, re.split(BASIS_SET_DELIMITER, string)))
        line_data = _search_basis_block(raw_data, symb)
        if not line_data:
            raise BasisNotFoundError(f'Basis not found for {symb}')
    else:
        line_data = string.splitlines()

    bastxt = []
    for dat in line_data:
        x = dat.split('#')[0].strip()
        if (x and not x.startswith('END') and not x.startswith('BASIS')):
            bastxt.append(x)
    return _parse(bastxt, optimize)



[docs]
def load(basisfile, symb, optimize=False):
    return _parse(search_seg(basisfile, symb), optimize)


def _to_MOLOPT_name(basis_name):
    '''Convert the alias name (such as gthszvmoloptsr) to the standard MOLOPT
    basis set name
    '''
    if '-' in basis_name:
        # If a full CP2K basis set name is specified
        return basis_name

    basis_name = basis_name.upper()
    if 'MOLOPT' not in basis_name:
        raise BasisNotFoundError(f'{basis_name} is not a valid MOLOPT basis set')
    if basis_name[:3] == 'GTH':
        basis_name = basis_name[3:] # handle GTHSZVMOLOPTSR
    parts = basis_name.split('MOLOPT')
    assert len(parts) >= 2
    parts = [x for x in parts if x]
    parts.insert(1, 'MOLOPT')
    parts.append('GTH')
    basis_name = '-'.join(parts)
    return basis_name

def _load_MOLOPT(basis_name, symb, basis_dir, optimize=False):
    if 'MOLOPT-PBE-GTH' in basis_name or 'MOLOPT-GGA-GTH' in basis_name:
        basisfiles = ('BASIS_MOLOPT_UZH',)
    elif 'MOLOPT-SCAN-GTH' in basis_name or 'MOLOPT-MGGA-GTH' in basis_name:
        basisfiles = ('BASIS_MOLOPT_UZH',)
    elif 'MOLOPT-PBE0-GTH' in basis_name or 'MOLOPT-HYB-GTH' in basis_name:
        basisfiles = ('BASIS_MOLOPT_UZH',)
    elif 'MOLOPT-SR-GTH' in basis_name:
        basisfiles = ('BASIS_MOLOPT', 'BASIS_MOLOPT_UCL')
    elif 'MOLOPT-GTH' in basis_name:
        basisfiles = ('BASIS_MOLOPT',)
    elif '-GTH' in basis_name:
        basisfiles = ('GTH_BASIS_SETS',)
    else:
        raise BasisNotFoundError(
            f'MOLOPT basis set {basis_name} is not available in the built-in database.')

    data = []
    for basisfile in basisfiles:
        with open(f'{basis_dir}/{basisfile}', 'r') as searchfile:
            in_block = False
            for line in searchfile:
                if not line or line[0] == '#':
                    continue
                if not in_block:
                    line = line.lstrip()
                    if line.startswith(symb+' ') and basis_name in line:
                        data.append(line)
                        in_block = True
                    continue

                data.append(line.strip())
                if 'GTH' in line or 'END' in line: # next block
                    break
        if data:
            break
    else:
        raise BasisNotFoundError(
            f'{basis_name} for {symb} not found in files {", ".join(basisfiles)}.')

    return _parse(data, optimize)

def _parse(blines, optimize=False):
    blines_iter = iter(blines)
    try:
        header_ln = next(blines_iter)  # noqa: F841
        nsets = int(next(blines_iter))
    except Exception:
        raise BasisNotFoundError('Not basis data')

    basis = []
    try:
        for n in range(nsets):
            line = next(blines_iter)
            comp = [int(p) for p in line.split()]
            lmin, lmax, nexps, ncontractions = comp[1], comp[2], comp[3], comp[4:]
            basis_n = [[l] for l in range(lmin,lmax+1)]
            for nexp in range(nexps):
                line = next(blines_iter)
                dat = line.split()
                try:
                    bfun = [float(x) for x in dat]
                except ValueError:
                    if DISABLE_EVAL:
                        raise ValueError('Failed to parse %s' % line)
                    else:
                        bfun = eval(','.join(dat))

                if len(bfun) != sum(ncontractions) + 1:
                    raise ValueError('Basis data incomplete')

                bfun_iter = iter(bfun)
                exp = next(bfun_iter)
                for i,l in enumerate(range(lmin,lmax+1)):
                    cl = [exp]
                    for c in range(ncontractions[i]):
                        cl.append(next(bfun_iter))
                    basis_n[i].append(cl)
            basis.extend(basis_n)
    except StopIteration:
        raise ValueError('Basis data incomplete')

    basis_sorted = []
    for l in range(MAXL):
        basis_sorted.extend([b for b in basis if b[0] == l])
    if not basis_sorted:
        raise BasisNotFoundError('Basis data not found')

    if optimize:
        basis_sorted = parse_nwchem.optimize_contraction(basis_sorted)

    basis_sorted = parse_nwchem.remove_zero(basis_sorted)
    return basis_sorted

BASIS_SET_DELIMITER = re.compile('# *BASIS SET.*\n')

[docs]
def search_seg(basisfile, symb):
    with open(basisfile, 'r') as fin:
        fdata = re.split(BASIS_SET_DELIMITER, fin.read())
    line_data = _search_basis_block(fdata[1:], symb)
    if not line_data:
        raise BasisNotFoundError(f'Basis for {symb} not found in {basisfile}')
    return [x for x in line_data if x and 'END' not in x]