Source code for pyscf.pbc.df.df

#!/usr/bin/env python
# Copyright 2014-2019,2021 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Qiming Sun <osirpt.sun@gmail.com>
#

'''
Density fitting

Divide the 3-center Coulomb integrals to two parts.  Compute the local
part in real space, long range part in reciprocal space.

Note when diffuse functions are used in fitting basis, it is easy to cause
linear dependence (non-positive definite) issue under PBC.

Ref:
J. Chem. Phys. 147, 164119 (2017)
'''

import os
import ctypes
import warnings
import tempfile
import contextlib
import itertools
import numpy
import h5py
import scipy.linalg
from pyscf import lib
from pyscf import gto
from pyscf.lib import logger
from pyscf.df import addons
from pyscf.df.outcore import _guess_shell_ranges
from pyscf.pbc.gto.cell import _estimate_rcut
from pyscf.pbc import tools
from pyscf.pbc.df import incore
from pyscf.pbc.df import outcore
from pyscf.pbc.df import ft_ao
from pyscf.pbc.df import aft
from pyscf.pbc.df import df_jk
from pyscf.pbc.df import df_ao2mo
from pyscf.pbc.df.aft import estimate_eta, _check_kpts
from pyscf.pbc.df.df_jk import zdotCN
from pyscf.pbc.lib.kpts import KPoints
from pyscf.pbc.lib.kpts_helper import (is_zero, gamma_point, member, unique,
                                       KPT_DIFF_TOL)
from pyscf.pbc.df.gdf_builder import libpbc, _CCGDFBuilder, _CCNucBuilder
from pyscf.pbc.df.rsdf_builder import _RSGDFBuilder, _RSNucBuilder
from pyscf import __config__

LINEAR_DEP_THR = getattr(__config__, 'pbc_df_df_DF_lindep', 1e-9)
LONGRANGE_AFT_TURNOVER_THRESHOLD = 2.5


[docs] def make_modrho_basis(cell, auxbasis=None, drop_eta=None): r'''Generate a cell object using the density fitting auxbasis as the basis set. The normalization coefficients of the auxiliary cell are different to the regular (square-norm) convention. To simplify the compensated charge algorithm, they are normalized against \int (r^l e^{-ar^2} r^2 dr ''' auxcell = incore.make_auxcell(cell, auxbasis) # Note libcint library will multiply the norm of the integration over spheric # part sqrt(4pi) to the basis. half_sph_norm = numpy.sqrt(.25/numpy.pi) steep_shls = [] ndrop = 0 rcut = [] _env = auxcell._env.copy() for ib in range(len(auxcell._bas)): l = auxcell.bas_angular(ib) np = auxcell.bas_nprim(ib) nc = auxcell.bas_nctr(ib) es = auxcell.bas_exp(ib) ptr = auxcell._bas[ib,gto.PTR_COEFF] cs = auxcell._env[ptr:ptr+np*nc].reshape(nc,np).T if drop_eta is not None and numpy.any(es < drop_eta): cs = cs[es>=drop_eta] es = es[es>=drop_eta] np, ndrop = len(es), ndrop+np-len(es) if np > 0: pe = auxcell._bas[ib,gto.PTR_EXP] auxcell._bas[ib,gto.NPRIM_OF] = np _env[pe:pe+np] = es # int1 is the multipole value. l*2+2 is due to the radial part integral # \int (r^l e^{-ar^2} * Y_{lm}) (r^l Y_{lm}) r^2 dr d\Omega int1 = gto.gaussian_int(l*2+2, es) s = numpy.einsum('pi,p->i', cs, int1) # The auxiliary basis normalization factor is not a must for density expansion. # half_sph_norm here to normalize the monopole (charge). This convention can # simplify the formulism of \int \bar{\rho}, see function auxbar. cs = numpy.einsum('pi,i->pi', cs, half_sph_norm/s) _env[ptr:ptr+np*nc] = cs.T.ravel() steep_shls.append(ib) r = _estimate_rcut(es, l, abs(cs).max(axis=1), cell.precision) rcut.append(r.max()) auxcell._env = _env auxcell.rcut = max(rcut) auxcell._bas = numpy.asarray(auxcell._bas[steep_shls], order='C') logger.info(cell, 'Drop %d primitive fitting functions', ndrop) logger.info(cell, 'make aux basis, num shells = %d, num cGTOs = %d', auxcell.nbas, auxcell.nao_nr()) logger.info(cell, 'auxcell.rcut %s', auxcell.rcut) return auxcell
make_auxcell = make_modrho_basis
[docs] class GDF(lib.StreamObject, aft.AFTDFMixin): '''Gaussian density fitting ''' blockdim = getattr(__config__, 'pbc_df_df_DF_blockdim', 240) _dataname = 'j3c' # Call _CCGDFBuilder if applicable. _CCGDFBuilder is slower than # _RSGDFBuilder but numerically more close to previous versions _prefer_ccdf = False # If True, force using density matrix-based K-build force_dm_kbuild = False _keys = { 'blockdim', 'force_dm_kbuild', 'cell', 'kpts', 'kpts_band', 'eta', 'mesh', 'exp_to_discard', 'exxdiv', 'auxcell', 'linear_dep_threshold', } def __init__(self, cell, kpts=numpy.zeros((1,3))): self.cell = cell self.stdout = cell.stdout self.verbose = cell.verbose self.max_memory = cell.max_memory if isinstance(kpts, KPoints): kpts = kpts.kpts self.kpts = kpts # default is gamma point self.kpts_band = None self._auxbasis = None self.eta = None self.mesh = None # exp_to_discard to remove diffused fitting functions. The diffused # fitting functions may cause linear dependency in DF metric. Removing # the fitting functions whose exponents are smaller than exp_to_discard # can improve the linear dependency issue. However, this parameter # affects the quality of the auxiliary basis. The default value of # this parameter was set to 0.2 in v1.5.1 or older and was changed to # 0 since v1.5.2. self.exp_to_discard = cell.exp_to_discard # The following attributes are not input options. self.exxdiv = None # to mimic KRHF/KUHF object in function get_coulG self.auxcell = None self.linear_dep_threshold = LINEAR_DEP_THR self._j_only = False # If _cderi_to_save is specified, the 3C-integral tensor will be saved in this file. self._cderi_to_save = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) # If _cderi is specified, the 3C-integral tensor will be read from this file self._cderi = None self._rsh_df = {} # Range separated Coulomb DF objects __getstate__, __setstate__ = lib.generate_pickle_methods( excludes=('_cderi_to_save', '_cderi', '_rsh_df'), reset_state=True) @property def auxbasis(self): return self._auxbasis @auxbasis.setter def auxbasis(self, x): if self._auxbasis != x: self._auxbasis = x self.auxcell = None self._cderi = None
[docs] def reset(self, cell=None): if cell is not None: self.cell = cell self.auxcell = None self._cderi = None self._rsh_df = {} return self
@property def gs(self): return [n//2 for n in self.mesh] @gs.setter def gs(self, x): warnings.warn('Attribute .gs is deprecated. It is replaced by attribute .mesh.\n' 'mesh = the number of PWs (=2*gs+1) for each direction.') self.mesh = [2*n+1 for n in x]
[docs] def dump_flags(self, verbose=None): log = logger.new_logger(self, verbose) log.info('\n') log.info('******** %s ********', self.__class__) if self.auxcell is None: log.info('auxbasis = %s', self.auxbasis) else: log.info('auxbasis = %s', self.auxcell.basis) if self.eta is not None: log.info('eta = %s', self.eta) if self.mesh is not None: log.info('mesh = %s (%d PWs)', self.mesh, numpy.prod(self.mesh)) log.info('exp_to_discard = %s', self.exp_to_discard) if isinstance(self._cderi, str): log.info('_cderi = %s where DF integrals are loaded (readonly).', self._cderi) elif isinstance(self._cderi_to_save, str): log.info('_cderi_to_save = %s', self._cderi_to_save) else: log.info('_cderi_to_save = %s', self._cderi_to_save.name) log.info('len(kpts) = %d', len(self.kpts)) log.debug1(' kpts = %s', self.kpts) if self.kpts_band is not None: log.info('len(kpts_band) = %d', len(self.kpts_band)) log.debug1(' kpts_band = %s', self.kpts_band) return self
[docs] def build(self, j_only=None, with_j3c=True, kpts_band=None): if j_only is not None: self._j_only = j_only if self.kpts_band is not None: self.kpts_band = numpy.reshape(self.kpts_band, (-1,3)) if kpts_band is not None: kpts_band = numpy.reshape(kpts_band, (-1,3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(numpy.vstack((self.kpts_band,kpts_band)))[0] self.check_sanity() self.dump_flags() self.auxcell = make_modrho_basis(self.cell, self.auxbasis, self.exp_to_discard) if with_j3c and self._cderi_to_save is not None: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn(self, 'File %s (specified by ._cderi) is ' 'overwritten by GDF initialization.', cderi) else: logger.warn(self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (logger.process_clock(), logger.perf_counter()) self._make_j3c(self.cell, self.auxcell, None, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) return self
def _make_j3c(self, cell=None, auxcell=None, kptij_lst=None, cderi_file=None): if cell is None: cell = self.cell if auxcell is None: auxcell = self.auxcell if cderi_file is None: cderi_file = self._cderi_to_save # Remove duplicated k-points. Duplicated kpts may lead to a buffer # located in incore.wrap_int3c larger than necessary. Integral code # only fills necessary part of the buffer, leaving some space in the # buffer unfilled. if self.kpts_band is None: kpts_union = self.kpts else: kpts_union = unique(numpy.vstack([self.kpts, self.kpts_band]))[0] if self._prefer_ccdf or cell.omega > 0: # For long-range integrals _CCGDFBuilder is the only option dfbuilder = _CCGDFBuilder(cell, auxcell, kpts_union) dfbuilder.eta = self.eta else: dfbuilder = _RSGDFBuilder(cell, auxcell, kpts_union) dfbuilder.mesh = self.mesh dfbuilder.linear_dep_threshold = self.linear_dep_threshold j_only = self._j_only or len(kpts_union) == 1 dfbuilder.make_j3c(cderi_file, j_only=j_only, dataname=self._dataname, kptij_lst=kptij_lst)
[docs] def cderi_array(self, label=None): ''' Returns CDERIArray object which provides numpy APIs to access cderi tensor. ''' if label is None: label = self._dataname if self._cderi is None: self.build() return CDERIArray(self._cderi, label)
[docs] def has_kpts(self, kpts): if kpts is None: return True else: kpts = numpy.asarray(kpts).reshape(-1,3) if self.kpts_band is None: return all((len(member(kpt, self.kpts))>0) for kpt in kpts) else: return all((len(member(kpt, self.kpts))>0 or len(member(kpt, self.kpts_band))>0) for kpt in kpts)
[docs] def sr_loop(self, kpti_kptj=numpy.zeros((2,3)), max_memory=2000, compact=True, blksize=None, aux_slice=None): '''Short range part''' if self._cderi is None: self.build() cell = self.cell kpti, kptj = kpti_kptj unpack = is_zero(kpti-kptj) and not compact is_real = is_zero(kpti_kptj) nao = cell.nao_nr() if blksize is None: if is_real: blksize = max_memory*1e6/8/(nao**2*2) else: blksize = max_memory*1e6/16/(nao**2*2) blksize /= 2 # For prefetch blksize = max(16, min(int(blksize), self.blockdim)) logger.debug3(self, 'max_memory %d MB, blksize %d', max_memory, blksize) def load(aux_slice): b0, b1 = aux_slice naux = b1 - b0 if is_real: LpqR = numpy.asarray(j3c[b0:b1].real) if compact and LpqR.shape[1] == nao**2: LpqR = lib.pack_tril(LpqR.reshape(naux,nao,nao)) elif unpack and LpqR.shape[1] != nao**2: LpqR = lib.unpack_tril(LpqR).reshape(naux,nao**2) LpqI = numpy.zeros_like(LpqR) else: Lpq = numpy.asarray(j3c[b0:b1]) LpqR = numpy.asarray(Lpq.real, order='C') LpqI = numpy.asarray(Lpq.imag, order='C') Lpq = None if compact and LpqR.shape[1] == nao**2: LpqR = lib.pack_tril(LpqR.reshape(naux,nao,nao)) LpqI = lib.pack_tril(LpqI.reshape(naux,nao,nao)) elif unpack and LpqR.shape[1] != nao**2: LpqR = lib.unpack_tril(LpqR).reshape(naux,nao**2) LpqI = lib.unpack_tril(LpqI, lib.ANTIHERMI).reshape(naux,nao**2) return LpqR, LpqI with _load3c(self._cderi, self._dataname, kpti_kptj) as j3c: if aux_slice is None: slices = lib.prange(0, j3c.shape[0], blksize) else: slices = lib.prange(*aux_slice, blksize) for LpqR, LpqI in lib.map_with_prefetch(load, slices): yield LpqR, LpqI, 1 LpqR = LpqI = None if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': # Truncated Coulomb operator is not positive definite. Load the # CDERI tensor of negative part. with _load3c(self._cderi, self._dataname+'-', kpti_kptj, ignore_key_error=True) as j3c: if aux_slice is None: slices = lib.prange(0, j3c.shape[0], blksize) else: slices = lib.prange(*aux_slice, blksize) for LpqR, LpqI in lib.map_with_prefetch(load, slices): yield LpqR, LpqI, -1 LpqR = LpqI = None
[docs] def get_pp(self, kpts=None): '''Get the periodic pseudopotential nuc-el AO matrix, with G=0 removed. ''' cell = self.cell kpts, is_single_kpt = _check_kpts(self, kpts) if self._prefer_ccdf or cell.omega > 0: # For long-range integrals _CCGDFBuilder is the only option dfbuilder = _CCNucBuilder(cell, kpts).build() else: dfbuilder = _RSNucBuilder(cell, kpts).build() vpp = dfbuilder.get_pp() if is_single_kpt: vpp = vpp[0] return vpp
[docs] def get_nuc(self, kpts=None): '''Get the periodic nuc-el AO matrix, with G=0 removed. ''' cell = self.cell kpts, is_single_kpt = _check_kpts(self, kpts) if self._prefer_ccdf or cell.omega > 0: # For long-range integrals _CCGDFBuilder is the only option dfbuilder = _CCNucBuilder(cell, kpts).build() else: dfbuilder = _RSNucBuilder(cell, kpts).build() nuc = dfbuilder.get_nuc() if is_single_kpt: nuc = nuc[0] return nuc
# Note: Special exxdiv by default should not be used for an arbitrary # input density matrix. When the df object was used with the molecular # post-HF code, get_jk was often called with an incomplete DM (e.g. the # core DM in CASCI). An SCF level exxdiv treatment is inadequate for # post-HF methods.
[docs] def get_jk(self, dm, hermi=1, kpts=None, kpts_band=None, with_j=True, with_k=True, omega=None, exxdiv=None): if omega is not None: # J/K for RSH functionals cell = self.cell # * AFT is computationally more efficient than GDF if the Coulomb # attenuation tends to the long-range role (i.e. small omega). # * Note: changing to AFT integrator may cause small difference to # the GDF integrator. If a very strict GDF result is desired, # we can disable this trick by setting # LONGRANGE_AFT_TURNOVER_THRESHOLD to 0. # * The sparse mesh is not appropriate for low dimensional systems # with infinity vacuum since the ERI may require large mesh to # sample density in vacuum. if (omega < LONGRANGE_AFT_TURNOVER_THRESHOLD and cell.dimension >= 2 and cell.low_dim_ft_type != 'inf_vacuum'): mydf = aft.AFTDF(cell, self.kpts) ke_cutoff = aft.estimate_ke_cutoff_for_omega(cell, omega) mydf.mesh = cell.cutoff_to_mesh(ke_cutoff) else: mydf = self with mydf.range_coulomb(omega) as rsh_df: return rsh_df.get_jk(dm, hermi, kpts, kpts_band, with_j, with_k, omega=None, exxdiv=exxdiv) kpts, is_single_kpt = _check_kpts(self, kpts) if is_single_kpt: return df_jk.get_jk(self, dm, hermi, kpts[0], kpts_band, with_j, with_k, exxdiv) vj = vk = None if with_k: vk = df_jk.get_k_kpts(self, dm, hermi, kpts, kpts_band, exxdiv) if with_j: vj = df_jk.get_j_kpts(self, dm, hermi, kpts, kpts_band) return vj, vk
get_eri = get_ao_eri = df_ao2mo.get_eri ao2mo = get_mo_eri = df_ao2mo.general ao2mo_7d = df_ao2mo.ao2mo_7d
[docs] def update_mp(self): mf = self.copy() mf.with_df = self return mf
[docs] def update_cc(self): raise NotImplementedError
[docs] def update(self): raise NotImplementedError
[docs] def prange(self, start, stop, step): '''This is a hook for MPI parallelization. DO NOT use it out of the scope of AFTDF/GDF/MDF. ''' return lib.prange(start, stop, step)
################################################################################ # With this function to mimic the molecular DF.loop function, the pbc gamma # point DF object can be used in the molecular code
[docs] def loop(self, blksize=None): cell = self.cell if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': raise RuntimeError('ERIs of PBC-2D systems are not positive ' 'definite. Current API only supports positive ' 'definite ERIs.') if blksize is None: blksize = self.blockdim for LpqR, LpqI, sign in self.sr_loop(compact=True, blksize=blksize): # LpqI should be 0 for gamma point DF # assert (numpy.linalg.norm(LpqI) < 1e-12) yield LpqR
[docs] def get_naoaux(self): '''The dimension of auxiliary basis at gamma point''' # determine naoaux with self._cderi, because DF object may be used as CD # object when self._cderi is provided. if self._cderi is None: self.build() cell = self.cell if isinstance(self._cderi, numpy.ndarray): # self._cderi is likely offered by user. Ensure # cderi.shape = (nkpts,naux,nao_pair) nao = cell.nao if self._cderi.shape[-1] == nao: assert self._cderi.ndim == 4 naux = self._cderi.shape[1] elif self._cderi.shape[-1] in (nao**2, nao*(nao+1)//2): assert self._cderi.ndim == 3 naux = self._cderi.shape[1] else: raise RuntimeError('cderi shape') return naux # self._cderi['j3c/k_id/seg_id'] with h5py.File(self._cderi, 'r') as feri: key = next(iter(feri[self._dataname].keys())) dat = feri[f'{self._dataname}/{key}'] if isinstance(dat, h5py.Group): naux = dat['0'].shape[0] else: naux = dat.shape[0] if (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum' and f'{self._dataname}-' in feri): key = next(iter(feri[f'{self._dataname}-'].keys())) dat = feri[f'{self._dataname}-/{key}'] if isinstance(dat, h5py.Group): naux += dat['0'].shape[0] else: naux += dat.shape[0] return naux
to_gpu = lib.to_gpu
DF = GDF
[docs] class CDERIArray: ''' Provide numpy APIs to access cderi tensor. This object can be viewed as an 5-dimension array [kpt-i, kpt-j, aux-index, ao-i, ao-j] ''' def __init__(self, data_group, label='j3c'): self._data_is_h5obj = isinstance(data_group, h5py.Group) if not self._data_is_h5obj: data_group = h5py.File(data_group, 'r') self.data_group = data_group if 'kpts' not in data_group: # TODO: Deprecate the v1 data format self._data_version = 'v1' self._cderi = data_group.file.filename self._label = label self._kptij_lst = data_group['j3c-kptij'][()] kpts = unique(self._kptij_lst[:,0])[0] self.nkpts = nkpts = len(kpts) if len(self._kptij_lst) not in (nkpts, nkpts**2, nkpts*(nkpts+1)//2): raise RuntimeError(f'Dimension error for CDERI {self._cderi}') return self._data_version = 'v2' aosym = data_group['aosym'][()] if isinstance(aosym, bytes): aosym = aosym.decode() self.aosym = aosym self.j3c = data_group[label] self.kpts = data_group['kpts'][:] self.nkpts = self.kpts.shape[0] self.naux = 0 nao_pair = 0 for dat in self.j3c.values(): nao_pair = sum(x.shape[1] for x in dat.values()) self.naux = dat['0'].shape[0] break if self.aosym == 's1': nao = int(nao_pair ** .5) assert nao ** 2 == nao_pair self.nao = nao elif self.aosym == 's2': self.nao = int((nao_pair * 2)**.5) else: raise NotImplementedError def __del__(self): if not self._data_is_h5obj: self.data_group.close() def __enter__(self): return self def __exit__(self, type, value, traceback): if not self._data_is_h5obj: self.data_group.close() def __getitem__(self, slices): if isinstance(slices, tuple): ns = len(slices) if ns < 2: # patch (:) to slices slices = slices + (range(self.nkpts),) * (2 - ns) k_slices = slices[:2] a_slices = slices[2:] if isinstance(k_slices[0], int) and isinstance(k_slices[1], int): return self._load_one(k_slices[0], k_slices[1], ()) else: k_slices = slices a_slices = () out = numpy.empty((self.nkpts, self.nkpts), dtype=object) out[k_slices] = True for ki, kj in numpy.argwhere(out): out[ki,kj] = self._load_one(ki, kj, a_slices) return out[k_slices] def _load_one(self, ki, kj, slices): if self._data_version == 'v1': with _load3c(self._cderi, self._label) as fload: if len(self._kptij_lst) == self.nkpts: # kptij_lst was generated with option j_only, leading to # only the diagonal terms kikj = ki kpti, kptj = self._kptij_lst[kikj] elif len(self._kptij_lst) == self.nkpts**2: kikj = ki * self.nkpts + kj kpti, kptj = self._kptij_lst[kikj] elif ki >= kj: kikj = ki*(ki+1)//2 + kj kpti, kptj = self._kptij_lst[kikj] else: kikj = kj*(kj+1)//2 + ki kptj, kpti = self._kptij_lst[kikj] out = fload(kpti, kptj) return out[slices] kikj = ki * self.nkpts + kj kjki = kj * self.nkpts + ki if self.aosym == 's1' or kikj == kjki: dat = self.j3c[str(kikj)] nsegs = len(dat) out = _hstack_datasets([dat[str(i)] for i in range(nsegs)], slices) elif self.aosym == 's2': dat_ij = self.j3c[str(kikj)] dat_ji = self.j3c[str(kjki)] tril = _hstack_datasets([dat_ij[str(i)] for i in range(len(dat_ij))], slices) triu = _hstack_datasets([dat_ji[str(i)] for i in range(len(dat_ji))], slices) assert tril.dtype == numpy.complex128 naux = self.naux nao = self.nao out = numpy.empty((naux, nao*nao), dtype=tril.dtype) libpbc.PBCunpack_tril_triu(out.ctypes.data_as(ctypes.c_void_p), tril.ctypes.data_as(ctypes.c_void_p), triu.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao)) return out
[docs] def load(self, kpti, kptj): if self._data_version == 'v1': with _load3c(self._cderi, self._label) as fload: return numpy.asarray(fload(kpti, kptj)) ki = member(kpti, self.kpts) kj = member(kptj, self.kpts) if len(ki) == 0 or len(kj) == 0: raise RuntimeError(f'CDERI for kpts ({kpti}, {kptj}) not found') return self._load_one(ki[0], kj[0], ())
def __array__(self): '''Create a numpy array''' return self[:]
class _load3c: #'''Read cderi from old version pyscf (<= 2.0)''' '''cderi file may be stored in different formats (version 1 generated from pyscf-2.0 or older, version 2 from pyscf-2.1 or newer). This function can read both data formats. ''' def __init__(self, cderi, label, kpti_kptj=None, kptij_label='j3c-kptij', ignore_key_error=False): self.cderi = cderi self.label = label self.kptij_label = kptij_label self.kpti_kptj = kpti_kptj self.ignore_key_error = ignore_key_error self.feri = None self._kptij_lst = None self._aosym = None def __enter__(self): self.feri = h5py.File(self.cderi, 'r') if self.label not in self.feri: # Return a size-0 array to skip the loop in sr_loop if self.ignore_key_error: return numpy.zeros(0) else: raise KeyError('Key "%s" not found' % self.label) if self.kpti_kptj is None: return self.getitem else: return self.getitem(*self.kpti_kptj) def __exit__(self, type, value, traceback): self.feri.close() @property def kptij_lst(self): if self._kptij_lst is None: if self.data_version == 'v2': self._kptij_lst = self.feri['kpts'][()] else: self._kptij_lst = self.feri[self.kptij_label][()] return self._kptij_lst @property def aosym(self): if self._aosym is None: self._aosym = self.feri['aosym'][()] if isinstance(self._aosym, bytes): self._aosym = self._aosym.decode() return self._aosym @property def data_version(self): '''Guess the data format version''' if 'kpts' in self.feri: return 'v2' else: return 'v1' def getitem(self, kpti, kptj): assert self.feri is not None if self.data_version == 'v2': kpts = self.kptij_lst nkpts = len(kpts) if (isinstance(kpti, (int, numpy.integer)) and isinstance(kptj, (int, numpy.integer))): ki = kpti kj = kptj else: ki = member(kpti, kpts) kj = member(kptj, kpts) if len(ki) == 0 or len(kj) == 0: raise RuntimeError(f'CDERI {self.label} for kpts ({kpti}, {kptj}) is ' 'not initialized.') ki = ki[0] kj = kj[0] key = f'{self.label}/{ki * nkpts + kj}' if key not in self.feri: if self.ignore_key_error: return numpy.zeros(0) else: raise KeyError(f'Key {key} not found') return _KPair3CLoader(self.feri[self.label], ki, kj, nkpts, self.aosym) else: # data format version 1 return _getitem(self.feri, self.label, (kpti, kptj), self.kptij_lst, self.ignore_key_error) def _getitem(h5group, label, kpti_kptj, kptij_lst, ignore_key_error=False, aosym=None): kpti_kptj = numpy.asarray(kpti_kptj) k_id = member(kpti_kptj, kptij_lst) if len(k_id) > 0: key = label + '/' + str(k_id[0]) if key not in h5group: if ignore_key_error: return numpy.zeros(0) else: raise KeyError('Key "%s" not found' % key) hermi = False else: # swap ki,kj due to the hermiticity kptji = kpti_kptj[[1,0]] k_id = member(kptji, kptij_lst) if len(k_id) == 0: raise RuntimeError('%s for kpts %s is not initialized.\n' 'You need to update the attribute .kpts then call ' '.build() to initialize %s.' % (label, kpti_kptj, label)) key = label + '/' + str(k_id[0]) if key not in h5group: if ignore_key_error: return numpy.zeros(0) else: raise KeyError('Key "%s" not found' % key) hermi = True dat = _load_and_unpack(h5group[key],hermi) return dat class _load_and_unpack: ''' This class returns an array-like object to an hdf5 file that can be sliced, to allow for lazy loading hermi : boolean Take the conjugate transpose of the slice See PR 1086 and Issue 1076 ''' def __init__(self, dat, hermi): self.dat = dat self.hermi = hermi def __getitem__(self, s): dat = self.dat if isinstance(dat, h5py.Group): v = _hstack_datasets([dat[str(i)] for i in range(len(dat))], s) else: # For mpi4pyscf, pyscf-1.5.1 or older v = numpy.asarray(dat[s]) if self.hermi: nao = int(numpy.sqrt(v.shape[-1])) v1 = lib.transpose(v.reshape(-1,nao,nao), axes=(0,2,1)).conj() return v1.reshape(v.shape) else: return v def __array__(self): '''Create a numpy array''' return self[()] @property def shape(self): dat = self.dat if isinstance(dat, h5py.Group): all_shape = [dat[str(i)].shape for i in range(len(dat))] shape = all_shape[0][:-1] + (sum(x[-1] for x in all_shape),) return shape else: # For mpi4pyscf, pyscf-1.5.1 or older return dat.shape def _hstack_datasets(data_to_stack, slices=numpy.s_[:]): """Faster version of the operation np.hstack([x[slices] for x in data_to_stack]) for h5py datasets. Parameters ---------- data_to_stack : list of h5py.Dataset or np.ndarray Datasets/arrays to be stacked along first axis. slices: tuple or list of slices, a slice, or (). The slices (or indices) to select data from each H5 dataset. Returns ------- numpy.ndarray The stacked data, equal to numpy.hstack([dset[slices] for dset in data_to_stack]) """ # Step 1. Calculate the shape of the output array, and store it # in res_shape. res_shape = list(data_to_stack[0].shape) dset_shapes = [x.shape for x in data_to_stack] if not (isinstance(slices, tuple) or isinstance(slices, list)): # If slices is not a tuple, we assume it is a single slice acting on axis 0 only. slices = (slices,) def len_of_slice(arraylen, s): start, stop, step = s.indices(arraylen) r = range(start, stop, step) # Python has a very fast builtin method to get the length of a range. return len(r) for i, cur_slice in enumerate(slices): if not isinstance(cur_slice, slice): return numpy.hstack([dset[slices] for dset in data_to_stack]) if i == 1: ax1widths_sliced = [len_of_slice(shp[1], cur_slice) for shp in dset_shapes] else: # Except along axis 1, we assume the dimensions of all datasets are the same. # If they aren't, an error gets raised later. res_shape[i] = len_of_slice(res_shape[i], cur_slice) if len(slices) <= 1: ax1widths_sliced = [shp[1] for shp in dset_shapes] # Final dim along axis 1 is the sum of the post-slice axis 1 widths. res_shape[1] = sum(ax1widths_sliced) # Step 2. Allocate the output buffer out = numpy.empty(res_shape, dtype=numpy.result_type(*[dset.dtype for dset in data_to_stack])) # Step 3. Read data into the output buffer. ax1ind = 0 for i, dset in enumerate(data_to_stack): ax1width = ax1widths_sliced[i] dest_sel = numpy.s_[:, ax1ind:ax1ind + ax1width] if hasattr(dset, 'read_direct'): # h5py has issues with zero-size selections, see # https://github.com/h5py/h5py/issues/1455, # so we check for that here. if out[dest_sel].size > 0: dset.read_direct( out, source_sel=slices, dest_sel=dest_sel ) else: # For array-like objects out[dest_sel] = dset[slices] ax1ind += ax1width return out class _KPair3CLoader: def __init__(self, dat, ki, kj, nkpts, aosym): self.dat = dat self.kikj = ki * nkpts + kj self.kjki = kj * nkpts + ki self.nkpts = nkpts self.nsegs = len(dat[str(self.kikj)]) self.aosym = aosym def __getitem__(self, s): if self.aosym == 's1' or self.kikj == self.kjki: dat = self.dat[str(self.kikj)] out = _hstack_datasets([dat[str(i)] for i in range(self.nsegs)], s) elif self.aosym == 's2': dat_ij = self.dat[str(self.kikj)] dat_ji = self.dat[str(self.kjki)] tril = _hstack_datasets([dat_ij[str(i)] for i in range(self.nsegs)], s) triu = _hstack_datasets([dat_ji[str(i)] for i in range(self.nsegs)], s) assert tril.dtype == numpy.complex128 naux, nao_pair = tril.shape nao = int((nao_pair * 2)**.5) out = numpy.empty((naux, nao*nao), dtype=tril.dtype) libpbc.PBCunpack_tril_triu(out.ctypes.data_as(ctypes.c_void_p), tril.ctypes.data_as(ctypes.c_void_p), triu.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naux), ctypes.c_int(nao)) else: raise ValueError(f'Unknown aosym {self.aosym}') return out def __array__(self): '''Create a numpy array''' return self[()] @property def shape(self): dat = self.dat[str(self.kikj)] shapes = [dat[str(i)].shape for i in range(self.nsegs)] naux = shapes[0][0] nao_pair = sum([shape[1] for shape in shapes]) if self.aosym == 's1' or self.kikj == self.kjki: return (naux, nao_pair) else: nao = int((nao_pair * 2)**.5) return (naux, nao*nao) def _gaussian_int(cell): r'''Regular gaussian integral \int g(r) dr^3''' return ft_ao.ft_ao(cell, numpy.zeros((1,3)))[0].real