Source code for pyscf.pbc.scf.rsjk

#!/usr/bin/env python
# Copyright 2020-2021 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Authors: Qiming Sun <osirpt.sun@gmail.com>
#

'''
Range separation JK builder

Ref:
    Q. Sun, arXiv:2012.07929
    Q. Sun, arXiv:2302.11307
'''

import ctypes
import numpy as np
import scipy.linalg
from pyscf import gto
from pyscf import lib
from pyscf.lib import logger
from pyscf.scf import _vhf
from pyscf.pbc import gto as pbcgto
from pyscf.pbc.tools import pbc as pbctools
from pyscf.pbc.tools import k2gamma
from pyscf.pbc.scf import addons
from pyscf.pbc.df import aft, rsdf_builder, aft_jk
from pyscf.pbc.df import ft_ao
from pyscf.pbc.df.df_jk import (zdotNN, zdotCN, zdotNC, _ewald_exxdiv_for_G0,
                                _format_dms, _format_kpts_band, _format_jks)
from pyscf.pbc.df.incore import libpbc, _get_cache_size, LOG_ADJUST
from pyscf.pbc.lib.kpts_helper import (is_zero, kk_adapted_iter,
                                       group_by_conj_pairs, intersection)
from pyscf import __config__

# Threshold of steep bases and local bases
RCUT_THRESHOLD = getattr(__config__, 'pbc_scf_rsjk_rcut_threshold', 1.0)
OMEGA_MIN = rsdf_builder.OMEGA_MIN
INDEX_MIN = rsdf_builder.INDEX_MIN

[docs] class RangeSeparatedJKBuilder(lib.StreamObject): _keys = { 'cell', 'mesh', 'kpts', 'purify', 'omega', 'rs_cell', 'cell_d', 'bvk_kmesh', 'supmol_sr', 'supmol_ft', 'supmol_d', 'cell0_basis_mask', 'ke_cutoff', 'direct_scf_tol', 'time_reversal_symmetry', 'exclude_dd_block', 'allow_drv_nodddd', 'approx_vk_lr_missing_mo', } def __init__(self, cell, kpts=np.zeros((1,3))): self.cell = cell self.stdout = cell.stdout self.verbose = cell.verbose self.max_memory = cell.max_memory self.mesh = None self.kpts = np.reshape(kpts, (-1, 3)) self.purify = True if cell.omega == 0: self.omega = None elif cell.omega < 0: # Initialize omega to cell.omega for HF exchange of short range # int2e in RSH functionals self.omega = -cell.omega else: raise RuntimeError('RSJK does not support LR integrals') self.rs_cell = None self.cell_d = None # Born-von Karman supercell self.bvk_kmesh = None self.supmol_sr = None self.supmol_ft = None self.supmol_d = None # which shells are located in the first primitive cell self.cell0_basis_mask = None self.ke_cutoff = None self.direct_scf_tol = None # Use the k-point conjugation symmetry between k and -k self.time_reversal_symmetry = True self.exclude_dd_block = True self.allow_drv_nodddd = True self._sr_without_dddd = None # Allow to approximate vk_lr with less mesh if mo_coeff not attached to dm. self.approx_vk_lr_missing_mo = False # TODO: incrementally build SR part self._last_vs = (0, 0) self._qindex = None __getstate__, __setstate__ = lib.generate_pickle_methods( excludes=('rs_cell', 'cell_d', 'supmol_sr', 'supmol_ft', 'supmol_d', '_sr_without_dddd', '_last_vs', '_qindex'), reset_state=True)
[docs] def has_long_range(self): '''Whether to add the long-range part computed with AFT/FFT integrals''' return self.omega is None or abs(self.cell.omega) < self.omega
[docs] def dump_flags(self, verbose=None): logger.info(self, '\n') logger.info(self, '******** %s ********', self.__class__) logger.info(self, 'mesh = %s (%d PWs)', self.mesh, np.prod(self.mesh)) logger.info(self, 'omega = %s', self.omega) logger.info(self, 'purify = %s', self.purify) logger.info(self, 'bvk_kmesh = %s', self.bvk_kmesh) logger.info(self, 'ke_cutoff = %s', self.ke_cutoff) logger.info(self, 'time_reversal_symmetry = %s', self.time_reversal_symmetry) logger.info(self, 'has_long_range = %s', self.has_long_range()) logger.info(self, 'exclude_dd_block = %s', self.exclude_dd_block) logger.info(self, 'allow_drv_nodddd = %s', self.allow_drv_nodddd) logger.debug(self, 'approx_vk_lr_missing_mo = %s', self.approx_vk_lr_missing_mo) return self
[docs] def reset(self, cell=None): if cell is not None: self.cell = cell self.rs_cell = None self.cell_d = None self.supmol_sr = None self.supmol_ft = None self.supmol_d = None self._sr_without_dddd = None self._last_vs = (0, 0) self._qindex = None return self
[docs] def build(self, omega=None, intor='int2e'): cpu0 = logger.process_clock(), logger.perf_counter() log = logger.new_logger(self) cell = self.cell kpts = self.kpts if omega is not None: self.omega = omega if self.omega is None or self.omega == 0: # Search a proper range-separation parameter omega that can balance the # computational cost between the real space integrals and moment space # integrals self.omega, self.mesh, self.ke_cutoff = _guess_omega(cell, kpts, self.mesh) else: self.ke_cutoff = estimate_ke_cutoff_for_omega(cell, self.omega) self.mesh = cell.cutoff_to_mesh(self.ke_cutoff) if cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum': # To ensure trunc-coulG converged for all basis self.mesh[2] = rsdf_builder._estimate_meshz(cell) self.check_sanity() log.info('omega = %.15g ke_cutoff = %s mesh = %s', self.omega, self.ke_cutoff, self.mesh) # AFT part exchange cost may be dominant if cell.nao * len(kpts) > 5000: log.debug1('Disable exclude_dd_block') self.exclude_dd_block = False # basis with cutoff under ~150 eV are handled by AFTDF ke_cutoff = max(6., self.ke_cutoff) rs_cell = ft_ao._RangeSeparatedCell.from_cell( cell, ke_cutoff, RCUT_THRESHOLD, in_rsjk=True, verbose=log) self.rs_cell = rs_cell self.bvk_kmesh = kmesh = k2gamma.kpts_to_kmesh(cell, kpts) log.debug('kmesh for bvk-cell = %s', kmesh) exp_min = np.hstack(cell.bas_exps()).min() lattice_sum_factor = max((2*cell.rcut)**3/cell.vol * 1/exp_min, 1) cutoff = cell.precision / lattice_sum_factor * .1 self.direct_scf_tol = cutoff log.debug('Set RangeSeparationJKBuilder.direct_scf_tol to %g', cutoff) rcut_sr = estimate_rcut(rs_cell, self.omega, exclude_dd_block=self.exclude_dd_block) supmol_sr = ft_ao.ExtendedMole.from_cell(rs_cell, kmesh, rcut_sr.max(), log) supmol_sr.omega = -self.omega self.supmol_sr = supmol_sr.strip_basis(rcut_sr) log.debug('supmol nbas = %d cGTO = %d pGTO = %d', supmol_sr.nbas, supmol_sr.nao, supmol_sr.npgto_nr()) if self.has_long_range(): rcut = rsdf_builder.estimate_ft_rcut( rs_cell, exclude_dd_block=self.exclude_dd_block) supmol_ft = rsdf_builder._ExtendedMoleFT.from_cell( rs_cell, kmesh, rcut.max(), log) supmol_ft.exclude_dd_block = self.exclude_dd_block self.supmol_ft = supmol_ft.strip_basis(rcut) log.debug('sup-mol-ft nbas = %d cGTO = %d pGTO = %d', supmol_ft.nbas, supmol_ft.nao, supmol_ft.npgto_nr()) self.cell_d = rs_cell.smooth_basis_cell() if self.cell_d.nbas > 0: rcut = ft_ao.estimate_rcut(self.cell_d) supmol_d = ft_ao.ExtendedMole.from_cell( self.cell_d, self.bvk_kmesh, rcut.max(), log) self.supmol_d = supmol_d.strip_basis(rcut) log.debug('supmol_d nbas = %d cGTO = %d', supmol_d.nbas, supmol_d.nao) log.timer_debug1('initializing supmol', *cpu0) self._cintopt = _vhf.make_cintopt(supmol_sr._atm, supmol_sr._bas, supmol_sr._env, 'int2e') nbas = supmol_sr.nbas qindex = np.empty((3,nbas,nbas), dtype=np.int16) ao_loc = supmol_sr.ao_loc with supmol_sr.with_integral_screen(self.direct_scf_tol**2): libpbc.PBCVHFnr_int2e_q_cond( libpbc.int2e_sph, self._cintopt, qindex.ctypes.data_as(ctypes.c_void_p), ao_loc.ctypes.data_as(ctypes.c_void_p), supmol_sr._atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(supmol_sr.natm), supmol_sr._bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(supmol_sr.nbas), supmol_sr._env.ctypes.data_as(ctypes.c_void_p)) libpbc.PBCVHFnr_sindex( qindex[2:].ctypes.data_as(ctypes.c_void_p), supmol_sr._atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(supmol_sr.natm), supmol_sr._bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(supmol_sr.nbas), supmol_sr._env.ctypes.data_as(ctypes.c_void_p)) if self.exclude_dd_block: # Remove the smooth-smooth basis block. smooth_idx = supmol_sr.bas_type_to_indices(ft_ao.SMOOTH_BASIS) qindex[0,smooth_idx[:,None], smooth_idx] = INDEX_MIN self.qindex = qindex log.timer('initializing qindex', *cpu0) return self
@property def qindex(self): if self._qindex is None or isinstance(self._qindex, np.ndarray): return self._qindex return self._qindex['qindex'][:] @qindex.setter def qindex(self, x): if x.size < self.max_memory*.2e6: self._qindex = x else: self._qindex = lib.H5TmpFile() self._qindex['qindex'] = x self._qindex.flush() def _sort_qcond_cell0(self, seg_loc, seg2sh, nbasp, qindex): '''Sort qcond for ij-pair in cell0 so that loops in PBCVHF_direct_drv can be "break" early ''' qcell0_ijij = _qcond_cell0_abstract(qindex[0], seg_loc, seg2sh, nbasp) qcell0_iijj = _qcond_cell0_abstract(qindex[1], seg_loc, seg2sh, nbasp) idx = np.asarray(qcell0_ijij.ravel().argsort(kind='stable'), np.int32)[::-1] jsh_idx, ish_idx = divmod(idx, nbasp) ish_idx = np.asarray(ish_idx, dtype=np.int32, order='C') jsh_idx = np.asarray(jsh_idx, dtype=np.int32, order='C') return qcell0_ijij, qcell0_iijj, ish_idx, jsh_idx def _get_jk_sr(self, dm_kpts, hermi=1, kpts=None, kpts_band=None, with_j=True, with_k=True, omega=None, exxdiv=None): if omega is not None: # J/K for RSH functionals raise NotImplementedError cpu0 = logger.process_clock(), logger.perf_counter() log = logger.new_logger(self) comp = 1 nkpts = kpts.shape[0] supmol = self.supmol_sr cell = self.cell rs_cell = self.rs_cell nao = cell.nao bvk_ncells, rs_nbas, nimgs = supmol.bas_mask.shape if dm_kpts.ndim != 4: dm = dm_kpts.reshape(-1, nkpts, nao, nao) else: dm = dm_kpts n_dm = dm.shape[0] cutoff = int(np.log(self.direct_scf_tol) * LOG_ADJUST) self._sr_without_dddd = ( self.allow_drv_nodddd and not self.exclude_dd_block and self.cell_d.nbas > 0 and self.has_long_range() and (cell.dimension == 3 or cell.low_dim_ft_type != 'inf_vacuum')) if self._sr_without_dddd: # To exclude (dd|dd) block, diffused shell needs to be independent # segments in PBCVHF_direct_drv1. Decontract the segments in rs-shell. # map ao indices in cell to ao indices in rs_cell log.debug1('get_jk_sr with PBCVHF_direct_drv_nodddd') drv = libpbc.PBCVHF_direct_drv_nodddd ao_map = lib.locs_to_indices(cell.ao_loc, rs_cell.bas_map) rs_nao = ao_map.size rs_dm = np.empty((n_dm,nkpts*rs_nao,rs_nao), dtype=dm.dtype) idx = np.arange(nkpts)[:,None] * nao + ao_map idx = np.asarray(idx, dtype=np.int32).ravel() for i in range(n_dm): lib.take_2d(dm[i].reshape(nkpts*nao,nao), idx, ao_map, out=rs_dm[i]) dm = rs_dm.reshape(n_dm,nkpts,rs_nao,rs_nao) nbasp = rs_cell.nbas cell0_ao_loc = rs_cell.ao_loc # uncontracted (local, diffused) basis segments in bvk-cell seg_loc = lib.locs_to_indices( supmol.seg_loc, np.arange(supmol.seg_loc.size-1)) seg_loc = np.append(seg_loc, supmol.seg_loc[-1]).astype(np.int32) else: drv = libpbc.PBCVHF_direct_drv nbasp = cell.nbas # The number of shells in the primitive cell cell0_ao_loc = cell.ao_loc seg_loc = supmol.seg_loc qindex = self.qindex qcell0_ijij, qcell0_iijj, ish_idx, jsh_idx = \ self._sort_qcond_cell0(seg_loc, supmol.seg2sh, nbasp, qindex) weight = 1. / nkpts expLk = np.exp(1j*np.dot(supmol.bvkmesh_Ls, kpts.T)) # Utilized symmetry sc_dm[R,S] = sc_dm[S-R] = sc_dm[(S-R)%N] #:phase = expLk / nkpts**.5 #:sc_dm = lib.einsum('Rk,nkuv,Sk->nRuSv', phase, sc_dm, phase.conj()) sc_dm = lib.einsum('k,Sk,nkuv->nSuv', expLk[0]*weight, expLk.conj(), dm) dm_translation = k2gamma.double_translation_indices(self.bvk_kmesh).astype(np.int32) dm_imag_max = abs(sc_dm.imag).max() is_complex_dm = dm_imag_max > 1e-6 if is_complex_dm: if dm_imag_max < 1e-2: log.warn('DM in (BvK) cell has small imaginary part. ' 'It may be a signal of symmetry broken in k-point symmetry') sc_dm = np.vstack([sc_dm.real, sc_dm.imag]) self.purify = False else: sc_dm = sc_dm.real nao1 = dm.shape[-1] sc_dm = np.asarray(sc_dm.reshape(-1, bvk_ncells, nao1, nao1), order='C') n_sc_dm = sc_dm.shape[0] # * sparse_ao_loc has dimension (Nk,nbas), corresponding to the # bvkcell with all basis sparse_ao_loc = nao1 * np.arange(bvk_ncells)[:,None] + cell0_ao_loc[:-1] sparse_ao_loc = np.append(sparse_ao_loc.ravel(), nao1 * bvk_ncells) dm_cond = [lib.condense('NP_absmax', d, sparse_ao_loc, cell0_ao_loc) for d in sc_dm.reshape(n_sc_dm, bvk_ncells*nao1, nao1)] dm_cond = np.max(dm_cond, axis=0) dm_cond[dm_cond < 1e-100] = 1e-100 dmindex = np.log(dm_cond) dmindex *= LOG_ADJUST dmindex = np.asarray(dmindex, order='C', dtype=np.int16) dmindex = dmindex.reshape(bvk_ncells, nbasp, nbasp) dm_cond = None bvk_nbas = nbasp * bvk_ncells shls_slice = (0, nbasp, 0, bvk_nbas, 0, bvk_nbas, 0, bvk_nbas) cache_size = _get_cache_size(cell, 'int2e_sph') cell0_dims = cell0_ao_loc[1:] - cell0_ao_loc[:-1] cache_size += cell0_dims.max()**4 * comp * 2 if hermi: fdot_suffix = 's2kl' else: fdot_suffix = 's1' nvs = 1 if with_j and with_k: fdot = 'PBCVHF_contract_jk_' + fdot_suffix nvs = 2 elif with_j: fdot = 'PBCVHF_contract_j_' + fdot_suffix else: # with_k fdot = 'PBCVHF_contract_k_' + fdot_suffix vs = np.zeros((nvs, n_sc_dm, nao1, bvk_ncells, nao1)) if supmol.cart: intor = 'PBCint2e_cart' else: intor = 'PBCint2e_sph' drv(getattr(libpbc, fdot), getattr(libpbc, intor), vs.ctypes.data_as(ctypes.c_void_p), sc_dm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(vs.size), ctypes.c_int(n_sc_dm), ctypes.c_int(bvk_ncells), ctypes.c_int(nimgs), ctypes.c_int(nkpts), ctypes.c_int(nbasp), ctypes.c_int(comp), seg_loc.ctypes.data_as(ctypes.c_void_p), supmol.seg2sh.ctypes.data_as(ctypes.c_void_p), cell0_ao_loc.ctypes.data_as(ctypes.c_void_p), rs_cell.bas_type.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int*8)(*shls_slice), dm_translation.ctypes.data_as(ctypes.c_void_p), qindex.ctypes.data_as(ctypes.c_void_p), dmindex.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(cutoff), qcell0_ijij.ctypes.data_as(ctypes.c_void_p), qcell0_iijj.ctypes.data_as(ctypes.c_void_p), ish_idx.ctypes.data_as(ctypes.c_void_p), jsh_idx.ctypes.data_as(ctypes.c_void_p), self._cintopt, ctypes.c_int(cache_size), supmol._atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(supmol.natm), supmol._bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(supmol.nbas), supmol._env.ctypes.data_as(ctypes.c_void_p)) if is_complex_dm: vs = vs[:,:n_dm] + vs[:,n_dm:] * 1j if self._sr_without_dddd: jdx = np.arange(bvk_ncells)[:,None] * nao + ao_map jdx = np.asarray(jdx, dtype=np.int32).ravel() rs_vs = vs.reshape(nvs*n_dm, nao1, bvk_ncells*nao1) vs = np.zeros((nvs*n_dm, nao, bvk_ncells*nao), dtype=rs_vs.dtype) for i in range(nvs*n_dm): lib.takebak_2d(vs[i], rs_vs[i], ao_map, jdx, thread_safe=False) vs = vs.reshape(nvs,n_dm,nao,bvk_ncells,nao) if kpts_band is not None: kpts_band = np.reshape(kpts_band, (-1, 3)) subset_only = intersection(kpts, kpts_band).size == len(kpts_band) if not subset_only: log.warn('Approximate J/K matrices at kpts_band ' 'with the bvk-cell derived from kpts') expLk = np.exp(1j*np.dot(supmol.bvkmesh_Ls, kpts_band.T)) vs = lib.einsum('snpRq,Rk->snkpq', vs, expLk) vs = np.asarray(vs, order='C') log.timer_debug1('short range part vj and vk', *cpu0) return vs
[docs] def get_jk(self, dm_kpts, hermi=1, kpts=None, kpts_band=None, with_j=True, with_k=True, omega=None, exxdiv=None): cell = self.cell if omega is not None: # J/K for RSH functionals if omega > 0: # Long-range part only, call AFTDF dfobj = aft.AFTDF(cell, self.kpts) ke_cutoff = estimate_ke_cutoff_for_omega(cell, omega) dfobj.mesh = cell.cutoff_to_mesh(ke_cutoff) return dfobj.get_jk(dm_kpts, hermi, kpts, kpts_band, with_j, with_k, omega, exxdiv) elif omega < 0: # Short-range part only if self.omega is not None and self.omega != omega: raise RuntimeError(f'omega = {omega}, self.omega = {self.omega}') raise NotImplementedError if self.supmol_sr is None: self.build() # Does not support to specify arbitrary kpts if kpts is not None and abs(kpts-self.kpts).max() > 1e-7: raise RuntimeError('kpts error. kpts cannot be modified in RSJK') kpts = np.asarray(self.kpts.reshape(-1, 3), order='C') mo_coeff = getattr(dm_kpts, 'mo_coeff', None) mo_occ = getattr(dm_kpts, 'mo_occ', None) dm_kpts = np.asarray(dm_kpts) dms = _format_dms(dm_kpts, kpts) # compute delta vs if dm is obtained from SCF make_rdm1 if mo_coeff is not None: last_dm, last_vs = self._last_vs vs = self._get_jk_sr(dms-last_dm, hermi, kpts, kpts_band, with_j, with_k, omega, exxdiv) vs += last_vs self._last_vs = (dms, vs.copy()) last_dm = last_vs = None # dm ~= dm_factor * dm_factor.T n_dm, nkpts, nao = dms.shape[:3] # mo_coeff, mo_occ are not a list of aligned array if # remove_lin_dep was applied to scf object if dm_kpts.ndim == 4: # KUHF nocc = max(max(np.count_nonzero(x > 0) for x in z) for z in mo_occ) dm_factor = [[x[:,:nocc] for x in mo] for mo in mo_coeff] occs = [[x[:nocc] for x in z] for z in mo_occ] else: # KRHF nocc = max(np.count_nonzero(x > 0) for x in mo_occ) dm_factor = [[mo[:,:nocc] for mo in mo_coeff]] occs = [[x[:nocc] for x in mo_occ]] dm_factor = np.array(dm_factor, dtype=np.complex128, order='C') dm_factor *= np.sqrt(np.array(occs, dtype=np.double))[:,:,None] else: vs = self._get_jk_sr(dms, hermi, kpts, kpts_band, with_j, with_k, omega, exxdiv) dm_factor = None dms = lib.tag_array(dms, dm_factor=dm_factor) if with_j and with_k: vj, vk = vs elif with_j: vj, vk = vs[0], None else: vj, vk = None, vs[0] if self.purify and kpts_band is None: phase = np.exp(1j*np.dot(self.supmol_sr.bvkmesh_Ls, kpts.T)) phase /= np.sqrt(len(kpts)) else: phase = None if with_j: if self.has_long_range(): vj += self._get_vj_lr(dms, hermi, kpts, kpts_band) if hermi: vj = (vj + vj.conj().transpose(0,1,3,2)) * .5 vj = _purify(vj, phase) vj = _format_jks(vj, dm_kpts, kpts_band, kpts) if is_zero(kpts) and dm_kpts.dtype == np.double: vj = vj.real.copy() if with_k: if self.has_long_range(): approx_vk_lr = dm_factor is None and self.approx_vk_lr_missing_mo if not approx_vk_lr: vk += self._get_vk_lr(dms, hermi, kpts, kpts_band, exxdiv) else: mesh1 = np.array(self.mesh)//3*2 + 1 logger.debug(self, 'Approximate lr_k with mesh %s', mesh1) with lib.temporary_env(self, mesh=mesh1): vk += self._get_vk_lr(dms, hermi, kpts, kpts_band, exxdiv) self.approx_vk_lr_missing_mo = False if hermi: vk = (vk + vk.conj().transpose(0,1,3,2)) * .5 vk = _purify(vk, phase) vk = _format_jks(vk, dm_kpts, kpts_band, kpts) if is_zero(kpts) and dm_kpts.dtype == np.double: vk = vk.real.copy() return vj, vk
weighted_coulG = aft.weighted_coulG
[docs] def weighted_coulG_LR(self, kpt=np.zeros(3), exx=False, mesh=None): # The long range part Coulomb kernel has to be computed as the # difference between coulG(cell.omega) - coulG(self.omega). It allows this # module to handle the SR- and regular integrals in the same framework return (self.weighted_coulG(kpt, exx, mesh) - self.weighted_coulG_SR(kpt, exx, mesh))
[docs] def weighted_coulG_SR(self, kpt=np.zeros(3), exx=False, mesh=None): return self.weighted_coulG(kpt, False, mesh, -self.omega)
def _get_vj_lr(self, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None): ''' Long-range part of J matrix ''' if kpts_band is None: return self._get_lr_j_kpts(dm_kpts, hermi, kpts) logger.warn(self, 'Approximate kpts_band for vj with k-point projection') vj = self._get_lr_j_kpts(dm_kpts, hermi, kpts) pk2k = addons._k2k_projection(kpts, kpts_band, self.supmol_ft.bvkmesh_Ls) return lib.einsum('nkpq,kh->nhpq', vj, pk2k) def _get_lr_j_kpts(self, dm_kpts, hermi=1, kpts=np.zeros((1,3))): ''' Long-range part of J matrix ''' if len(kpts) == 1 and not is_zero(kpts): raise NotImplementedError('Single k-point get-j') cpu0 = logger.process_clock(), logger.perf_counter() log = logger.new_logger(self) cell = self.cell rs_cell = self.rs_cell if self.exclude_dd_block or self._sr_without_dddd: cell_d = self.cell_d naod = cell_d.nao ngrids_d = np.prod(cell_d.mesh) else: cell_d = None naod = ngrids_d = 0 kpts = np.asarray(kpts.reshape(-1, 3), order='C') dms = dm_kpts n_dm, nkpts, nao = dms.shape[:3] mesh = self.mesh ngrids = np.prod(mesh) is_real = is_zero(kpts) and dms.dtype == np.double if is_real: vj_kpts = np.zeros((n_dm,nkpts,nao,nao)) else: vj_kpts = np.zeros((n_dm,nkpts,nao,nao), dtype=np.complex128) # TODO: aosym == 's2' aosym = 's1' ft_kern = self.supmol_ft.gen_ft_kernel( aosym, return_complex=False, kpts=kpts, verbose=log) Gv, Gvbase, kws = cell.get_Gv_weights(mesh) gxyz = lib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) kpt_allow = np.zeros(3) coulG = self.weighted_coulG(kpt_allow, False, mesh) if (cell.dimension == 3 or (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum')): G0_idx = 0 # due to np.fft.fftfreq convention # G=0 associated to 2e integrals in real-space coulG_SR_at_G0 = np.pi/self.omega**2 # For cell.dimension = 2, coulG is computed with truncated Coulomb # interactions. The 3d coulG_SR below is to remove the analytical # SR from get_jk_sr (which was computed with full Coulomb) then to # add truncated Coulomb for AFT part. with lib.temporary_env(cell, dimension=3): coulG_SR = self.weighted_coulG_SR(kpt_allow, False, mesh) coulG_SR[G0_idx] += coulG_SR_at_G0 * kws else: coulG_SR = self.weighted_coulG_SR(kpt_allow, False, mesh) coulG_SR_at_G0 = None if naod > 0: smooth_bas_mask = rs_cell.bas_type == ft_ao.SMOOTH_BASIS smooth_bas_idx = rs_cell.bas_map[smooth_bas_mask] ao_d_idx = rs_cell.get_ao_indices(smooth_bas_idx, cell.ao_loc) mem_now = lib.current_memory()[0] max_memory = self.max_memory - mem_now log.debug1('max_memory = %d MB (%d in use)', max_memory+mem_now, mem_now) Gblksize = max(24, int(max_memory*.8e6/16/(nao**2+naod**2)/(nkpts+1))//8*8) Gblksize = min(Gblksize, ngrids) log.debug1('Gblksize = %d', Gblksize) if not self.exclude_dd_block or naod == 0: log.debug1('get_lr_j_kpts with aft_aopair') # Long-range part is calculated as the difference # coulG(cell.omega) - coulG(self.omega) . It can support both regular # integrals and LR integrals. coulG_LR = coulG - coulG_SR buf = np.empty(nkpts*Gblksize*nao**2*2) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gpq = ft_kern(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt_allow, out=buf) aft_jk._update_vj_(vj_kpts, Gpq, dms, coulG_LR[p0:p1]) Gpq = buf = None if self._sr_without_dddd and naod > 0: log.debug1('get_lr_j_kpts dd block with mesh %s', cell_d.mesh) if cell.dimension < 2: raise NotImplementedError if cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum': raise NotImplementedError aoR_ks, aoI_ks = rsdf_builder._eval_gto(cell_d, cell_d.mesh, kpts) # rho = einsum('nkji,kig,kjg->ng', dm, ao.conj(), ao) rho = np.zeros((n_dm, ngrids_d)) tmpR = np.empty((naod, ngrids_d)) tmpI = np.empty((naod, ngrids_d)) dm_dd = dms[:,:,ao_d_idx[:,None],ao_d_idx] dmR_dd = np.asarray(dm_dd.real, order='C') dmI_dd = np.asarray(dm_dd.imag, order='C') # vG = einsum('ij,gji->g', dm_dd[k], aoao[k]) * coulG for i in range(n_dm): for k in range(nkpts): zdotNN(dmR_dd[i,k].T, dmI_dd[i,k].T, aoR_ks[k], aoI_ks[k], 1, tmpR, tmpI) rho[i] += np.einsum('ig,ig->g', aoR_ks[k], tmpR) rho[i] += np.einsum('ig,ig->g', aoI_ks[k], tmpI) if coulG_SR_at_G0 is not None: with lib.temporary_env(cell_d, dimension=3): coulG_SR = pbctools.get_coulG(cell_d, omega=-self.omega) coulG_SR[G0_idx] += coulG_SR_at_G0 else: coulG_SR = pbctools.get_coulG(cell_d, omega=-self.omega) coulG_SR *= cell.vol / ngrids_d vG = pbctools.fft(rho, cell_d.mesh) * coulG_SR vR = pbctools.ifft(vG, cell_d.mesh).real nband = nkpts vjR_dd = np.empty((naod,naod)) vjI_dd = np.empty((naod,naod)) for i in range(n_dm): for k in range(nband): aowR = np.einsum('xi,x->xi', aoR_ks[k].T, vR[i]) aowI = np.einsum('xi,x->xi', aoI_ks[k].T, vR[i]) zdotCN(aoR_ks[k], aoI_ks[k], aowR, aowI, 1, vjR_dd, vjI_dd) if is_real: vj = vjR_dd else: vj = vjR_dd + vjI_dd*1j lib.takebak_2d(vj_kpts[i,k], vj, ao_d_idx, ao_d_idx, thread_safe=False) elif naod > 0 and ngrids < ngrids_d: # Prefer AFTDF for everything otherwise cell_d.mesh have to be used # for AFTDF log.debug1('get_lr_j_kpts dd block cached aft_aopair_dd') ft_kern_dd = self.supmol_d.gen_ft_kernel( aosym, return_complex=False, kpts=kpts, verbose=log) def merge_dd(Gpq, p0, p1): '''Merge diffused basis block into ao-pair tensor inplace''' GpqR, GpqI = Gpq pqG_ddR, pqG_ddI = ft_kern_dd(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt_allow, kpts) # Gpq should be an array of (nkpts,ni,nj,ngrids) in C order if not GpqR[0].flags.c_contiguous: assert GpqR[0].strides[0] == 8 # stride for grids for k in range(nkpts): libpbc.PBC_ft_fuse_dd_s1( GpqR[k].ctypes.data_as(ctypes.c_void_p), GpqI[k].ctypes.data_as(ctypes.c_void_p), pqG_ddR[k].ctypes.data_as(ctypes.c_void_p), pqG_ddI[k].ctypes.data_as(ctypes.c_void_p), ao_d_idx.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), ctypes.c_int(naod), ctypes.c_int(p1-p0)) return (GpqR, GpqI) buf = np.empty(nkpts*Gblksize*nao**2*2) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gpq = ft_kern(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt_allow, out=buf) aft_jk._update_vj_(vj_kpts, Gpq, dms, -coulG_SR[p0:p1]) Gpq = merge_dd(Gpq, p0, p1) aft_jk._update_vj_(vj_kpts, Gpq, dms, coulG[p0:p1]) Gpq = buf = None elif naod > 0: log.debug1('get_lr_j_kpts dd block cached fft_aopair_dd') aoR_ks, aoI_ks = rsdf_builder._eval_gto(cell_d, mesh, kpts) # rho = einsum('nkji,kig,kjg->ng', dm, ao.conj(), ao) rho = np.zeros((n_dm, ngrids)) tmpR = np.empty((naod, ngrids)) tmpI = np.empty((naod, ngrids)) dm_dd = dms[:,:,ao_d_idx[:,None],ao_d_idx] dmR_dd = np.asarray(dm_dd.real, order='C') dmI_dd = np.asarray(dm_dd.imag, order='C') # vG = einsum('ij,gji->g', dm_dd[k], aoao[k]) * coulG for i in range(n_dm): for k in range(nkpts): zdotNN(dmR_dd[i,k].T, dmI_dd[i,k].T, aoR_ks[k], aoI_ks[k], 1, tmpR, tmpI) rho[i] += np.einsum('ig,ig->g', aoR_ks[k], tmpR) rho[i] += np.einsum('ig,ig->g', aoI_ks[k], tmpI) vG_dd = pbctools.ifft(rho, mesh) * cell.vol * coulG tmpR = tmpI = dmR_dd = dmI_dd = None cpu1 = log.timer_debug1('get_lr_j_kpts dd block', *cpu0) mem_now = lib.current_memory()[0] max_memory = self.max_memory - mem_now log.debug1('max_memory = %d MB (%d in use)', max_memory+mem_now, mem_now) Gblksize = min(Gblksize, int(max_memory*.8e6/16/(nao**2+naod**2)/(nkpts+1))//8*8) log.debug1('Gblksize = %d', Gblksize) buf = np.empty(nkpts*Gblksize*nao**2*2) for p0, p1 in lib.prange(0, ngrids, Gblksize): Gpq = ft_kern(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt_allow, out=buf) #: aft_jk._update_vj_(vj_kpts, aoaoks, dms, coulG[p0:p1], 1) #: aft_jk._update_vj_(vj_kpts, aoaoks, dms, coulG_SR[p0:p1], -1) GpqR, GpqI = Gpq for i in range(n_dm): rhoR = np.einsum('kij,kgij->g', dms[i].real, GpqR) rhoI = -np.einsum('kij,kgij->g', dms[i].real, GpqI) if not is_real: rhoR += np.einsum('kij,kgij->g', dms[i].imag, GpqI) rhoI += np.einsum('kij,kgij->g', dms[i].imag, GpqR) rho = rhoR + rhoI * 1j vG = vG_dd[i,p0:p1] # Update vG_dd inplace to include rho contributions vG += coulG[p0:p1] * rho vG_SR = coulG_SR[p0:p1] * rho # vG_LR contains full vG of dd-block and vG_LR of rest blocks vG_LR = vG - vG_SR vj_kpts[i].real += np.einsum('g,kgij->kij', vG_LR.real, GpqR) vj_kpts[i].real -= np.einsum('g,kgij->kij', vG_LR.imag, GpqI) if not is_real: vj_kpts[i].imag += np.einsum('g,kgij->kij', vG_LR.real, GpqI) vj_kpts[i].imag += np.einsum('g,kgij->kij', vG_LR.imag, GpqR) Gpq = None log.timer_debug1('get_lr_j_kpts ft_aopair', *cpu1) vR = pbctools.fft(vG_dd, mesh).real * (cell.vol/ngrids) vjR_dd = np.empty((naod, naod)) vjI_dd = np.empty((naod, naod)) for i in range(n_dm): for k in range(nkpts): tmpR = aoR_ks[k] * vR[i] tmpI = aoI_ks[k] * vR[i] zdotCN(aoR_ks[k], aoI_ks[k], tmpR.T, tmpI.T, 1, vjR_dd, vjI_dd) if is_real: vj_dd = vjR_dd else: vj_dd = vjR_dd + vjI_dd * 1j lib.takebak_2d(vj_kpts[i,k], vj_dd, ao_d_idx, ao_d_idx, thread_safe=False) else: raise RuntimeError(f'exclude_dd_block={self.exclude_dd_block} ' f'sr_without_dddd={self._sr_without_dddd} naod={naod}') if nkpts > 1: vj_kpts *= 1./nkpts log.timer_debug1('get_lr_j_kpts', *cpu0) return vj_kpts def _get_vk_lr(self, dm_kpts, hermi=1, kpts=np.zeros((1,3)), kpts_band=None, exxdiv=None): ''' Long-range part of K matrix ''' if kpts_band is None: return self._get_lr_k_kpts(dm_kpts, hermi, kpts, exxdiv) # Note: Errors in k2k-projection for vj is relatively small. # k2k-projection for vk has significant finite-size errors. logger.warn(self, 'Approximate kpts_band for vk with k-point projection') vk = self._get_lr_k_kpts(dm_kpts, hermi, kpts, exxdiv=None) pk2k = addons._k2k_projection(kpts, kpts_band, self.supmol_ft.bvkmesh_Ls) vk = lib.einsum('nkpq,kh->nhpq', vk, pk2k) if exxdiv == 'ewald': _ewald_exxdiv_for_G0(self.cell, kpts, dm_kpts, vk, kpts_band) return vk def _get_lr_k_kpts(self, dm_kpts, hermi=1, kpts=np.zeros((1,3)), exxdiv=None): ''' Long-range part of K matrix ''' cpu0 = cpu1 = logger.process_clock(), logger.perf_counter() log = logger.new_logger(self) cell = self.cell rs_cell = self.rs_cell if self.exclude_dd_block or self._sr_without_dddd: cell_d = self.cell_d naod = cell_d.nao ngrids_d = np.prod(cell_d.mesh) else: cell_d = None naod = ngrids_d = 0 mesh = self.mesh ngrids = np.prod(mesh) dms = dm_kpts n_dm, nkpts, nao = dms.shape[:3] vkR = np.zeros((n_dm,nkpts,nao,nao)) vkI = np.zeros((n_dm,nkpts,nao,nao)) vk = [vkR, vkI] weight = 1. / nkpts # Test if vk[k] == vk[k_conj].conj() t_rev_pairs = group_by_conj_pairs(cell, kpts, return_kpts_pairs=False) try: t_rev_pairs = np.asarray(t_rev_pairs, dtype=np.int32, order='F') except TypeError: t_rev_pairs = [[k, k] if k_conj is None else [k, k_conj] for k, k_conj in t_rev_pairs] t_rev_pairs = np.asarray(t_rev_pairs, dtype=np.int32, order='F') log.debug1('Num kpts conj_pairs %d', len(t_rev_pairs)) time_reversal_symmetry = self.time_reversal_symmetry if time_reversal_symmetry: for k, k_conj in t_rev_pairs: if k != k_conj and abs(dms[:,k_conj] - dms[:,k].conj()).max() > 1e-6: time_reversal_symmetry = False log.debug2('Disable time_reversal_symmetry') break # TODO: aosym == 's2' aosym = 's1' if time_reversal_symmetry: k_to_compute = np.zeros(nkpts, dtype=np.int8) k_to_compute[t_rev_pairs[:,0]] = 1 else: k_to_compute = np.ones(nkpts, dtype=np.int8) t_rev_pairs = None dm_factor = getattr(dm_kpts, 'dm_factor', None) contract_mo_early = False if dm_factor is None: dmsR = np.asarray(dms.real, order='C') dmsI = np.asarray(dms.imag, order='C') dm = [dmsR, dmsI] dm_factor = None if np.count_nonzero(k_to_compute) >= 2 * lib.num_threads(): update_vk = aft_jk._update_vk1_ else: update_vk = aft_jk._update_vk_ else: # dm ~= dm_factor * dm_factor.T nocc = dm_factor.shape[-1] if nocc == 0: return vkR bvk_ncells, rs_nbas, nimgs = self.supmol_ft.bas_mask.shape s_nao = self.supmol_ft.nao contract_mo_early = (time_reversal_symmetry and naod == 0 and bvk_ncells*nao*6 > s_nao*nocc*n_dm) log.debug2('time_reversal_symmetry = %s bvk_ncells = %d ' 's_nao = %d nocc = %d n_dm = %d', time_reversal_symmetry, bvk_ncells, s_nao, nocc, n_dm) log.debug2('Use algorithm contract_mo_early = %s', contract_mo_early) if contract_mo_early: bvk_kmesh = self.bvk_kmesh rcut = ft_ao.estimate_rcut(cell) supmol = ft_ao.ExtendedMole.from_cell(cell, bvk_kmesh, rcut.max()) supmol = supmol.strip_basis(rcut) s_nao = supmol.nao moR, moI = aft_jk._mo_k2gamma(supmol, dm_factor, kpts, t_rev_pairs) if abs(moI).max() < 1e-5: dm = [moR, None] ft_kern = aft_jk._gen_ft_kernel_fake_gamma(cell, supmol, aosym) update_vk = aft_jk._update_vk_fake_gamma else: contract_mo_early = False moR = moI = None if not contract_mo_early: dm = [np.asarray(dm_factor.real, order='C'), np.asarray(dm_factor.imag, order='C')] if np.count_nonzero(k_to_compute) >= 2 * lib.num_threads(): update_vk = aft_jk._update_vk1_dmf else: update_vk = aft_jk._update_vk_dmf log.debug2('set update_vk to %s', update_vk) if not contract_mo_early: ft_kern = self.supmol_ft.gen_ft_kernel( aosym, return_complex=False, kpts=kpts, verbose=log) Gv, Gvbase, kws = cell.get_Gv_weights(mesh) Gv = np.asarray(Gv, order='F') gxyz = lib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) G0_idx = 0 if (cell.dimension == 3 or (cell.dimension == 2 and cell.low_dim_ft_type != 'inf_vacuum')): coulG_SR_at_G0 = np.pi/self.omega**2 * kws else: coulG_SR_at_G0 = None if naod > 0: smooth_bas_mask = rs_cell.bas_type == ft_ao.SMOOTH_BASIS smooth_bas_idx = rs_cell.bas_map[smooth_bas_mask] ao_d_idx = rs_cell.get_ao_indices(smooth_bas_idx, cell.ao_loc) mem_now = lib.current_memory()[0] max_memory = max(2000, (self.max_memory - mem_now)) log.debug1('max_memory = %d MB (%d in use)', max_memory+mem_now, mem_now) if self.exclude_dd_block and not self._sr_without_dddd and naod > 0: cache_size = (naod*(naod+1)*ngrids*(nkpts+1))*16e-6 log.debug1('naod = %d cache_size = %d', naod, cache_size) # fft_aopair_dd seems less efficient than aft_aopair_dd if 0 and max_memory * .5 > cache_size and cell.dimension >= 2: from pyscf.pbc.dft.multigrid import _take_5d mesh_d = cell_d.mesh log.debug1('merge_dd with cached fft_aopair_dd') aoR_ks, aoI_ks = rsdf_builder._eval_gto(cell_d, mesh_d, kpts) coords = cell_d.get_uniform_grids(mesh_d) max_memory -= cache_size gx = np.fft.fftfreq(mesh[0], 1./mesh[0]).astype(np.int32) gy = np.fft.fftfreq(mesh[1], 1./mesh[1]).astype(np.int32) gz = np.fft.fftfreq(mesh[2], 1./mesh[2]).astype(np.int32) def fft_aopair_dd(ki, kj, expmikr): # einsum('g,ig,jg->ijg', expmikr, ao_ki.conj(), ao_kj) pqG_ddR = np.empty((naod**2, ngrids_d)) pqG_ddI = np.empty((naod**2, ngrids_d)) expmikrR, expmikrI = expmikr libpbc.PBC_zjoin_fCN_s1( pqG_ddR.ctypes.data_as(ctypes.c_void_p), pqG_ddI.ctypes.data_as(ctypes.c_void_p), expmikrR.ctypes.data_as(ctypes.c_void_p), expmikrI.ctypes.data_as(ctypes.c_void_p), aoR_ks[ki].ctypes.data_as(ctypes.c_void_p), aoI_ks[ki].ctypes.data_as(ctypes.c_void_p), aoR_ks[kj].ctypes.data_as(ctypes.c_void_p), aoI_ks[kj].ctypes.data_as(ctypes.c_void_p), ctypes.c_int(naod), ctypes.c_int(naod), ctypes.c_int(ngrids_d)) pqG_dd = np.empty((naod**2, ngrids_d), dtype=np.complex128) pqG_dd.real = pqG_ddR pqG_dd.imag = pqG_ddI pqG_ddR = pqG_ddI = None pqG_dd *= cell.vol / ngrids_d pqG_dd = pbctools.fft(pqG_dd, mesh_d).reshape(naod, naod, *mesh_d) pqG_dd = _take_5d(pqG_dd, (None, None, gx, gy, gz)) return np.asarray(pqG_dd.reshape(naod, naod, ngrids), order='C') cache = {} def merge_dd(Gpq, p0, p1, ki_lst, kj_lst): '''Merge diffused basis block into ao-pair tensor inplace''' expmikr = np.exp(-1j * np.dot(coords, kpts[kj_lst[0]]-kpts[ki_lst[0]])) expmikrR = np.asarray(expmikr.real, order='C') expmikrI = np.asarray(expmikr.imag, order='C') GpqR, GpqI = Gpq # Gpq should be an array of (nkpts,ni,nj,ngrids) in C order if not GpqR[0].flags.c_contiguous: assert GpqR[0].strides[0] == 8 # stride for grids cpu0 = logger.process_clock(), logger.perf_counter() for k, (ki, kj) in enumerate(zip(ki_lst, kj_lst)): if (ki, kj) not in cache: log.debug3('cache dd block (%d, %d)', ki, kj) cache[ki, kj] = fft_aopair_dd(ki, kj, (expmikrR, expmikrI)) pqG_dd = cache[ki, kj] libpbc.PBC_ft_zfuse_dd_s1( GpqR[kj].ctypes.data_as(ctypes.c_void_p), GpqI[kj].ctypes.data_as(ctypes.c_void_p), pqG_dd.ctypes.data_as(ctypes.c_void_p), ao_d_idx.ctypes.data_as(ctypes.c_void_p), (ctypes.c_int*2)(p0, p1), ctypes.c_int(nao), ctypes.c_int(naod), ctypes.c_int(ngrids)) log.timer_debug1('merge_dd', *cpu0) return (GpqR, GpqI) cpu1 = log.timer_debug1('get_lr_k_kpts initializing dd block', *cpu1) else: log.debug1('merge_dd with aft_aopair_dd') ft_kern_dd = self.supmol_d.gen_ft_kernel( aosym, return_complex=False, kpts=kpts, verbose=log) buf1 = None def merge_dd(Gpq, p0, p1, ki_lst, kj_lst): '''Merge diffused basis block into ao-pair tensor inplace''' kpt = kpts[kj_lst[0]] - kpts[ki_lst[0]] GpqR, GpqI = Gpq pqG_ddR, pqG_ddI = ft_kern_dd(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt, out=buf1) # Gpq should be an array of (nkpts,ni,nj,ngrids) in C order if not GpqR[0].flags.c_contiguous: assert GpqR[0].strides[0] == 8 # stride for grids for k in range(nkpts): libpbc.PBC_ft_fuse_dd_s1( GpqR[k].ctypes.data_as(ctypes.c_void_p), GpqI[k].ctypes.data_as(ctypes.c_void_p), pqG_ddR[k].ctypes.data_as(ctypes.c_void_p), pqG_ddI[k].ctypes.data_as(ctypes.c_void_p), ao_d_idx.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(nao), ctypes.c_int(naod), ctypes.c_int(p1-p0)) return (GpqR, GpqI) if contract_mo_early: Gblksize = max(24, int((max_memory*1e6/16-nkpts*nao**2*3)/ (nao*s_nao+nao*nkpts*nocc))//8*8) Gblksize = min(Gblksize, ngrids, 200000) log.debug1('Gblksize = %d', Gblksize) buf = np.empty(Gblksize*s_nao*nao*2) else: Gblksize = max(24, int(max_memory*.8e6/16/(nao**2+naod**2)/(nkpts+3))//8*8) Gblksize = min(Gblksize, ngrids, 200000) log.debug1('Gblksize = %d', Gblksize) buf = np.empty(nkpts*Gblksize*nao**2*2) if naod > 0: buf1 = np.empty(nkpts*Gblksize*naod**2*2) for group_id, (kpt, ki_idx, kj_idx, self_conj) \ in enumerate(kk_adapted_iter(cell, kpts)): coulG = self.weighted_coulG(kpt, exxdiv, mesh) if coulG_SR_at_G0 is not None: # For cell.dimension = 2, coulG is computed with truncated coulomb # interactions. The 3d coulG_SR below is to remove the analytical # SR from get_jk_sr (which was computed with full Coulomb) then # add the truncated Coulomb for AFT part. with lib.temporary_env(cell, dimension=3): coulG_SR = self.weighted_coulG_SR(kpt, False, mesh) if is_zero(kpt): coulG_SR[G0_idx] += coulG_SR_at_G0 else: coulG_SR = self.weighted_coulG_SR(kpt, False, mesh) if self.exclude_dd_block and not self._sr_without_dddd and naod > 0: for p0, p1 in lib.prange(0, ngrids, Gblksize): # C ~ compact basis, D ~ diffused basis # K matrix with coulG_LR: # (CC|CC) (CC|CD) (CC|DC) (CD|CC) (CD|CD) (CD|DC) (DC|CC) (DC|CD) (DC|DC) # K matrix with full coulG: # (CC|DD) (CD|DD) (DC|DD) (DD|CC) (DD|CD) (DD|DC) (DD|DD) log.debug3('update_vk [%s:%s]', p0, p1) Gpq = ft_kern(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt, out=buf) update_vk(vk, Gpq, dm, coulG_SR[p0:p1] * -weight, ki_idx, kj_idx, not self_conj, k_to_compute, t_rev_pairs) Gpq = merge_dd(Gpq, p0, p1, ki_idx, kj_idx) update_vk(vk, Gpq, dm, coulG[p0:p1] * weight, ki_idx, kj_idx, not self_conj, k_to_compute, t_rev_pairs) Gpq = None # clear cache to release memory for merge_dd function cache = {} else: coulG_LR = coulG - coulG_SR for p0, p1 in lib.prange(0, ngrids, Gblksize): log.debug3('update_vk [%s:%s]', p0, p1) Gpq = ft_kern(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt, out=buf) update_vk(vk, Gpq, dm, coulG_LR[p0:p1] * weight, ki_idx, kj_idx, not self_conj, k_to_compute, t_rev_pairs) Gpq = None cpu1 = log.timer_debug1(f'ft_aopair group {group_id}', *cpu1) if self._sr_without_dddd and naod > 0: # (DD|DD) with full coulG, rest terms with coulG_LR log.debug1('ft_aopair dd-block for dddd-block ERI') if cell.dimension < 2: raise NotImplementedError if cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum': raise NotImplementedError vkR_dd = np.zeros((n_dm,nkpts,naod,naod)) vkI_dd = np.zeros((n_dm,nkpts,naod,naod)) vk_dd = [vkR_dd, vkI_dd] if dm_factor is None: dmsR, dmsI = dm dmR_dd = np.asarray(dmsR[:,:,ao_d_idx[:,None],ao_d_idx], order='C') dmI_dd = np.asarray(dmsI[:,:,ao_d_idx[:,None],ao_d_idx], order='C') dm_dd = [dmR_dd, dmI_dd] else: assert update_vk is not aft_jk._update_vk_fake_gamma dmfR, dmfI = dm dmfR_dd = np.asarray(dmfR[:,:,ao_d_idx], order='C') dmfI_dd = np.asarray(dmfI[:,:,ao_d_idx], order='C') dm_dd = [dmfR_dd, dmfI_dd] # AFT mesh for ERI is usually smaller than cell_d.mesh ke_cutoff = aft.estimate_ke_cutoff(cell_d) mesh_d = cell_d.cutoff_to_mesh(ke_cutoff) log.debug1('lr_k dd-block ke_cutoff = %s mesh = %s', ke_cutoff, mesh_d) ft_kern_dd = self.supmol_d.gen_ft_kernel( aosym, return_complex=False, kpts=kpts, verbose=log) Gv, Gvbase, kws = cell_d.get_Gv_weights(mesh_d) gxyz = lib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) ngrids_d = len(Gv) Gblksize = max(24, int(max_memory*.8e6/16/naod**2/(nkpts+max(nkpts,3)))//8*8) Gblksize = min(Gblksize, ngrids_d) log.debug1('Gblksize = %d', Gblksize) buf = np.empty(nkpts*Gblksize*naod**2*2) for group_id, (kpt, ki_idx, kj_idx, self_conj) \ in enumerate(kk_adapted_iter(cell, kpts)): with lib.temporary_env(cell, dimension=3): coulG_SR = self.weighted_coulG_SR(kpt, False, mesh_d) if is_zero(kpt) and coulG_SR_at_G0 is not None: coulG_SR[G0_idx] += coulG_SR_at_G0 for p0, p1 in lib.prange(0, ngrids_d, Gblksize): Gpq = ft_kern_dd(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt, out=buf) update_vk(vk_dd, Gpq, dm_dd, coulG_SR[p0:p1] * weight, ki_idx, kj_idx, not self_conj, k_to_compute, t_rev_pairs) Gpq = None cpu1 = log.timer_debug1(f'ft_aopair dd-block group {group_id}', *cpu1) for i in range(n_dm): for k in range(nkpts): lib.takebak_2d(vkR[i,k], vkR_dd[i,k], ao_d_idx, ao_d_idx, thread_safe=False) lib.takebak_2d(vkI[i,k], vkI_dd[i,k], ao_d_idx, ao_d_idx, thread_safe=False) buf = buf1 = None if is_zero(kpts) and not np.iscomplexobj(dm_kpts): vk_kpts = vkR else: vk_kpts = vkR + vkI * 1j # Add ewald_exxdiv contribution because G=0 was not included in the # non-uniform grids if (exxdiv == 'ewald' and (cell.dimension < 2 or # 0D and 1D are computed with inf_vacuum (cell.dimension == 2 and cell.low_dim_ft_type == 'inf_vacuum'))): _ewald_exxdiv_for_G0(cell, kpts, dms, vk_kpts, kpts) if time_reversal_symmetry: for k, k_conj in t_rev_pairs: if k != k_conj: vk_kpts[:,k_conj] = vk_kpts[:,k].conj() log.timer_debug1('get_lr_k_kpts', *cpu0) return vk_kpts to_gpu = lib.to_gpu
RangeSeparationJKBuilder = RangeSeparatedJKBuilder def _purify(mat_kpts, phase): if phase is None: return mat_kpts #:mat_bvk = np.einsum('Rk,nkij,Sk->nRSij', phase, mat_kpts, phase.conj()) #:return np.einsum('Rk,nRSij,Sk->nkij', phase.conj(), mat_bvk.real, phase) nkpts = phase.shape[1] mat_bvk = lib.einsum('k,Sk,nkuv->nSuv', phase[0], phase.conj(), mat_kpts) return lib.einsum('S,Sk,nSuv->nkuv', nkpts*phase[:,0].conj(), phase, mat_bvk.real)
[docs] def estimate_rcut(rs_cell, omega, precision=None, exclude_dd_block=True): '''Estimate rcut for 2e SR-integrals''' if precision is None: # Adjust precision a little bit as errors are found slightly larger than cell.precision. precision = rs_cell.precision * 1e-1 rs_cell = rs_cell exps, cs = pbcgto.cell._extract_pgto_params(rs_cell, 'min') ls = rs_cell._bas[:,gto.ANG_OF] exp_min_idx = exps.argmin() cost = cs * (.5*abs(omega)*rs_cell.rcut)**ls / (2*exps)**(ls/2+.75) ai_idx = ak_idx = cost.argmax() compact_mask = rs_cell.bas_type != ft_ao.SMOOTH_BASIS compact_idx = np.where(compact_mask)[0] if exclude_dd_block and compact_idx.size > 0: ak_idx = compact_idx[cost[compact_idx].argmax()] logger.debug2(rs_cell, 'ai_idx=%d ak_idx=%d', ai_idx, ak_idx) # Case 1: l in cell0, product kl ~ dc, product ij ~ dd and dc # This includes interactions (dc|dd) ak = exps[ak_idx] lk = rs_cell._bas[ak_idx,gto.ANG_OF] ck = cs[ak_idx] aj = exps lj = ls cj = cs ai = exps[ai_idx] li = rs_cell._bas[ai_idx,gto.ANG_OF] ci = cs[ai_idx] al = exps[exp_min_idx] ll = rs_cell._bas[exp_min_idx,gto.ANG_OF] cl = cs[exp_min_idx] aij = ai + aj akl = ak + al lij = li + lj lkl = lk + ll l4 = lij + lkl norm_ang = ((2*li+1)*(2*lj+1)*(2*lk+1)*(2*ll+1)/(4*np.pi)**4)**.5 c1 = ci * cj * ck * cl * norm_ang theta = omega**2*aij*akl/(aij*akl + (aij+akl)*omega**2) sfac = omega**2*aj*al/(aj*al + (aj+al)*omega**2) / theta fl = 2 fac = 2**(li+lk)*np.pi**2.5*c1 * theta**(l4-.5) fac *= 2*np.pi/rs_cell.vol/theta fac /= aij**(li+1.5) * akl**(lk+1.5) * aj**lj * al**ll fac *= fl / precision r0 = rs_cell.rcut r0 = (np.log(fac * r0 * (sfac*r0)**(l4-1) + 1.) / (sfac*theta))**.5 r0 = (np.log(fac * r0 * (sfac*r0)**(l4-1) + 1.) / (sfac*theta))**.5 rcut = r0 if exclude_dd_block and 0 < compact_idx.size < rs_cell.nbas: # Case 2: l in cell0, product kl ~ dc, product ij ~ cd # so as to exclude interaction (dc|dd) smooth_mask = ~compact_mask ai, li, ci = ak, lk, ck aj = exps[smooth_mask] lj = ls[smooth_mask] cj = cs[smooth_mask] aij = ai + aj lij = li + lj l4 = lij + lkl norm_ang = ((2*li+1)*(2*lj+1)*(2*lk+1)*(2*ll+1)/(4*np.pi)**4)**.5 c1 = ci * cj * ck * cl * norm_ang theta = omega**2*aij*akl/(aij*akl + (aij+akl)*omega**2) sfac = omega**2*aj*al/(aj*al + (aj+al)*omega**2) / theta fl = 2 fac = 2**(li+lk)*np.pi**2.5*c1 * theta**(l4-.5) fac *= 2*np.pi/rs_cell.vol/theta fac /= aij**(li+1.5) * akl**(lk+1.5) * aj**lj * al**ll fac *= fl / precision r0 = rcut[smooth_mask] r0 = (np.log(fac * r0 * (sfac*r0)**(l4-1) + 1.) / (sfac*theta))**.5 r0 = (np.log(fac * r0 * (sfac*r0)**(l4-1) + 1.) / (sfac*theta))**.5 rcut[smooth_mask] = r0 return rcut
def _guess_omega(cell, kpts, mesh=None): a = cell.lattice_vectors() if cell.dimension == 0: if mesh is None: mesh = cell.mesh ke_cutoff = pbctools.mesh_to_cutoff(a, mesh).min() return 0, mesh, ke_cutoff precision = cell.precision nkpts = len(kpts) if mesh is None: omega_min = OMEGA_MIN ke_min = estimate_ke_cutoff_for_omega(cell, omega_min) nk = (cell.nao/25 * nkpts)**(1./3) ke_cutoff = 50 / (.7+.25*nk+.05*nk**3) ke_cutoff = max(ke_cutoff, ke_min) # avoid large omega since numerical issues were found in Rys # polynomials when computing SR integrals with nroots > 3 exps = [e for l, e in zip(cell._bas[:,gto.ANG_OF], cell.bas_exps()) if l != 0] if exps: omega_max = np.hstack(exps).min()**.5 * 2 ke_max = estimate_ke_cutoff_for_omega(cell, omega_max) ke_cutoff = min(ke_cutoff, ke_max) mesh = cell.cutoff_to_mesh(ke_cutoff) else: mesh = np.asarray(mesh) ke_cutoff = min(pbctools.mesh_to_cutoff(a, mesh)[:cell.dimension]) omega = estimate_omega_for_ke_cutoff(cell, ke_cutoff, precision) return omega, mesh, ke_cutoff
[docs] def estimate_ke_cutoff_for_omega(cell, omega, precision=None): '''Energy cutoff for FFTDF to converge attenuated Coulomb in moment space ''' if precision is None: precision = cell.precision ai = np.hstack(cell.bas_exps()).max() theta = 1./(1./ai + omega**-2) fac = 32*np.pi**2 * theta / precision Ecut = 20. Ecut = np.log(fac / (2*Ecut) + 1.) * 2*theta Ecut = np.log(fac / (2*Ecut) + 1.) * 2*theta return Ecut
[docs] def estimate_omega_for_ke_cutoff(cell, ke_cutoff, precision=None): '''The minimal omega in attenuated Coulomb given energy cutoff ''' if precision is None: precision = cell.precision # # estimation based on \int dk 4pi/k^2 exp(-k^2/4omega) sometimes is not # # enough to converge the 2-electron integrals. A penalty term here is to # # reduce the error in integrals # precision *= 1e-2 # kmax = (ke_cutoff*2)**.5 # log_rest = np.log(precision / (16*np.pi**2 * kmax**lmax)) # omega = (-.5 * ke_cutoff / log_rest)**.5 # return omega ai = np.hstack(cell.bas_exps()).max() aij = ai * 2 fac = 32*np.pi**2 / precision omega = .3 theta = 1./(1./ai + omega**-2) omega2 = 1./(np.log(fac * theta/ (2*ke_cutoff) + 1.)*2/ke_cutoff - 1./aij) if omega2 > 0: theta = 1./(1./ai + 1./omega2) omega2 = 1./(np.log(fac * theta/ (2*ke_cutoff) + 1.)*2/ke_cutoff - 1./aij) omega = max(omega2, 0)**.5 if omega < OMEGA_MIN: logger.warn(cell, 'omega=%g smaller than the required minimal value %g. ' 'Set omega to %g', omega2, OMEGA_MIN, OMEGA_MIN) omega = OMEGA_MIN return omega
def _qcond_cell0_abstract(qcond, seg_loc, seg2sh, nbasp): '''Find the max qcond for ij pair''' # The first shell in each seg_loc[i]:seg_loc[i+1] is inside cell0 cell0_prim_idx = seg2sh[np.arange(seg_loc[nbasp])] qcond_sub = qcond[:,cell0_prim_idx] sh_loc = seg2sh[seg_loc] nbas_bvk = sh_loc.size - 1 qtmp = np.empty((nbas_bvk, cell0_prim_idx.size), dtype=qcond.dtype) for i, (i0, i1) in enumerate(zip(sh_loc[:-1], sh_loc[1:])): if i0 != i1: qtmp[i] = qcond_sub[i0:i1].max(axis=0) else: qtmp[i] = INDEX_MIN qcond_cell0 = np.empty((nbas_bvk, nbasp), dtype=qcond.dtype) for j, (j0, j1) in enumerate(zip(seg_loc[:nbasp], seg_loc[1:nbasp+1])): if j0 != j1: qcond_cell0[:,j] = qtmp[:,j0:j1].max(axis=1) else: qcond_cell0[:,j] = INDEX_MIN return qcond_cell0