#!/usr/bin/env python
# Copyright 2018-2019 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Peng Bao <baopeng@iccas.ac.cn>
# Qiming Sun <osirpt.sun@gmail.com>
#
'''
semi-grid Coulomb and eXchange without differential density matrix
To lower the scaling of coulomb and exchange matrix construction for large system, one
coordinate is analytical and the other is grid. The traditional two electron
integrals turn to analytical one electron integrals and numerical integration
based on grid.(see Friesner, R. A. Chem. Phys. Lett. 1985, 116, 39)
Minimizing numerical errors using overlap fitting correction.(see
Lzsak, R. et. al. J. Chem. Phys. 2011, 135, 144105)
Grid screening for weighted AO value and DktXkg.
Two SCF steps: coarse grid then fine grid.
See the docs of pyscf.sgx.sgx for details on adjustable parameters.
'''
import ctypes
import numpy
import scipy.linalg
from pyscf import lib
from pyscf import gto
from pyscf.lib import logger
from pyscf.df.incore import aux_e2
from pyscf.gto import moleintor
from pyscf.scf import _vhf
from pyscf.dft import gen_grid
from pyscf.data.elements import _std_symbol_without_ghost, NUC
from pyscf.data.radii import BRAGG
from pyscf.dft.radi import treutler_ahlrichs
from pyscf.dft.gen_grid import LEBEDEV_ORDER, SGX_ANG_MAPPING, \
sgx_prune, becke_lko
from pyscf.dft.numint import BLKSIZE, NBINS, libdft, \
SWITCH_SIZE, _scale_ao, _dot_ao_ao_sparse
import time
libdft.SGXreturn_blksize.restype = int
SGX_BLKSIZE = libdft.SGXreturn_blksize()
assert SGX_BLKSIZE % BLKSIZE == 0
# number of DFT numint blocks per SGX block
DFT_PER_SGX = SGX_BLKSIZE // BLKSIZE
SGX_DELTA = 0.01
def _sgxdot_ao_dm(ao, dms, non0tab, shls_slice, ao_loc, out=None):
'''return numpy.dot(ao, dms)'''
ngrids, nao = ao.shape
nbas = len(ao_loc) - 1
vms = numpy.ndarray((dms.shape[0], dms.shape[2], ngrids), dtype=ao.dtype, order='C', buffer=out)
if (nao < SWITCH_SIZE or non0tab is None or shls_slice is None or ao_loc is None):
for i in range(len(dms)):
lib.dot(dms[i].T, ao.T, c=vms[i])
return vms
if not ao.flags.f_contiguous:
ao = lib.transpose(ao)
if ao.dtype == dms.dtype == numpy.double:
fn = libdft.VXCsgx_ao_dm
else:
# TODO implement complex dot
raise NotImplementedError("Complex sgxdot")
dms = numpy.asarray(dms, order='C')
for i in range(len(dms)):
fn(
vms[i].ctypes.data_as(ctypes.c_void_p),
ao.ctypes.data_as(ctypes.c_void_p),
dms[i].ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(nao), ctypes.c_int(dms.shape[2]),
ctypes.c_int(ngrids), ctypes.c_int(nbas),
non0tab.ctypes.data_as(ctypes.c_void_p),
(ctypes.c_int*2)(*shls_slice),
ao_loc.ctypes.data_as(ctypes.c_void_p)
)
return vms
def _sgxdot_ao_dm_sparse(ao, dms, mask, pair_mask, ao_loc, out=None):
'''return numpy.dot(ao, dms)'''
ngrids, nao = ao.shape
nbas = len(ao_loc) - 1
if pair_mask is None:
return _sgxdot_ao_dm(ao, dms, mask, (0, nbas), ao_loc, out=out)
vms = numpy.ndarray((dms.shape[0], dms.shape[2], ngrids), dtype=ao.dtype, order='C', buffer=out)
if (nao < SWITCH_SIZE or mask is None or ao_loc is None):
for i in range(len(dms)):
lib.dot(dms[i].T, ao.T, c=vms[i])
return vms
if not ao.flags.f_contiguous:
ao = lib.transpose(ao)
if ao.dtype == dms.dtype == numpy.double:
fn = _vhf.libcvhf.SGXdot_ao_dm_sparse
else:
# TODO implement complex dot
raise NotImplementedError("Complex sgxdot")
dms = numpy.asarray(dms, order='C')
for i in range(len(dms)):
fn(
vms[i].ctypes.data_as(ctypes.c_void_p),
ao.ctypes.data_as(ctypes.c_void_p),
dms[i].ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(nao),
ctypes.c_int(ngrids),
ctypes.c_int(nbas),
mask.ctypes.data_as(ctypes.c_void_p),
pair_mask.ctypes.data_as(ctypes.c_void_p),
ao_loc.ctypes.data_as(ctypes.c_void_p)
)
return vms
def _sgxdot_ao_gv(ao, gv, non0tab, shls_slice, ao_loc, out=None):
'''return numpy.dot(ao.T, gv.T)'''
ngrids, nao = ao.shape
outshape = (gv.shape[0], nao, nao)
if (nao < SWITCH_SIZE or non0tab is None
or shls_slice is None or ao_loc is None):
if out is None:
vv = numpy.zeros(outshape, dtype=ao.dtype, order='C')
else:
vv = out
for i in range(len(gv)):
lib.dot(ao.conj().T, gv[i].T, c=vv[i], beta=1.0)
return vv
nbas = non0tab.shape[1]
if not ao.flags.f_contiguous:
ao = lib.transpose(ao)
if not gv.flags.c_contiguous:
gv = numpy.ascontiguousarray(gv)
if ao.dtype == gv.dtype == numpy.double:
fn = libdft.VXCsgx_ao_ao
else:
# TODO implement complex dot
raise NotImplementedError("Complex sgxdot")
vv = numpy.empty(outshape, dtype=ao.dtype, order='C')
for i in range(len(gv)):
fn(
vv[i].ctypes.data_as(ctypes.c_void_p),
ao.ctypes.data_as(ctypes.c_void_p),
gv[i].ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(nao), ctypes.c_int(ngrids),
ctypes.c_int(nbas),
non0tab.ctypes.data_as(ctypes.c_void_p),
(ctypes.c_int*2)(*shls_slice),
ao_loc.ctypes.data_as(ctypes.c_void_p)
)
vv = vv.transpose(0, 2, 1)
if out is None:
return vv
else:
out[:] += vv
return out
def _sgxdot_ao_gv_sparse(ao, gv, wt, mask1, mask2, ao_loc, out=None):
# ao is F-order, gv is C-order
# gu,ivg->iuv
# in C-order, the operation is ug,ivg->iuv
ngrids, nao = ao.shape
nbas = mask1.shape[1]
assert gv.shape[1:] == ao.shape[::-1]
assert wt is not None
if out is None:
vv = numpy.zeros((gv.shape[0], nao, nao), dtype=ao.dtype, order='C')
else:
vv = out
if (nao < SWITCH_SIZE or mask1 is None
or mask2 is None or ao_loc is None):
return _sgxdot_ao_gv(ao * wt[:, None], gv, mask1, (0, nbas), ao_loc, out)
if not ao.flags.f_contiguous:
ao = lib.transpose(ao)
if not gv.flags.c_contiguous:
gv = numpy.ascontiguousarray(gv)
fn = _vhf.libcvhf.SGXdot_ao_ao_sparse
for i in range(len(gv)):
fn(
vv[i].ctypes.data_as(ctypes.c_void_p),
ao.ctypes.data_as(ctypes.c_void_p),
gv[i].ctypes.data_as(ctypes.c_void_p),
wt.ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(nao),
ctypes.c_int(ngrids),
ctypes.c_int(nbas),
ctypes.c_int(0),
mask1.ctypes.data_as(ctypes.c_void_p),
mask2.ctypes.data_as(ctypes.c_void_p),
ao_loc.ctypes.data_as(ctypes.c_void_p),
)
return vv
class _CSGXOpt(ctypes.Structure):
__slots__ = []
_fields_ = [('mode', ctypes.c_int),
('nbas', ctypes.c_int),
('ngrids', ctypes.c_int),
('ao_loc', ctypes.c_void_p),
('etol', ctypes.c_double),
('vtol', ctypes.c_void_p),
('wt', ctypes.c_void_p),
('mbar_ij', ctypes.c_void_p),
('mbar_bi', ctypes.c_void_p),
('rbar_ij', ctypes.c_void_p),
('mmax_i', ctypes.c_void_p),
('msum_i', ctypes.c_void_p),
('buf_size_bytes', ctypes.c_size_t),
('shl_info_size_bytes', ctypes.c_size_t),
('direct_scf_tol', ctypes.c_double),
('fscreen_grid', ctypes.c_void_p),
('fscreen_shl', ctypes.c_void_p),
('full_f_bi', ctypes.c_void_p)]
def _arr2c(arr):
return None if arr is None else arr.ctypes.data_as(ctypes.c_void_p)
[docs]
class SGXData:
def __init__(self, mol, grids,
fit_ovlp=True, sym_ovlp=False,
max_memory=2000, hermi=0,
bound_algo="sample_pos",
sgxopt=None, etol=None, vtol=None,
direct_scf_tol=0.0):
"""
bound_algo can be
"ovlp": Screen integrals based on overlap of
orbital pairs. Overlap serves as a rough
approximation of the maximum ESP integral.
"sample": Provide an approximate but accurate
upper bound for the ESP integrals by sampling
_nquad points for each shell pair.
"sample_pos": Same as sample, but the ESP
bounds are position-dependent, which gives
a slight speed increase for large systems
and a significant speed increase for
short-range hybrids.
For each bound algo, screening can be done based
on energy, potential, or both
"""
self.mol = mol
self.grids = grids
self.fit_ovlp = fit_ovlp
self.sym_ovlp = sym_ovlp
self.max_memory = max_memory
self.hermi = hermi
self._itol = direct_scf_tol
if etol == "auto":
etol = direct_scf_tol
if vtol == "auto":
if etol is None:
vtol = direct_scf_tol**0.5
else:
vtol = etol**0.5
self._etol = etol
self._screen_energy = etol is not None
self._vtol = vtol
self._screen_potential = vtol is not None
self._nquad = 200
self._nrad = 32
self._rcmul = 2.0
self.bound_algo = bound_algo
if self.bound_algo not in ["ovlp", "sample", "sample_pos"]:
raise ValueError("Unsupported integral bound algorithm")
self._opt = sgxopt
self._this = _CSGXOpt()
@property
def _cintopt(self):
return self._opt._cintopt
@property
def use_dm_screening(self):
return self._screen_energy or self._screen_potential
[docs]
def reset(self):
self._ovlp_proj = numpy.identity(self.mol.nao_nr())
self._stilde_ij = None
self._mtilde_ij = None
self._xni_b = None
self._xsgx_b = None
# _this properties
self._mbar_ij = None
self._etol_per_sgx_block = None
self._vtol_arr = None
self._mmax_i = None
self._msum_i = None
self._rbar_ij = None
self._mbar_bi = None
@property
def mol(self):
return self._mol
@mol.setter
def mol(self, mol):
self._mol = mol
self._ao_loc = numpy.ascontiguousarray(
self.mol.ao_loc_nr().astype(numpy.int32)
)
self.reset()
@property
def grids(self):
return self._grids
@grids.setter
def grids(self, grids):
self._grids = grids
self.reset()
def _setup_opt(self):
"""
Set up the C backend for the data.
"""
self._this.mode = 0
self._this.nbas = self.mol.nbas
self._this.ngrids = self.grids.weights.size
self._this.ao_loc = _arr2c(self._ao_loc)
if self._screen_energy:
self._this.etol = self._etol_per_sgx_block
else:
self._this.etol = 1e10
self._this.vtol = None
self._this.wt = None # set in each cycle
self._this.mbar_ij = _arr2c(self._mbar_ij)
self._this.mmax_i = _arr2c(self._mmax_i)
self._this.msum_i = _arr2c(self._msum_i)
if self.bound_algo == "sample_pos":
self._this.rbar_ij = _arr2c(self._rbar_ij)
else:
self._this.rbar_ij = None
self._this.mbar_bi = None
if self._screen_energy or self._screen_potential:
# buf can contain the following:
# For batch screening step
# Nothing for no dm screening
# 3 * nbas * double, 2 * nbas * int for sort1
# 5 * nbas * double, 2 * nbas * int for sort2
# For the shell screening step
# Nothing for no dm screening
# 3 * nbas * double, 3 nbas * int
dsize = self.mol.nbas * ctypes.sizeof(ctypes.c_double)
isize = self.mol.nbas * ctypes.sizeof(ctypes.c_int)
bsize = self.mol.nbas * ctypes.sizeof(ctypes.c_uint8)
# shl_info can contain the following:
# shlscreen_i (nbas * uint8_t)
# bscreen_i (nbas * double)
# fbar_i (nbas * double)
# ffbar_i (nbas * double)
bsb = 5 * dsize + 3 * isize
sisb = bsize + 3 * dsize
grid_screen = "SGXscreen_grid"
shl_screen = "SGXscreen_shells_sorted"
else:
bsb = 0
sisb = 0
grid_screen = "SGXnoscreen_grid"
shl_screen = "SGXscreen_shells_simple"
self._this.fscreen_grid = _vhf._fpointer(grid_screen)
self._this.fscreen_shl = _vhf._fpointer(shl_screen)
self._this.direct_scf_tol = self._itol
self._this.buf_size_bytes = bsb
self._this.shl_info_size_bytes = sisb
# TODO need to make this modifiable in the future
# for more sophisticated use cases.
if self.mol.cart:
self._intor = "int1e_grids_cart"
else:
self._intor = "int1e_grids_sph"
[docs]
def set_block(self, b0, weights, full_f_bi=None):
"""
Se the grid block information in the C sgxopt
object. This must be called right before
SGXnr_direct_k_drv. TODO might be better to hav
a safer way of managing this data.
Args:
b0: Initial block idx
weights: grid weights for this set of blocks
full_f_bi: block-reduced estimate (upper bound)
for the full density matrix f_gu. In direct_scf,
f_gu passed directly to SGXnr_direct_k_drv is
a DM difference, and we also need the "full" DM
for energy-based screening. If None, we assume
DM difference and full DM are the same.
"""
# IMPORTANT: Make sure numpy arrays don't go out of
# scope during C call, otherwise it could get freed.
self._wt = weights
self._this.ngrids = weights.size
self._this.wt = _arr2c(weights)
if self._screen_potential or self._screen_energy:
self._this.vtol = _arr2c(self._vtol_arr[b0:])
else:
self._this.vtol = None
if self.bound_algo == "sample_pos":
self._this.mbar_bi = _arr2c(self._mbar_bi[b0:])
else:
self._this.mbar_bi = None
if full_f_bi is None:
self._this.full_f_bi = None
else:
self._full_f_bi = full_f_bi
self._this.full_f_bi = _arr2c(full_f_bi)
[docs]
def unset_block(self):
"""
Reset the grid block data a the C level. Should be called after
a call to SGXnr_direct_k_drv.
"""
self._wt = None
self._full_f_bi = None
self._this.ngrids = 0
self._this.wt = None
self._this.vtol = None
self._this.mbar_bi = None
self._this.full_f_bi = None
[docs]
def get_loop_data(self, nset=1, with_pair_mask=True, grad=False):
"""
Get all the info we need to loop over grids, i.e.
nbins, screen_index (mask), pair_mask, ao_loc, and the block size.
"""
mol = self.mol
grids = self.grids
nao = mol.nao_nr()
ao_loc = mol.ao_loc_nr()
if (grids.coords is None or grids.non0tab is None
or grids.atm_idx is None):
grids.build(with_non0tab=True)
ngrids = grids.coords.shape[0]
if mol is grids.mol:
non0tab = grids.non0tab
if non0tab is None:
non0tab = numpy.empty(((ngrids+BLKSIZE-1)//BLKSIZE,mol.nbas),
dtype=numpy.uint8)
non0tab[:] = NBINS + 1 # Corresponding to AO value ~= 1
screen_index = non0tab
max_memory = self.max_memory - lib.current_memory()[0]
if grad:
# We need ~4 ao copes (ao, dx, dy, dz), plus 2 temporary arrays
# along with gv, dgv, and fg
data_dim = 6 + 5 * nset
else:
# We need to store ao, wao, fg, and gv -> 2 + nset sets of size nao
data_dim = 2 + 2 * nset
blksize = max(SGX_BLKSIZE, int(max_memory*1e6/8/(data_dim*nao)))
blksize = min(ngrids, max(1, blksize // SGX_BLKSIZE) * SGX_BLKSIZE)
cutoff = grids.cutoff * 1e2
nbins = NBINS * 2 - int(NBINS * numpy.log(cutoff) / numpy.log(grids.cutoff))
if with_pair_mask:
pair_mask = mol.get_overlap_cond() < -numpy.log(cutoff)
else:
pair_mask = None
return nbins, screen_index, pair_mask, ao_loc, blksize
def _get_diagonal_ints(self, sgxopt):
"""
Compute the maximum of the ESP integrals within each grid block
for the diagonal shell pairs (i.e. ish == jsh).
"""
mol = self.mol
drv = _vhf.libcvhf.SGXfit_diagonal_ints
atm_coords = numpy.ascontiguousarray(
mol.atom_coords(unit='Bohr')
)
ao_loc = mol.ao_loc_nr().astype(numpy.int32)
cintor = _vhf._fpointer(sgxopt._intor)
atm, bas, env = mol._atm, mol._bas, mol._env
nrad = self._nrad
env = numpy.append(env, numpy.zeros(3 * mol.nbas * nrad))
env[gto.NGRIDS] = 3 * mol.nbas * nrad
env[gto.PTR_GRIDS] = mol._env.size
# TODO only need diagonals along the shells, so this can be made faster
ovlp = mol.intor_symmetric("int1e_ovlp")
if not ovlp.flags.c_contiguous:
ovlp = ovlp.T
sbar_ij = numpy.zeros((mol.nbas, mol.nbas))
self._make_shl_mat(ovlp, sbar_ij, ao_loc, ao_loc)
norms = numpy.ascontiguousarray(numpy.diag(sbar_ij))
drv(
cintor, ctypes.c_int(nrad), atm_coords.ctypes,
ao_loc.ctypes, sgxopt._cintopt,
atm.ctypes, ctypes.c_int(mol.natm), bas.ctypes,
ctypes.c_int(mol.nbas), env.ctypes,
norms.ctypes,
)
res = env[mol._env.size:].reshape(mol.nbas, 3, nrad)
radii = numpy.ascontiguousarray(res[:, 1, 0])
res = numpy.ascontiguousarray(res[:, 0, :])
res[:, -2:] = 0 # avoid interpolating in crazy directions
return res, radii, norms
def _build_integral_bounds(self):
"""
Compute and save bounds on the ESP integrals
"""
mol = self.mol
mbar_ij = numpy.zeros((mol.nbas, mol.nbas))
if self.bound_algo == "sample_pos":
rbar_ij = numpy.zeros((mol.nbas, mol.nbas))
else:
rbar_ij = None
atm_coords = numpy.ascontiguousarray(
mol.atom_coords(unit='Bohr')
)
ao_loc = mol.ao_loc_nr().astype(numpy.int32)
if self._opt is None:
from pyscf.sgx.sgx import _make_opt
sgxopt = _make_opt(mol, False, self._itol)
else:
sgxopt = self._opt
if self.bound_algo == "ovlp":
# This is the simple bound estimated from the previous version
# of the code. It is based on the overlap matrix.
atm, bas, env = mol._atm, mol._bas, mol._env
if self.mol.cart:
intor = "int1e_ovlp_cart"
else:
intor = "int1e_ovlp_sph"
cintor = _vhf._fpointer(intor)
_vhf.libcvhf.SGXnr_q_cond(
cintor, lib.c_null_ptr(),
mbar_ij.ctypes, ao_loc.ctypes,
atm.ctypes, ctypes.c_int(mol.natm), bas.ctypes,
ctypes.c_int(mol.nbas), env.ctypes,
)
else:
# 'Sample' the integrals on a coarse grid for each pair of
# atoms. Assume the largest ESP integral on that grid
# is the upper bound.
vals, widths, norms = self._get_diagonal_ints(sgxopt)
drv = _vhf.libcvhf.SGXsample_ints
cintor = _vhf._fpointer(sgxopt._intor)
atm, bas, env = mol._atm, mol._bas, mol._env
ngmax = self._nquad + self._nrad + 2
env = numpy.append(env, numpy.zeros(3 * ngmax))
env[gto.NGRIDS] = ngmax
env[gto.PTR_GRIDS] = mol._env.size
rs = {}
radii = {}
elements = [x for x, _ in mol._atom]
for el in set(elements):
a = _std_symbol_without_ghost(el)
z = NUC[a]
radii[el] = BRAGG[z]
rs[el] = treutler_ahlrichs(self._nrad, z)[0]
radii = numpy.asarray(
[radii[el] for el in elements], order="C", dtype=numpy.float64
)
rs = numpy.asarray(
[rs[el] for el in elements], order="C", dtype=numpy.float64
)
for _arr in [mbar_ij, radii, atm_coords, rs, env]:
self._check_arr(_arr, dtype=numpy.float64)
for _arr in [ao_loc, atm, bas]:
self._check_arr(_arr, dtype=numpy.int32)
assert sgxopt._cintopt is not None
crbar = None if rbar_ij is None else rbar_ij.ctypes
drv(
cintor, mbar_ij.ctypes, crbar, radii.ctypes,
ctypes.c_int(self._nquad), ctypes.c_int(self._nrad),
ctypes.c_double(self._rcmul), atm_coords.ctypes,
rs.ctypes, ao_loc.ctypes, sgxopt._cintopt,
atm.ctypes, ctypes.c_int(mol.natm), bas.ctypes,
ctypes.c_int(mol.nbas), env.ctypes, ctypes.c_int(env.size),
widths.ctypes, norms.ctypes, vals.ctypes,
ctypes.c_double(self._itol)
)
self._mbar_ij = numpy.maximum(mbar_ij, mbar_ij.T)
self._mmax_i = numpy.max(mbar_ij, axis=1)
self._msum_i = numpy.sum(mbar_ij, axis=1)
self._opt.q_cond = mbar_ij
if self.bound_algo == "sample_pos":
self._rbar_ij = numpy.maximum(rbar_ij, rbar_ij.T)
self._get_pos_dpt_ints(sgxopt, widths, norms, vals, self._nrad, atm_coords)
else:
self._rbar_ij = None
self._mbar_bi = None
def _get_pos_dpt_ints(self, sgxopt, widths, norms, vals, nrad, atm_coords):
"""
Calculate position-dependent bounds on the ESP integrals.
Gives a slight speedup to full exchange and a larger
speedup to short-range exchange.
"""
mol = self.mol
grid_coords = self.grids.coords
atm, bas, env = mol._atm, mol._bas, mol._env
cintor = _vhf._fpointer(sgxopt._intor)
ngrids = grid_coords.shape[0]
nblk = (ngrids + SGX_BLKSIZE - 1) // SGX_BLKSIZE
env = numpy.append(env, grid_coords.ravel())
env[gto.NGRIDS] = ngrids
env[gto.PTR_GRIDS] = mol._env.size
m_bi = numpy.empty((nblk, mol.nbas))
_vhf.libcvhf.SGXdiagonal_ints(
cintor,
m_bi.ctypes,
self._ao_loc.ctypes,
sgxopt._cintopt,
atm.ctypes,
ctypes.c_int(mol.natm),
bas.ctypes,
ctypes.c_int(mol.nbas),
env.ctypes,
widths.ctypes,
norms.ctypes,
vals.ctypes,
ctypes.c_int(nrad),
atm_coords.ctypes,
)
m_bi[:] = numpy.sqrt(m_bi)
self._mbar_bi = m_bi
def _build_ovlp_fit(self):
"""
If fit_ovlp is True, set up a transformation matrix for the atomic
orbitals to correct the error of the numerical integration grid.
This typically reduces the error for a given grid size compared to
the analytical exchange.
"""
mol = self.mol
grids = self.grids
nao = mol.nao_nr()
sn = numpy.zeros((nao,nao))
nbins, screen_index, pair_mask, ao_loc, blksize = self.get_loop_data()
ngrids = grids.weights.size
if self.fit_ovlp:
sn = numpy.zeros((nao, nao))
for i0, i1 in lib.prange(0, ngrids, blksize):
assert i0 % SGX_BLKSIZE == 0
coords = grids.coords[i0:i1]
mask = screen_index[i0 // BLKSIZE:]
ao = mol.eval_gto('GTOval', coords, non0tab=mask)
_dot_ao_ao_sparse(ao, ao, grids.weights[i0:i1], nbins, mask,
pair_mask, ao_loc, hermi=self.hermi, out=sn)
ovlp = mol.intor_symmetric('int1e_ovlp')
if self.sym_ovlp:
proj = scipy.linalg.solve(sn, ovlp)
proj = 0.5 * (proj + numpy.identity(nao))
else:
proj = scipy.linalg.solve(sn, ovlp)
self._overlap_correction_matrix = proj
else:
self._overlap_correction_matrix = numpy.identity(nao)
def _check_arr(self, x, order="C", dtype=None):
if dtype is not None:
assert x.dtype == dtype
if order == "C":
assert x.flags.c_contiguous
elif order == "F":
assert x.flags.f_contiguous
else:
raise ValueError
def _make_shl_mat(self, x_gu, x_bi, rlocs, clocs, wt=None, tr=False):
"""
Contract x_gu to x_bi via a C-level version of
xblk = x_gu[rlocs[b] : rlocs[b+1], clocs[i] : clocs[i+1]]
res = sqrt(sum(xblk**2 * abs(wt[clocs[i] : clocs[i+1]]))
if tr:
x_bi[i, b] = res
else:
x_bi[b, i] = res
If wt is not provided, w_g is 1 everywhere.
Args:
x_gu: Input array
x_bi: Output array
rlocs: aoloc-like object for the rows, indexed like rlocs[b]
clocs: aoloc-like object for the columns, indexed like clocs[i]
wt: Weights for the integration
tr: Transpose the output (only supported for wt!=None)
"""
self._check_arr(x_gu, dtype=numpy.float64)
self._check_arr(x_bi, dtype=numpy.float64)
self._check_arr(rlocs, dtype=numpy.int32)
self._check_arr(clocs, dtype=numpy.int32)
assert x_gu.shape[-2:] == (rlocs[-1], clocs[-1])
if tr:
assert wt is not None
assert x_bi.shape == (clocs.size - 1, rlocs.size - 1)
else:
assert x_bi.shape == (rlocs.size - 1, clocs.size - 1)
args = [
x_gu.ctypes,
x_bi.ctypes,
ctypes.c_int(rlocs.size - 1),
ctypes.c_int(clocs.size - 1),
rlocs.ctypes,
clocs.ctypes,
]
if wt is None:
fn = _vhf.libcvhf.SGXmake_shl_mat
assert x_gu.ndim == 2
else:
if x_gu.ndim == 2:
ncomp = 1
else:
assert x_gu.ndim == 3
ncomp = x_gu.shape[0]
self._check_arr(wt, dtype=numpy.float64)
args.append(wt.ctypes)
args.append(ctypes.c_int(ncomp))
if tr:
fn = _vhf.libcvhf.SGXmake_shl_mat_wt_tr
else:
fn = _vhf.libcvhf.SGXmake_shl_mat_wt
fn(*args)
def _pow_screen(self, xbar_ij, delta):
"""
In-place operation!
tmp = (xbar_ij**(1-delta)).sum(0)
xbar_ij = 1.0 / (tmp * xbar_ij**delta)
"""
# sum is over outer index here
# This is done in-place
self._check_arr(xbar_ij, dtype=numpy.float64)
n, m = xbar_ij.shape
_vhf.libcvhf.SGXpow_screen(
xbar_ij.ctypes,
ctypes.c_int(n),
ctypes.c_int(m),
ctypes.c_double(delta),
)
[docs]
def reduce_mask_ni2sgx(self, xni_bi):
"""
Compute xsgx_bi (block-reduced array) in blocks of SGX_BLKSIZE,
using sni_bi in blocks of BLKSIZE
"""
nblk_ni, nbas = xni_bi.shape
nblk = (nblk_ni + DFT_PER_SGX - 1) // DFT_PER_SGX
xsgx_bi = numpy.empty((nblk, nbas), dtype=numpy.float64)
locs = DFT_PER_SGX * numpy.arange(nblk + 1, dtype=numpy.int32)
locs[-1] = nblk_ni
ones = numpy.arange(nbas + 1, dtype=numpy.int32)
self._make_shl_mat(xni_bi, xsgx_bi, locs, ones)
return xsgx_bi
[docs]
def reduce_ao_to_shl_mat(self, ao, ao_loc, wt):
"""
For F-ordered orbitals ao[g, u] = X_ug, compute the sum
of X_ug^2 * wt_g within each grid block and AO shell.
"""
assert ao.flags.f_contiguous
ngrids, nao = ao.shape
nblk_ni_curr = (ngrids + BLKSIZE - 1) // BLKSIZE
ni_locs = BLKSIZE * numpy.arange(
nblk_ni_curr + 1, dtype=numpy.int32
)
ni_locs[-1] = ngrids
x_bi = numpy.empty((nblk_ni_curr, ao_loc.size - 1))
self._make_shl_mat(ao.T, x_bi, ao_loc, ni_locs, wt=wt, tr=True)
return x_bi
def _build_dm_screen(self):
mol = self.mol
grids = self.grids
ngrids = grids.weights.size
nbins, screen_index, pair_mask, ao_loc, blksize = self.get_loop_data()
nblk = (grids.weights.size + SGX_BLKSIZE - 1) // SGX_BLKSIZE
nblk_ni = (grids.weights.size + BLKSIZE - 1) // BLKSIZE
xni_bi = numpy.empty((nblk_ni, mol.nbas), dtype=numpy.float64)
dft_locs = BLKSIZE * numpy.arange(nblk_ni, dtype=numpy.int32)
if grids.weights.size >= 2**31:
raise ValueError("Too many grids for signed int32")
dft_locs[-1] = grids.weights.size
ao = None
for i0, i1 in lib.prange(0, ngrids, blksize):
assert i0 % SGX_BLKSIZE == 0
b0 = i0 // BLKSIZE
b1 = b0 + (i1 - i0 + BLKSIZE - 1) // BLKSIZE
coords = grids.coords[i0:i1]
mask = screen_index[b0:b1]
ao = mol.eval_gto('GTOval', coords, non0tab=mask, out=ao)
xtmp_bi = self.reduce_ao_to_shl_mat(ao, ao_loc, grids.weights[i0:i1])
xni_bi[b0:b1] = xtmp_bi
xsgx_bi = self.reduce_mask_ni2sgx(xni_bi)
assert xsgx_bi.shape == (nblk, mol.nbas)
self._sbar_ij = lib.dot(xsgx_bi.T, xsgx_bi)
self._smax_i = numpy.max(self._sbar_ij, axis=1)
if self._screen_potential:
self._pow_screen(xni_bi, SGX_DELTA)
self._pow_screen(xsgx_bi, SGX_DELTA)
self._xni_b = numpy.min(xni_bi, axis=1)
self._xsgx_b = numpy.min(xsgx_bi, axis=1)
self._vtol_arr = self._vtol * self._xsgx_b
else:
self._xni_b = numpy.empty(nblk_ni)
self._xni_b[:] = numpy.inf
self._vtol_arr = numpy.empty(nblk)
self._vtol_arr[:] = numpy.inf
if self._screen_energy:
self._etol_per_sgx_block = self._etol / nblk
self._etol_per_ni_block = self._etol / nblk_ni
else:
self._etol_per_sgx_block = 1e10
self._etol_per_ni_block = 1e10
[docs]
def build(self):
self._build_integral_bounds()
self._ovlp_proj = self._build_ovlp_fit()
if self.use_dm_screening:
self._build_dm_screen()
self._setup_opt()
[docs]
def get_dm_threshold_matrix(self, dm, full_dm_ij=None):
de, dv = self._etol, self._vtol
mol = self.mol
ao_loc = mol.ao_loc_nr()
if dm.ndim == 3:
dm = dm.sum(axis=0)
dm_ij = numpy.zeros((mol.nbas, mol.nbas))
self._make_shl_mat(dm, dm_ij, ao_loc, ao_loc)
if full_dm_ij is None:
full_dm_ij = dm_ij
if self._screen_energy:
tmp = lib.einsum("lk,jk->lj", self._sbar_ij, full_dm_ij)
Z_il = lib.einsum("ij,lj->il", self._mbar_ij, tmp) + 1e-200
PZ = dm_ij * Z_il
SPZ = self._cumsum(PZ, self._argsort(PZ))
SPZ = 0.5 * (SPZ + SPZ.T)
econd = SPZ > de / mol.nbas
else:
econd = None
if self._screen_potential:
PY = (dm_ij * self._mmax_i[:, None] * self._smax_i)
SPY = self._cumsum(PY, self._argsort(PY))
SPY = 0.5 * (SPY + SPY.T)
vcond = SPY > dv / mol.nbas
if self._screen_energy and self._screen_potential:
return numpy.logical_or(econd, vcond)
elif self._screen_energy:
return econd
elif self._screen_potential:
return vcond
else:
return None
def _argsort(self, f):
"""
Perform an argsort along index 1 while parallelizing over
index 0. Uses the N log N merge sort algorithm.
TODO might better to replace with with bin sort for
proper linear scaling.
"""
assert f.ndim == 2
self._check_arr(f, dtype=numpy.float64)
_inds = numpy.empty_like(f, dtype=numpy.int32)
_vhf.libcvhf.SGXargsort_lists(
_inds.ctypes,
f.ctypes,
ctypes.c_int(f.shape[0]),
ctypes.c_int(f.shape[1]),
)
return _inds
def _cumsum(self, f, inds):
"""
Perform a cumulative sum of f along index 1, with the ordering
given by inds. Parallelize over index 0.
"""
assert f.ndim == 2
self._check_arr(f, dtype=numpy.float64)
out = numpy.empty_like(f)
_vhf.libcvhf.SGXcumsum_lists(
out.ctypes,
f.ctypes,
inds.ctypes,
ctypes.c_int(f.shape[0]),
ctypes.c_int(f.shape[1]),
)
return out
[docs]
def get_g_threshold(self, f_ug, g_ug, i0, wt, full_f_bi=None):
de, dv = self._etol, self._vtol
b0 = i0 // BLKSIZE
ao_loc = self._ao_loc
assert f_ug.flags.c_contiguous
ndm, nao, ngrids = f_ug.shape
assert g_ug.flags.c_contiguous
assert g_ug.shape == f_ug.shape
assert nao == ao_loc[-1]
b1 = (ngrids + BLKSIZE - 1) // BLKSIZE + b0
if self._screen_potential:
thresh = dv * self._xni_b[b0:b1]
else:
thresh = numpy.empty(b1-b0, dtype=numpy.float64)
thresh[:] = numpy.inf
fullf_ptr = None if full_f_bi is None else full_f_bi.ctypes
if self._screen_energy:
cond = numpy.empty((b1 - b0, self.mol.nbas), dtype=numpy.uint8)
_vhf.libcvhf.SGXbuild_gv_threshold(
f_ug.ctypes,
g_ug.ctypes,
ao_loc.ctypes,
thresh.ctypes,
ctypes.c_double(de / self._xni_b.size),
wt.ctypes,
ctypes.c_int(ndm),
ctypes.c_int(self.mol.nbas),
ctypes.c_int(ngrids),
cond.ctypes,
fullf_ptr,
)
elif self._screen_potential:
# TODO test
g_bi = numpy.empty((b1 - b0, self.mol.nbas))
cloc = numpy.arange(b1 - b0 + 1, dtype=numpy.int32) * BLKSIZE
cloc[-1] = ngrids
#_vhf.libcvhf.SGXmake_shl_mat_wt(
# g_ug.ctypes, g_ib.ctypes, ctypes.c_int(self.mol.nbas),
# ctypes.c_int(g_ib.shape[1]), ao_loc.ctypes, cloc.ctypes,
# wt.ctypes, ctypes.c_int(ndm)
#)
self._make_shl_mat(g_ug, g_bi, ao_loc, cloc, wt=wt, tr=True)
cond = g_bi > thresh[:, None]
assert cond.flags.c_contiguous
else:
cond = None
return cond
[docs]
def get_jk_favork(sgx, dm, hermi=1, with_j=True, with_k=True,
direct_scf_tol=1e-13):
t0 = logger.process_clock(), logger.perf_counter()
mol = sgx.mol
grids = sgx.grids
dms = numpy.asarray(dm)
dm_shape = dms.shape
nao = dm_shape[-1]
dms = dms.reshape(-1,nao,nao)
nset = dms.shape[0]
if sgx.debug:
batch_nuc = _gen_batch_nuc(mol)
else:
batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol,
sgx._opt)
t1 = logger.timer_debug1(mol, "sgX initialization", *t0)
sn = numpy.zeros((nao,nao))
vj = numpy.zeros_like(dms)
vk = numpy.zeros_like(dms)
ngrids = grids.coords.shape[0]
max_memory = sgx.max_memory - lib.current_memory()[0]
if sgx.debug:
sblk = sgx.blockdim
blksize = min(ngrids, max(4, int(min(sblk, max_memory*1e6/8/nao**2))))
blksize = max(1, blksize // SGX_BLKSIZE) * SGX_BLKSIZE
else:
# We need to store ao, wao, and fg, gv -> 2 + 2 * nset sets of size nao
blksize = min(ngrids, max(SGX_BLKSIZE,
int(max_memory*1e6/8/(2 + 2 * nset))))
tnuc = 0, 0
for i0, i1 in lib.prange(0, ngrids, blksize):
coords = grids.coords[i0:i1]
weights = grids.weights[i0:i1,None]
ao = mol.eval_gto('GTOval', coords)
wao = ao * grids.weights[i0:i1,None]
sn += lib.dot(ao.T, wao)
fg = lib.einsum('xij,ig->xjg', dms, wao.T)
if sgx.debug:
tnuc = tnuc[0] - logger.process_clock(), tnuc[1] - logger.perf_counter()
gbn = batch_nuc(mol, coords)
tnuc = tnuc[0] + logger.process_clock(), tnuc[1] + logger.perf_counter()
if with_j:
jg = numpy.einsum('gij,xij->xg', gbn, dms)
if with_k:
gv = lib.einsum('gvt,xtg->xvg', gbn, fg)
gbn = None
else:
tnuc = tnuc[0] - logger.process_clock(), tnuc[1] - logger.perf_counter()
jg, gv = batch_jk(mol, coords, dms, fg.copy(), weights)
tnuc = tnuc[0] + logger.process_clock(), tnuc[1] + logger.perf_counter()
if with_j:
xj = lib.einsum('gv,xg->xgv', ao, jg)
for i in range(nset):
vj[i] += lib.einsum('gu,gv->uv', wao, xj[i])
if with_k:
for i in range(nset):
vk[i] += lib.einsum('gu,vg->uv', ao, gv[i])
jg = gv = None
t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1)
tdot = t2[0] - t1[0] - tnuc[0], t2[1] - t1[1] - tnuc[1]
logger.debug1(sgx, '(CPU, wall) time for integrals (%.2f, %.2f); '
'for tensor contraction (%.2f, %.2f)',
tnuc[0], tnuc[1], tdot[0], tdot[1])
ovlp = mol.intor_symmetric('int1e_ovlp')
proj = scipy.linalg.solve(sn, ovlp)
if with_j:
vj = lib.einsum('pi,xpj->xij', proj, vj)
vj = (vj + vj.transpose(0,2,1))*.5
if with_k:
vk = lib.einsum('pi,xpj->xij', proj, vk)
if hermi == 1:
vk = (vk + vk.transpose(0,2,1))*.5
logger.timer(mol, "vj and vk", *t0)
return vj.reshape(dm_shape), vk.reshape(dm_shape)
[docs]
def get_k_only(sgx, dm, hermi=1, direct_scf_tol=1e-13):
if sgx.debug:
raise NotImplementedError("debug mode for accelerated K matrix")
t0 = logger.process_clock(), logger.perf_counter()
mol = sgx.mol
grids = sgx.grids
dms = numpy.asarray(dm)
dm_shape = dms.shape
nao = dm_shape[-1]
dms = dms.reshape(-1,nao,nao)
nset = dms.shape[0]
t1 = logger.timer_debug1(mol, "sgX initialization", *t0)
vk = numpy.zeros_like(dms)
tnuc = 0, 0
ngrids = grids.weights.size
if (
sgx._pjs_data is None
or sgx._pjs_data.mol is not mol
or sgx._pjs_data.grids is not grids
or sgx._pjs_data._itol != direct_scf_tol
):
sgx._build_pjs(direct_scf_tol)
sgxdat = sgx._pjs_data
loop_data = sgx._pjs_data.get_loop_data(nset=nset, with_pair_mask=True)
nbins, screen_index, pair_mask, ao_loc, blksize = loop_data
proj = sgx._pjs_data._overlap_correction_matrix
proj_dm = lib.einsum('ki,xij->xkj', proj, dms)
if sgxdat.use_dm_screening:
if sgx._full_dm is None:
full_dm_ij = None
else:
full_dm = sgx._full_dm
if full_dm.ndim == 3:
full_dm = full_dm.sum(axis=0)
full_dm_ij = numpy.zeros((mol.nbas, mol.nbas))
sgx._pjs_data._make_shl_mat(
full_dm, full_dm_ij, ao_loc, ao_loc
)
dm_mask = sgx._pjs_data.get_dm_threshold_matrix(
dms, full_dm_ij=full_dm_ij
)
else:
dm_mask = None
full_dm_ij = None
batch_k = _gen_k_direct(mol, 's2', direct_scf_tol, sgx._pjs_data)
ao = None
fg = None
for i0, i1 in lib.prange(0, ngrids, blksize):
assert i0 % SGX_BLKSIZE == 0
coords = grids.coords[i0:i1]
weights = grids.weights[i0:i1]
mask = screen_index[i0 // BLKSIZE :]
ao = mol.eval_gto('GTOval', coords, non0tab=mask, out=ao)
if full_dm_ij is None:
full_fni_bi = None
full_fsgx_bi = None
else:
full_fni_bi = sgx._pjs_data.reduce_ao_to_shl_mat(ao, ao_loc, weights)
full_fni_bi = lib.dot(full_fni_bi, full_dm_ij)
assert full_fni_bi.flags.c_contiguous
full_fsgx_bi = sgx._pjs_data.reduce_mask_ni2sgx(full_fni_bi)
fg = _sgxdot_ao_dm_sparse(ao, proj_dm, mask, dm_mask, ao_loc, out=fg)
tnuc = tnuc[0] - logger.process_clock(), tnuc[1] - logger.perf_counter()
gv = batch_k(mol, coords, fg, weights, i0 // SGX_BLKSIZE, full_fsgx_bi)
tnuc = tnuc[0] + logger.process_clock(), tnuc[1] + logger.perf_counter()
if sgxdat.use_dm_screening:
mask2 = sgx._pjs_data.get_g_threshold(
fg, gv, i0, weights, full_f_bi=full_fni_bi
)
else:
mask2 = None
_sgxdot_ao_gv_sparse(ao, gv, weights, mask, mask2, ao_loc, out=vk)
gv = None
if sgx._pjs_data.sym_ovlp:
vk = lib.einsum('ik,xij->xkj', proj, vk)
t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1)
tdot = t2[0] - t1[0] - tnuc[0] , t2[1] - t1[1] - tnuc[1]
logger.debug1(sgx, '(CPU, wall) time for integrals (%.2f, %.2f); '
'for tensor contraction (%.2f, %.2f)',
tnuc[0], tnuc[1], tdot[0], tdot[1])
if hermi == 1:
vk = (vk + vk.transpose(0,2,1))*.5
logger.timer(mol, "vk", *t0)
return vk.reshape(dm_shape)
[docs]
def get_jk_favorj(sgx, dm, hermi=1, with_j=True, with_k=True,
direct_scf_tol=1e-13):
t0 = logger.process_clock(), logger.perf_counter()
mol = sgx.mol
grids = sgx.grids
dms = numpy.asarray(dm)
dm_shape = dms.shape
nao = dm_shape[-1]
dms = dms.reshape(-1,nao,nao)
nset = dms.shape[0]
if sgx.debug:
batch_nuc = _gen_batch_nuc(mol)
else:
batch_jk = _gen_jk_direct(mol, 's2', with_j, with_k, direct_scf_tol,
sgx._opt)
if mol is grids.mol:
non0tab = grids.non0tab
if non0tab is None:
ngrids = grids.weights.size
non0tab = numpy.empty(((ngrids+BLKSIZE-1)//BLKSIZE,mol.nbas),
dtype=numpy.uint8)
non0tab[:] = NBINS + 1 # Corresponding to AO value ~= 1
screen_index = non0tab
sn = numpy.zeros((nao,nao))
ngrids = grids.coords.shape[0]
max_memory = sgx.max_memory - lib.current_memory()[0]
if sgx.debug:
sblk = sgx.blockdim
blksize = min(ngrids, max(4, int(min(sblk, max_memory*1e6/8/nao**2))))
blksize = max(1, blksize // SGX_BLKSIZE) * SGX_BLKSIZE
else:
# We need to store ao, wao, and fg, gv -> 2 + 2 * nset sets of size nao
blksize = min(ngrids, max(SGX_BLKSIZE,
int(max_memory*1e6/8/((2 + 2 * nset) * nao))))
if sgx.fit_ovlp:
for i0, i1 in lib.prange(0, ngrids, blksize):
assert i0 % BLKSIZE == 0
coords = grids.coords[i0:i1]
mask = screen_index[i0 // BLKSIZE:]
ao = mol.eval_gto('GTOval', coords, non0tab=mask)
wao = ao * grids.weights[i0:i1,None]
sn += lib.dot(ao.T, wao)
ovlp = mol.intor_symmetric('int1e_ovlp')
proj = scipy.linalg.solve(sn, ovlp)
proj_dm = lib.einsum('ki,xij->xkj', proj, dms)
else:
proj_dm = dms.copy()
t1 = logger.timer_debug1(mol, "sgX initialization", *t0)
vj = numpy.zeros_like(dms)
vk = numpy.zeros_like(dms)
tnuc = 0, 0
wao = None
shls_slice = (0, mol.nbas)
ao_loc = mol.ao_loc_nr()
fg = None
for i0, i1 in lib.prange(0, ngrids, blksize):
assert i0 % BLKSIZE == 0
coords = grids.coords[i0:i1]
weights = grids.weights[i0:i1]
mask = screen_index[i0 // BLKSIZE:]
ao = mol.eval_gto('GTOval', coords, non0tab=mask)
# wao = ao * weights[:, None]
# fg = lib.einsum('xij,ig->xjg', proj_dm, wao.T)
wao = _scale_ao(ao, weights, out=wao)
fg = _sgxdot_ao_dm(wao, proj_dm, mask, shls_slice, ao_loc, out=fg)
if with_j:
rhog = numpy.einsum('xug,gu->xg', fg, ao)
else:
rhog = None
if sgx.debug:
tnuc = tnuc[0] - logger.process_clock(), tnuc[1] - logger.perf_counter()
gbn = batch_nuc(mol, coords)
tnuc = tnuc[0] + logger.process_clock(), tnuc[1] + logger.perf_counter()
if with_j:
jpart = numpy.einsum('guv,xg->xuv', gbn, rhog)
if with_k:
gv = lib.einsum('gtv,xtg->xvg', gbn, fg)
gbn = None
else:
tnuc = tnuc[0] - logger.process_clock(), tnuc[1] - logger.perf_counter()
if with_j: rhog = rhog.copy()
jpart, gv = batch_jk(mol, coords, rhog, fg, weights)
tnuc = tnuc[0] + logger.process_clock(), tnuc[1] + logger.perf_counter()
if with_j:
vj += jpart
if with_k:
# vk[:] += lib.einsum('gu,xvg->xuv', ao, gv)
_sgxdot_ao_gv(ao, gv, mask, shls_slice, ao_loc, out=vk)
jpart = gv = None
t2 = logger.timer_debug1(mol, "sgX J/K builder", *t1)
tdot = t2[0] - t1[0] - tnuc[0], t2[1] - t1[1] - tnuc[1]
logger.debug1(sgx, '(CPU, wall) time for integrals (%.2f, %.2f); '
'for tensor contraction (%.2f, %.2f)',
tnuc[0], tnuc[1], tdot[0], tdot[1])
for i in range(nset):
lib.hermi_triu(vj[i], inplace=True)
if with_k and hermi == 1:
vk = (vk + vk.transpose(0, 2, 1))*.5
logger.timer(mol, "vj and vk", *t0)
return vj.reshape(dm_shape), vk.reshape(dm_shape)
def _gen_batch_nuc(mol):
'''Coulomb integrals of the given points and orbital pairs'''
cintopt = gto.moleintor.make_cintopt(mol._atm, mol._bas, mol._env, 'int3c2e')
def batch_nuc(mol, grid_coords, out=None):
fakemol = gto.fakemol_for_charges(grid_coords)
j3c = aux_e2(mol, fakemol, intor='int3c2e', aosym='s2ij', cintopt=cintopt)
return lib.unpack_tril(j3c.T, out=out)
return batch_nuc
def _gen_jk_direct(mol, aosym, with_j, with_k, direct_scf_tol,
sgxopt=None, grad=False):
'''Contraction between sgX Coulomb integrals and density matrices
J: einsum('guv,xg->xuv', gbn, dms) if dms == rho at grid,
or einsum('gij,xij->xg', gbn, dms) if dms are density matrices
K: einsum('gtv,xgt->xgv', gbn, fg)
'''
if sgxopt is None:
from pyscf.sgx import sgx
sgxopt = sgx._make_opt(mol, grad, direct_scf_tol)
sgxopt.direct_scf_tol = direct_scf_tol
ao_loc = moleintor.make_loc(mol._bas, sgxopt._intor)
if grad:
ncomp = 3
else:
ncomp = 1
nao = mol.nao
cintor = _vhf._fpointer(sgxopt._intor)
drv = _vhf.libcvhf.SGXnr_direct_drv
def jk_part(mol, grid_coords, dms, fg, weights):
atm, bas, env = mol._atm, mol._bas, mol._env
ngrids = grid_coords.shape[0]
env = numpy.append(env, grid_coords.ravel())
env[gto.NGRIDS] = ngrids
env[gto.PTR_GRIDS] = mol._env.size
shls_slice = (0, mol.nbas, 0, mol.nbas)
vj = vk = None
fjk = []
dmsptr = []
vjkptr = []
if with_j:
if dms[0].ndim == 1: # the value of density at each grid
if grad:
vj = numpy.zeros((len(dms),ncomp,nao,nao))
else:
vj = numpy.zeros((len(dms),ncomp,nao,nao))[:,0]
for i, dm in enumerate(dms):
dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p))
fjk.append(_vhf._fpointer('SGXnr'+aosym+'_ijg_g_ij'))
else:
if grad:
vj = numpy.zeros((len(dms),ncomp,ngrids))
else:
vj = numpy.zeros((len(dms),ncomp,ngrids))[:,0]
for i, dm in enumerate(dms):
dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
vjkptr.append(vj[i].ctypes.data_as(ctypes.c_void_p))
fjk.append(_vhf._fpointer('SGXnr'+aosym+'_ijg_ji_g'))
if with_k:
assert fg.flags.c_contiguous
assert fg.shape[-2:] == (ao_loc[-1], weights.size)
if grad:
vk = numpy.zeros((len(fg),ncomp,nao,ngrids))
else:
vk = numpy.zeros((len(fg),ncomp,nao,ngrids))[:,0]
for i, dm in enumerate(fg):
dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p))
fjk.append(_vhf._fpointer('SGXnr'+aosym+'_ijg_gj_gi'))
n_dm = len(fjk)
fjk = (ctypes.c_void_p*(n_dm))(*fjk)
dmsptr = (ctypes.c_void_p*(n_dm))(*dmsptr)
vjkptr = (ctypes.c_void_p*(n_dm))(*vjkptr)
drv(cintor, fjk, dmsptr, vjkptr, n_dm, ncomp,
(ctypes.c_int*4)(*shls_slice),
ao_loc.ctypes.data_as(ctypes.c_void_p),
sgxopt._cintopt, ctypes.byref(sgxopt._this),
atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(mol.natm),
bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(mol.nbas),
env.ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(2 if aosym == 's2' else 1))
return vj, vk
return jk_part
def _gen_k_direct(mol, aosym, direct_scf_tol,
sgxopt=None, grad=False):
'''K-only ontraction between sgX Coulomb integrals and density matrices
K: einsum('gtv,xgt->xgv', gbn, fg)
'''
if sgxopt is None:
from pyscf.sgx import sgx
sgxopt = sgx._make_opt(mol, grad, direct_scf_tol)
if isinstance(sgxopt, SGXData):
if grad:
from pyscf.sgx import sgx
tmpopt = sgx._make_opt(mol, grad, direct_scf_tol)
_cintopt = tmpopt._cintopt
intor_name = tmpopt._intor
else:
_cintopt = sgxopt._cintopt
intor_name = sgxopt._intor
else:
intor_name = sgxopt._intor
sgxopt.direct_scf_tol = direct_scf_tol
ao_loc = moleintor.make_loc(mol._bas, sgxopt._intor)
if grad:
ncomp = 3
else:
ncomp = 1
nao = mol.nao
cintor = _vhf._fpointer(intor_name)
drv = _vhf.libcvhf.SGXnr_direct_k_drv
def k_part(mol, grid_coords, fg, weights=None, b0=None,
full_f_bi=None):
atm, bas, env = mol._atm, mol._bas, mol._env
ngrids = grid_coords.shape[0]
env = numpy.append(env, grid_coords.ravel())
env[gto.NGRIDS] = ngrids
env[gto.PTR_GRIDS] = mol._env.size
shls_slice = (0, mol.nbas, 0, mol.nbas)
vk = None
fjk = []
dmsptr = []
vjkptr = []
if grad:
vk = numpy.zeros((len(fg),ncomp,nao,ngrids))
else:
vk = numpy.zeros((len(fg),ncomp,nao,ngrids))[:,0]
assert fg.flags.c_contiguous
for i, dm in enumerate(fg):
assert fg[i].flags.c_contiguous
assert fg[i].shape == (ao_loc[-1], ngrids)
dmsptr.append(dm.ctypes.data_as(ctypes.c_void_p))
vjkptr.append(vk[i].ctypes.data_as(ctypes.c_void_p))
fjk.append(_vhf._fpointer('SGXnr'+aosym+'_ijg_gj_gi'))
n_dm = len(fjk)
fjk = (ctypes.c_void_p*(n_dm))(*fjk)
dmsptr = (ctypes.c_void_p*(n_dm))(*dmsptr)
vjkptr = (ctypes.c_void_p*(n_dm))(*vjkptr)
assert weights is not None
assert b0 is not None
sgxopt.set_block(b0, weights, full_f_bi)
drv(
cintor, fjk, dmsptr, vjkptr, n_dm, ncomp,
(ctypes.c_int*4)(*shls_slice),
ao_loc.ctypes.data_as(ctypes.c_void_p),
_cintopt, ctypes.byref(sgxopt._this),
atm.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(mol.natm),
bas.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(mol.nbas),
env.ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(2 if aosym == 's2' else 1),
)
sgxopt.unset_block()
return vk
return k_part
SGX_RAD_GRIDS = []
for eps in [3.816, 4.020, 4.338, 4.871, 5.3, 5.8, 6.3, 9.0]:
nums = []
for row in range(1, 8):
nums.append(int(eps * 15 - 40 + 5 * row))
SGX_RAD_GRIDS.append(nums)
SGX_RAD_GRIDS = numpy.array(SGX_RAD_GRIDS)
[docs]
def get_gridss(mol, level=1, gthrd=1e-10, use_opt_grids=False):
Ktime = (logger.process_clock(), logger.perf_counter())
grids = gen_grid.Grids(mol)
grids.level = level
if use_opt_grids:
grids.becke_scheme = becke_lko
grids.prune = sgx_prune
grids.atom_grid = {}
tab = numpy.array( (2 , 10, 18, 36, 54, 86, 118))
for ia in range(mol.natm):
symb = mol.atom_symbol(ia)
if symb not in grids.atom_grid:
chg = gto.charge(symb)
period = (chg > tab).sum()
grids.atom_grid[symb] = (SGX_RAD_GRIDS[level, period],
LEBEDEV_ORDER[SGX_ANG_MAPPING[level, 3]])
grids.build(with_non0tab=True)
if gthrd > 0:
ngrids = grids.weights.size
mask = []
for p0, p1 in lib.prange(0, ngrids, 10000):
ao_v = mol.eval_gto('GTOval', grids.coords[p0:p1])
ao_v *= grids.weights[p0:p1,None]
wao_v0 = ao_v
mask.append(numpy.any(wao_v0>gthrd, axis=1) |
numpy.any(wao_v0<-gthrd, axis=1))
mask = numpy.hstack(mask)
grids._select_grids(mask)
grids._add_padding()
grids.non0tab = grids.make_mask(mol, grids.coords)
grids.screen_index = grids.non0tab
logger.debug(mol, 'threshold for grids screening %g', gthrd)
logger.debug(mol, 'number of grids %d', grids.weights.size)
logger.timer_debug1(mol, "Xg screening", *Ktime)
return grids
get_jk = get_jk_favorj