#!/usr/bin/env python
# Copyright 2018-2020 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Bryan Lau <blau1270@gmail.com>
# Qiming Sun <osirpt.sun@gmail.com>
#
"""
Created on Thu May 17 11:05:22 2018
@author: Bryan Lau
A module that will do on-disk transformation of two electron integrals, and
also return specific slices of (o)ccupied and (v)irtual ones needed for post HF
Comparing to the full in-memory transformation (see incore.py) which holds all
intermediates in memory, this version uses less memory but performs slow due
to IO overhead.
"""
import ctypes
import numpy
import h5py
from pyscf import lib
from pyscf.lib import logger
from pyscf.ao2mo.incore import iden_coeffs, _conc_mos
from pyscf.ao2mo.outcore import _load_from_h5g
from pyscf.ao2mo import _ao2mo
IOBLK_SIZE = 128 # MB
[docs]
def general(eri, mo_coeffs, erifile, dataname='eri_mo',
ioblk_size=IOBLK_SIZE, compact=True, verbose=logger.NOTE):
'''For the given four sets of orbitals, transfer arbitrary spherical AO
integrals to MO integrals on disk.
Args:
eri : 8-fold reduced eri vector
mo_coeffs : 4-item list of ndarray
Four sets of orbital coefficients, corresponding to the four
indices of (ij|kl)
erifile : str or h5py File or h5py Group object
To store the transformed integrals, in HDF5 format.
Kwargs
dataname : str
The dataset name in the erifile (ref the hierarchy of HDF5 format
http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning
different dataname, the existed integral file can be reused. If
the erifile contains the dataname, the new integrals data will
overwrite the old one.
ioblk_size : float or int
The block size for IO, large block size may **not** improve performance
compact : bool
When compact is True, depending on the four orbital sets, the
returned MO integrals has (up to 4-fold) permutation symmetry.
If it's False, the function will abandon any permutation symmetry,
and return the "plain" MO integrals
Pseudocode / algorithm:
u = mu
v = nu
l = lambda
o = sigma
Assume eri's are 8-fold reduced.
nij/nkl_pair = npair or i*j/k*l if only transforming a subset
First half transform:
Initialize half_eri of size (nij_pair,npair)
For lo = 1 -> npair
Unpack row lo
Unpack row lo to matrix E_{uv}^{lo}
Transform C_ui^+*E*C_nj -> E_{ij}^{lo}
Ravel or pack E_{ij}^{lo}
Save E_{ij}^{lo} -> half_eri[:,lo]
Second half transform:
Initialize h5d_eri of size (nij_pair,nkl_pair)
For ij = 1 -> nij_pair
Load and unpack half_eri[ij,:] -> E_{lo}^{ij}
Transform C_{lk}E_{lo}^{ij}C_{ol} -> E_{kl}^{ij}
Repack E_{kl}^{ij}
Save E_{kl}^{ij} -> h5d_eri[ij,:]
Each matrix is indexed by the composite index ij x kl, where ij/kl is
either npair or ixj/kxl, if only a subset of MOs are being transformed.
Since entire rows or columns need to be read in, the arrays are chunked
such that IOBLK_SIZE = row/col x chunking col/row. For example, for the
first half transform, we would save in nij_pair x IOBLK_SIZE/nij_pair,
then load in IOBLK_SIZE/nkl_pair x npair for the second half transform.
------ kl ----->
|jxl
|
ij
|
|
v
As a first guess, the chunking size is jxl. If the super-rows/cols are
larger than IOBLK_SIZE, then the chunk rectangle jxl is trimmed
accordingly. The pathological limiting case is where the dimensions
nao_pair, nij_pair, or nkl_pair are so large that the arrays are
chunked 1x1, in which case IOBLK_SIZE needs to be increased.
'''
log = logger.new_logger(None, verbose)
log.info('******** ao2mo disk, custom eri ********')
eri_ao = numpy.asarray(eri, order='C')
nao, nmoi = mo_coeffs[0].shape
nmoj = mo_coeffs[1].shape[1]
nao_pair = nao*(nao+1)//2
ijmosym, nij_pair, moij, ijshape = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact)
klmosym, nkl_pair, mokl, klshape = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact)
ijshape = (ijshape[0], ijshape[1]-ijshape[0],
ijshape[2], ijshape[3]-ijshape[2])
dtype = numpy.result_type(eri, *mo_coeffs)
typesize = dtype.itemsize/1e6 # in MB
if nij_pair == 0:
return numpy.empty((nij_pair,nkl_pair))
ij_red = ijmosym == 's1'
kl_red = klmosym == 's1'
if isinstance(erifile, str):
if h5py.is_hdf5(erifile):
feri = lib.H5FileWrap(erifile, 'a')
if dataname in feri:
del (feri[dataname])
else:
feri = lib.H5FileWrap(erifile,'w', libver='latest')
else:
assert (isinstance(erifile, h5py.Group))
feri = erifile
h5d_eri = feri.create_dataset(dataname,(nij_pair,nkl_pair), dtype.char)
feri_swap = lib.H5TmpFile(libver='latest')
chunk_size = min(nao_pair, max(4, int(ioblk_size*1e6/8/nao_pair)))
log.debug('Memory information:')
log.debug(' IOBLK_SIZE (MB): {} chunk_size: {}'
.format(ioblk_size, chunk_size))
log.debug(' Final disk eri size (MB): {:.3g}'
.format(nij_pair*nkl_pair*typesize))
log.debug(' Half transformed eri size (MB): {:.3g}'
.format(nij_pair*nao_pair*typesize))
log.debug(' RAM buffer (MB): {:.3g}'
.format(nij_pair*IOBLK_SIZE*typesize*2))
if eri_ao.size == nao_pair**2: # 4-fold symmetry
# half_e1 first transforms the indices which are contiguous in memory
# transpose the 4-fold integrals to make ij the contiguous indices
eri_ao = lib.transpose(eri_ao)
ftrans = _ao2mo.libao2mo.AO2MOtranse1_incore_s4
elif eri_ao.size == nao_pair*(nao_pair+1)//2:
ftrans = _ao2mo.libao2mo.AO2MOtranse1_incore_s8
else:
raise NotImplementedError
if ijmosym == 's2':
fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_s2
elif nmoi <= nmoj:
fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_iltj
else:
fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_igtj
fdrv = getattr(_ao2mo.libao2mo, 'AO2MOnr_e1incore_drv')
def save(piece, buf):
feri_swap[str(piece)] = buf.T
# transform \mu\nu -> ij
cput0 = logger.process_clock(), logger.perf_counter()
with lib.call_in_background(save) as async_write:
for istep, (p0, p1) in enumerate(lib.prange(0, nao_pair, chunk_size)):
if dtype == numpy.double:
buf = numpy.empty((p1-p0, nij_pair))
fdrv(ftrans, fmmm,
buf.ctypes.data_as(ctypes.c_void_p),
eri_ao.ctypes.data_as(ctypes.c_void_p),
moij.ctypes.data_as(ctypes.c_void_p),
ctypes.c_int(p0), ctypes.c_int(p1-p0),
ctypes.c_int(nao),
ctypes.c_int(ijshape[0]), ctypes.c_int(ijshape[1]),
ctypes.c_int(ijshape[2]), ctypes.c_int(ijshape[3]))
else: # complex
tmp = numpy.empty((p1-p0, nao_pair))
if eri_ao.size == nao_pair**2: # 4-fold symmetry
tmp = eri_ao[p0:p1]
else: # 8-fold symmetry
for i in range(p0, p1):
tmp[i-p0] = lib.unpack_row(eri_ao, i)
tmp = lib.unpack_tril(tmp, filltriu=lib.SYMMETRIC)
buf = lib.einsum('xpq,pi,qj->xij', tmp, mo_coeffs[0].conj(), mo_coeffs[1])
if ij_red:
buf = buf.reshape(p1-p0,-1) # grabs by row
else:
buf = lib.pack_tril(buf)
async_write(istep, buf)
log.timer('(uv|lo) -> (ij|lo)', *cput0)
# transform \lambda\sigma -> kl
cput1 = logger.process_clock(), logger.perf_counter()
Cklam = mo_coeffs[2].conj()
buf_read = numpy.empty((chunk_size,nao_pair), dtype=dtype)
buf_prefetch = numpy.empty_like(buf_read)
def load(start, stop, buf):
if start < stop:
_load_from_h5g(feri_swap, start, stop, buf)
def save(start, stop, buf):
if start < stop:
h5d_eri[start:stop] = buf[:stop-start]
with lib.call_in_background(save,load) as (async_write, prefetch):
for p0, p1 in lib.prange(0, nij_pair, chunk_size):
if p0 == 0:
load(p0, p1, buf_prefetch)
buf_read, buf_prefetch = buf_prefetch, buf_read
prefetch(p1, min(p1+chunk_size, nij_pair), buf_prefetch)
lo = lib.unpack_tril(buf_read[:p1-p0], filltriu=lib.SYMMETRIC)
lo = lib.einsum('xpq,pi,qj->xij', lo, Cklam, mo_coeffs[3])
if kl_red:
kl = lo.reshape(p1-p0,-1)
else:
kl = lib.pack_tril(lo)
async_write(p0, p1, kl)
log.timer('(ij|lo) -> (ij|kl)', *cput1)
if isinstance(erifile, str):
feri.close()
return erifile
if __name__ == '__main__':
import tempfile
from pyscf import gto, scf, ao2mo
# set verbose to 7 to get detailed timing info, otherwise 0
verbose = 0
mol = gto.Mole()
mol.verbose = 0
mol.output = None
mol.atom = [
['H' , (0. , 0. , .917)],
['F' , (0. , 0. , 0.)], ]
mol.basis = '6311g'
mol.build()
mf = scf.RHF(mol)
mf.kernel()
mf.verbose = verbose
mo_coeff = mf.mo_coeff
nmo = mo_coeff.shape[0]
# compare custom outcore eri with incore eri
nocc = numpy.count_nonzero(mf.mo_occ)
nvir = nmo - nocc
print('Full incore transformation (pyscf)...')
start_time = logger.perf_counter()
eri_incore = ao2mo.incore.full(mf._eri, mo_coeff)
onnn = eri_incore[:nocc*nmo].copy()
print(' Time elapsed (s): ',logger.perf_counter() - start_time)
print('Parital incore transformation (pyscf)...')
start_time = logger.perf_counter()
orbo = mo_coeff[:,:nocc]
onnn2 = ao2mo.incore.general(mf._eri, (orbo,mo_coeff,mo_coeff,mo_coeff))
print(' Time elapsed (s): ',logger.perf_counter() - start_time)
tmpfile2 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR)
print('\n\nCustom outcore transformation ...')
orbo = mo_coeff[:,:nocc]
start_time = logger.perf_counter()
general(mf._eri, (orbo,mo_coeff,mo_coeff,mo_coeff), tmpfile2.name, 'aa',
verbose=verbose)
stop_time = logger.perf_counter() - start_time
print(' Time elapsed (s): ',stop_time)
print('\n\nPyscf outcore transformation ...')
start_time = logger.perf_counter()
ao2mo.outcore.general(mol, (orbo,mo_coeff,mo_coeff,mo_coeff), tmpfile2.name, 'ab',
verbose=verbose)
stop_time2 = logger.perf_counter() - start_time
print(' Time elapsed (s): ',stop_time2)
print('How worse is the custom implementation?',stop_time/stop_time2)
with h5py.File(tmpfile2.name, 'r') as f:
print('\n\nIncore (pyscf) vs outcore (custom)?',numpy.allclose(onnn2,f['aa']))
print('Outcore (pyscf) vs outcore (custom)?',numpy.allclose(f['ab'],f['aa']))
print('\n\nCustom full outcore transformation ...')
start_time = logger.perf_counter()
general(mf._eri, (mo_coeff,mo_coeff,mo_coeff,mo_coeff), tmpfile2.name, 'aa',
verbose=verbose)
stop_time = logger.perf_counter() - start_time
print(' Time elapsed (s): ',stop_time)
print('\n\nPyscf full outcore transformation ...')
start_time = logger.perf_counter()
ao2mo.outcore.full(mol, mo_coeff, tmpfile2.name, 'ab',verbose=verbose)
stop_time2 = logger.perf_counter() - start_time
print(' Time elapsed (s): ',stop_time2)
print(' How worse is the custom implementation?',stop_time/stop_time2)
with h5py.File(tmpfile2.name, 'r') as f:
print('\n\nIncore (pyscf) vs outcore (custom)?',numpy.allclose(eri_incore,f['aa']))
print('Outcore (pyscf) vs outcore (custom)?',numpy.allclose(f['ab'],f['aa']))
tmpfile2.close()