Source code for pyscf.ao2mo.semi_incore

#!/usr/bin/env python
# Copyright 2018-2020 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Bryan Lau <blau1270@gmail.com>
#         Qiming Sun <osirpt.sun@gmail.com>
#

"""
Created on Thu May 17 11:05:22 2018

@author: Bryan Lau


A module that will do on-disk transformation of two electron integrals, and
also return specific slices of (o)ccupied and (v)irtual ones needed for post HF

Comparing to the full in-memory transformation (see incore.py) which holds all
intermediates in memory, this version uses less memory but performs slow due
to IO overhead.
"""


import ctypes
import numpy
import h5py
from pyscf import lib
from pyscf.lib import logger
from pyscf.ao2mo.incore import iden_coeffs, _conc_mos
from pyscf.ao2mo.outcore import _load_from_h5g
from pyscf.ao2mo import _ao2mo

IOBLK_SIZE = 128  # MB

[docs] def general(eri, mo_coeffs, erifile, dataname='eri_mo', ioblk_size=IOBLK_SIZE, compact=True, verbose=logger.NOTE): '''For the given four sets of orbitals, transfer arbitrary spherical AO integrals to MO integrals on disk. Args: eri : 8-fold reduced eri vector mo_coeffs : 4-item list of ndarray Four sets of orbital coefficients, corresponding to the four indices of (ij|kl) erifile : str or h5py File or h5py Group object To store the transformed integrals, in HDF5 format. Kwargs dataname : str The dataset name in the erifile (ref the hierarchy of HDF5 format http://www.hdfgroup.org/HDF5/doc1.6/UG/09_Groups.html). By assigning different dataname, the existed integral file can be reused. If the erifile contains the dataname, the new integrals data will overwrite the old one. ioblk_size : float or int The block size for IO, large block size may **not** improve performance compact : bool When compact is True, depending on the four orbital sets, the returned MO integrals has (up to 4-fold) permutation symmetry. If it's False, the function will abandon any permutation symmetry, and return the "plain" MO integrals Pseudocode / algorithm: u = mu v = nu l = lambda o = sigma Assume eri's are 8-fold reduced. nij/nkl_pair = npair or i*j/k*l if only transforming a subset First half transform: Initialize half_eri of size (nij_pair,npair) For lo = 1 -> npair Unpack row lo Unpack row lo to matrix E_{uv}^{lo} Transform C_ui^+*E*C_nj -> E_{ij}^{lo} Ravel or pack E_{ij}^{lo} Save E_{ij}^{lo} -> half_eri[:,lo] Second half transform: Initialize h5d_eri of size (nij_pair,nkl_pair) For ij = 1 -> nij_pair Load and unpack half_eri[ij,:] -> E_{lo}^{ij} Transform C_{lk}E_{lo}^{ij}C_{ol} -> E_{kl}^{ij} Repack E_{kl}^{ij} Save E_{kl}^{ij} -> h5d_eri[ij,:] Each matrix is indexed by the composite index ij x kl, where ij/kl is either npair or ixj/kxl, if only a subset of MOs are being transformed. Since entire rows or columns need to be read in, the arrays are chunked such that IOBLK_SIZE = row/col x chunking col/row. For example, for the first half transform, we would save in nij_pair x IOBLK_SIZE/nij_pair, then load in IOBLK_SIZE/nkl_pair x npair for the second half transform. ------ kl -----> |jxl | ij | | v As a first guess, the chunking size is jxl. If the super-rows/cols are larger than IOBLK_SIZE, then the chunk rectangle jxl is trimmed accordingly. The pathological limiting case is where the dimensions nao_pair, nij_pair, or nkl_pair are so large that the arrays are chunked 1x1, in which case IOBLK_SIZE needs to be increased. ''' log = logger.new_logger(None, verbose) log.info('******** ao2mo disk, custom eri ********') eri_ao = numpy.asarray(eri, order='C') nao, nmoi = mo_coeffs[0].shape nmoj = mo_coeffs[1].shape[1] nao_pair = nao*(nao+1)//2 ijmosym, nij_pair, moij, ijshape = _conc_mos(mo_coeffs[0], mo_coeffs[1], compact) klmosym, nkl_pair, mokl, klshape = _conc_mos(mo_coeffs[2], mo_coeffs[3], compact) ijshape = (ijshape[0], ijshape[1]-ijshape[0], ijshape[2], ijshape[3]-ijshape[2]) dtype = numpy.result_type(eri, *mo_coeffs) typesize = dtype.itemsize/1e6 # in MB if nij_pair == 0: return numpy.empty((nij_pair,nkl_pair)) ij_red = ijmosym == 's1' kl_red = klmosym == 's1' if isinstance(erifile, str): if h5py.is_hdf5(erifile): feri = h5py.File(erifile, 'a') if dataname in feri: del (feri[dataname]) else: feri = h5py.File(erifile,'w',libver='latest') else: assert (isinstance(erifile, h5py.Group)) feri = erifile h5d_eri = feri.create_dataset(dataname,(nij_pair,nkl_pair), dtype.char) feri_swap = lib.H5TmpFile(libver='latest') chunk_size = min(nao_pair, max(4, int(ioblk_size*1e6/8/nao_pair))) log.debug('Memory information:') log.debug(' IOBLK_SIZE (MB): {} chunk_size: {}' .format(ioblk_size, chunk_size)) log.debug(' Final disk eri size (MB): {:.3g}' .format(nij_pair*nkl_pair*typesize)) log.debug(' Half transformed eri size (MB): {:.3g}' .format(nij_pair*nao_pair*typesize)) log.debug(' RAM buffer (MB): {:.3g}' .format(nij_pair*IOBLK_SIZE*typesize*2)) if eri_ao.size == nao_pair**2: # 4-fold symmetry # half_e1 first transforms the indices which are contiguous in memory # transpose the 4-fold integrals to make ij the contiguous indices eri_ao = lib.transpose(eri_ao) ftrans = _ao2mo.libao2mo.AO2MOtranse1_incore_s4 elif eri_ao.size == nao_pair*(nao_pair+1)//2: ftrans = _ao2mo.libao2mo.AO2MOtranse1_incore_s8 else: raise NotImplementedError if ijmosym == 's2': fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_s2 elif nmoi <= nmoj: fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_iltj else: fmmm = _ao2mo.libao2mo.AO2MOmmm_nr_s2_igtj fdrv = getattr(_ao2mo.libao2mo, 'AO2MOnr_e1incore_drv') def save(piece, buf): feri_swap[str(piece)] = buf.T # transform \mu\nu -> ij cput0 = logger.process_clock(), logger.perf_counter() with lib.call_in_background(save) as async_write: for istep, (p0, p1) in enumerate(lib.prange(0, nao_pair, chunk_size)): if dtype == numpy.double: buf = numpy.empty((p1-p0, nij_pair)) fdrv(ftrans, fmmm, buf.ctypes.data_as(ctypes.c_void_p), eri_ao.ctypes.data_as(ctypes.c_void_p), moij.ctypes.data_as(ctypes.c_void_p), ctypes.c_int(p0), ctypes.c_int(p1-p0), ctypes.c_int(nao), ctypes.c_int(ijshape[0]), ctypes.c_int(ijshape[1]), ctypes.c_int(ijshape[2]), ctypes.c_int(ijshape[3])) else: # complex tmp = numpy.empty((p1-p0, nao_pair)) if eri_ao.size == nao_pair**2: # 4-fold symmetry tmp = eri_ao[p0:p1] else: # 8-fold symmetry for i in range(p0, p1): tmp[i-p0] = lib.unpack_row(eri_ao, i) tmp = lib.unpack_tril(tmp, filltriu=lib.SYMMETRIC) buf = lib.einsum('xpq,pi,qj->xij', tmp, mo_coeffs[0].conj(), mo_coeffs[1]) if ij_red: buf = buf.reshape(p1-p0,-1) # grabs by row else: buf = lib.pack_tril(buf) async_write(istep, buf) log.timer('(uv|lo) -> (ij|lo)', *cput0) # transform \lambda\sigma -> kl cput1 = logger.process_clock(), logger.perf_counter() Cklam = mo_coeffs[2].conj() buf_read = numpy.empty((chunk_size,nao_pair), dtype=dtype) buf_prefetch = numpy.empty_like(buf_read) def load(start, stop, buf): if start < stop: _load_from_h5g(feri_swap, start, stop, buf) def save(start, stop, buf): if start < stop: h5d_eri[start:stop] = buf[:stop-start] with lib.call_in_background(save,load) as (async_write, prefetch): for p0, p1 in lib.prange(0, nij_pair, chunk_size): if p0 == 0: load(p0, p1, buf_prefetch) buf_read, buf_prefetch = buf_prefetch, buf_read prefetch(p1, min(p1+chunk_size, nij_pair), buf_prefetch) lo = lib.unpack_tril(buf_read[:p1-p0], filltriu=lib.SYMMETRIC) lo = lib.einsum('xpq,pi,qj->xij', lo, Cklam, mo_coeffs[3]) if kl_red: kl = lo.reshape(p1-p0,-1) else: kl = lib.pack_tril(lo) async_write(p0, p1, kl) log.timer('(ij|lo) -> (ij|kl)', *cput1) if isinstance(erifile, str): feri.close() return erifile
if __name__ == '__main__': import tempfile from pyscf import gto, scf, ao2mo # set verbose to 7 to get detailed timing info, otherwise 0 verbose = 0 mol = gto.Mole() mol.verbose = 0 mol.output = None mol.atom = [ ['H' , (0. , 0. , .917)], ['F' , (0. , 0. , 0.)], ] mol.basis = '6311g' mol.build() mf = scf.RHF(mol) mf.kernel() mf.verbose = verbose mo_coeff = mf.mo_coeff nmo = mo_coeff.shape[0] # compare custom outcore eri with incore eri nocc = numpy.count_nonzero(mf.mo_occ) nvir = nmo - nocc print('Full incore transformation (pyscf)...') start_time = logger.perf_counter() eri_incore = ao2mo.incore.full(mf._eri, mo_coeff) onnn = eri_incore[:nocc*nmo].copy() print(' Time elapsed (s): ',logger.perf_counter() - start_time) print('Parital incore transformation (pyscf)...') start_time = logger.perf_counter() orbo = mo_coeff[:,:nocc] onnn2 = ao2mo.incore.general(mf._eri, (orbo,mo_coeff,mo_coeff,mo_coeff)) print(' Time elapsed (s): ',logger.perf_counter() - start_time) tmpfile2 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) print('\n\nCustom outcore transformation ...') orbo = mo_coeff[:,:nocc] start_time = logger.perf_counter() general(mf._eri, (orbo,mo_coeff,mo_coeff,mo_coeff), tmpfile2.name, 'aa', verbose=verbose) stop_time = logger.perf_counter() - start_time print(' Time elapsed (s): ',stop_time) print('\n\nPyscf outcore transformation ...') start_time = logger.perf_counter() ao2mo.outcore.general(mol, (orbo,mo_coeff,mo_coeff,mo_coeff), tmpfile2.name, 'ab', verbose=verbose) stop_time2 = logger.perf_counter() - start_time print(' Time elapsed (s): ',stop_time2) print('How worse is the custom implementation?',stop_time/stop_time2) with h5py.File(tmpfile2.name, 'r') as f: print('\n\nIncore (pyscf) vs outcore (custom)?',numpy.allclose(onnn2,f['aa'])) print('Outcore (pyscf) vs outcore (custom)?',numpy.allclose(f['ab'],f['aa'])) print('\n\nCustom full outcore transformation ...') start_time = logger.perf_counter() general(mf._eri, (mo_coeff,mo_coeff,mo_coeff,mo_coeff), tmpfile2.name, 'aa', verbose=verbose) stop_time = logger.perf_counter() - start_time print(' Time elapsed (s): ',stop_time) print('\n\nPyscf full outcore transformation ...') start_time = logger.perf_counter() ao2mo.outcore.full(mol, mo_coeff, tmpfile2.name, 'ab',verbose=verbose) stop_time2 = logger.perf_counter() - start_time print(' Time elapsed (s): ',stop_time2) print(' How worse is the custom implementation?',stop_time/stop_time2) with h5py.File(tmpfile2.name, 'r') as f: print('\n\nIncore (pyscf) vs outcore (custom)?',numpy.allclose(eri_incore,f['aa'])) print('Outcore (pyscf) vs outcore (custom)?',numpy.allclose(f['ab'],f['aa'])) tmpfile2.close()