Source code for pyscf.mcscf.umc_ao2mo

#!/usr/bin/env python
# Copyright 2014-2021 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

'''
MO integrals for UCASSCF methods
'''

import ctypes

import numpy
from pyscf import lib
from pyscf.lib import logger
from pyscf import ao2mo
from pyscf.ao2mo import _ao2mo

libmcscf = lib.load_library('libmcscf')

# least memory requirements:
#       ncore**2*(nmo-ncore)**2 + ncas**2*nmo**2*2 + nmo**3   words
# nmo  ncore  ncas  outcore  incore
# 200  40     16    2.4GB    5.3 GB (_eri 1.6GB )
# 250  50     16    4.9GB   12.0 GB (_eri 3.9GB )
# 300  60     16    9.0GB   23.7 GB (_eri 8.1GB )
# 400  80     16   24.6GB
# 500  100    16   54.8GB
# 600  120    16   107 GB


[docs] def trans_e1_incore(eri_ao, mo, ncore, ncas): nmo = mo[0].shape[1] nocc = (ncore[0] + ncas, ncore[1] + ncas) erib = ao2mo.incore.half_e1(eri_ao, (mo[1][:,:nocc[1]],mo[1]), compact=False) load_buf = lambda bufid: erib[bufid*nmo:bufid*nmo+nmo].copy() AAPP, AApp, APPA, tmp, IAPCV, APcv = \ _trans_aapp_((mo[1],mo[0]), (ncore[1],ncore[0]), ncas, load_buf) jC_PP, jC_pp, kC_PP, ICVCV = \ _trans_cvcv_((mo[1],mo[0]), (ncore[1],ncore[0]), ncas, load_buf)[:4] erib = None eria = ao2mo.incore.half_e1(eri_ao, (mo[0][:,:nocc[0]],mo[0]), compact=False) load_buf = lambda bufid: eria[bufid*nmo:bufid*nmo+nmo].copy() aapp, aaPP, appa, apPA, Iapcv, apCV = \ _trans_aapp_(mo, ncore, ncas, load_buf) jc_pp, jc_PP, kc_pp, Icvcv, cvCV = \ _trans_cvcv_(mo, ncore, ncas, load_buf) jkcpp = jc_pp - kc_pp jkcPP = jC_PP - kC_PP return jkcpp, jkcPP, jC_pp, jc_PP, \ aapp, aaPP, AApp, AAPP, appa, apPA, APPA, \ Iapcv, IAPCV, apCV, APcv, Icvcv, ICVCV, cvCV
[docs] def trans_e1_outcore(mol, mo, ncore, ncas, max_memory=None, ioblk_size=512, verbose=logger.WARN): time0 = (logger.process_clock(), logger.perf_counter()) log = logger.new_logger(mol, verbose) nao, nmo = mo[0].shape nao_pair = nao*(nao+1)//2 nocc = (ncore[0] + ncas, ncore[1] + ncas) fswap = lib.H5TmpFile() ao2mo.outcore.half_e1(mol, (mo[1][:,:nocc[1]],mo[1]), fswap, verbose=log, compact=False) klaoblks = len(fswap['0']) def load_bufa(bfn_id): if log.verbose >= logger.DEBUG1: time1[:] = log.timer('between load_buf', *tuple(time1)) buf = numpy.empty((nmo,nao_pair)) col0 = 0 for ic in range(klaoblks): dat = fswap['0/%d'%ic] col1 = col0 + dat.shape[1] buf[:nmo,col0:col1] = dat[bfn_id*nmo:(bfn_id+1)*nmo] col0 = col1 if log.verbose >= logger.DEBUG1: time1[:] = log.timer('load_buf', *tuple(time1)) return buf time0 = log.timer('halfe1-beta', *time0) time1 = [logger.process_clock(), logger.perf_counter()] ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32) AAPP, AApp, APPA, tmp, IAPCV, APcv = \ _trans_aapp_((mo[1],mo[0]), (ncore[1],ncore[0]), ncas, load_bufa, ao_loc) time0 = log.timer('trans_AAPP', *time0) jC_PP, jC_pp, kC_PP, ICVCV = \ _trans_cvcv_((mo[1],mo[0]), (ncore[1],ncore[0]), ncas, load_bufa, ao_loc)[:4] time0 = log.timer('trans_CVCV', *time0) ########################### fswap = lib.H5TmpFile() ao2mo.outcore.half_e1(mol, (mo[0][:,:nocc[0]],mo[0]), fswap, verbose=log, compact=False) klaoblks = len(fswap['0']) def load_bufb(bfn_id): if log.verbose >= logger.DEBUG1: time1[:] = log.timer('between load_buf', *tuple(time1)) buf = numpy.empty((nmo,nao_pair)) col0 = 0 for ic in range(klaoblks): dat = fswap['0/%d'%ic] col1 = col0 + dat.shape[1] buf[:nmo,col0:col1] = dat[bfn_id*nmo:(bfn_id+1)*nmo] col0 = col1 if log.verbose >= logger.DEBUG1: time1[:] = log.timer('load_buf', *tuple(time1)) return buf time0 = log.timer('halfe1-alpha', *time0) time1 = [logger.process_clock(), logger.perf_counter()] aapp, aaPP, appa, apPA, Iapcv, apCV = \ _trans_aapp_(mo, ncore, ncas, load_bufb, ao_loc) time0 = log.timer('trans_aapp', *time0) jc_pp, jc_PP, kc_pp, Icvcv, cvCV = \ _trans_cvcv_(mo, ncore, ncas, load_bufb, ao_loc) time0 = log.timer('trans_cvcv', *time0) jkcpp = jc_pp - kc_pp jkcPP = jC_PP - kC_PP return jkcpp, jkcPP, jC_pp, jc_PP, \ aapp, aaPP, AApp, AAPP, appa, apPA, APPA, \ Iapcv, IAPCV, apCV, APcv, Icvcv, ICVCV, cvCV
def _trans_aapp_(mo, ncore, ncas, fload, ao_loc=None): nmo = mo[0].shape[1] nocc = (ncore[0] + ncas, ncore[1] + ncas) klshape = (0, nmo, 0, nmo) japcv = numpy.empty((ncas,nmo,ncore[0],nmo-ncore[0])) aapp = numpy.empty((ncas,ncas,nmo,nmo)) aaPP = numpy.empty((ncas,ncas,nmo,nmo)) appa = numpy.empty((ncas,nmo,nmo,ncas)) apPA = numpy.empty((ncas,nmo,nmo,ncas)) apCV = numpy.empty((ncas,nmo,ncore[1],nmo-ncore[1])) ppp = numpy.empty((nmo,nmo,nmo)) for i in range(ncas): buf = _ao2mo.nr_e2(fload(ncore[0]+i), mo[0], klshape, aosym='s4', mosym='s2', ao_loc=ao_loc) lib.unpack_tril(buf, out=ppp) aapp[i] = ppp[ncore[0]:nocc[0]] appa[i] = ppp[:,:,ncore[0]:nocc[0]] #japcp = avcp * 2 - acpv.transpose(0,2,1,3) - avcp.transpose(0,3,2,1) japcv[i] = ppp[:,:ncore[0],ncore[0]:] * 2 \ - ppp[:ncore[0],:,ncore[0]:].transpose(1,0,2) \ - ppp[ncore[0]:,:ncore[0],:].transpose(2,1,0) buf = _ao2mo.nr_e2(fload(ncore[0]+i), mo[1], klshape, aosym='s4', mosym='s2', ao_loc=ao_loc) lib.unpack_tril(buf, out=ppp) aaPP[i] = ppp[ncore[0]:nocc[0]] apPA[i] = ppp[:,:,ncore[1]:nocc[1]] apCV[i] = ppp[:,:ncore[1],ncore[1]:] return aapp, aaPP, appa, apPA, japcv, apCV def _trans_cvcv_(mo, ncore, ncas, fload, ao_loc=None): nmo = mo[0].shape[1] jc_pp = numpy.empty((ncore[0],nmo,nmo)) jc_PP = numpy.zeros((nmo,nmo)) kc_pp = numpy.empty((ncore[0],nmo,nmo)) jcvcv = numpy.zeros((ncore[0],nmo-ncore[0],ncore[0],nmo-ncore[0])) cvCV = numpy.empty((ncore[0],nmo-ncore[0],ncore[1],nmo-ncore[1])) vcp = numpy.empty((nmo-ncore[0],ncore[0],nmo)) cpp = numpy.empty((ncore[0],nmo,nmo)) for i in range(ncore[0]): buf = fload(i) klshape = (0, ncore[1], ncore[1], nmo) _ao2mo.nr_e2(buf[ncore[0]:nmo], mo[1], klshape, aosym='s4', mosym='s1', out=cvCV[i], ao_loc=ao_loc) klshape = (0, nmo, 0, nmo) tmp = _ao2mo.nr_e2(buf[i:i+1], mo[1], klshape, aosym='s4', mosym='s1', ao_loc=ao_loc) jc_PP += tmp.reshape(nmo,nmo) klshape = (0, ncore[0], 0, nmo) _ao2mo.nr_e2(buf[ncore[0]:nmo], mo[0], klshape, aosym='s4', mosym='s1', out=vcp, ao_loc=ao_loc) kc_pp[i,ncore[0]:] = vcp[:,i] klshape = (0, nmo, 0, nmo) _ao2mo.nr_e2(buf[:ncore[0]], mo[0], klshape, aosym='s4', mosym='s2', out=buf[:ncore[0]], ao_loc=ao_loc) lib.unpack_tril(buf[:ncore[0]], out=cpp) jc_pp[i] = cpp[i] kc_pp[i,:ncore[0]] = cpp[:,i] #jcvcv = cvcv * 2 - cvcv.transpose(2,1,0,3) - ccvv.transpose(0,2,1,3) jcvcv[i] = vcp[:,:,ncore[0]:] * 2 \ - vcp[:,:,ncore[0]:].transpose(2,1,0) \ - cpp[:,ncore[0]:,ncore[0]:].transpose(1,0,2) return jc_pp, jc_PP, kc_pp, jcvcv, cvCV class _ERIS: def __init__(self, casscf, mo, method='incore'): mol = casscf.mol ncore = self.ncore = casscf.ncore ncas = self.ncas = casscf.ncas nmo = mo[0].shape[1] mem_incore, mem_outcore, mem_basic = _mem_usage(ncore, ncas, nmo) mem_now = lib.current_memory()[0] eri = casscf._scf._eri if (method == 'incore' and eri is not None and ((mem_incore+mem_now) < casscf.max_memory*.9) or mol.incore_anyway): if eri is None: eri = mol.intor('int2e', aosym='s8') (self.jkcpp, self.jkcPP, self.jC_pp, self.jc_PP, self.aapp, self.aaPP, self.AApp, self.AAPP, self.appa, self.apPA, self.APPA, self.Iapcv, self.IAPCV, self.apCV, self.APcv, self.Icvcv, self.ICVCV, self.cvCV) = \ trans_e1_incore(eri, mo, ncore, ncas) self.vhf_c = (numpy.einsum('ipq->pq', self.jkcpp) + self.jC_pp, numpy.einsum('ipq->pq', self.jkcPP) + self.jc_PP) else: log = logger.Logger(casscf.stdout, casscf.verbose) max_memory = max(2000, casscf.max_memory*.9-mem_now) if ((mem_outcore+mem_now) < casscf.max_memory*.9): if max_memory < mem_basic: log.warn('Calculation needs %d MB memory, over CASSCF.max_memory (%d MB) limit', (mem_outcore+mem_now)/.9, casscf.max_memory) (self.jkcpp, self.jkcPP, self.jC_pp, self.jc_PP, self.aapp, self.aaPP, self.AApp, self.AAPP, self.appa, self.apPA, self.APPA, self.Iapcv, self.IAPCV, self.apCV, self.APcv, self.Icvcv, self.ICVCV, self.cvCV) = \ trans_e1_outcore(mol, mo, ncore, ncas, max_memory=max_memory, verbose=log) self.vhf_c = (numpy.einsum('ipq->pq', self.jkcpp) + self.jC_pp, numpy.einsum('ipq->pq', self.jkcPP) + self.jc_PP) else: raise RuntimeError('.max_memory not enough') def _mem_usage(ncore, ncas, nmo): ncore = (ncore[0] + ncore[1]) * .5 nvir = nmo - ncore basic = (ncas**2*nmo**2*7 + nmo**3*2) * 8/1e6 outcore = basic + (ncore**2*nvir**2*3 + ncas*nmo*ncore*nvir*4 + ncore*nmo**2*3) * 8/1e6 incore = outcore + nmo**4/1e6 + ncore*nmo**3*4/1e6 return incore, outcore, basic if __name__ == '__main__': from pyscf import scf from pyscf import gto from pyscf.mcscf import umc1step mol = gto.Mole() mol.verbose = 0 mol.output = None#"out_h2o" mol.atom = [ ['O', ( 0., 0. , 0. )], ['H', ( 0., -0.757, 0.587)], ['H', ( 0., 0.757 , 0.587)],] mol.basis = {'H': 'sto-3g', 'O': 'sto-3g',} mol.charge = 1 mol.spin = 1 mol.build() m = scf.UHF(mol) ehf = m.scf() mc = umc1step.UCASSCF(m, 4, 4) mc.verbose = 4 mo = m.mo_coeff eris0 = _ERIS(mc, mo, 'incore') eris1 = _ERIS(mc, mo, 'outcore') print('jkcpp', numpy.allclose(eris1.jkcpp, eris0.jkcpp)) print('jkcPP', numpy.allclose(eris1.jkcPP, eris0.jkcPP)) print('jC_pp', numpy.allclose(eris1.jC_pp, eris0.jC_pp)) print('jc_PP', numpy.allclose(eris1.jc_PP, eris0.jc_PP)) print('aapp ', numpy.allclose(eris1.aapp , eris0.aapp )) print('aaPP ', numpy.allclose(eris1.aaPP , eris0.aaPP )) print('AApp ', numpy.allclose(eris1.AApp , eris0.AApp )) print('AAPP ', numpy.allclose(eris1.AAPP , eris0.AAPP )) print('appa ', numpy.allclose(eris1.appa , eris0.appa )) print('apPA ', numpy.allclose(eris1.apPA , eris0.apPA )) print('APPA ', numpy.allclose(eris1.APPA , eris0.APPA )) print('cvCV ', numpy.allclose(eris1.cvCV , eris0.cvCV )) print('Icvcv', numpy.allclose(eris1.Icvcv, eris0.Icvcv)) print('ICVCV', numpy.allclose(eris1.ICVCV, eris0.ICVCV)) print('Iapcv', numpy.allclose(eris1.Iapcv, eris0.Iapcv)) print('IAPCV', numpy.allclose(eris1.IAPCV, eris0.IAPCV)) print('apCV ', numpy.allclose(eris1.apCV , eris0.apCV )) print('APcv ', numpy.allclose(eris1.APcv , eris0.APcv )) nmo = mo[0].shape[1] ncore = mc.ncore ncas = mc.ncas nocc = (ncas + ncore[0], ncas + ncore[1]) eriaa = ao2mo.incore.full(mc._scf._eri, mo[0]) eriab = ao2mo.incore.general(mc._scf._eri, (mo[0],mo[0],mo[1],mo[1])) eribb = ao2mo.incore.full(mc._scf._eri, mo[1]) eriaa = ao2mo.restore(1, eriaa, nmo) eriab = ao2mo.restore(1, eriab, nmo) eribb = ao2mo.restore(1, eribb, nmo) jkcpp = numpy.einsum('iipq->ipq', eriaa[:ncore[0],:ncore[0],:,:]) \ - numpy.einsum('ipqi->ipq', eriaa[:ncore[0],:,:,:ncore[0]]) jkcPP = numpy.einsum('iipq->ipq', eribb[:ncore[1],:ncore[1],:,:]) \ - numpy.einsum('ipqi->ipq', eribb[:ncore[1],:,:,:ncore[1]]) jC_pp = numpy.einsum('pqii->pq', eriab[:,:,:ncore[1],:ncore[1]]) jc_PP = numpy.einsum('iipq->pq', eriab[:ncore[0],:ncore[0],:,:]) aapp = numpy.copy(eriaa[ncore[0]:nocc[0],ncore[0]:nocc[0],:,:]) aaPP = numpy.copy(eriab[ncore[0]:nocc[0],ncore[0]:nocc[0],:,:]) AApp = numpy.copy(eriab[:,:,ncore[1]:nocc[1],ncore[1]:nocc[1]].transpose(2,3,0,1)) AAPP = numpy.copy(eribb[ncore[1]:nocc[1],ncore[1]:nocc[1],:,:]) appa = numpy.copy(eriaa[ncore[0]:nocc[0],:,:,ncore[0]:nocc[0]]) apPA = numpy.copy(eriab[ncore[0]:nocc[0],:,:,ncore[1]:nocc[1]]) APPA = numpy.copy(eribb[ncore[1]:nocc[1],:,:,ncore[1]:nocc[1]]) cvCV = numpy.copy(eriab[:ncore[0],ncore[0]:,:ncore[1],ncore[1]:]) Icvcv = eriaa[:ncore[0],ncore[0]:,:ncore[0],ncore[0]:] * 2\ - eriaa[:ncore[0],:ncore[0],ncore[0]:,ncore[0]:].transpose(0,3,1,2) \ - eriaa[:ncore[0],ncore[0]:,:ncore[0],ncore[0]:].transpose(0,3,2,1) ICVCV = eribb[:ncore[1],ncore[1]:,:ncore[1],ncore[1]:] * 2\ - eribb[:ncore[1],:ncore[1],ncore[1]:,ncore[1]:].transpose(0,3,1,2) \ - eribb[:ncore[1],ncore[1]:,:ncore[1],ncore[1]:].transpose(0,3,2,1) Iapcv = eriaa[ncore[0]:nocc[0],:,:ncore[0],ncore[0]:] * 2 \ - eriaa[:,ncore[0]:,:ncore[0],ncore[0]:nocc[0]].transpose(3,0,2,1) \ - eriaa[:,:ncore[0],ncore[0]:,ncore[0]:nocc[0]].transpose(3,0,1,2) IAPCV = eribb[ncore[1]:nocc[1],:,:ncore[1],ncore[1]:] * 2 \ - eribb[:,ncore[1]:,:ncore[1],ncore[1]:nocc[1]].transpose(3,0,2,1) \ - eribb[:,:ncore[1],ncore[1]:,ncore[1]:nocc[1]].transpose(3,0,1,2) apCV = numpy.copy(eriab[ncore[0]:nocc[0],:,:ncore[1],ncore[1]:]) APcv = numpy.copy(eriab[:ncore[0],ncore[0]:,ncore[1]:nocc[1],:].transpose(2,3,0,1)) print('jkcpp', numpy.allclose(jkcpp, eris0.jkcpp)) print('jkcPP', numpy.allclose(jkcPP, eris0.jkcPP)) print('jC_pp', numpy.allclose(jC_pp, eris0.jC_pp)) print('jc_PP', numpy.allclose(jc_PP, eris0.jc_PP)) print('aapp ', numpy.allclose(aapp , eris0.aapp )) print('aaPP ', numpy.allclose(aaPP , eris0.aaPP )) print('AApp ', numpy.allclose(AApp , eris0.AApp )) print('AAPP ', numpy.allclose(AAPP , eris0.AAPP )) print('appa ', numpy.allclose(appa , eris0.appa )) print('apPA ', numpy.allclose(apPA , eris0.apPA )) print('APPA ', numpy.allclose(APPA , eris0.APPA )) print('cvCV ', numpy.allclose(cvCV , eris0.cvCV )) print('Icvcv', numpy.allclose(Icvcv, eris0.Icvcv)) print('ICVCV', numpy.allclose(ICVCV, eris0.ICVCV)) print('Iapcv', numpy.allclose(Iapcv, eris0.Iapcv)) print('IAPCV', numpy.allclose(IAPCV, eris0.IAPCV)) print('apCV ', numpy.allclose(apCV , eris0.apCV )) print('APcv ', numpy.allclose(APcv , eris0.APcv ))