Source code for pyscf.mcscf.apc

# Author: Daniel S. King

'''
APC Ranked-Orbital Active Space Selection
If you find this module useful for your work, please consider citing the following:

A Ranked-Orbital Approach to Select Active Spaces for High-Throughput Multireference Computation
https://doi.org/10.1021/acs.jctc.1c00037

Large-Scale Benchmarking of Multireference Vertical-Excitation Calculations via Automated Active-Space Selection
https://doi.org/10.1021/acs.jctc.2c00630
'''

from pyscf.lib import logger
from pyscf import scf, lib
import numpy as np

[docs] class Chooser(): """ Chooser Class Implements the ranked-orbital selection scheme outlined in https://doi.org/10.1021/acs.jctc.1c00037 Given a set of entropies, will select all orbitals for the active space and then drop the lowest-entropy orbitals until the size constraint max_size is met. Args: orbs: 2D Numpy Array Orbitals to choose from, spanning the entire basis (must be square matrix of coefficients) occ: 1D Numpy Array Orbital occupations for orbs (2,1,0); nactel will be set to the number of electrons in the selected orbitals entropies: 1D Numpy Array Importance measurement used to rank the orbitals max_size: Int or Tuple Active space size constraint. If tuple, interpreted as (nelecas,ncas) If int, interpreted as max # of orbitals Returns: active-space-size, #-active-electrons, orbital-initial-guess, chosen-active-orbital-indices Example: #Randomly ranked orbitals >>> import numpy as np >>> from pyscf import gto, scf, mcscf >>> from pyscf.mcscf import apc >>> mol = gto.M(atom='H 0 0 0; H 0 0 1', basis='ccpvtz') >>> mf = scf.RHF(mol).run() >>> entropies = np.random.choice(np.arange(len(mf.mo_occ)),len(mf.mo_occ),replace=False) >>> chooser = apc.Chooser(mf.mo_coeff,mf.mo_occ,entropies,max_size=(2,2)) >>> ncas, nelecas, casorbs, active_idx = chooser.kernel() >>> mc = mcscf.CASSCF(mf, ncas, nelecas).run(casorbs) """ def __init__(self,orbs,occ,entropies,max_size=(8,8),verbose=4): #Check that we have a full set of orbitals: assert(orbs.shape[0] == orbs.shape[1]) assert(len(occ) == len(entropies)) assert(len(entropies) == orbs.shape[1]) self.log = logger.new_logger(lib.StreamObject,verbose) self.orbs = orbs self.occ = np.array(occ) self.entropies = np.asarray(entropies) self.max_size = max_size self.verbose = verbose def _ncsf(self,nactel,norbs): """ Returns number of CSFs in a (nactel,nactorbs) active space Assumes minimum number Sz = alpha - beta (0 for even nactel, 1 for odd nactel) """ from scipy.special import comb alpha = int(nactel//2 + nactel%2) beta = int(nactel//2) term1 = comb(norbs,alpha)*comb(norbs,beta) term2 = comb(norbs,alpha+1)*comb(norbs,beta-1) ncsf = term1-term2 return ncsf def _calc_ncsf(self,active_idx): """ Returns the number of CSFs given the active index using the info in self.occ Passes this info to self._ncsf to calculate the size of the active space """ occ = self.occ nactel = np.sum(np.array(occ)[active_idx]) norbs = len(active_idx) return self._ncsf(nactel,norbs) def _as_is_reasonable(self,active_idx): #Checks active space reasonability occ = self.occ nactel = np.sum(np.array(occ)[active_idx]) num_os = len(np.where(occ == 1)[0]) nactorbs = len(active_idx) condition1 = (nactel > 0) condition2 = (nactel < 2*len(active_idx)) condition3 = (nactorbs >= num_os) if (condition1 and condition2 and condition3): return True else: self.log.debug("Active space is not reasonable!") self.log.debug(f"Nactel: {nactel}, Nactorbs: {nactorbs}, Num OS: {num_os}") if not condition1: self.log.debug("Condition 1 not met") elif not condition2: self.log.debug("Condition 2 not met") elif not condition3: self.log.debug("Condition 3 not met") return False
[docs] def kernel(self): log = self.log entropies = self.entropies.copy() occ = self.occ.copy() #Change singly occupied orbitals to have larger entropies so they are selected: os_idx = np.where(occ == 1)[0] entropies[os_idx] = np.max(entropies) + 0.01 if len(os_idx) > 0: log.info("Singly occupied orbitals found, setting them to have entropy of max + 0.01...") #Start with all orbitals in active space: active_idx = list(range(len(entropies))) inactive_idx = [] secondary_idx = [] #Size constraint: if isinstance(self.max_size, (tuple, list, np.ndarray)): nactel,norbs = self.max_size max_size = self._ncsf(nactel,norbs) as_size = self._calc_ncsf(active_idx) else: max_size = self.max_size as_size = len(active_idx) nactel = int(np.sum(np.array(occ)[active_idx])) nactorbs = len(active_idx) log.debug(f"Initial active space of ({nactel},{nactorbs}) has size {as_size}") log.debug(f"Maximum active space size set to {max_size}") #Drop orbitals until size constraint is satisfied: while as_size > max_size: nactel = int(np.sum(np.array(occ)[active_idx])) nactorbs = len(active_idx) log.debug(f"Active space of ({nactel},{nactorbs}) has size {as_size} larger than {max_size}") log.debug("Dropping lowest entropy orbital...") #Get active orbital entropies active_entropies = entropies[active_idx] ranked_active_idx = [active_idx[i] for i in np.argsort(active_entropies)] #Drop lowest orbital in succession, checking for reasonability: active_space_is_reasonable = False tries = 0 while not active_space_is_reasonable: try: dropped_idx = ranked_active_idx[tries] #Move to next possibility dropped_idx_entropy = np.round(entropies[dropped_idx],4) dropped_idx_occ = int(occ[dropped_idx]) except IndexError: log.error("Not enough orbitals to choose a reasonable active space!") raise RuntimeError("Not enough orbitals to choose a reasonable active space!") new_inactive_idx = inactive_idx.copy() new_active_idx = active_idx.copy() new_secondary_idx = secondary_idx.copy() if dropped_idx_occ > 0: new_active_idx.remove(dropped_idx) new_inactive_idx += [dropped_idx] else: new_active_idx.remove(dropped_idx) new_secondary_idx += [dropped_idx] log.debug(f"Attempting to drop orbital {dropped_idx} \ (occ={dropped_idx_occ}, S={dropped_idx_entropy})...") active_space_is_reasonable = self._as_is_reasonable(new_active_idx) if active_space_is_reasonable: log.debug("Orbital has been dropped") else: log.debug("Active space becomes unreasonable if this orbital is dropped, trying next option...") tries += 1 inactive_idx = new_inactive_idx active_idx = new_active_idx secondary_idx = new_secondary_idx #Calculate new NCSFs: if isinstance(self.max_size,tuple): nactel,norbs = self.max_size as_size = self._calc_ncsf(active_idx) else: as_size = len(active_idx) #Final checks: assert(len(active_idx) <= len(entropies)) assert(as_size <= max_size) orbs = self.orbs.copy() inactive_orbs = orbs[:,inactive_idx] active_orbs = orbs[:,active_idx] secondary_orbs = orbs[:,secondary_idx] casorbs = np.hstack([inactive_orbs,active_orbs,secondary_orbs]) nactorbs = active_orbs.shape[1] active_occ = np.array(occ)[active_idx] nboth = int(np.sum(active_occ[np.where(active_occ == 2)])/2) nalpha = int(np.sum(active_occ[np.where(active_occ == 1)])) alpha = nboth + nalpha beta = nboth nactel = (alpha,beta) log.info(f"Final selected active space: ({nactel},{nactorbs})") return nactorbs, nactel, casorbs, active_idx
[docs] class APC(): """ APC Class Implements APC orbital entropy estimation from https://doi.org/10.1021/acs.jctc.1c00037 APC-N implemented from https://doi.org/10.1021/acs.jctc.2c00630 .kernel() combines this with the ranked-orbital scheme implemented in Chooser() to select an active space of size max_size from the orbitals in mf.mo_coeff with occupancy mf.mo_occ Args: mf: an :class:`SCF` object Must expose mf.mo_coeff, mf.mo_occ, mf.get_fock(), and mf.get_k() max_size: Int or Tuple Active space size constraint. If tuple, interpreted as (nelecas,ncas) If int interpreted as max # of orbitals n: Int Number of times to remove highest-entropy virtual orbitals in entropy calculation. A higher value will tend to select active spaces with less doubly occupied orbitals. Kwargs: eps: Float Small offset added to singly occupied and removed virtual orbital entropies (can generally be ignored) Returns: active-space-size, #-active-electrons, orbital-initial-guess (following AVAS convention) Example: >>> import numpy as np >>> from pyscf import gto, scf, mcscf >>> from pyscf.mcscf import apc >>> mol = gto.M(atom='H 0 0 0; H 0 0 1', basis='ccpvtz') >>> mf = scf.RHF(mol).run() >>> myapc = apc.APC(mf,max_size=2) >>> ncas,nelecas,casorbs = myapc.kernel() >>> mc = mcscf.CASSCF(mf, ncas, nelecas).run(casorbs) """ def __init__(self,mf,max_size=(8,8),n=2,eps=1e-3,verbose=4): self.log = logger.new_logger(lib.StreamObject,verbose) self.mf = mf self.n = n self.eps = eps assert(eps > 0) #Check that eps > 0 self.max_size = max_size self.verbose = verbose def _apc(self,orbs,occ,f_mo,k_mo): """ Calculates APC entropies for given orbitals, occupations, and F and K matrix elements Singly occupied orbitals are set to max value of other orbitals + self.eps Args: orbs: 2D Numpy Array A nbasis x nmo array of candidate AS orbitals occ: 1D Numpy Array Orbital occupations for orbs (2,1,0) f_mo: 2D Numpy Array Fock operator in the basis of the orbs (nmo x nmo) k_mo: 2D Numpy Array Exchange operator in the basis of the orbs (nmo x nmo) """ eps = self.eps docc_idx = np.where(occ == 2)[0] os_idx = np.where(occ == 1)[0] virt_idx = np.where(occ == 0)[0] #Calculate APCs apcs = np.zeros([len(docc_idx),len(virt_idx)]) for i,d in enumerate(docc_idx): for j,v in enumerate(virt_idx): k12 = 0.5*k_mo[v,v] delta = f_mo[v,v] - f_mo[d,d] c = -k12/(delta + np.sqrt(k12**2 + delta**2)) apcs[i,j] = c #Calculate entropies apc_entropies = np.zeros(orbs.shape[1]) for o in range(orbs.shape[1]): #Collect APCs for this orbital: if o in os_idx: continue #Fill in later with max value elif o in docc_idx: idx = np.where(docc_idx == o)[0][0] apcs_o = apcs[idx,:] elif o in virt_idx: idx = np.where(virt_idx == o)[0][0] apcs_o = apcs[:,idx] #Normalize APCs: cis = apcs_o cis2 = cis**2 sumci2 = np.sum(cis2) norm = np.sqrt((sumci2 + 1)) cisnorm = cis/norm #Square Normalized APCs to calculate entropies: cisnorm2 = cisnorm**2 assert((cisnorm2 < 1).any().all()) sumcisnorm2 = np.sum(cisnorm2) assert(np.allclose((sumcisnorm2 + (1/norm)**2),1,atol=1e-6)) exent = -sumcisnorm2 * np.log(sumcisnorm2) gsent = -(1/norm)**2 * np.log((1/norm)**2) ent = exent + gsent apc_entropies[o] = ent #Assign max value to singly occupied orbitals plus some small value: apc_entropies[os_idx] = np.max(apc_entropies) + eps return apc_entropies def _calc_apc_entropies(self,mf): """ Implements the "APC-N" approach in which high-entropy virtual orbitals are repeatedly set to singly occupied Then sets the singly occupied orbitals and previously removed orbitals to high values Reads the value of n from self.n Args: mf: an :class:`SCF` object Must expose mf.mo_coeff, mf.mo_occ, mf.get_fock(), and mf.get_k() """ log = self.log n = self.n eps = self.eps log.info(f"Calculating APC entropies (N={n})...") f_ao = mf.get_fock() k_ao = mf.get_k() if isinstance(mf, scf.uhf.UHF): log.note('UHF object found. APC uses averaged F, summed K, summed occupation, and alpha orbitals.') orbs = mf.mo_coeff[0] occ = mf.mo_occ.sum(axis=0) #summed occupation f_ao = np.sum(f_ao,axis=0)/2 #averaged fock k_ao = np.sum(k_ao,axis=0) #summed exchange elif isinstance(mf, scf.rohf.ROHF): log.note('ROHF object found. APC uses summed K') orbs = mf.mo_coeff occ = mf.mo_occ.copy() k_ao = np.sum(k_ao,axis=0) #summed exchange else: orbs = mf.mo_coeff occ = mf.mo_occ.copy() #Calculate f and k in mo basis log.info("Transforming F and K to MO basis...") f_mo = np.linalg.multi_dot([orbs.T,f_ao,orbs]) k_mo = np.linalg.multi_dot([orbs.T,k_ao,orbs]) #Calculate entropies removed_idx = [] original_os = np.where(occ == 1)[0] log.info("Calculating initial APC entropies...") apc_entropies = self._apc(orbs,occ,f_mo,k_mo) for loop_n in range(n): if loop_n > 0: log.info(f"Calculating APC entropies (Round {loop_n})...") apc_entropies = self._apc(orbs,occ,f_mo,k_mo) maxS = np.round(np.max(apc_entropies),5) log.info(f"Maximum entropy: {maxS}") #Remove highest virtual and set occ to 1 virt_idx = np.where(occ == 0)[0] to_remove = virt_idx[np.argmax(apc_entropies[virt_idx])] removed_idx += [to_remove] log.info(f"Setting maximum virtual orbitals {removed_idx} to occupation 1...") occ[removed_idx] = 1 log.info("Calculating final APC entropies...") apc_entropies = self._apc(orbs,occ,f_mo,k_mo) maxS = np.round(np.max(apc_entropies),5) log.info(f"Maximum entropy: {maxS}") #Iterate over os and removed virtuals and set to max in order: maxs = np.max(apc_entropies) for i,o in enumerate(original_os): apc_entropies[o] = maxs + 2*eps - i*eps*1e-2 for i,o in enumerate(removed_idx): apc_entropies[o] = maxs + eps - i*eps*1e-2 return apc_entropies
[docs] def kernel(self): log = self.log log.info('\n** APC Active Space Selection **') entropies = self._calc_apc_entropies(self.mf) self.entropies = entropies if isinstance(self.mf, scf.uhf.UHF): orbs = self.mf.mo_coeff[0] #alpha orbitals occ = self.mf.mo_occ.sum(axis=0) #summed occupation else: orbs = self.mf.mo_coeff occ = self.mf.mo_occ max_size = self.max_size log.info(f"Choosing active space with ranked orbital approach (max_size = {max_size})...") chooser = Chooser(orbs,occ,entropies,max_size,verbose=self.verbose) nactorbs, nactel, casorbs, active_idx = chooser.kernel() self.active_idx = active_idx return nactorbs, nactel, casorbs