GauXC in C++

GauXC in C++#

In this guide we will cover how to use the GauXC library in C++. We will cover

setting up a CMake project including GauXC as dependency
initializing the GauXC runtime environment
reading molecule, basis set, and density matrix from an HDF5 input file
setting up the integration grid, load balancer, and exchange-correlation integrator
performing the exchange-correlation evaluation and outputting the results

Tip

For building GauXC and installing in the conda environment checkout Installing GauXC.

Setting up CMake#

GauXC can be most conveniently used via CMake, therefore we will setup a minimal CMake project for a command line driver to use GauXC. Next to GauXC we will use other dependencies as needed.

The directory structure for the project will be

├── CMakeLists.txt
├── app
│   └── main.cxx
└── cmake
    ├── skala-cli11.cmake
    ├── skala-dep-versions.cmake
    ├── skala-eigen3.cmake
    └── skala-gauxc.cmake

First we create the main CMakeLists.txt to define our project, include our dependencies, and declare our executable.

CMakeLists.txt#

cmake_minimum_required(VERSION 3.20 FATAL_ERROR)

project("Skala" LANGUAGES CXX)

include(GNUInstallDirs)
include(FetchContent)
set(FETCHCONTENT_UPDATES_DISCONNECTED ON CACHE BOOL "Disable FC Updates")

list(PREPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

option(Skala_GauXC_ENABLE_OPENMP "Enable OpenMP support in GauXC" ON)
option(Skala_GauXC_ENABLE_MPI "Enable MPI support in GauXC" OFF)
option(Skala_GauXC_ENABLE_CUDA "Enable CUDA support in GauXC" OFF)

include(skala-dep-versions)
include(skala-gauxc)
include(skala-cli11)
include(skala-eigen3)

add_executable(
  "${PROJECT_NAME}"
  "app/main.cxx"
)
target_link_libraries(
  "${PROJECT_NAME}"
  PRIVATE
  gauxc::gauxc
  CLI11::CLI11
  Eigen3::Eigen
)
install(
  TARGETS
  "${PROJECT_NAME}"
  DESTINATION
  "${CMAKE_INSTALL_BINDIR}"
)

list(REMOVE_AT CMAKE_MODULE_PATH 0)

For handling the dependencies, we create a separate file in the cmake/ subdirectory to include the path and checksums for all our dependencies.

cmake/skala-dep-versions.cmake#

set(Skala_GauXC_URL "https://github.com/microsoft/skala/releases/download/v1.1.1/gauxc-skala-r1.tar.gz")
set(Skala_GauXC_SHA256 "01f8856502e136281b0837f5d19973f02e32e2f0c97561a5bb42962344e0f944")

set(Skala_CLI11_URL "https://github.com/CLIUtils/CLI11/archive/refs/tags/v2.6.1.tar.gz")
set(Skala_CLI11_SHA256 "377691f3fac2b340f12a2f79f523c780564578ba3d6eaf5238e9f35895d5ba95")

set(Skala_Eigen3_URL "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz")
set(Skala_Eigen3_SHA256 "8586084f71f9bde545ee7fa6d00288b264a2b7ac3607b974e54d13e7162c1c72")

For each dependency we will create a separate CMake include file for finding and making it available. First, we define how we will include GauXC our main dependency. For this we can rely in most cases to discover the GauXC config file, however we also provide a fallback to download and build GauXC in case it is not available in the environment. The options we defined in the main CMake file will be passed through to GauXC to ensure the library provides the requested features. Furthermore, after having GauXC available, we double check whether our requirements for GauXC are satisfied, this is especially necessary for the Skala implementation, which requires the GAUXC_HAS_ONEDFT feature flag.

cmake/skala-gauxc.cmake#

if(NOT DEFINED Skala_GauXC_URL)
  include(skala-dep-versions)
endif()
find_package(gauxc QUIET CONFIG)
if(NOT gauxc_FOUND)
  include(FetchContent)

  message(STATUS "Could not find GauXC... Building GauXC from source")
  message(STATUS "GAUXC URL: ${Skala_GauXC_URL}")

  set(GAUXC_ENABLE_ONEDFT ON CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_HDF5 ON CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_OPENMP ${Skala_GauXC_ENABLE_OPENMP} CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_MPI ${Skala_GauXC_ENABLE_MPI} CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_CUDA ${Skala_GauXC_ENABLE_CUDA} CACHE BOOL "" FORCE)

  FetchContent_Declare(
    gauxc
    URL ${Skala_GauXC_URL}
    URL_HASH SHA256=${Skala_GauXC_SHA256}
    DOWNLOAD_EXTRACT_TIMESTAMP ON
  )
  FetchContent_MakeAvailable(gauxc)

endif()
if(NOT GAUXC_HAS_ONEDFT)
  message(FATAL_ERROR "GauXC found but without Skala support enabled")
endif()
if(NOT GAUXC_HAS_HDF5)
  message(FATAL_ERROR "GauXC found but without HDF5 support enabled")
endif()
if(Skala_GauXC_ENABLE_OPENMP AND NOT GAUXC_HAS_OPENMP)
  message(FATAL_ERROR "GauXC found without OpenMP support but Skala_GauXC_ENABLE_OPENMP is ON")
endif()
if(Skala_GauXC_ENABLE_MPI AND NOT GAUXC_HAS_MPI)
  message(FATAL_ERROR "GauXC found without MPI support but Skala_GauXC_ENABLE_MPI is ON")
endif()
if(Skala_GauXC_ENABLE_CUDA AND NOT GAUXC_HAS_CUDA)
  message(FATAL_ERROR "GauXC found without CUDA support but Skala_GauXC_ENABLE_CUDA is ON")
endif()

While GauXC provides the implementation to evaluate the exchange-correlation functional, it is independent to the library used for storing matrices. For our example here we will be using Eigen3. Similar to GauXC we will attempt to find Eigen3 via its config file and fallback to downloading it. Since Eigen3 is a header-only library, we just need to reexport the include directory of the project.

cmake/skala-eigen3.cmake#

if(NOT DEFINED Skala_Eigen3_URL)
  include(skala-dep-versions)
endif()
find_package(Eigen3 CONFIG HINTS ${EIGEN3_ROOT_DIR})
if(NOT Eigen3_FOUND)
  message(STATUS "Could Not Find Eigen3... Building Eigen3 from source")
  message(STATUS "EIGEN3 REPO = ${Skala_Eigen3_URL}")

  FetchContent_Declare(
    eigen3
    URL ${Skala_Eigen3_URL}
    URL_HASH SHA256=${Skala_Eigen3_SHA256}
    DOWNLOAD_EXTRACT_TIMESTAMP ON
  )

  FetchContent_GetProperties(eigen3)
  if(NOT eigen3_POPULATED)
    FetchContent_Populate(eigen3)
  endif()

  add_library(Eigen3::Eigen INTERFACE IMPORTED)
  set_target_properties(
    Eigen3::Eigen
    PROPERTIES
    INTERFACE_INCLUDE_DIRECTORIES ${eigen3_SOURCE_DIR}
  )
endif()

For our command line driver, we will be using CLI11 to create the command line interface. Similar to Eigen3, CLI11 is a header-only library and we will use the same approach for including its headers if the dependency can not be found in the environment.

cmake/skala-cli11.cmake#

if(NOT DEFINED Skala_CLI11_URL)
  include(skala-dep-versions)
endif()
find_package(CLI11 QUIET CONFIG)
if(NOT CLI11_FOUND)
  message(STATUS "Could not find CLI11... Building CLI11 from source")
  message(STATUS "CLI11 URL: ${Skala_CLI11_URL}")

  FetchContent_Declare(
    cli11
    URL ${Skala_CLI11_URL}
    URL_HASH SHA256=${Skala_CLI11_SHA256}
    DOWNLOAD_EXTRACT_TIMESTAMP ON
  )

  FetchContent_GetProperties(cli11)
  if(NOT cli11_POPULATED)
    FetchContent_Populate(cli11)
  endif()

  add_library( CLI11::CLI11 INTERFACE IMPORTED )
  set_target_properties( CLI11::CLI11 PROPERTIES
    INTERFACE_INCLUDE_DIRECTORIES ${cli11_SOURCE_DIR}/include
  )
endif()

With this we have the full CMake setup we need for creating our command line driver.

Initializing GauXC#

For the main program we start with including the relevant headers for GauXC. In our case those come from GauXC for the main functionality of the library, HighFive for access to HDF5 files, Eigen3 for matrix types, and CLI11 for creating the command line interface.

app/main.cxx (headers)#

// For GauXC core functionality
#include <gauxc/exceptions.hpp>
#include <gauxc/molecular_weights.hpp>
#include <gauxc/molgrid/defaults.hpp>
#include <gauxc/runtime_environment.hpp>
#include <gauxc/xc_integrator.hpp>
#include <gauxc/xc_integrator/integrator_factory.hpp>

// For loading data from HDF5 files
#include <gauxc/external/hdf5.hpp>
#include <highfive/H5File.hpp>

// For providing matrix implementation
#define EIGEN_DONT_VECTORIZE
#define EIGEN_NO_CUDA
#include <Eigen/Core>
using matrix = Eigen::MatrixXd;

// For command line interface
#include <CLI/CLI.hpp>

We start defining our main program with a set of default variables. In this tutorial we will be using

input_file: an HDF5 input file to provide the molecule, basis and density matrix
model: the model checkpoint we want to evaluate
grid_spec: the grid size specification which defines the number of angular and radial integration points
rad_quad_spec: the radial quadrature scheme which defines the spacing of radial integration points
prune_spec: the pruning scheme for combining the atomic grids to a molecular one

Furthermore, we have the variables which define where GauXC is executing (host or device) and batch_size and basis_tol for the numerical settings of the evaluation.

app/main.cxx (initialization)#

int
main(int argc, char** argv)
{
#ifdef GAUXC_HAS_MPI
  MPI_Init(NULL, NULL);
#endif
  {
    std::string input_file;
    std::string model;
    std::string grid_spec = "fine";
    std::string rad_quad_spec = "muraknowles";
    std::string prune_spec = "robust";
    std::string lb_exec_space_str = "host";
    std::string int_exec_space_str = "host";
    int batch_size = 512;
    double basis_tol = 1e-10;

Note

The settings for the molecular grid (grid_spec, rad_quad_spec, prune_spec) are defined in more detail in Molecular grid settings reference.

Command line interface#

Next we will create a command line interface based on the CLI11 library. Each of the default options we specified will be included there.

app/main.cxx (command-line)#

    {
      auto string_to_lower = CLI::Validator(
        [](auto& str){
          std::transform(str.begin(), str.end(), str.begin(), ::tolower);
          return "";
        }, std::string(""), std::string("argument is case-insensitive"));
      CLI::App app{"Skala GauXC driver"};
      app.option_defaults()->always_capture_default();
      app.add_option("input", input_file, "Input file in HDF5 format")->required()->check(CLI::ExistingFile);
      app.add_option("--model", model, "Model checkpoint to evaluate")->required();
      app.add_option("--grid-size", grid_spec, "Grid specification (fine|ultrafine|superfine|gm3|gm5)")->transform(string_to_lower);
      app.add_option("--radial-quad", rad_quad_spec, "Radial quadrature specification (becke|muraknowles|treutlerahlrichs|murrayhandylaming)")->transform(string_to_lower);
      app.add_option("--prune-scheme", prune_spec, "Pruning scheme (unpruned|robust|treutler)")->transform(string_to_lower);
      app.add_option("--lb-exec-space", lb_exec_space_str, "Load balancing execution space")->transform(string_to_lower);
      app.add_option("--int-exec-space", int_exec_space_str, "Integration execution space")->transform(string_to_lower);
      app.add_option("--batch-size", batch_size, "");
      app.add_option("--basis-tol", basis_tol, "");
      CLI11_PARSE(app, argc, argv);
    }

Before adding any further implementation, we can add the finalization to our main and create a first build of the project.

app/main.cxx (finalization)#

  }
#ifdef GAUXC_HAS_MPI
  MPI_Finalize();
#endif
}

To configure the CMake build run

cmake -B build -G Ninja -S .
cmake --build build

After we build our project successfully, we can run our driver directly from the build directory with

./build/Skala --help

As output you should see the help page generated by our command-line interface

Skala GauXC driver


./build/Skala [OPTIONS] input


POSITIONALS:
  input TEXT:FILE REQUIRED    Input file in HDF5 format

OPTIONS:
  -h,     --help              Print this help message and exit
          --model TEXT        Model checkpoint to evaluate
          --grid-size TEXT [fine]
                              Grid specification (fine|ultrafine|superfine|gm3|gm5)
          --radial-quad TEXT [muraknowles]
                              Radial quadrature specification
                              (becke|muraknowles|treutlerahlrichs|murrayhandylaming)
          --prune-scheme TEXT [robust]
                              Pruning scheme (unpruned|robust|treutler)
          --lb-exec-space TEXT [host]
                              Load balancing execution space
          --int-exec-space TEXT [host]
                              Integration execution space
          --batch-size INT [512]
          --basis-tol FLOAT [1e-10]

Now that we are able to change our program variables conveniently from the command-line, we will initialize the GauXC runtime. For this we are defining a new function to handle different cases, like having access to a device (GPU) or running MPI parallel. GauXC provides preprocessor guards like GAUXC_HAS_DEVICE and GAUXC_HAS_MPI or convenience macros like GAUXC_MPI_CODE for defining conditional code paths. When creating the runtime environment, we will provide it with the MPI world communicator if available and if we have a device preallocate memory on the device.

app/main.cxx (get_runtime)#

GauXC::RuntimeEnvironment
get_runtime()
{
#ifdef GAUXC_HAS_DEVICE
  auto rt = GauXC::DeviceRuntimeEnvironment( GAUXC_MPI_CODE(MPI_COMM_WORLD,) 0.9 );
  // Calculate GauXC Device buffer size
  size_t available_mem, total_mem;
  cudaMemGetInfo(&available_mem, &total_mem);
  int device_id;
  cudaGetDevice(&device_id);
  size_t sz = 0.9 * available_mem;  
  void* p;
  cudaMallocAsync(&p, sz, 0);
  cudaStreamSynchronize(0);
  rt.set_buffer(p, sz);
#else
  auto rt = GauXC::RuntimeEnvironment(GAUXC_MPI_CODE(MPI_COMM_WORLD));
#endif
  return rt;
}

We return the runtime environment in the main program.

app/main.cxx (runtime setup)#

    // Create runtime
    auto rt = get_runtime();
    auto world_rank = rt.comm_rank();
    auto world_size = rt.comm_size();

The world_size and world_rank variables describe our MPI environment and contain dummy values if we do not use MPI.

Before we continue with setting up GauXC, we include an output of our program variables. Here we can use world_rank provided by the runtime environment to ensure only the root rank outputs the information.

app/main.cxx (inputs)#

    if (!world_rank) {
      std::cout << std::boolalpha;
      std::cout << "Configuration" << std::endl
                << "-> Input file        : " << input_file << std::endl
                << "-> Model             : " << model << std::endl
                << "-> Grid              : " << grid_spec << std::endl
                << "-> Radial quadrature : " << rad_quad_spec << std::endl
                << "-> Pruning scheme    : " << prune_spec << std::endl
                << std::endl;
    }

Molecule data#

For reading the molecule we will use GauXC’s built-in functionality to read from an HDF5 dataset.

Note

The GauXC::Molecule stores the information about the atomic numbers and their coordinates as an array of structs.

struct GauXC::Atom {
  int64_t Z; ///< Atomic number
  double x;  ///< X coordinate (bohr)
  double y;  ///< Y coordinate (bohr)
  double z;  ///< Z coordinate (bohr)
};
class GauXC::Molecule : public std::vector<GauXC::Atom> {
  ...
};

This allows to directly map the object’s representation to an HDF5 dataset.

We use void GauXC::read_hdf5_record(Molecule&,string,string)() function which implements the reading of the molecule data.

app/main.cxx (read_molecule)#

// Load molecule from HDF5 dataset
GauXC::Molecule
read_molecule(std::string ref_file)
{
  GauXC::Molecule mol;
  GauXC::read_hdf5_record(mol, ref_file, "/MOLECULE");
  return mol;
}

In the main program we will just use our small wrapper function to obtain the molecule.

app/main.cxx (molecule)#

    // Get molecule (atomic numbers and cartesian coordinates)
    auto mol = read_molecule(input_file);

Basis set data#

For the basis set we will use the same approach as for the molecule and use GauXC’s built-in HDF5 reading functionality.

Note

Similar to the molecule the GauXC::BasisSet object is built as an array of GauXC::Shell objects. The GauXC::Shell object contains the information about the primitives, contraction coefficients, angular momentum, and center of the shell.

template <typename F>
class alignas(256) GauXC::Shell {
  std::array<F, 32> alpha; ///< exponents of primitives
  std::array<F, 32> coeff; ///< contraction coefficients
  std::array<double, 3> O; ///< origin of the shell
  int32_t nprim; ///< number of primitives
  int32_t l; ///< angular moment of the shell
  int32_t pure; ///< pure=1: spherical Gaussianss; pure=0: cartesian Gaussianss
};

template <typename F>
struct GauXC::BasisSet : public std::vector<GauXC::Shell<F>> {
  ...
};

Again, this allows to directly map the object’s representation to an HDF5 dataset.

With GauXC’s void GauXC::read_hdf5_record(BasisSet&,string,string)() function we can read the basis set data conveniently from the HDF5 file. Additionally, we are setting the basis set tolerance on the loaded basis set data, which will be taken from our input variables, by default we use a tolerance of 1e-10. The basis set tolerance will be used for screening small contributions during the evaluation of the density on the grid points.

app/main.cxx (read_basis)#

// Load basis from HDF5 dataset
GauXC::BasisSet<double>
read_basis(std::string ref_file, double basis_tol)
{
  GauXC::BasisSet<double> basis;
  GauXC::read_hdf5_record(basis, ref_file, "/BASIS");
  for(auto& shell : basis){
    shell.set_shell_tolerance(basis_tol);
  }
  return basis;
}

In the main program we can use our wrapper function to load the basis set.

app/main.cxx (basis)#

    // Get basis set
    auto basis = read_basis(input_file, basis_tol);

Integration grid#

To setup the integration grid, which is the part of the input to GauXC for computing the exchange-correlation functional, we create a molecular grid. We have three main input parameters here, the grid size which defines the density of angular and radial points, the radial quadrature scheme which defines the spacing of the radial points, and the pruning scheme which defines how atomic grids are combined to a molecular grid. In GauXC these are defined as enumerators and we add a number of helper functions for turning the input strings from the command-line to the respective enumerator values.

app/main.cxx (grid setting)#

GauXC::AtomicGridSizeDefault
read_atomic_grid_size(std::string spec)
{
    std::map<std::string, GauXC::AtomicGridSizeDefault> mg_map = {
      {"fine",      GauXC::AtomicGridSizeDefault::FineGrid},
      {"ultrafine", GauXC::AtomicGridSizeDefault::UltraFineGrid},
      {"superfine", GauXC::AtomicGridSizeDefault::SuperFineGrid},
      {"gm3",       GauXC::AtomicGridSizeDefault::GM3},
      {"gm5",       GauXC::AtomicGridSizeDefault::GM5}
    };
    return mg_map.at(spec);
}

GauXC::PruningScheme
read_pruning_scheme(std::string spec)
{
    std::map<std::string, GauXC::PruningScheme> prune_map = {
      {"unpruned", GauXC::PruningScheme::Unpruned},
      {"robust",   GauXC::PruningScheme::Robust},
      {"treutler", GauXC::PruningScheme::Treutler}
    };
    return prune_map.at(spec);
}

GauXC::RadialQuad
read_radial_quad(std::string spec)
{
    std::map<std::string, GauXC::RadialQuad> rad_quad_map = {
      {"becke",             GauXC::RadialQuad::Becke},
      {"muraknowles",       GauXC::RadialQuad::MuraKnowles},
      {"treutlerahlrichs",  GauXC::RadialQuad::TreutlerAhlrichs},
      {"murrayhandylaming", GauXC::RadialQuad::MurrayHandyLaming},
    };
    return rad_quad_map.at(spec);
}

For the main program we can now create the molecular grid based on our input parameters. We also have to define the batch size for the grid, the default is 512 points per batch, however larger values up around 10000 are recommended for better performance.

app/main.cxx (grid)#

    // Define molecular grid from grid size, radial quadrature and pruning scheme
    auto grid = GauXC::MolGridFactory::create_default_molgrid(
      mol,
      read_pruning_scheme(prune_spec),
      GauXC::BatchSize(batch_size),
      read_radial_quad(rad_quad_spec),
      read_atomic_grid_size(grid_spec));

Exchange-correlation integrator#

To distribute the work of evaluating the exchange-correlation functional on the grid points, we create a load balancer. The load balancer will take care of distributing the grid points to the available resources, either host or device, based on the execuation space we provide. Note that the load balancer will provide access to the molecule, basis and grid data for all further usage in GauXC.

app/main.cxx (load balancer)#

    // Choose whether we run on host or device
  #ifdef GAUXC_HAS_DEVICE
    std::map<std::string, GauXC::ExecutionSpace> exec_space_map = {
      { "host",   GauXC::ExecutionSpace::Host },
      { "device", GauXC::ExecutionSpace::Device }
    };

    auto lb_exec_space = exec_space_map.at(lb_exec_space_str);
    auto int_exec_space = exec_space_map.at(int_exec_space_str);
  #else
    auto lb_exec_space  = GauXC::ExecutionSpace::Host;
    auto int_exec_space = GauXC::ExecutionSpace::Host;
  #endif

    // Setup load balancer based on molecule, grid and basis set
    GauXC::LoadBalancerFactory lb_factory(lb_exec_space, "Replicated");
    auto lb = lb_factory.get_shared_instance(rt, mol, grid, basis);

    // Apply partitioning weights to the molecule grid
    GauXC::MolecularWeightsFactory mw_factory(int_exec_space, "Default", 
      GauXC::MolecularWeightsSettings{} );
    auto mw = mw_factory.get_instance();
    mw.modify_weights(*lb);

Finally, we can create the main GauXC integrator, for this we setup the exchange-correlation integrator factory for producing an instance of the integrator. To configure the integrator we create an additional settings object which holds the model checkpoint we want to evaluate.

app/main.cxx (integrator)#

    // Setup exchange-correlation integrator
    GauXC::functional_type func;
    GauXC::XCIntegratorFactory<matrix> integrator_factory(int_exec_space, "Replicated", "Default", "Default", "Default");
    auto integrator = integrator_factory.get_instance(func, lb);

    // Configure model checkpoint
    GauXC::OneDFTSettings onedft_settings;
    onedft_settings.model = model;

Density matrix#

The final input we need to provide to GauXC is the density matrix. Similar to the molecule and basis set we will read it from our HDF5 input file using the HighFive library directly.

app/main.cxx (load_density_matrix)#

std::pair<matrix, matrix>
read_density_matrix(std::string ref_file)
{
  HighFive::File h5file(ref_file, HighFive::File::ReadOnly);

  auto dset = h5file.getDataSet("/DENSITY_SCALAR");
  auto dims = dset.getDimensions();
  auto P_s = matrix(dims[0], dims[1]);
  auto P_z = matrix(dims[0], dims[1]);

  dset.read(P_s.data());

  dset = h5file.getDataSet("/DENSITY_Z");
  dset.read(P_z.data());

  return std::make_pair(P_s, P_z);
}

For the model checkpoint we always use two spin channels and therefore have the scalar density matrix (alpha + beta spin channel) and the polarization density matrix (alpha - beta spin channel).

app/main.cxx (density matrix)#

    // Load density matrix from input
    matrix P_s, P_z;
    std::tie(P_s, P_z) = read_density_matrix(input_file);

Exchange-correlation evaluation#

With all inputs provided we can now perform the exchange-correlation evaluation.

app/main.cxx (exchange-correlation)#

    // Integrate exchange correlation energy
    double EXC;
    matrix VXC_s, VXC_z;
    std::tie(EXC, VXC_s, VXC_z) = integrator.eval_exc_vxc_onedft(P_s, P_z, onedft_settings);

Tip

For timing the execution we can add an optional timer around the evaluation and synchronize the MPI processes before and after the evaluation to get accurate timings.

app/main.cxx (timer)#

#ifdef GAUXC_HAS_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    auto xc_int_start = std::chrono::high_resolution_clock::now();

    // Integrate exchange correlation energy
    double EXC;
    matrix VXC_s, VXC_z;
    std::tie(EXC, VXC_s, VXC_z) = integrator.eval_exc_vxc_onedft(P_s, P_z, onedft_settings);

#ifdef GAUXC_HAS_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    auto xc_int_end = std::chrono::high_resolution_clock::now();
    double xc_int_dur = std::chrono::duration<double>(xc_int_end - xc_int_start).count();

After the evaluation we can output the computed exchange-correlation energy and potential.

app/main.cxx (output)#

    std::cout << std::scientific << std::setprecision(12);
    if(!world_rank) {
      std::cout << "EXC          = " << EXC << " Eh" << std::endl
                << "|VXC(a+b)|_F = " << VXC_s.norm() << std::endl
                << "|VXC(a-b)|_F = " << VXC_z.norm() << std::endl
                << "Runtime XC   = " << xc_int_dur << " s" << std::endl
                << std::endl;
    }

Now we can rebuild our project with

cmake --build build

After we build our project successfully, we run the driver again from the build directory

./build/Skala He_def2-svp.h5 --model PBE

Note

The He_def2-svp.h5 input file can be created with the skala package.

from pyscf import gto

from skala.gauxc.export import write_gauxc_h5_from_pyscf
from skala.pyscf import SkalaRKS

mol = gto.M(atom="He 0 0 0", basis="def2-svp", unit="Bohr", spin=0)
ks = SkalaRKS(mol, xc="pbe")
ks.kernel()

dm = ks.make_rdm1()
exc = ks.scf_summary["exc"]
_, _, vxc = ks._numint.nr_rks(ks.mol, ks.grids, ks.xc, dm)

write_gauxc_h5_from_pyscf("He_def2-svp.h5", mol, dm=dm, exc=exc, vxc=vxc)

As output we can see the results for the PBE functional

Configuration
-> Input file        : He_def2-svp.h5
-> Model             : PBE
-> Grid              : fine
-> Radial quadrature : muraknowles
-> Pruning scheme    : robust

EXC          = -1.054031868349e+00 Eh
|VXC(a+b)|_F = 1.455982966065e+00
|VXC(a-b)|_F = 0.000000000000e+00
Runtime XC   = 4.382018760000e-01 s

Download checkpoint from HuggingFace#

To evaluate Skala we first need to download the model checkpoint from HuggingFace. For this we can use the hf command line tool from the huggingface_hub Python package. After downloading the model checkpoint we can run our driver again with the new model

hf download microsoft/skala skala-1.0.fun --local-dir .
./build/Skala He_def2-svp.h5 --model ./skala-1.0.fun

In the output we can see the results for the Skala functional

Configuration
-> Input file        : He_def2-svp.h5
-> Model             : ./skala-1.0.fun
-> Grid              : fine
-> Radial quadrature : muraknowles
-> Pruning scheme    : robust

EXC          = -1.071256087389e+00 Eh
|VXC(a+b)|_F = 1.500299750739e+00
|VXC(a-b)|_F = 0.000000000000e+00
Runtime XC   = 1.792662281000e+00 s

Full source code#

Full source code of the main driver

app/main.cxx#

// For GauXC core functionality
#include <gauxc/exceptions.hpp>
#include <gauxc/molecular_weights.hpp>
#include <gauxc/molgrid/defaults.hpp>
#include <gauxc/runtime_environment.hpp>
#include <gauxc/xc_integrator.hpp>
#include <gauxc/xc_integrator/integrator_factory.hpp>

// For loading data from HDF5 files
#include <gauxc/external/hdf5.hpp>
#include <highfive/H5File.hpp>

// For providing matrix implementation
#define EIGEN_DONT_VECTORIZE
#define EIGEN_NO_CUDA
#include <Eigen/Core>
using matrix = Eigen::MatrixXd;

// For command line interface
#include <CLI/CLI.hpp>

GauXC::RuntimeEnvironment
get_runtime()
{
#ifdef GAUXC_HAS_DEVICE
  auto rt = GauXC::DeviceRuntimeEnvironment( GAUXC_MPI_CODE(MPI_COMM_WORLD,) 0.9 );
  // Calculate GauXC Device buffer size
  size_t available_mem, total_mem;
  cudaMemGetInfo(&available_mem, &total_mem);
  int device_id;
  cudaGetDevice(&device_id);
  size_t sz = 0.9 * available_mem;  
  void* p;
  cudaMallocAsync(&p, sz, 0);
  cudaStreamSynchronize(0);
  rt.set_buffer(p, sz);
#else
  auto rt = GauXC::RuntimeEnvironment(GAUXC_MPI_CODE(MPI_COMM_WORLD));
#endif
  return rt;
}

// Load molecule from HDF5 dataset
GauXC::Molecule
read_molecule(std::string ref_file)
{
  GauXC::Molecule mol;
  GauXC::read_hdf5_record(mol, ref_file, "/MOLECULE");
  return mol;
}

// Load basis from HDF5 dataset
GauXC::BasisSet<double>
read_basis(std::string ref_file, double basis_tol)
{
  GauXC::BasisSet<double> basis;
  GauXC::read_hdf5_record(basis, ref_file, "/BASIS");
  for(auto& shell : basis){
    shell.set_shell_tolerance(basis_tol);
  }
  return basis;
}

GauXC::AtomicGridSizeDefault
read_atomic_grid_size(std::string spec)
{
    std::map<std::string, GauXC::AtomicGridSizeDefault> mg_map = {
      {"fine",      GauXC::AtomicGridSizeDefault::FineGrid},
      {"ultrafine", GauXC::AtomicGridSizeDefault::UltraFineGrid},
      {"superfine", GauXC::AtomicGridSizeDefault::SuperFineGrid},
      {"gm3",       GauXC::AtomicGridSizeDefault::GM3},
      {"gm5",       GauXC::AtomicGridSizeDefault::GM5}
    };
    return mg_map.at(spec);
}

GauXC::PruningScheme
read_pruning_scheme(std::string spec)
{
    std::map<std::string, GauXC::PruningScheme> prune_map = {
      {"unpruned", GauXC::PruningScheme::Unpruned},
      {"robust",   GauXC::PruningScheme::Robust},
      {"treutler", GauXC::PruningScheme::Treutler}
    };
    return prune_map.at(spec);
}

GauXC::RadialQuad
read_radial_quad(std::string spec)
{
    std::map<std::string, GauXC::RadialQuad> rad_quad_map = {
      {"becke",             GauXC::RadialQuad::Becke},
      {"muraknowles",       GauXC::RadialQuad::MuraKnowles},
      {"treutlerahlrichs",  GauXC::RadialQuad::TreutlerAhlrichs},
      {"murrayhandylaming", GauXC::RadialQuad::MurrayHandyLaming},
    };
    return rad_quad_map.at(spec);
}

std::pair<matrix, matrix>
read_density_matrix(std::string ref_file)
{
  HighFive::File h5file(ref_file, HighFive::File::ReadOnly);

  auto dset = h5file.getDataSet("/DENSITY_SCALAR");
  auto dims = dset.getDimensions();
  auto P_s = matrix(dims[0], dims[1]);
  auto P_z = matrix(dims[0], dims[1]);

  dset.read(P_s.data());

  dset = h5file.getDataSet("/DENSITY_Z");
  dset.read(P_z.data());

  return std::make_pair(P_s, P_z);
}

int
main(int argc, char** argv)
{
#ifdef GAUXC_HAS_MPI
  MPI_Init(NULL, NULL);
#endif
  {
    std::string input_file;
    std::string model;
    std::string grid_spec = "fine";
    std::string rad_quad_spec = "muraknowles";
    std::string prune_spec = "robust";
    std::string lb_exec_space_str = "host";
    std::string int_exec_space_str = "host";
    int batch_size = 512;
    double basis_tol = 1e-10;
    {
      auto string_to_lower = CLI::Validator(
        [](auto& str){
          std::transform(str.begin(), str.end(), str.begin(), ::tolower);
          return "";
        }, std::string(""), std::string("argument is case-insensitive"));
      CLI::App app{"Skala GauXC driver"};
      app.option_defaults()->always_capture_default();
      app.add_option("input", input_file, "Input file in HDF5 format")->required()->check(CLI::ExistingFile);
      app.add_option("--model", model, "Model checkpoint to evaluate")->required();
      app.add_option("--grid-size", grid_spec, "Grid specification (fine|ultrafine|superfine|gm3|gm5)")->transform(string_to_lower);
      app.add_option("--radial-quad", rad_quad_spec, "Radial quadrature specification (becke|muraknowles|treutlerahlrichs|murrayhandylaming)")->transform(string_to_lower);
      app.add_option("--prune-scheme", prune_spec, "Pruning scheme (unpruned|robust|treutler)")->transform(string_to_lower);
      app.add_option("--lb-exec-space", lb_exec_space_str, "Load balancing execution space")->transform(string_to_lower);
      app.add_option("--int-exec-space", int_exec_space_str, "Integration execution space")->transform(string_to_lower);
      app.add_option("--batch-size", batch_size, "");
      app.add_option("--basis-tol", basis_tol, "");
      CLI11_PARSE(app, argc, argv);
    }
    // Create runtime
    auto rt = get_runtime();
    auto world_rank = rt.comm_rank();
    auto world_size = rt.comm_size();

    if (!world_rank) {
      std::cout << std::boolalpha;
      std::cout << "Configuration" << std::endl
                << "-> Input file        : " << input_file << std::endl
                << "-> Model             : " << model << std::endl
                << "-> Grid              : " << grid_spec << std::endl
                << "-> Radial quadrature : " << rad_quad_spec << std::endl
                << "-> Pruning scheme    : " << prune_spec << std::endl
                << std::endl;
    }

    // Get molecule (atomic numbers and cartesian coordinates)
    auto mol = read_molecule(input_file);

    // Get basis set
    auto basis = read_basis(input_file, basis_tol);

    // Define molecular grid from grid size, radial quadrature and pruning scheme
    auto grid = GauXC::MolGridFactory::create_default_molgrid(
      mol,
      read_pruning_scheme(prune_spec),
      GauXC::BatchSize(batch_size),
      read_radial_quad(rad_quad_spec),
      read_atomic_grid_size(grid_spec));

    // Choose whether we run on host or device
  #ifdef GAUXC_HAS_DEVICE
    std::map<std::string, GauXC::ExecutionSpace> exec_space_map = {
      { "host",   GauXC::ExecutionSpace::Host },
      { "device", GauXC::ExecutionSpace::Device }
    };

    auto lb_exec_space = exec_space_map.at(lb_exec_space_str);
    auto int_exec_space = exec_space_map.at(int_exec_space_str);
  #else
    auto lb_exec_space  = GauXC::ExecutionSpace::Host;
    auto int_exec_space = GauXC::ExecutionSpace::Host;
  #endif

    // Setup load balancer based on molecule, grid and basis set
    GauXC::LoadBalancerFactory lb_factory(lb_exec_space, "Replicated");
    auto lb = lb_factory.get_shared_instance(rt, mol, grid, basis);

    // Apply partitioning weights to the molecule grid
    GauXC::MolecularWeightsFactory mw_factory(int_exec_space, "Default", 
      GauXC::MolecularWeightsSettings{} );
    auto mw = mw_factory.get_instance();
    mw.modify_weights(*lb);

    // Setup exchange-correlation integrator
    GauXC::functional_type func;
    GauXC::XCIntegratorFactory<matrix> integrator_factory(int_exec_space, "Replicated", "Default", "Default", "Default");
    auto integrator = integrator_factory.get_instance(func, lb);

    // Configure model checkpoint
    GauXC::OneDFTSettings onedft_settings;
    onedft_settings.model = model;

    // Load density matrix from input
    matrix P_s, P_z;
    std::tie(P_s, P_z) = read_density_matrix(input_file);

#ifdef GAUXC_HAS_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    auto xc_int_start = std::chrono::high_resolution_clock::now();

    // Integrate exchange correlation energy
    double EXC;
    matrix VXC_s, VXC_z;
    std::tie(EXC, VXC_s, VXC_z) = integrator.eval_exc_vxc_onedft(P_s, P_z, onedft_settings);

#ifdef GAUXC_HAS_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif
    auto xc_int_end = std::chrono::high_resolution_clock::now();
    double xc_int_dur = std::chrono::duration<double>(xc_int_end - xc_int_start).count();

    std::cout << std::scientific << std::setprecision(12);
    if(!world_rank) {
      std::cout << "EXC          = " << EXC << " Eh" << std::endl
                << "|VXC(a+b)|_F = " << VXC_s.norm() << std::endl
                << "|VXC(a-b)|_F = " << VXC_z.norm() << std::endl
                << "Runtime XC   = " << xc_int_dur << " s" << std::endl
                << std::endl;
    }
  }
#ifdef GAUXC_HAS_MPI
  MPI_Finalize();
#endif
}

Summary#

In this guide we covered how to use the GauXC library in C++. We created a minimal CMake project to setup the build environment and included GauXC. We created a command line driver which reads the molecule, basis set, and density matrix from an HDF5 input file and sets up the integration grid, load balancer, and exchange-correlation integrator. Finally, we performed the exchange-correlation evaluation and output the results.

Troubleshooting#

The link interface of target “gauxc::gauxc” contains “gau2grid::gg” but the target was not found

Explicitly find the gau2grid package together with GauXC by adding the following lines at the end of the GauXC CMake include file

cmake/skala-gauxc.cmake (append)#

if(GAUXC_HAS_GAU2GRID AND NOT TARGET gau2grid::gg)
  find_package(gau2grid CONFIG REQUIRED)
endif()

OpenMP not found with Apple Clang

On macOS OpenMP support is not provided by default with Apple Clang. Either disable OpenMP using the CMake option -DSkala_GauXC_ENABLE_OPENMP=OFF or install a version of Clang with OpenMP support, for example from conda-forge, and set the CXX environment variable to point to the new compiler.

libtorch not found

Ensure that the libtorch library is installed and in the CMAKE_PREFIX_PATH. The libtorch library is part of the pytorch package on conda-forge and can be installed with

conda install -c conda-forge pytorch

GauXC in C++

Contents

GauXC in C++#

Setting up CMake#

Initializing GauXC#

Command line interface#

Molecule data#

Basis set data#

Integration grid#

Exchange-correlation integrator#

Density matrix#

Exchange-correlation evaluation#

Download checkpoint from HuggingFace#

Full source code#

Summary#

Troubleshooting#