GauXC in Fortran#

This guide demonstrates how to use the GauXC library from Fortran to evaluate exchange-correlation functionals, including the Skala neural network functional. By the end of this tutorial, you will:

  • Set up a CMake project with GauXC as a dependency

  • Initialize the GauXC runtime environment

  • Read molecule, basis set, and density matrix from an HDF5 input file

  • Configure the integration grid, load balancer, and XC integrator

  • Evaluate the exchange-correlation energy and potential

Setting up CMake#

GauXC integrates most conveniently via CMake. We will set up a minimal CMake project for a command-line driver that uses GauXC. In addition to GauXC, we will include other dependencies as needed.

The directory structure for the project will be

├── CMakeLists.txt
├── app
│   └── main.F90
└── cmake
    ├── skala-flap.cmake
    ├── skala-dep-versions.cmake
    └── skala-gauxc.cmake

First, we create the main CMakeLists.txt to define our project, include dependencies, and declare the executable.

CMakeLists.txt#
cmake_minimum_required(VERSION 3.20 FATAL_ERROR)

project("Skala" LANGUAGES Fortran C CXX)

include(GNUInstallDirs)
include(FetchContent)
set(FETCHCONTENT_UPDATES_DISCONNECTED ON CACHE BOOL "Disable FC Updates")

list(PREPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

option(Skala_GauXC_ENABLE_OPENMP "Enable OpenMP support in GauXC" ON)
option(Skala_GauXC_ENABLE_MPI "Enable MPI support in GauXC" OFF)
option(Skala_GauXC_ENABLE_CUDA "Enable CUDA support in GauXC" OFF)

include(skala-dep-versions)
include(skala-gauxc)
include(skala-flap)

add_executable(
  "${PROJECT_NAME}"
  "app/main.F90"
)
target_link_libraries(
  "${PROJECT_NAME}"
  PRIVATE
  gauxc::gauxc
  FLAP::FLAP
)
install(
  TARGETS
  "${PROJECT_NAME}"
  DESTINATION
  "${CMAKE_INSTALL_BINDIR}"
)

list(REMOVE_AT CMAKE_MODULE_PATH 0)

To manage dependencies cleanly, we create a separate file in the cmake/ subdirectory that stores URLs and checksums for all external packages.

cmake/skala-dep-versions.cmake#
set(Skala_GauXC_URL "https://github.com/microsoft/skala/releases/download/v1.1.1/gauxc-skala.tar.gz")
set(Skala_GauXC_SHA256 "93011cc5a7e9e1d3d065a4fd14deb2914274c93283a29676439e885003ac4436")

set(Skala_FLAP_URL "https://github.com/szaghi/FLAP/releases/download/v1.2.5/FLAP.tar.gz")
set(Skala_FLAP_SHA256 "f2a388898a1ee49a8559822280647eb112d884484cb6169710011daab6dbb501")

Each dependency gets its own CMake include file. First, we define how to include GauXC, our main dependency. CMake will first attempt to discover an installed GauXC via its config file; if that fails, it will download and build GauXC from source. The options defined in the main CMake file are passed through to GauXC to ensure the library provides the requested features. After GauXC is available, we verify that our requirements are satisfied—this is especially important for the Skala implementation, which requires the GAUXC_HAS_ONEDFT feature flag.

cmake/skala-gauxc.cmake#
if(NOT DEFINED Skala_GauXC_URL)
  include(skala-dep-versions)
endif()
find_package(gauxc QUIET CONFIG)
if(NOT gauxc_FOUND)
  include(FetchContent)

  message(STATUS "Could not find GauXC... Building GauXC from source")
  message(STATUS "GAUXC URL: ${Skala_GauXC_URL}")

  set(GAUXC_ENABLE_ONEDFT ON CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_C ON CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_FORTRAN ON CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_OPENMP ${Skala_GauXC_ENABLE_OPENMP} CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_MPI ${Skala_GauXC_ENABLE_MPI} CACHE BOOL "" FORCE)
  set(GAUXC_ENABLE_CUDA ${Skala_GauXC_ENABLE_CUDA} CACHE BOOL "" FORCE)

  FetchContent_Declare(
    gauxc
    URL ${Skala_GauXC_URL}
    # URL_HASH SHA256=${Skala_GauXC_SHA256}
    DOWNLOAD_EXTRACT_TIMESTAMP ON
  )
  FetchContent_MakeAvailable(gauxc)

else()
  if(NOT GAUXC_HAS_ONEDFT)
    message(FATAL_ERROR "GauXC found but without Skala support enabled")
  endif()
  if(NOT GAUXC_HAS_C)
    message(FATAL_ERROR "GauXC found but without C API support enabled")
  endif()
  if(NOT GAUXC_HAS_FORTRAN)
    message(FATAL_ERROR "GauXC found but without Fortran API support enabled")
  endif()
  if(Skala_GauXC_ENABLE_OPENMP AND NOT GAUXC_HAS_OPENMP)
    message(FATAL_ERROR "GauXC found without OpenMP support but Skala_GauXC_ENABLE_OPENMP is ON")
  endif()
  if(Skala_GauXC_ENABLE_MPI AND NOT GAUXC_HAS_MPI)
    message(FATAL_ERROR "GauXC found without MPI support but Skala_GauXC_ENABLE_MPI is ON")
  endif()
  if(Skala_GauXC_ENABLE_CUDA AND NOT GAUXC_HAS_CUDA)
    message(FATAL_ERROR "GauXC found without CUDA support but Skala_GauXC_ENABLE_CUDA is ON")
  endif()
endif()

For the command-line interface, we use the FLAP library (Fortran command Line Arguments Parser).

cmake/skala-flap.cmake#
if(NOT DEFINED Skala_FLAP_URL)
  include(skala-dep-versions)
endif()
find_package(FLAP QUIET CONFIG)
if(NOT FLAP_FOUND)
  include(FetchContent)

  message(STATUS "Could not find FLAP... Building FLAP from source")
  message(STATUS "FLAP URL: ${Skala_FLAP_URL}")

  FetchContent_Declare(
    flap
    URL ${Skala_FLAP_URL}
    URL_HASH SHA256=${Skala_FLAP_SHA256}
    DOWNLOAD_EXTRACT_TIMESTAMP ON
  )
  FetchContent_MakeAvailable(flap)
endif()

if(NOT TARGET FLAP::FLAP)
  add_library(FLAP::FLAP INTERFACE IMPORTED)
  target_link_libraries(FLAP::FLAP INTERFACE FLAP)
endif()

This completes the CMake setup required for our command-line driver.

Module imports#

The main driver program imports the relevant GauXC modules. We also use GauXC’s HDF5 I/O module and the FLAP module for command-line parsing.

app/main.F90#
#include "gauxc/gauxc_config.f"

program skala
  use iso_c_binding, only : c_int, c_int64_t, c_double, c_bool
  use flap, only : command_line_interface
  use gauxc_status, only : gauxc_status_type, gauxc_status_message
  use gauxc_enums, only : gauxc_executionspace, gauxc_radialquad, &
    & gauxc_atomicgridsizedefault, gauxc_pruningscheme
  use gauxc_runtime_environment, only : gauxc_runtime_environment_type, &
    & gauxc_runtime_environment_comm_rank, gauxc_runtime_environment_comm_size, &
    & gauxc_runtime_environment_new, gauxc_delete
  use gauxc_molecule, only : gauxc_molecule_type, gauxc_molecule_new, gauxc_delete
  use gauxc_basisset, only : gauxc_basisset_type, gauxc_basisset_new, gauxc_delete
  use gauxc_molgrid, only : gauxc_molgrid_type, gauxc_molgrid_new_default, &
    & gauxc_delete
  use gauxc_load_balancer, only : gauxc_load_balancer_type, gauxc_load_balancer_factory_type, &
    & gauxc_load_balancer_factory_new, gauxc_get_shared_instance, gauxc_delete
  use gauxc_molecular_weights, only : gauxc_molecular_weights_type, &
    & gauxc_molecular_weights_factory_type, gauxc_molecular_weights_factory_new, &
    & gauxc_molecular_weights_modify_weights, gauxc_get_instance, gauxc_delete, &
    & gauxc_molecular_weights_settings
  use gauxc_xc_functional, only : gauxc_functional_type, gauxc_functional_from_string, gauxc_delete
  use gauxc_integrator, only : gauxc_integrator_type, gauxc_integrator_factory_type, &
    & gauxc_integrator_factory_new, gauxc_get_instance, gauxc_eval_exc_vxc, gauxc_delete
  use gauxc_matrix, only : gauxc_matrix_type, gauxc_matrix_empty, gauxc_matrix_data, gauxc_delete
  use gauxc_external_hdf5_read, only : gauxc_read_hdf5_record
#ifdef GAUXC_HAS_MPI
  use mpi
#endif

  implicit none(type, external)

Each GauXC component has a corresponding Fortran module:

gauxc_status

Handles GauXC status codes and error messages.

gauxc_molecule

Manages molecular data structures (atomic numbers and Cartesian coordinates).

gauxc_basisset

Manages basis set data (shell definitions, exponents, contraction coefficients).

gauxc_molgrid

Sets up and manages the numerical integration grid.

gauxc_runtime_environment

Interfaces with MPI and device (GPU) runtime environments.

gauxc_load_balancer

Distributes grid points across available compute resources.

gauxc_molecular_weights

Computes Becke-style partitioning weights for the molecular grid.

gauxc_xc_functional

Handles exchange-correlation functional definitions.

gauxc_integrator

Performs the numerical integration of the exchange-correlation energy and potential.

gauxc_matrix

Manages matrix data structures (density matrices, potentials).

Next, we declare variables for the GauXC-specific types:

app/main.F90#
  type(gauxc_status_type) :: status
  type(gauxc_runtime_environment_type) :: rt
  type(gauxc_molecule_type) :: mol
  type(gauxc_basisset_type) :: basis
  type(gauxc_molgrid_type) :: grid
  type(gauxc_load_balancer_factory_type) :: lbf
  type(gauxc_load_balancer_type) :: lb
  type(gauxc_molecular_weights_factory_type) :: mwf
  type(gauxc_molecular_weights_type) :: mw
  type(gauxc_functional_type) :: func
  type(gauxc_integrator_factory_type) :: intf
  type(gauxc_integrator_type) :: integrator
  type(gauxc_matrix_type) :: p_s, p_z, vxc_s, vxc_z

We also declare variables for input parameters and intermediate values:

app/main.F90#
  character(len=:), allocatable :: input_file, model, grid_spec, rad_quad_spec, prune_spec, &
    & lb_exec_space_str, int_exec_space_str
  integer(c_int) :: grid_type, radial_quad, pruning_scheme
  integer(c_int) :: lb_exec_space, int_exec_space
  integer(c_int) :: world_rank, world_size
  integer(c_int64_t) :: batch_size
  real(c_double) :: exc, t_exc, t_start, t_end
  type(command_line_interface) :: cli
  integer :: error

When compiled with MPI support, we initialize MPI at program startup. The gauxc/gauxc_config.f header provides the GAUXC_HAS_MPI preprocessor macro for guarding MPI-specific calls.

app/main.F90#
#ifdef GAUXC_HAS_MPI
  call MPI_Init(error)
#endif

Similarly, at the end of the program we finalize MPI:

app/main.F90#
  call gauxc_delete(status, p_z)
  call gauxc_delete(status, vxc_s)
  call gauxc_delete(status, vxc_z)

Command line interface#

This guide uses an HDF5 input file to provide molecular data. (GauXC also supports constructing these objects programmatically, which is more convenient when embedding GauXC in a library.) We also expose parameters for configuring the integration grid and parallelization strategy. The main command-line arguments are:

input_file

Path to an HDF5 file containing the molecule, basis set, and density matrix.

model

The model checkpoint to evaluate (e.g., PBE for a traditional functional or a path to a Skala checkpoint).

grid_spec

The grid size specification, which controls the number of angular and radial integration points.

rad_quad_spec

The radial quadrature scheme, which determines the spacing of radial integration points.

prune_spec

The pruning scheme, which controls how atomic grids are combined into a molecular grid.

We use the command_line_interface type from the FLAP library to define the CLI. First, we initialize variables with their default values:

app/main.F90#
  grid_spec = "fine"
  rad_quad_spec = "muraknowles"
  prune_spec = "robust"
  lb_exec_space_str = "host"
  int_exec_space_str = "host"
  batch_size = 512_c_int64_t

Next, we initialize the CLI and define the available arguments. A named block statement provides convenient error handling:

app/main.F90#
  input: block
  character(len=512) :: dummy
  call cli%init(description="Driver for using Skala")
  call cli%add(position=1, required=.true., act="store", error=error, positional=.true., &
    & help="Input HDF5 file containing molecule, basis set and density matrix")
  if (error /= 0) exit input
  call cli%add(switch="--model", required=.true., act="store", error=error, &
    & help="Model to use for the calculation")
  if (error /= 0) exit input
  call cli%add(switch="--grid", act="store", error=error, def=grid_spec, required=.false., &
    & help="Molecular grid specification", choices="fine,ultrafine,superfine,gm3,gm5")
  if (error /= 0) exit input
  call cli%add(switch="--radial-quadrature", act="store", error=error, def=rad_quad_spec, &
    & help="Radial quadrature to use", choices="becke,muraknowles,treutlerahlrichs,murrayhandylaming")
  if (error /= 0) exit input
  call cli%add(switch="--pruning-scheme", act="store", error=error, def=prune_spec, &
    & help="Pruning scheme to use", choices="unpruned,robust,treutler")
  if (error /= 0) exit input
  call cli%add(switch="--lb-exec-space", act="store", error=error, def=lb_exec_space_str, &
    & help="Execution space for load balancer", choices="host,device")
  if (error /= 0) exit input
  call cli%add(switch="--int-exec-space", act="store", error=error, def=int_exec_space_str, &
    & help="Execution space for integrator", choices="host,device")
  if (error /= 0) exit input
  call cli%add(switch="--batch-size", act="store", error=error, def="512", &
    & help="Batch size for grid point processing")
  if (error /= 0) exit input
  end block input
  if (error /= 0) then
    print '(a)', cli%error_message
#ifdef GAUXC_HAS_MPI
    call MPI_Abort(MPI_COMM_WORLD, 1, error)
#else
    stop 1
#endif
  end if

Note

The molecular grid settings (grid_spec, rad_quad_spec, prune_spec) are documented in detail in the Molecular grid reference.

After defining the CLI, we parse it within the input block and retrieve the values:

app/main.F90#
  call cli%parse(error=error)
  if (error /= 0) exit input

  call cli%get(position=1, val=dummy, error=error)
  input_file = trim(dummy)
  if (error /= 0) exit input
  call cli%get(switch="--model", val=dummy, error=error)
  model = trim(dummy)
  if (error /= 0) exit input
  if (cli%is_passed(switch="--grid")) then
    call cli%get(switch="--grid", val=dummy, error=error)
    if (error /= 0) exit input
    grid_spec = trim(dummy)
  end if
  if (cli%is_passed(switch="--radial-quadrature")) then
    call cli%get(switch="--radial-quadrature", val=dummy, error=error)
    if (error /= 0) exit input
    rad_quad_spec = trim(dummy)
  end if
  if (cli%is_passed(switch="--pruning-scheme")) then
    call cli%get(switch="--pruning-scheme", val=dummy, error=error)
    if (error /= 0) exit input
    prune_spec = trim(dummy)
  end if
  if (cli%is_passed(switch="--lb-exec-space")) then
    call cli%get(switch="--lb-exec-space", val=dummy, error=error)
    if (error /= 0) exit input
    lb_exec_space_str = trim(dummy)
  end if
  if (cli%is_passed(switch="--int-exec-space")) then
    call cli%get(switch="--int-exec-space", val=dummy, error=error)
    if (error /= 0) exit input
    int_exec_space_str = trim(dummy)
  end if
  if (cli%is_passed(switch="--batch-size")) then
    call cli%get(switch="--batch-size", val=batch_size, error=error)
    if (error /= 0) exit input
  end if

Before adding further implementation, let’s build and test the project. Configure and compile with:

cmake -B build -G Ninja -S .
cmake --build build

Once built, run the driver with --help to verify it works:

./build/Skala --help

You should see the help page generated by FLAP:

usage: ./build/Skala  value --model value [--grid value] [--radial-quadrature value] [--pruning-scheme value] [--lb-exec-space value] [--int-exec-space value] [--batch-size value] [--help] [--markdown] [--version]

Driver for using Skala


Required switches:
  value
    1-th argument
    Input HDF5 file containing molecule, basis set and density matrix
   --model value
    Model to use for the calculation

Optional switches:
   --grid value, value in: `fine,ultrafine,superfine,gm3,gm5`
    default value fine
    Molecular grid specification
   --radial-quadrature value, value in: `becke,muraknowles,treutlerahlrichs,murrayhandylaming`
    default value muraknowles
    Radial quadrature to use
   --pruning-scheme value, value in: `unpruned,robust,treutler`
    default value robust
    Pruning scheme to use
   --lb-exec-space value, value in: `host,device`
    default value host
    Execution space for load balancer
   --int-exec-space value, value in: `host,device`
    default value host
    Execution space for integrator
   --batch-size value
    default value 512
    Batch size for grid point processing
   --help, -h
    Print this help message
   --markdown, -md
    Save this help message in a Markdown file
   --version, -v
    Print version

The CLI is now fully functional and allows flexible configuration from the command line.

Initializing GauXC#

We begin by initializing the GauXC runtime environment. All GauXC-related calls are placed inside a named block for streamlined error handling:

app/main.F90#
  main: block
  ! Create runtime
#ifdef GAUXC_HAS_MPI
  rt = gauxc_runtime_environment_new(MPI_COMM_WORLD, status)
#else
  rt = gauxc_runtime_environment_new(status)
#endif
  if (status%code /= 0) exit main
  world_rank = gauxc_runtime_environment_comm_rank(status, rt)
  if (status%code /= 0) exit main
  world_size = gauxc_runtime_environment_comm_size(status, rt)
  if (status%code /= 0) exit main

At the end of the block, we check the status and clean up the runtime environment:

app/main.F90#
  end block main
  if (world_rank == 0 .and. status%code /= 0) then
    print '(a,1x,i0)', "GauXC returned with status code", status%code
    print '(a)', gauxc_status_message(status)
  end if

  call gauxc_delete(status, rt)

The runtime environment provides the MPI world rank and size (for both MPI and non-MPI builds). We print the configuration obtained from the command line:

app/main.F90#
  if (world_rank == 0) then
    print '(a)', &
      & "Configuration", &
      & "-> Input file        : "//input_file, &
      & "-> Model             : "//model, &
      & "-> Grid              : "//grid_spec, &
      & "-> Radial quadrature : "//rad_quad_spec, &
      & "-> Pruning scheme    : "//prune_spec, &
      & ""
  end if

From here on, we use world_rank to ensure only rank 0 produces output.

Molecule data#

We use GauXC’s built-in HDF5 reader to load the molecule data.

Note

GauXC stores the information about the atomic numbers and their coordinates as an array of structs, defined in C++ as

struct GauXC::Atom {
  int64_t Z; ///< Atomic number
  double x;  ///< X coordinate (bohr)
  double y;  ///< Y coordinate (bohr)
  double z;  ///< Z coordinate (bohr)
};
class GauXC::Molecule : public std::vector<GauXC::Atom> {
  ...
};

The HDF5 wrapper maps this struct representation directly to the HDF5 dataset.

The gauxc_read_hdf5_record subroutine loads the molecule data:

app/main.F90 (read molecule)#
  ! Get molecule (atomic numbers and cartesian coordinates)
  mol = gauxc_molecule_new(status)
  if (status%code /= 0) exit main
  ! Load molecule from HDF5 dataset
  call gauxc_read_hdf5_record(status, mol, input_file, "/MOLECULE")
  if (status%code /= 0) exit main

For proper memory management, we free the molecule object at program end:

app/main.F90 (free molecule)#
  call gauxc_delete(status, mol)

Basis set data#

We load the basis set using the same HDF5 approach as for the molecule.

Note

The basis set is represented as an array of shell objects, each containing primitive exponents, contraction coefficients, angular momentum, and shell center:

template <typename F>
class alignas(256) GauXC::Shell {
  std::array<F, 32> alpha; ///< exponents of primitives
  std::array<F, 32> coeff; ///< contraction coefficients
  std::array<double, 3> O; ///< origin of the shell
  int32_t nprim; ///< number of primitives
  int32_t l; ///< angular moment of the shell
  int32_t pure; ///< pure=1: spherical Gaussians; pure=0: cartesian Gaussians
};

template <typename F>
struct GauXC::BasisSet : public std::vector<GauXC::Shell<F>> {
  ...
};

Again, this allows the object’s representation to map directly to an HDF5 dataset.

We read the basis set using gauxc_read_hdf5_record:

app/main.F90 (read basisset)#
  ! Get basis set
  basis = gauxc_basisset_new(status)
  if (status%code /= 0) exit main
  ! Load basis set from HDF5 dataset
  call gauxc_read_hdf5_record(status, basis, input_file, "/BASIS")
  if (status%code /= 0) exit main

We free the basis set at program end:

app/main.F90 (free basisset)#
  call gauxc_delete(status, basis)

Integration grid#

The integration grid defines the spatial points for evaluating the exchange-correlation functional. Three parameters control grid construction:

  • Grid size: Density of angular and radial points (e.g., fine, ultrafine)

  • Radial quadrature: Spacing scheme for radial points (e.g., muraknowles)

  • Pruning scheme: How atomic grids combine into the molecular grid (e.g., robust)

GauXC uses enumerators for these settings. We define helper functions to convert CLI strings:

app/main.F90 (enumerator conversion functions)#
contains
  pure function read_atomic_grid_size(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("fine")
      val = gauxc_atomicgridsizedefault%finegrid
    case("ultrafine")
      val = gauxc_atomicgridsizedefault%ultrafinegrid
    case("superfine")
      val = gauxc_atomicgridsizedefault%superfinegrid
    case("gm3")
      val = gauxc_atomicgridsizedefault%gm3
    case("gm5")
      val = gauxc_atomicgridsizedefault%gm5
    end select
  end function read_atomic_grid_size
  
  pure function read_radial_quad(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("becke")
      val = gauxc_radialquad%becke
    case("muraknowles")
      val = gauxc_radialquad%mura_knowles
    case("treutlerahlrichs")
      val = gauxc_radialquad%treutler_ahlrichs
    case("murrayhandylaming")
      val = gauxc_radialquad%murray_handy_laming
    end select
  end function read_radial_quad
  
  pure function read_pruning_scheme(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("unpruned")
      val = gauxc_pruningscheme%unpruned
    case("robust")
      val = gauxc_pruningscheme%robust
    case("treutler")
      val = gauxc_pruningscheme%treutler
    end select
  end function read_pruning_scheme

We create the molecular grid from our input parameters. The batch size controls how many grid points are processed together; larger values (up to ~10000) improve performance, though the default is 512:

app/main.F90 (grid setup)#
  ! Define molecular grid from grid size, radial quadrature and pruning scheme
  grid_type = read_atomic_grid_size(grid_spec)
  radial_quad = read_radial_quad(rad_quad_spec)
  pruning_scheme = read_pruning_scheme(prune_spec)
  grid = gauxc_molgrid_new_default(status, mol, pruning_scheme, batch_size, radial_quad, grid_type)
  if (status%code /= 0) exit main

  ! Choose whether we run on host or device

We free the grid at program end:

app/main.F90 (free grid)#
  call gauxc_delete(status, grid)

Exchange-correlation integrator#

The load balancer distributes XC functional evaluation across available resources (host or device). A helper function converts the execution-space CLI string to an enumerator:

app/main.F90 (execution space enumerator)#
contains

  pure function read_execution_space(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("host")
      val = gauxc_executionspace%host
    case("device")
      val = gauxc_executionspace%device
    end select
  end function read_execution_space

The load balancer manages access to the molecule, basis, and grid data for all subsequent GauXC operations. We create it from our input parameters:

app/main.F90 (load balancer setup)#
  ! Choose whether we run on host or device
  lb_exec_space = read_execution_space(lb_exec_space_str)
  int_exec_space = read_execution_space(int_exec_space_str)

  ! Setup load balancer based on molecule, grid and basis set
  lbf = gauxc_load_balancer_factory_new(status, lb_exec_space, "Replicated")
  if (status%code /= 0) exit main
  lb = gauxc_get_shared_instance(status, lbf, rt, mol, grid, basis)
  if (status%code /= 0) exit main

  ! Apply partitioning weights to the molecule grid
  mwf = gauxc_molecular_weights_factory_new(status, int_exec_space, "Default", &
    & gauxc_molecular_weights_settings())
  if (status%code /= 0) exit main
  mw = gauxc_get_instance(status, mwf)
  if (status%code /= 0) exit main
  call gauxc_molecular_weights_modify_weights(status, mw, lb)
  if (status%code /= 0) exit main

Finally, we create the XC integrator using a factory pattern. A settings object specifies the model checkpoint to evaluate:

app/main.F90 (integrator setup)#
  ! Setup exchange-correlation integrator
  func = gauxc_functional_from_string(status, "PBE", .true._c_bool)
  intf = gauxc_integrator_factory_new(status, int_exec_space, "Replicated", &
    & "Default", "Default", "Default")
  if (status%code /= 0) exit main
  integrator = gauxc_get_instance(status, intf, func, lb)
  if (status%code /= 0) exit main

We free the integrator and associated objects at program end:

app/main.F90 (free integrator)#
  call gauxc_delete(status, lbf)
  call gauxc_delete(status, lb)
  call gauxc_delete(status, mwf)
  call gauxc_delete(status, mw)
  call gauxc_delete(status, func)
  call gauxc_delete(status, intf)
  call gauxc_delete(status, integrator)

Density matrix#

The density matrix is the final input for GauXC. We read it from the HDF5 file:

app/main.F90 (read density matrix)#
  ! Load density matrix from input
  p_s = gauxc_matrix_empty(status)
  p_z = gauxc_matrix_empty(status)
  call gauxc_read_hdf5_record(status, p_s, input_file, "/DENSITY_SCALAR")
  if (status%code /= 0) exit main
  call gauxc_read_hdf5_record(status, p_z, input_file, "/DENSITY_Z")
  if (status%code /= 0) exit main

We free the density matrix at program end:

app/main.F90 (free density matrix)#
  call gauxc_delete(status, p_s)
  call gauxc_delete(status, p_z)

Exchange-correlation evaluation#

With all inputs ready, we perform the XC evaluation:

app/main.F90 (exchange-correlation evaluation)#
  ! Integrate exchange-correlation energy
  vxc_s = gauxc_matrix_empty(status)
  vxc_z = gauxc_matrix_empty(status)
  call gauxc_eval_exc_vxc(status, integrator, p_s, p_z, model, exc, vxc_s, vxc_z)
  if (status%code /= 0) exit main

Tip

To measure evaluation time, define a helper function:

app/main.F90 (time helper function)#
  function timing() result(time)
    real(c_double) :: time
    integer(c_int64_t) :: time_count, time_rate, time_max
    call system_clock(time_count, time_rate, time_max)
    time = real(time_count, c_double) / real(time_rate, c_double)
  end function timing

Use it to wrap the evaluation and print elapsed time:

app/main.F90 (timed exchange-correlation evaluation)#
#ifdef GAUXC_HAS_MPI
  call MPI_Barrier(MPI_COMM_WORLD, error)
#endif
  t_start = timing()

  ! Integrate exchange-correlation energy
  vxc_s = gauxc_matrix_empty(status)
  vxc_z = gauxc_matrix_empty(status)
  call gauxc_eval_exc_vxc(status, integrator, p_s, p_z, model, exc, vxc_s, vxc_z)
  if (status%code /= 0) exit main

#ifdef GAUXC_HAS_MPI
  call MPI_Barrier(MPI_COMM_WORLD, error)
#endif
  t_end = timing()
  t_exc = t_end - t_start

We output the computed XC energy:

app/main.F90 (exchange-correlation output)#
  if (world_rank == 0) then
    associate(vxc_s_ => gauxc_matrix_data(status, vxc_s), vxc_z_ => gauxc_matrix_data(status, vxc_z))
    print '(a)', "Results"
    print '(a,1x,es17.10,:1x,a)', "Exc          =", exc, "Eh"
    print '(a,1x,es17.10,:1x,a)', "|VXC(a+b)|_F =", sqrt(sum(vxc_s_**2))
    print '(a,1x,es17.10,:1x,a)', "|VXC(a-b)|_F =", sqrt(sum(vxc_z_**2))
    print '(a,1x,es17.10,:1x,a)', "Runtime XC   =", t_exc
    end associate
  end if

We free the allocated XC potential matrices at program end:

app/main.F90 (free exchange-correlation potential)#
  call gauxc_delete(status, vxc_s)
  call gauxc_delete(status, vxc_z)

Rebuild the project:

cmake --build build

Run the driver from the build directory:

./build/Skala He_def2-svp.h5 --model PBE

Note

Create the He_def2-svp.h5 input file using the skala package:

from pyscf import gto

from skala.gauxc.export import write_gauxc_h5_from_pyscf
from skala.pyscf import SkalaRKS

mol = gto.M(atom="He 0 0 0", basis="def2-svp", unit="Bohr", spin=0)
ks = SkalaRKS(mol, xc="pbe")
ks.kernel()

dm = ks.make_rdm1()
exc = ks.scf_summary["exc"]
_, _, vxc = ks._numint.nr_rks(ks.mol, ks.grids, ks.xc, dm)

write_gauxc_h5_from_pyscf("He_def2-svp.h5", mol, dm=dm, exc=exc, vxc=vxc)

The output shows results for the PBE functional:

Configuration
-> Input file        : He_def2-svp.h5
-> Model             : PBE
-> Grid              : fine
-> Radial quadrature : muraknowles
-> Pruning scheme    : robust

Results
Exc          = -1.0540318683E+00 Eh
|VXC(a+b)|_F =  1.4559829661E+00
|VXC(a-b)|_F =  0.0000000000E+00
Runtime XC   =  2.5566819200E-01

Download checkpoint from HuggingFace#

To evaluate Skala, download the model checkpoint from HuggingFace using the hf CLI from the huggingface_hub package:

hf download microsoft/skala skala-1.0.fun --local-dir .
./build/Skala He_def2-svp.h5 --model ./skala-1.0.fun

The output shows results for the Skala functional:

Configuration
-> Input file        : He_def2-svp.h5
-> Model             : ./skala-1.0.fun
-> Grid              : fine
-> Radial quadrature : muraknowles
-> Pruning scheme    : robust

Results
Exc          = -1.0712560874E+00 Eh
|VXC(a+b)|_F =  1.5002997546E+00
|VXC(a-b)|_F =  0.0000000000E+00
Runtime XC   =  1.5986489670E+00

Full source code#

Full source code of the main driver
app/main.F90#
#include "gauxc/gauxc_config.f"

program skala
  use iso_c_binding, only : c_int, c_int64_t, c_double, c_bool
  use flap, only : command_line_interface
  use gauxc_status, only : gauxc_status_type, gauxc_status_message
  use gauxc_enums, only : gauxc_executionspace, gauxc_radialquad, &
    & gauxc_atomicgridsizedefault, gauxc_pruningscheme
  use gauxc_runtime_environment, only : gauxc_runtime_environment_type, &
    & gauxc_runtime_environment_comm_rank, gauxc_runtime_environment_comm_size, &
    & gauxc_runtime_environment_new, gauxc_delete
  use gauxc_molecule, only : gauxc_molecule_type, gauxc_molecule_new, gauxc_delete
  use gauxc_basisset, only : gauxc_basisset_type, gauxc_basisset_new, gauxc_delete
  use gauxc_molgrid, only : gauxc_molgrid_type, gauxc_molgrid_new_default, &
    & gauxc_delete
  use gauxc_load_balancer, only : gauxc_load_balancer_type, gauxc_load_balancer_factory_type, &
    & gauxc_load_balancer_factory_new, gauxc_get_shared_instance, gauxc_delete
  use gauxc_molecular_weights, only : gauxc_molecular_weights_type, &
    & gauxc_molecular_weights_factory_type, gauxc_molecular_weights_factory_new, &
    & gauxc_molecular_weights_modify_weights, gauxc_get_instance, gauxc_delete, &
    & gauxc_molecular_weights_settings
  use gauxc_xc_functional, only : gauxc_functional_type, gauxc_functional_from_string, gauxc_delete
  use gauxc_integrator, only : gauxc_integrator_type, gauxc_integrator_factory_type, &
    & gauxc_integrator_factory_new, gauxc_get_instance, gauxc_eval_exc_vxc, gauxc_delete
  use gauxc_matrix, only : gauxc_matrix_type, gauxc_matrix_empty, gauxc_matrix_data, gauxc_delete
  use gauxc_external_hdf5_read, only : gauxc_read_hdf5_record
#ifdef GAUXC_HAS_MPI
  use mpi
#endif

  implicit none(type, external)

  type(gauxc_status_type) :: status
  type(gauxc_runtime_environment_type) :: rt
  type(gauxc_molecule_type) :: mol
  type(gauxc_basisset_type) :: basis
  type(gauxc_molgrid_type) :: grid
  type(gauxc_load_balancer_factory_type) :: lbf
  type(gauxc_load_balancer_type) :: lb
  type(gauxc_molecular_weights_factory_type) :: mwf
  type(gauxc_molecular_weights_type) :: mw
  type(gauxc_functional_type) :: func
  type(gauxc_integrator_factory_type) :: intf
  type(gauxc_integrator_type) :: integrator
  type(gauxc_matrix_type) :: p_s, p_z, vxc_s, vxc_z

  character(len=:), allocatable :: input_file, model, grid_spec, rad_quad_spec, prune_spec, &
    & lb_exec_space_str, int_exec_space_str
  integer(c_int) :: grid_type, radial_quad, pruning_scheme
  integer(c_int) :: lb_exec_space, int_exec_space
  integer(c_int) :: world_rank, world_size
  integer(c_int64_t) :: batch_size
  real(c_double) :: exc, t_exc, t_start, t_end
  type(command_line_interface) :: cli
  integer :: error

#ifdef GAUXC_HAS_MPI
  call MPI_Init(error)
#endif

  grid_spec = "fine"
  rad_quad_spec = "muraknowles"
  prune_spec = "robust"
  lb_exec_space_str = "host"
  int_exec_space_str = "host"
  batch_size = 512_c_int64_t

  input: block
  character(len=512) :: dummy
  call cli%init(description="Driver for using Skala")
  call cli%add(position=1, required=.true., act="store", error=error, positional=.true., &
    & help="Input HDF5 file containing molecule, basis set and density matrix")
  if (error /= 0) exit input
  call cli%add(switch="--model", required=.true., act="store", error=error, &
    & help="Model to use for the calculation")
  if (error /= 0) exit input
  call cli%add(switch="--grid", act="store", error=error, def=grid_spec, required=.false., &
    & help="Molecular grid specification", choices="fine,ultrafine,superfine,gm3,gm5")
  if (error /= 0) exit input
  call cli%add(switch="--radial-quadrature", act="store", error=error, def=rad_quad_spec, &
    & help="Radial quadrature to use", choices="becke,muraknowles,treutlerahlrichs,murrayhandylaming")
  if (error /= 0) exit input
  call cli%add(switch="--pruning-scheme", act="store", error=error, def=prune_spec, &
    & help="Pruning scheme to use", choices="unpruned,robust,treutler")
  if (error /= 0) exit input
  call cli%add(switch="--lb-exec-space", act="store", error=error, def=lb_exec_space_str, &
    & help="Execution space for load balancer", choices="host,device")
  if (error /= 0) exit input
  call cli%add(switch="--int-exec-space", act="store", error=error, def=int_exec_space_str, &
    & help="Execution space for integrator", choices="host,device")
  if (error /= 0) exit input
  call cli%add(switch="--batch-size", act="store", error=error, def="512", &
    & help="Batch size for grid point processing")
  if (error /= 0) exit input
    
  call cli%parse(error=error)
  if (error /= 0) exit input

  call cli%get(position=1, val=dummy, error=error)
  input_file = trim(dummy)
  if (error /= 0) exit input
  call cli%get(switch="--model", val=dummy, error=error)
  model = trim(dummy)
  if (error /= 0) exit input
  if (cli%is_passed(switch="--grid")) then
    call cli%get(switch="--grid", val=dummy, error=error)
    if (error /= 0) exit input
    grid_spec = trim(dummy)
  end if
  if (cli%is_passed(switch="--radial-quadrature")) then
    call cli%get(switch="--radial-quadrature", val=dummy, error=error)
    if (error /= 0) exit input
    rad_quad_spec = trim(dummy)
  end if
  if (cli%is_passed(switch="--pruning-scheme")) then
    call cli%get(switch="--pruning-scheme", val=dummy, error=error)
    if (error /= 0) exit input
    prune_spec = trim(dummy)
  end if
  if (cli%is_passed(switch="--lb-exec-space")) then
    call cli%get(switch="--lb-exec-space", val=dummy, error=error)
    if (error /= 0) exit input
    lb_exec_space_str = trim(dummy)
  end if
  if (cli%is_passed(switch="--int-exec-space")) then
    call cli%get(switch="--int-exec-space", val=dummy, error=error)
    if (error /= 0) exit input
    int_exec_space_str = trim(dummy)
  end if
  if (cli%is_passed(switch="--batch-size")) then
    call cli%get(switch="--batch-size", val=batch_size, error=error)
    if (error /= 0) exit input
  end if
  end block input
  if (error /= 0) then
    print '(a)', cli%error_message
#ifdef GAUXC_HAS_MPI
    call MPI_Abort(MPI_COMM_WORLD, 1, error)
#else
    stop 1
#endif
  end if

#ifdef GAUXC_HAS_MPI
  call MPI_Barrier(MPI_COMM_WORLD, error)
#endif

  main: block
  ! Create runtime
#ifdef GAUXC_HAS_MPI
  rt = gauxc_runtime_environment_new(MPI_COMM_WORLD, status)
#else
  rt = gauxc_runtime_environment_new(status)
#endif
  if (status%code /= 0) exit main
  world_rank = gauxc_runtime_environment_comm_rank(status, rt)
  if (status%code /= 0) exit main
  world_size = gauxc_runtime_environment_comm_size(status, rt)
  if (status%code /= 0) exit main

  if (world_rank == 0) then
    print '(a)', &
      & "Configuration", &
      & "-> Input file        : "//input_file, &
      & "-> Model             : "//model, &
      & "-> Grid              : "//grid_spec, &
      & "-> Radial quadrature : "//rad_quad_spec, &
      & "-> Pruning scheme    : "//prune_spec, &
      & ""
  end if

  ! Get molecule (atomic numbers and cartesian coordinates)
  mol = gauxc_molecule_new(status)
  if (status%code /= 0) exit main
  ! Load molecule from HDF5 dataset
  call gauxc_read_hdf5_record(status, mol, input_file, "/MOLECULE")
  if (status%code /= 0) exit main

  ! Get basis set
  basis = gauxc_basisset_new(status)
  if (status%code /= 0) exit main
  ! Load basis set from HDF5 dataset
  call gauxc_read_hdf5_record(status, basis, input_file, "/BASIS")
  if (status%code /= 0) exit main

  ! Define molecular grid from grid size, radial quadrature and pruning scheme
  grid_type = read_atomic_grid_size(grid_spec)
  radial_quad = read_radial_quad(rad_quad_spec)
  pruning_scheme = read_pruning_scheme(prune_spec)
  grid = gauxc_molgrid_new_default(status, mol, pruning_scheme, batch_size, radial_quad, grid_type)
  if (status%code /= 0) exit main

  ! Choose whether we run on host or device
  lb_exec_space = read_execution_space(lb_exec_space_str)
  int_exec_space = read_execution_space(int_exec_space_str)

  ! Setup load balancer based on molecule, grid and basis set
  lbf = gauxc_load_balancer_factory_new(status, lb_exec_space, "Replicated")
  if (status%code /= 0) exit main
  lb = gauxc_get_shared_instance(status, lbf, rt, mol, grid, basis)
  if (status%code /= 0) exit main

  ! Apply partitioning weights to the molecule grid
  mwf = gauxc_molecular_weights_factory_new(status, int_exec_space, "Default", &
    & gauxc_molecular_weights_settings())
  if (status%code /= 0) exit main
  mw = gauxc_get_instance(status, mwf)
  if (status%code /= 0) exit main
  call gauxc_molecular_weights_modify_weights(status, mw, lb)
  if (status%code /= 0) exit main

  ! Setup exchange-correlation integrator
  func = gauxc_functional_from_string(status, "PBE", .true._c_bool)
  intf = gauxc_integrator_factory_new(status, int_exec_space, "Replicated", &
    & "Default", "Default", "Default")
  if (status%code /= 0) exit main
  integrator = gauxc_get_instance(status, intf, func, lb)
  if (status%code /= 0) exit main

  ! Load density matrix from input
  p_s = gauxc_matrix_empty(status)
  p_z = gauxc_matrix_empty(status)
  call gauxc_read_hdf5_record(status, p_s, input_file, "/DENSITY_SCALAR")
  if (status%code /= 0) exit main
  call gauxc_read_hdf5_record(status, p_z, input_file, "/DENSITY_Z")
  if (status%code /= 0) exit main

#ifdef GAUXC_HAS_MPI
  call MPI_Barrier(MPI_COMM_WORLD, error)
#endif
  t_start = timing()

  ! Integrate exchange-correlation energy
  vxc_s = gauxc_matrix_empty(status)
  vxc_z = gauxc_matrix_empty(status)
  call gauxc_eval_exc_vxc(status, integrator, p_s, p_z, model, exc, vxc_s, vxc_z)
  if (status%code /= 0) exit main

#ifdef GAUXC_HAS_MPI
  call MPI_Barrier(MPI_COMM_WORLD, error)
#endif
  t_end = timing()
  t_exc = t_end - t_start

  if (world_rank == 0) then
    associate(vxc_s_ => gauxc_matrix_data(status, vxc_s), vxc_z_ => gauxc_matrix_data(status, vxc_z))
    print '(a)', "Results"
    print '(a,1x,es17.10,:1x,a)', "Exc          =", exc, "Eh"
    print '(a,1x,es17.10,:1x,a)', "|VXC(a+b)|_F =", sqrt(sum(vxc_s_**2))
    print '(a,1x,es17.10,:1x,a)', "|VXC(a-b)|_F =", sqrt(sum(vxc_z_**2))
    print '(a,1x,es17.10,:1x,a)', "Runtime XC   =", t_exc
    end associate
  end if

  end block main
  if (world_rank == 0 .and. status%code /= 0) then
    print '(a,1x,i0)', "GauXC returned with status code", status%code
    print '(a)', gauxc_status_message(status)
  end if

  call gauxc_delete(status, rt)
  call gauxc_delete(status, mol)
  call gauxc_delete(status, basis)
  call gauxc_delete(status, grid)
  call gauxc_delete(status, lbf)
  call gauxc_delete(status, lb)
  call gauxc_delete(status, mwf)
  call gauxc_delete(status, mw)
  call gauxc_delete(status, func)
  call gauxc_delete(status, intf)
  call gauxc_delete(status, integrator)
  call gauxc_delete(status, p_s)
  call gauxc_delete(status, p_z)
  call gauxc_delete(status, vxc_s)
  call gauxc_delete(status, vxc_z)

#ifdef GAUXC_HAS_MPI
  call MPI_Finalize(error)
#endif

contains

  pure function read_execution_space(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("host")
      val = gauxc_executionspace%host
    case("device")
      val = gauxc_executionspace%device
    end select
  end function read_execution_space

  pure function read_atomic_grid_size(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("fine")
      val = gauxc_atomicgridsizedefault%finegrid
    case("ultrafine")
      val = gauxc_atomicgridsizedefault%ultrafinegrid
    case("superfine")
      val = gauxc_atomicgridsizedefault%superfinegrid
    case("gm3")
      val = gauxc_atomicgridsizedefault%gm3
    case("gm5")
      val = gauxc_atomicgridsizedefault%gm5
    end select
  end function read_atomic_grid_size
  
  pure function read_radial_quad(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("becke")
      val = gauxc_radialquad%becke
    case("muraknowles")
      val = gauxc_radialquad%mura_knowles
    case("treutlerahlrichs")
      val = gauxc_radialquad%treutler_ahlrichs
    case("murrayhandylaming")
      val = gauxc_radialquad%murray_handy_laming
    end select
  end function read_radial_quad
  
  pure function read_pruning_scheme(spec) result(val)
    character(len=*), intent(in) :: spec
    integer(c_int) :: val
    select case(spec)
    case("unpruned")
      val = gauxc_pruningscheme%unpruned
    case("robust")
      val = gauxc_pruningscheme%robust
    case("treutler")
      val = gauxc_pruningscheme%treutler
    end select
  end function read_pruning_scheme

  function timing() result(time)
    real(c_double) :: time
    integer(c_int64_t) :: time_count, time_rate, time_max
    call system_clock(time_count, time_rate, time_max)
    time = real(time_count, c_double) / real(time_rate, c_double)
  end function timing
end program skala

Summary#

This guide demonstrated GauXC usage in Fortran applications. We:

  1. Created a minimal CMake project with GauXC as a dependency

  2. Built a CLI driver that reads molecule, basis set, and density matrix from HDF5

  3. Configured the integration grid, load balancer, and XC integrator

  4. Evaluated the exchange-correlation energy and potential