# This file is part of the Open Data Cube, see https://opendatacube.org for more information
#
# Copyright (c) 2015-2025 ODC Contributors
# SPDX-License-Identifier: Apache-2.0
"""
Datacube configuration
"""
import os
import warnings
from os import PathLike
from threading import Lock
from typing import Any, TypeAlias, Union, cast
from ..migration import ODC2DeprecationWarning
from .cfg import find_config, parse_text
from .exceptions import ConfigException
from .opt import (
AliasOptionHandler,
BoolOptionHandler,
IndexDriverOptionHandler,
IntOptionHandler,
ODCOptionHandler,
)
from .utils import ConfigDict, check_valid_env_name
# TypeAliases for more concise type hints
# (Unions required as typehint | operator doesn't work with string forward-references).
GeneralisedPath: TypeAlias = str | PathLike | list[str | PathLike]
GeneralisedCfg: TypeAlias = Union["ODCConfig", GeneralisedPath]
GeneralisedEnv: TypeAlias = Union["ODCEnvironment", str]
GeneralisedRawCfg: TypeAlias = str | ConfigDict
[docs]
class ODCConfig:
"""
Configuration finder/reader/parser.
**Attributes**
.. py:attribute:: allow_envvar_overrides
:type: bool
:value: True
If True, environment variables can override the values explicitly specified in the supplied configuration.
Note that environments not explicitly specified in the supplied configuration (dynamic environments) can
still be read from environment variables, even if this attribute is False.
.. py:attribute:: raw_text
:type: str | None
The raw configuration text being used, as read from the configuration
file or supplied directly by the user. May be None if the user
directly supplied configuration as a dictionary. May be in ini or yaml
format. Does not include dynamic environments or values overridden by
environment variables.
.. py:attribute:: raw_config
:type: dict[str, dict[str, Any]]
The raw dictionary form of the configuration, as supplied directly
by the user, or as parsed from raw_text. Does not include dynamic
environments or values overridden by environment variables.
.. py:attribute:: known_environments
:type: dict[str, "ODCEnvironment"]
A dictionary containing all environments defined in raw_config,
plus any dynamic environments read so far.
Environment themselves are not validated until read from.
.. py:attribute:: canonical_names
:type: dict[str, list[str]]
A dictionary mapping canonical environment names to all aliases for
that environment.
"""
allow_envvar_overrides: bool = True
raw_text: str | None = None
raw_config: ConfigDict = {}
known_environments: dict[str, "ODCEnvironment"] = {}
canonical_names: dict[str, list[str]] = {}
is_default = False
def __init__(
self,
paths: GeneralisedPath | None = None,
raw_dict: ConfigDict | None = None,
text: str | None = None,
) -> None:
"""
When called with no args, reads the first config file found in the config path list is used.
The config path list is taken from:
1) Environment variable $ODC_CONFIG_PATH (as a UNIX path style colon-separated path list)
2) Environment variable $DATACUBE_CONFIG_PATH (as a UNIX path style colon-separated path list)
This is a deprecated legacy environment variable, and please note that it's behaviour has changed
slightly from datacube 1.8.x.
3) The default config search path (i.e. .cfg._DEFAULT_CONFIG_SEARCH_PATH)
If no config file is found at any of the paths in active path list, use the default configuration
at , or if no such config file exists, use the default configuration (.cfg._DEFAULT_CONF). Configuration
files may be in ini or yaml format. Environment variable overrides ARE applied.
Otherwise, user may supply one (and only one) of the following:
:param paths: The path of the configuration file, or a list of paths of candidate configuration files (the
first in the list that can be read is used). If none of the supplied paths can be read, an
error is raised. (Unlike calling with no arguments, the fallback default config is NOT
used.) Configuration file may be in ini or yaml format. Environment variable overrides ARE
applied.
:param raw_dict: A raw dictionary containing configuration data.
Used as is - environment variable overrides are NOT applied.
:param text: A string containing configuration data in ini or yaml format.
Used as is - environment variable overrides are NOT applied.
"""
# Cannot supply both text AND paths.
args_supplied: int = sum(int(bool(x)) for x in (paths, raw_dict, text))
if args_supplied > 1:
raise ConfigException(
"Can only supply one of configuration path(s), raw dictionary, "
"and explicit configuration text."
)
# Suppress environment variable overrides if explicit config text or dictionary is supplied.
self.allow_envvar_overrides = not text and not raw_dict
if not raw_dict and not text:
# No explicit config passed in. Check for ODC_CONFIG environment variables
if os.environ.get("ODC_CONFIG"):
text = os.environ["ODC_CONFIG"]
else:
# Read config text from config file
text = find_config(paths, default_cb=self._set_default)
self.raw_text = text
if raw_dict is not None:
self.raw_config = raw_dict
else:
self.raw_config = parse_text(cast(str, self.raw_text))
self._aliases: dict[str, str] = {}
self.known_environments: dict[str, ODCEnvironment] = {
section: ODCEnvironment(
self, section, self.raw_config[section], self.allow_envvar_overrides
)
for section in self.raw_config
}
self.canonical_names: dict[str, list[str]] = {}
for alias, canonical in self._aliases.items():
self.known_environments[alias] = self[canonical]
if canonical in self.canonical_names:
self.canonical_names[canonical].append(alias)
else:
self.canonical_names[canonical] = [canonical, alias]
[docs]
@classmethod
def get_environment(
cls,
env: GeneralisedEnv | None = None,
config: GeneralisedCfg | None = None,
raw_config: GeneralisedRawCfg | None = None,
) -> "ODCEnvironment":
"""
Obtain an ODCConfig object from the most general possible arguments.
It is an error to supply both config and raw_config, otherwise everything is optional and
honours system defaults.
:param env: An ODCEnvironment object or a string.
:param config: An ODCConfig object or a config path.
:param raw_config: A raw config string or ConfigDict.
:return:
"""
if config is not None and raw_config is not None:
raise ConfigException("Cannot specify both config and raw_config")
if isinstance(env, ODCEnvironment):
return env
else:
if isinstance(config, ODCConfig):
cfg = config
elif isinstance(raw_config, str):
cfg = ODCConfig(paths=config, text=raw_config)
else:
cfg = ODCConfig(paths=config, raw_dict=raw_config)
return cfg[env]
def _add_alias(self, alias: str, canonical: str) -> None:
"""
Register an environment alias during ODCConfig class construction.
Used internally by the Configuration library during class initialisation. Has no effect after initialisation.
:param alias: The alias for the environment
:param canonical: The canonical environment name the alias refers to
"""
self._aliases[alias] = canonical
[docs]
def get_aliases(self, canonical_name: str) -> list[str]:
"""
Get list of all possible names for a given canonical name.
:param canonical_name: The canonical name of the target environment
:return: A list of all known names for the target environment, including the canonical name itself and
any aliases.
"""
if canonical_name in self.canonical_names:
return self.canonical_names[canonical_name]
else:
return [canonical_name]
def _set_default(self) -> None:
self.is_default = True
def __getitem__(self, item: str | None) -> "ODCEnvironment":
"""
Environments can be accessed by name (canonical or aliases) with the getitem dunder method.
E.g. cfg["env"]
Passing in None returns the default environment, which is the first resolvable environment of:
1. The environment named by the $ODC_ENVIRONMENT variable
2. The environment named by the $DATACUBE_ENVIRONMENT variable
(legacy environment variable name - now deprecated)
3. The environment called `default` (dynamic environment lookup not supported)
4. The environment called `datacube` (dynamic environment lookup not supported)
If none of the above environment names are known, then a ConfigException is raised.
If an explicit environment name is passed in that does not exist, dynamic environment lookup is attempted.
Dynamic environment lookup always succeeds, but may return an environment which cannot connect to a database.
:param item: A canonical environment name, an environment alias, or None.
:return: A ODCEnvironment object
"""
if item is None:
# Get default.
if os.environ.get("ODC_ENVIRONMENT"):
item = os.environ["ODC_ENVIRONMENT"]
elif os.environ.get("DATACUBE_ENVIRONMENT"):
warnings.warn(
"Setting the default environment with $DATACUBE_ENVIRONMENT is deprecated. "
"Please use $ODC_ENVIRONMENT instead.",
ODC2DeprecationWarning,
)
item = os.environ["DATACUBE_ENVIRONMENT"]
elif "default" in self.known_environments:
item = "default"
elif "datacube" in self.known_environments:
warnings.warn(
"Defaulting to the 'datacube' environment - "
"this fallback behaviour is deprecated and may change in a future release.",
ODC2DeprecationWarning,
)
item = "datacube"
else:
# No explicitly defined (known) environments - assume default and hope there's config
# available in environment variables.
item = "default"
if item not in self.known_environments:
self.known_environments[item] = ODCEnvironment(self, item, {}, True)
return self.known_environments[item]
[docs]
class ODCEnvironment:
"""
Configuration reader for an individual ODC environment.
Only configuration options with a registered option handler are able to be read. Configuration options
may be read either as attributes on the ODCEnvironment objects or via the getitem dunder method.
E.g. env.index_driver or env["index_driver"]
Attempting to access an unhandled or invalid option will raise a KeyError or AttributeError, as
appropriate for the access method.
ODCEnvironment objects should only be instantiated by and acquired from an ODCConfig object.
"""
def __init__(
self,
cfg: ODCConfig,
name: str,
raw: dict[str, Any],
allow_env_overrides: bool = True,
) -> None:
self._cfg: ODCConfig = cfg
check_valid_env_name(name)
self._name: str = name
self._raw: dict[str, Any] = raw
self._allow_envvar_overrides: bool = allow_env_overrides
self._lock = Lock()
self._normalised: dict[str, Any] = {}
if name == "user" and "default_environment" in raw:
warnings.warn(
"The 'default_environment' setting in the 'user' section is no longer supported - "
"please refer to the documentation for more information"
)
self._env_overrides_applied = False
# Aliases are handled here, the alias OptionHandler is a place-holder.
if "alias" in self._raw:
alias = self._raw["alias"]
check_valid_env_name(alias)
self._cfg._add_alias(self._name, alias)
for opt in self._raw.keys():
if opt != "alias":
raise ConfigException(
f"Alias environments should only contain an alias option. Extra option {opt} found."
)
self._option_handlers: list[ODCOptionHandler] = [
AliasOptionHandler("alias", self),
IndexDriverOptionHandler("index_driver", self, default="default"),
BoolOptionHandler("skip_broken_datasets", self, default=False),
IntOptionHandler("dc_load_limit", self, minval=0),
]
def get_all_aliases(self) -> list[str]:
return self._cfg.get_aliases(self._name)
def __getitem__(self, key: str) -> Any:
with self._lock:
if not self._normalised:
# First access of environment - process config
# Loop through content handlers.
# Note that handlers may add more handlers to the end of the list while we are iterating over it.
for handler in self._option_handlers:
self._handle_option(handler)
if self._cfg.is_default and not self._env_overrides_applied:
warnings.warn(
"No configuration file found - using default configuration and environment variables"
)
# Config already processed
# 1. From Normalised
if key in self._normalised:
return self._normalised[key]
# No config, no default.
raise KeyError(key)
def __getattr__(self, item: str) -> Any:
try:
return self[item]
except KeyError:
raise AttributeError(item) from None
def _handle_option(self, handler: ODCOptionHandler) -> None:
val = handler.get_val_from_environment()
if val:
self._env_overrides_applied = True
else:
val = self._raw.get(handler.name)
val = handler.validate_and_normalise(val)
self._normalised[handler.name] = val
handler.handle_dependent_options(val)