import collections.abc
import operator
from abc import ABC, abstractmethod
from functools import reduce
from typing import AbstractSet, Iterable, Optional, Sequence, Union, cast
from typing_extensions import TypeAlias, TypeGuard
import dagster._check as check
from dagster._annotations import deprecated, experimental, experimental_param, public
from dagster._core.definitions.asset_check_spec import AssetCheckKey
from dagster._core.definitions.asset_graph import AssetGraph
from dagster._core.definitions.asset_key import (
AssetKey,
CoercibleToAssetKey,
CoercibleToAssetKeyPrefix,
asset_keys_from_defs_and_coercibles,
key_prefix_from_coercible,
)
from dagster._core.definitions.assets import AssetsDefinition
from dagster._core.definitions.base_asset_graph import BaseAssetGraph, BaseAssetNode
from dagster._core.definitions.resolved_asset_deps import resolve_similar_asset_names
from dagster._core.definitions.source_asset import SourceAsset
from dagster._core.errors import DagsterInvalidSubsetError
from dagster._core.selector.subset_selector import (
fetch_connected,
fetch_sinks,
fetch_sources,
parse_clause,
)
from dagster._record import copy, record
from dagster._serdes.serdes import whitelist_for_serdes
CoercibleToAssetSelection: TypeAlias = Union[
str,
Sequence[str],
Sequence[AssetKey],
Sequence[Union["AssetsDefinition", "SourceAsset"]],
"AssetSelection",
]
def is_coercible_to_asset_selection(
obj: object,
) -> TypeGuard[CoercibleToAssetSelection]:
# can coerce to (but is not already) an AssetSelection
return isinstance(obj, str) or (
isinstance(obj, Sequence)
and all(isinstance(x, (str, AssetKey, AssetsDefinition, SourceAsset)) for x in obj)
)
[docs]
class AssetSelection(ABC):
"""An AssetSelection defines a query over a set of assets and asset checks, normally all that are defined in a code location.
You can use the "|", "&", and "-" operators to create unions, intersections, and differences of selections, respectively.
AssetSelections are typically used with :py:func:`define_asset_job`.
By default, selecting assets will also select all of the asset checks that target those assets.
Examples:
.. code-block:: python
# Select all assets in group "marketing":
AssetSelection.groups("marketing")
# Select all assets in group "marketing", as well as the asset with key "promotion":
AssetSelection.groups("marketing") | AssetSelection.assets("promotion")
# Select all assets in group "marketing" that are downstream of asset "leads":
AssetSelection.groups("marketing") & AssetSelection.assets("leads").downstream()
# Select a list of assets:
AssetSelection.assets(*my_assets_list)
# Select all assets except for those in group "marketing"
AssetSelection.all() - AssetSelection.groups("marketing")
# Select all assets which are materialized by the same op as "projections":
AssetSelection.assets("projections").required_multi_asset_neighbors()
# Select all assets in group "marketing" and exclude their asset checks:
AssetSelection.groups("marketing") - AssetSelection.all_asset_checks()
# Select all asset checks that target a list of assets:
AssetSelection.checks_for_assets(*my_assets_list)
# Select a specific asset check:
AssetSelection.checks(my_asset_check)
"""
[docs]
@public
@experimental_param(param="include_sources")
@staticmethod
def all(include_sources: bool = False) -> "AllSelection":
"""Returns a selection that includes all assets and their asset checks.
Args:
include_sources (bool): If True, then include all source assets.
"""
return AllSelection(include_sources=include_sources)
[docs]
@public
@staticmethod
def all_asset_checks() -> "AllAssetCheckSelection":
"""Returns a selection that includes all asset checks."""
return AllAssetCheckSelection()
[docs]
@public
@staticmethod
def assets(
*assets_defs: Union[AssetsDefinition, CoercibleToAssetKey],
) -> "KeysAssetSelection":
"""Returns a selection that includes all of the provided assets and asset checks that target
them.
Args:
*assets_defs (Union[AssetsDefinition, str, Sequence[str], AssetKey]): The assets to
select.
Examples:
.. code-block:: python
AssetSelection.assets(AssetKey(["a"]))
AssetSelection.assets("a")
AssetSelection.assets(AssetKey(["a"]), AssetKey(["b"]))
AssetSelection.assets("a", "b")
@asset
def asset1():
...
AssetSelection.assets(asset1)
asset_key_list = [AssetKey(["a"]), AssetKey(["b"])]
AssetSelection.assets(*asset_key_list)
"""
return KeysAssetSelection(selected_keys=asset_keys_from_defs_and_coercibles(assets_defs))
[docs]
@public
@staticmethod
@deprecated(
breaking_version="2.0",
additional_warn_text="Use AssetSelection.assets instead.",
)
def keys(*asset_keys: CoercibleToAssetKey) -> "KeysAssetSelection":
"""Returns a selection that includes assets with any of the provided keys and all asset
checks that target them.
Deprecated: use AssetSelection.assets instead.
Examples:
.. code-block:: python
AssetSelection.keys(AssetKey(["a"]))
AssetSelection.keys("a")
AssetSelection.keys(AssetKey(["a"]), AssetKey(["b"]))
AssetSelection.keys("a", "b")
asset_key_list = [AssetKey(["a"]), AssetKey(["b"])]
AssetSelection.keys(*asset_key_list)
"""
_asset_keys = [
AssetKey.from_user_string(key) if isinstance(key, str) else AssetKey.from_coercible(key)
for key in asset_keys
]
return KeysAssetSelection(selected_keys=_asset_keys)
[docs]
@public
@staticmethod
def key_prefixes(
*key_prefixes: CoercibleToAssetKeyPrefix, include_sources: bool = False
) -> "KeyPrefixesAssetSelection":
"""Returns a selection that includes assets that match any of the provided key prefixes and all the asset checks that target them.
Args:
include_sources (bool): If True, then include source assets matching the key prefix(es)
in the selection.
Examples:
.. code-block:: python
# match any asset key where the first segment is equal to "a" or "b"
# e.g. AssetKey(["a", "b", "c"]) would match, but AssetKey(["abc"]) would not.
AssetSelection.key_prefixes("a", "b")
# match any asset key where the first two segments are ["a", "b"] or ["a", "c"]
AssetSelection.key_prefixes(["a", "b"], ["a", "c"])
"""
_asset_key_prefixes = [key_prefix_from_coercible(key_prefix) for key_prefix in key_prefixes]
return KeyPrefixesAssetSelection(
selected_key_prefixes=_asset_key_prefixes, include_sources=include_sources
)
@staticmethod
def key_substring(
key_substring: str, include_sources: bool = False
) -> "KeySubstringAssetSelection":
"""Returns a selection that includes assets whose string representation contains the provided substring and all the asset checks that target it.
Args:
include_sources (bool): If True, then include source assets matching the substring
in the selection.
Examples:
.. code-block:: python
# match any asset key containing "bc"
# e.g. AssetKey(["a", "bcd"]) would match, but not AssetKey(["ab", "cd"]).
AssetSelection.key_substring("bc")
# match any asset key containing "b/c"
# e.g. AssetKey(["ab", "cd"]) would match.
AssetSelection.key_substring("b/c")
"""
return KeySubstringAssetSelection(
selected_key_substring=key_substring, include_sources=include_sources
)
[docs]
@public
@staticmethod
def groups(*group_strs, include_sources: bool = False) -> "GroupsAssetSelection":
"""Returns a selection that includes materializable assets that belong to any of the
provided groups and all the asset checks that target them.
Args:
include_sources (bool): If True, then include source assets matching the group in the
selection.
"""
check.tuple_param(group_strs, "group_strs", of_type=str)
return GroupsAssetSelection(selected_groups=group_strs, include_sources=include_sources)
[docs]
@public
@staticmethod
@experimental
def tag(key: str, value: str, include_sources: bool = False) -> "AssetSelection":
"""Returns a selection that includes materializable assets that have the provided tag, and
all the asset checks that target them.
Args:
include_sources (bool): If True, then include source assets matching the group in the
selection.
"""
return TagAssetSelection(key=key, value=value, include_sources=include_sources)
@staticmethod
def tag_string(string: str, include_sources: bool = False) -> "AssetSelection":
"""Returns a selection that includes materializable assets that have the provided tag, and
all the asset checks that target them.
Args:
include_sources (bool): If True, then include source assets matching the group in the
selection.
"""
split_by_equals_segments = string.split("=")
if len(split_by_equals_segments) == 1:
return TagAssetSelection(key=string, value="", include_sources=include_sources)
elif len(split_by_equals_segments) == 2:
key, value = split_by_equals_segments
return TagAssetSelection(key=key, value=value, include_sources=include_sources)
else:
check.failed(f"Invalid tag selection string: {string}. Must have no more than one '='.")
@staticmethod
def owner(owner: str) -> "AssetSelection":
"""Returns a selection that includes assets that have the provided owner, and all the
asset checks that target them.
Args:
owner (str): The owner to select.
"""
return OwnerAssetSelection(selected_owner=owner)
[docs]
@public
@staticmethod
def checks_for_assets(
*assets_defs: Union[AssetsDefinition, CoercibleToAssetKey],
) -> "AssetChecksForAssetKeysSelection":
"""Returns a selection with the asset checks that target the provided assets.
Args:
*assets_defs (Union[AssetsDefinition, str, Sequence[str], AssetKey]): The assets to
select checks for.
"""
return AssetChecksForAssetKeysSelection(
selected_asset_keys=asset_keys_from_defs_and_coercibles(assets_defs)
)
[docs]
@public
@staticmethod
def checks(
*assets_defs_or_check_keys: Union[AssetsDefinition, AssetCheckKey],
) -> "AssetCheckKeysSelection":
"""Returns a selection that includes all of the provided asset checks or check keys."""
assets_defs = [ad for ad in assets_defs_or_check_keys if isinstance(ad, AssetsDefinition)]
check_keys = [key for key in assets_defs_or_check_keys if isinstance(key, AssetCheckKey)]
return AssetCheckKeysSelection(
selected_asset_check_keys=[
*(key for ad in assets_defs for key in ad.check_keys),
*check_keys,
]
)
[docs]
@public
def downstream(
self, depth: Optional[int] = None, include_self: bool = True
) -> "DownstreamAssetSelection":
"""Returns a selection that includes all assets that are downstream of any of the assets in
this selection, selecting the assets in this selection by default. Includes the asset checks targeting the returned assets. Iterates through each
asset in this selection and returns the union of all downstream assets.
depth (Optional[int]): If provided, then only include assets to the given depth. A depth
of 2 means all assets that are children or grandchildren of the assets in this
selection.
include_self (bool): If True, then include the assets in this selection in the result.
If the include_self flag is False, return each downstream asset that is not part of the
original selection. By default, set to True.
"""
check.opt_int_param(depth, "depth")
check.opt_bool_param(include_self, "include_self")
return DownstreamAssetSelection(child=self, depth=depth, include_self=include_self)
[docs]
@public
def upstream(
self, depth: Optional[int] = None, include_self: bool = True
) -> "UpstreamAssetSelection":
"""Returns a selection that includes all materializable assets that are upstream of any of
the assets in this selection, selecting the assets in this selection by default. Includes
the asset checks targeting the returned assets. Iterates through each asset in this
selection and returns the union of all upstream assets.
Because mixed selections of source and materializable assets are currently not supported,
keys corresponding to `SourceAssets` will not be included as upstream of regular assets.
Args:
depth (Optional[int]): If provided, then only include assets to the given depth. A depth
of 2 means all assets that are parents or grandparents of the assets in this
selection.
include_self (bool): If True, then include the assets in this selection in the result.
If the include_self flag is False, return each upstream asset that is not part of the
original selection. By default, set to True.
"""
check.opt_int_param(depth, "depth")
check.opt_bool_param(include_self, "include_self")
return UpstreamAssetSelection(child=self, depth=depth, include_self=include_self)
[docs]
@public
def sinks(self) -> "SinksAssetSelection":
"""Given an asset selection, returns a new asset selection that contains all of the sink
assets within the original asset selection. Includes the asset checks targeting the returned assets.
A sink asset is an asset that has no downstream dependencies within the asset selection.
The sink asset can have downstream dependencies outside of the asset selection.
"""
return SinksAssetSelection(child=self)
[docs]
@public
def required_multi_asset_neighbors(self) -> "RequiredNeighborsAssetSelection":
"""Given an asset selection in which some assets are output from a multi-asset compute op
which cannot be subset, returns a new asset selection that contains all of the assets
required to execute the original asset selection. Includes the asset checks targeting the returned assets.
"""
return RequiredNeighborsAssetSelection(child=self)
[docs]
@public
def roots(self) -> "RootsAssetSelection":
"""Given an asset selection, returns a new asset selection that contains all of the root
assets within the original asset selection. Includes the asset checks targeting the returned assets.
A root asset is an asset that has no upstream dependencies within the asset selection.
The root asset can have downstream dependencies outside of the asset selection.
Because mixed selections of source and materializable assets are currently not supported,
keys corresponding to `SourceAssets` will not be included as roots. To select source assets,
use the `upstream_source_assets` method.
"""
return RootsAssetSelection(child=self)
[docs]
@public
def materializable(self) -> "MaterializableAssetSelection":
"""Given an asset selection, returns a new asset selection that contains all of the assets
that are materializable. Removes any assets which are not materializable.
"""
return MaterializableAssetSelection(child=self)
[docs]
@public
@deprecated(breaking_version="2.0", additional_warn_text="Use AssetSelection.roots instead.")
def sources(self) -> "RootsAssetSelection":
"""Given an asset selection, returns a new asset selection that contains all of the root
assets within the original asset selection. Includes the asset checks targeting the returned assets.
A root asset is a materializable asset that has no upstream dependencies within the asset
selection. The root asset can have downstream dependencies outside of the asset selection.
Because mixed selections of source and materializable assets are currently not supported,
keys corresponding to `SourceAssets` will not be included as roots. To select source assets,
use the `upstream_source_assets` method.
"""
return self.roots()
[docs]
@public
def upstream_source_assets(self) -> "ParentSourcesAssetSelection":
"""Given an asset selection, returns a new asset selection that contains all of the source
assets that are parents of assets in the original selection. Includes the asset checks
targeting the returned assets.
"""
return ParentSourcesAssetSelection(child=self)
[docs]
@public
def without_checks(self) -> "AssetSelection":
"""Removes all asset checks in the selection."""
return self - AssetSelection.all_asset_checks()
def __or__(self, other: "AssetSelection") -> "OrAssetSelection":
check.inst_param(other, "other", AssetSelection)
operands = []
for selection in (self, other):
if isinstance(selection, OrAssetSelection):
operands.extend(selection.operands)
else:
operands.append(selection)
return OrAssetSelection(operands=operands)
def __and__(self, other: "AssetSelection") -> "AndAssetSelection":
check.inst_param(other, "other", AssetSelection)
operands = []
for selection in (self, other):
if isinstance(selection, AndAssetSelection):
operands.extend(selection.operands)
else:
operands.append(selection)
return AndAssetSelection(operands=operands)
def __bool__(self):
# Ensure that even if a subclass is a NamedTuple with no fields, it is still truthy
return True
def __sub__(self, other: "AssetSelection") -> "SubtractAssetSelection":
check.inst_param(other, "other", AssetSelection)
return SubtractAssetSelection(left=self, right=other)
def resolve(
self,
all_assets: Union[Iterable[Union[AssetsDefinition, SourceAsset]], BaseAssetGraph],
allow_missing: bool = False,
) -> AbstractSet[AssetKey]:
"""Returns the set of asset keys in all_assets that match this selection.
Args:
allow_missing (bool): If False, will raise an error if any of the leaf selections in the
asset selection target entities that don't exist in the set of provided assets.
"""
if isinstance(all_assets, BaseAssetGraph):
asset_graph = all_assets
else:
check.iterable_param(all_assets, "all_assets", (AssetsDefinition, SourceAsset))
asset_graph = AssetGraph.from_assets(all_assets)
return self.resolve_inner(asset_graph, allow_missing=allow_missing)
@abstractmethod
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
raise NotImplementedError()
def resolve_checks(
self, asset_graph: BaseAssetGraph, allow_missing: bool = False
) -> AbstractSet[AssetCheckKey]:
"""We don't need this method currently, but it makes things consistent with resolve_inner. Currently
we don't store checks in the RemoteAssetGraph, so we only support AssetGraph.
"""
return self.resolve_checks_inner(asset_graph, allow_missing=allow_missing)
def resolve_checks_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetCheckKey]:
"""By default, resolve to checks that target the selected assets. This is overriden for particular selections."""
asset_keys = self.resolve(asset_graph)
return {handle for handle in asset_graph.asset_check_keys if handle.asset_key in asset_keys}
@classmethod
def from_string(cls, string: str, include_sources=False) -> "AssetSelection":
from dagster._core.definitions.antlr_asset_selection.antlr_asset_selection import (
AntlrAssetSelectionParser,
)
try:
return AntlrAssetSelectionParser(string, include_sources).asset_selection
except:
pass
if string == "*":
return cls.all()
parts = parse_clause(string)
if parts is not None:
key_selection = cls.assets(parts.item_name)
if parts.up_depth and parts.down_depth:
selection = key_selection.upstream(parts.up_depth) | key_selection.downstream(
parts.down_depth
)
elif parts.up_depth:
selection = key_selection.upstream(parts.up_depth)
elif parts.down_depth:
selection = key_selection.downstream(parts.down_depth)
else:
selection = key_selection
return selection
elif string.startswith("tag:"):
tag_str = string[len("tag:") :]
return cls.tag_string(tag_str)
check.failed(f"Invalid selection string: {string}")
@classmethod
def from_coercible(cls, selection: CoercibleToAssetSelection) -> "AssetSelection":
if isinstance(selection, str):
return cls.from_string(selection)
elif isinstance(selection, AssetSelection):
return selection
elif isinstance(selection, collections.abc.Sequence) and all(
isinstance(el, str) for el in selection
):
return reduce(operator.or_, [cls.from_string(cast(str, s)) for s in selection])
elif isinstance(selection, collections.abc.Sequence) and all(
isinstance(el, (AssetsDefinition, SourceAsset)) for el in selection
):
return AssetSelection.assets(
*(
key
for el in selection
for key in (
el.keys if isinstance(el, AssetsDefinition) else [cast(SourceAsset, el).key]
)
)
)
elif isinstance(selection, collections.abc.Sequence) and all(
isinstance(el, AssetKey) for el in selection
):
return cls.assets(*cast(Sequence[AssetKey], selection))
else:
check.failed(
"selection argument must be one of str, Sequence[str], Sequence[AssetKey],"
" Sequence[AssetsDefinition], Sequence[SourceAsset], AssetSelection. Was"
f" {type(selection)}."
)
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return KeysAssetSelection(selected_keys=list(self.resolve(asset_graph)))
def needs_parentheses_when_operand(self) -> bool:
"""When generating a string representation of an asset selection and this asset selection
is an operand in a larger expression, whether it needs to be surrounded by parentheses.
"""
return False
def operand_to_selection_str(self) -> str:
"""Returns a string representation of the selection when it is a child of a boolean expression,
for example, in an `AndAssetSelection` or `OrAssetSelection`. The main difference from `to_selection_str`
is that this method may include additional parentheses around the selection to ensure that the
expression is parsed correctly.
"""
return (
f"({self.to_selection_str()})"
if self.needs_parentheses_when_operand()
else self.to_selection_str()
)
def to_selection_str(self) -> str:
"""Returns an Antlr string representation of the selection that can be parsed by `from_string`."""
raise NotImplementedError(
f"{self.__class__.__name__} does not support conversion to a string."
)
def __str__(self) -> str:
# Attempt to use the to-Antlr-selection-string method if it's implemented,
# otherwise fall back to the default Python string representation
try:
return self.to_selection_str()
except NotImplementedError:
return super().__str__()
@whitelist_for_serdes
@record
class AllSelection(AssetSelection):
include_sources: Optional[bool] = None
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return (
asset_graph.get_all_asset_keys()
if self.include_sources
else asset_graph.materializable_asset_keys
)
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
def to_selection_str(self) -> str:
return "*"
@whitelist_for_serdes
@record
class AllAssetCheckSelection(AssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return set()
def resolve_checks_inner(
self, asset_graph: AssetGraph, allow_missing: bool
) -> AbstractSet[AssetCheckKey]:
return asset_graph.asset_check_keys
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
@whitelist_for_serdes
@record
class AssetChecksForAssetKeysSelection(AssetSelection):
selected_asset_keys: Sequence[AssetKey]
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return set()
def resolve_checks_inner(
self, asset_graph: AssetGraph, allow_missing: bool
) -> AbstractSet[AssetCheckKey]:
return {
handle
for handle in asset_graph.asset_check_keys
if handle.asset_key in self.selected_asset_keys
}
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
@whitelist_for_serdes
@record
class AssetCheckKeysSelection(AssetSelection):
selected_asset_check_keys: Sequence[AssetCheckKey]
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return set()
def resolve_checks_inner(
self, asset_graph: AssetGraph, allow_missing: bool
) -> AbstractSet[AssetCheckKey]:
specified_keys = set(self.selected_asset_check_keys)
missing_keys = {key for key in specified_keys if key not in asset_graph.asset_check_keys}
if not allow_missing and missing_keys:
raise DagsterInvalidSubsetError(
f"AssetCheckKey(s) {[k.to_user_string() for k in missing_keys]} were selected, but "
"no definitions supply these keys. Make sure all keys are spelled "
"correctly, and all definitions are correctly added to the "
f"`Definitions`."
)
return specified_keys & asset_graph.asset_check_keys
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
@record
class OperandListAssetSelection(AssetSelection):
"""Superclass for classes like `AndAssetSelection` and `OrAssetSelection` that operate on
a list of sub-AssetSelections.
"""
operands: Sequence[AssetSelection]
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return copy(
self,
operands=[
operand.to_serializable_asset_selection(asset_graph) for operand in self.operands
],
)
def __eq__(self, other):
if not isinstance(other, OperandListAssetSelection):
return False
num_operands = len(self.operands)
return len(other.operands) == num_operands and all(
self.operands[i] == other.operands[i] for i in range(num_operands)
)
def needs_parentheses_when_operand(self) -> bool:
return True
@whitelist_for_serdes
class AndAssetSelection(OperandListAssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return reduce(
operator.and_,
(
selection.resolve_inner(asset_graph, allow_missing=allow_missing)
for selection in self.operands
),
)
def resolve_checks_inner(
self, asset_graph: AssetGraph, allow_missing: bool
) -> AbstractSet[AssetCheckKey]:
return reduce(
operator.and_,
(
selection.resolve_checks_inner(asset_graph, allow_missing=allow_missing)
for selection in self.operands
),
)
def to_selection_str(self) -> str:
return " and ".join(f"{operand.operand_to_selection_str()}" for operand in self.operands)
@whitelist_for_serdes
class OrAssetSelection(OperandListAssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return reduce(
operator.or_,
(
selection.resolve_inner(asset_graph, allow_missing=allow_missing)
for selection in self.operands
),
)
def resolve_checks_inner(
self, asset_graph: AssetGraph, allow_missing: bool
) -> AbstractSet[AssetCheckKey]:
return reduce(
operator.or_,
(
selection.resolve_checks_inner(asset_graph, allow_missing=allow_missing)
for selection in self.operands
),
)
def to_selection_str(self) -> str:
return " or ".join(f"{operand.operand_to_selection_str()}" for operand in self.operands)
@whitelist_for_serdes
@record
class SubtractAssetSelection(AssetSelection):
left: AssetSelection
right: AssetSelection
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return self.left.resolve_inner(
asset_graph, allow_missing=allow_missing
) - self.right.resolve_inner(asset_graph, allow_missing=allow_missing)
def resolve_checks_inner(
self, asset_graph: AssetGraph, allow_missing: bool
) -> AbstractSet[AssetCheckKey]:
return self.left.resolve_checks_inner(
asset_graph, allow_missing=allow_missing
) - self.right.resolve_checks_inner(asset_graph, allow_missing=allow_missing)
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return copy(
self,
left=self.left.to_serializable_asset_selection(asset_graph),
right=self.right.to_serializable_asset_selection(asset_graph),
)
def needs_parentheses_when_operand(self) -> bool:
return True
def to_selection_str(self) -> str:
if isinstance(self.left, AllSelection):
return f"not {self.right.to_selection_str()}"
return f"{self.left.operand_to_selection_str()} and not {self.right.operand_to_selection_str()}"
@record
class ChainedAssetSelection(AssetSelection):
"""Superclass for AssetSelection classes that contain a single child AssetSelection and are
resolved by applying some operation to the result of resolving the child selection.
"""
child: AssetSelection
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return copy(self, child=self.child.to_serializable_asset_selection(asset_graph))
@whitelist_for_serdes
class SinksAssetSelection(ChainedAssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
selection = self.child.resolve_inner(asset_graph, allow_missing=allow_missing)
return fetch_sinks(asset_graph.asset_dep_graph, selection)
def to_selection_str(self) -> str:
return f"sinks({self.child.to_selection_str()})"
@whitelist_for_serdes
class RequiredNeighborsAssetSelection(ChainedAssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
selection = self.child.resolve_inner(asset_graph, allow_missing=allow_missing)
output = set(selection)
for asset_key in selection:
output.update(asset_graph.get(asset_key).execution_set_asset_keys)
return output
@whitelist_for_serdes
class RootsAssetSelection(ChainedAssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
selection = self.child.resolve_inner(asset_graph, allow_missing=allow_missing)
return fetch_sources(asset_graph, selection)
def to_selection_str(self) -> str:
return f"roots({self.child.to_selection_str()})"
@whitelist_for_serdes
class MaterializableAssetSelection(ChainedAssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return {
asset_key
for asset_key in self.child.resolve_inner(asset_graph, allow_missing=allow_missing)
if cast(BaseAssetNode, asset_graph.get(asset_key)).is_materializable
}
@whitelist_for_serdes
@record
class DownstreamAssetSelection(ChainedAssetSelection):
depth: Optional[int]
include_self: bool
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
selection = self.child.resolve_inner(asset_graph, allow_missing=allow_missing)
return operator.sub(
reduce(
operator.or_,
[
{asset_key}
| fetch_connected(
item=asset_key,
graph=asset_graph.asset_dep_graph,
direction="downstream",
depth=self.depth,
)
for asset_key in selection
],
),
selection if not self.include_self else set(),
)
def to_selection_str(self) -> str:
if self.depth is None:
base = f"{self.child.operand_to_selection_str()}*"
elif self.depth == 0:
base = self.child.operand_to_selection_str()
else:
base = f"{self.child.operand_to_selection_str()}{'+' * self.depth}"
if self.include_self:
return base
else:
return f"{base} and not {self.child.operand_to_selection_str()}"
@whitelist_for_serdes
@record
class GroupsAssetSelection(AssetSelection):
selected_groups: Sequence[str]
include_sources: bool
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
base_set = (
asset_graph.get_all_asset_keys()
if self.include_sources
else asset_graph.materializable_asset_keys
)
return {
key
for group in self.selected_groups
for key in asset_graph.asset_keys_for_group(group)
if key in base_set
}
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
def needs_parentheses_when_operand(self) -> bool:
return len(self.selected_groups) > 1
def to_selection_str(self) -> str:
if len(self.selected_groups) == 1:
return f'group:"{self.selected_groups[0]}"'
else:
return " or ".join(f'group:"{group}"' for group in self.selected_groups)
@whitelist_for_serdes
@record
class TagAssetSelection(AssetSelection):
key: str
value: str
include_sources: bool
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
base_set = (
asset_graph.get_all_asset_keys()
if self.include_sources
else asset_graph.materializable_asset_keys
)
return {key for key in base_set if asset_graph.get(key).tags.get(self.key) == self.value}
def to_selection_str(self) -> str:
return f'tag:"{self.key}"="{self.value}"'
@whitelist_for_serdes
@record
class OwnerAssetSelection(AssetSelection):
selected_owner: str
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
return {
key
for key in asset_graph.get_all_asset_keys()
if self.selected_owner in asset_graph.get(key).owners
}
def to_selection_str(self) -> str:
return f'owner:"{self.selected_owner}"'
@whitelist_for_serdes
@record
class CodeLocationAssetSelection(AssetSelection):
"""Used to represent a UI asset selection by code location. This should not be resolved against
an in-process asset graph.
"""
selected_code_location: str
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
"""This should not be invoked in user code."""
raise NotImplementedError
def to_selection_str(self) -> str:
return f'code_location:"{self.selected_code_location}"'
@whitelist_for_serdes
@record
class KeysAssetSelection(AssetSelection):
selected_keys: Sequence[AssetKey]
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
specified_keys = set(self.selected_keys)
missing_keys = {key for key in specified_keys if not asset_graph.has(key)}
if not allow_missing:
# Arbitrary limit to avoid huge error messages
keys_to_suggest = list(missing_keys)[:4]
suggestions = ""
for invalid_key in keys_to_suggest:
similar_names = resolve_similar_asset_names(
invalid_key, asset_graph.get_all_asset_keys()
)
if similar_names:
# Arbitrarily limit to 10 similar names to avoid a huge error message
subset_similar_names = similar_names[:10]
similar_to_string = ", ".join(
(similar.to_string() for similar in subset_similar_names)
)
suggestions += (
f"\n\nFor selected asset {invalid_key.to_string()}, did you mean one of "
f"the following?\n\t{similar_to_string}"
)
if missing_keys:
raise DagsterInvalidSubsetError(
f"AssetKey(s) {[k.to_user_string() for k in missing_keys]} were selected, but "
"no AssetsDefinition objects supply these keys. Make sure all keys are spelled "
"correctly, and all AssetsDefinitions are correctly added to the "
f"`Definitions`.{suggestions}"
)
return specified_keys - missing_keys
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
def needs_parentheses_when_operand(self) -> bool:
return len(self.selected_keys) > 1
def to_selection_str(self) -> str:
return " or ".join(f'key:"{x.to_user_string()}"' for x in self.selected_keys)
@whitelist_for_serdes
@record
class KeyPrefixesAssetSelection(AssetSelection):
selected_key_prefixes: Sequence[Sequence[str]]
include_sources: bool
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
base_set = (
asset_graph.get_all_asset_keys()
if self.include_sources
else asset_graph.materializable_asset_keys
)
return {
key
for key in base_set
if any(key.has_prefix(prefix) for prefix in self.selected_key_prefixes)
}
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
@whitelist_for_serdes
@record
class KeySubstringAssetSelection(AssetSelection):
selected_key_substring: str
include_sources: bool
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
base_set = (
asset_graph.get_all_asset_keys()
if self.include_sources
else asset_graph.materializable_asset_keys
)
return {key for key in base_set if self.selected_key_substring in key.to_user_string()}
def to_serializable_asset_selection(self, asset_graph: BaseAssetGraph) -> "AssetSelection":
return self
def to_selection_str(self) -> str:
return f'key_substring:"{self.selected_key_substring}"'
def _fetch_all_upstream(
selection: AbstractSet[AssetKey],
asset_graph: BaseAssetGraph,
depth: Optional[int] = None,
include_self: bool = True,
) -> AbstractSet[AssetKey]:
return operator.sub(
reduce(
operator.or_,
[
{asset_key}
| fetch_connected(
item=asset_key,
graph=asset_graph.asset_dep_graph,
direction="upstream",
depth=depth,
)
for asset_key in selection
],
set(),
),
selection if not include_self else set(),
)
@whitelist_for_serdes
@record
class UpstreamAssetSelection(ChainedAssetSelection):
depth: Optional[int]
include_self: bool
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
selection = self.child.resolve_inner(asset_graph, allow_missing=allow_missing)
if len(selection) == 0:
return selection
all_upstream = _fetch_all_upstream(selection, asset_graph, self.depth, self.include_self)
return {key for key in all_upstream if key in asset_graph.materializable_asset_keys}
def to_selection_str(self) -> str:
if self.depth is None:
base = f"*{self.child.operand_to_selection_str()}"
elif self.depth == 0:
base = self.child.operand_to_selection_str()
else:
base = f"{'+' * self.depth}{self.child.operand_to_selection_str()}"
if self.include_self:
return base
else:
return f"{base} and not {self.child.operand_to_selection_str()}"
@whitelist_for_serdes
class ParentSourcesAssetSelection(ChainedAssetSelection):
def resolve_inner(
self, asset_graph: BaseAssetGraph, allow_missing: bool
) -> AbstractSet[AssetKey]:
selection = self.child.resolve_inner(asset_graph, allow_missing=allow_missing)
if len(selection) == 0:
return selection
all_upstream = _fetch_all_upstream(selection, asset_graph)
return {key for key in all_upstream if key in asset_graph.external_asset_keys}