-
Notifications
You must be signed in to change notification settings - Fork 2.2k
[dagster-graphql] Tolerate deleted partitions in historical automation evaluations #33939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,11 +17,16 @@ | |
| HistoricalAllPartitionsSubsetSentinel, | ||
| ) | ||
| from dagster._core.definitions.partitions.definition import ( | ||
| DynamicPartitionsDefinition, | ||
| PartitionsDefinition, | ||
| StaticPartitionsDefinition, | ||
| ) | ||
| from dagster._core.definitions.partitions.partition_key_range import PartitionKeyRange | ||
| from dagster._core.definitions.partitions.snap import PartitionsSnap | ||
| from dagster._core.definitions.partitions.subset.key_ranges import KeyRangesPartitionsSubset | ||
| from dagster._core.definitions.run_request import InstigatorType | ||
| from dagster._core.definitions.sensor_definition import SensorType | ||
| from dagster._core.errors import DagsterInvalidInvocationError | ||
| from dagster._core.instance import DagsterInstance | ||
| from dagster._core.remote_origin import RemoteInstigatorOrigin | ||
| from dagster._core.scheduler.instigation import ( | ||
|
|
@@ -527,6 +532,34 @@ def _get_condition_evaluation( | |
| subsets_with_metadata=[], | ||
| ) | ||
|
|
||
| def _get_key_range_condition_evaluation( | ||
| self, | ||
| asset_key: AssetKey, | ||
| description: str, | ||
| partition_key_ranges: Sequence[PartitionKeyRange], | ||
| ) -> AutomationConditionEvaluation: | ||
| partitions_snap = PartitionsSnap.from_def(DynamicPartitionsDefinition(name="dynamic")) | ||
| subset = SerializableEntitySubset( | ||
| key=asset_key, | ||
| value=KeyRangesPartitionsSubset( | ||
| partitions_snap=partitions_snap, | ||
| key_ranges=partition_key_ranges, | ||
| ), | ||
| ) | ||
| return AutomationConditionEvaluation( | ||
| condition_snapshot=AutomationConditionNodeSnapshot( | ||
| class_name="...", | ||
| description=description, | ||
| unique_id=str(random.randint(0, 100000000)), | ||
| ), | ||
| true_subset=subset, | ||
| candidate_subset=HistoricalAllPartitionsSubsetSentinel(), | ||
| start_timestamp=123, | ||
| end_timestamp=456, | ||
| child_evaluations=[], | ||
| subsets_with_metadata=[], | ||
| ) | ||
|
|
||
| def test_get_evaluations_with_partitions(self, graphql_context: WorkspaceRequestContext): | ||
| asset_key = AssetKey("upstream_static_partitioned_asset") | ||
| partitions_def = StaticPartitionsDefinition(["a", "b", "c", "d", "e", "f"]) | ||
|
|
@@ -822,6 +855,86 @@ def _get_node(id): | |
| "d", | ||
| } | ||
|
|
||
| def test_get_evaluations_tolerates_deleted_historical_partition_size( | ||
| self, graphql_context: WorkspaceRequestContext | ||
| ): | ||
| asset_key = AssetKey("dynamic_partitioned_asset") | ||
| graphql_context.instance.add_dynamic_partitions("dynamic", ["a"]) | ||
| evaluation = self._get_key_range_condition_evaluation( | ||
| asset_key, | ||
| "stale historical subset", | ||
| [PartitionKeyRange("a", "a")], | ||
| ) | ||
|
|
||
| check.not_none( | ||
| graphql_context.instance.schedule_storage | ||
| ).add_auto_materialize_asset_evaluations( | ||
| evaluation_id=201, | ||
| asset_evaluations=[ | ||
| AutomationConditionEvaluationWithRunIds(evaluation=evaluation, run_ids=frozenset()) | ||
| ], | ||
| ) | ||
|
|
||
| with patch.object( | ||
| KeyRangesPartitionsSubset, | ||
| "__len__", | ||
| side_effect=DagsterInvalidInvocationError("Partition key was deleted."), | ||
| ): | ||
| results = execute_dagster_graphql( | ||
| graphql_context, | ||
| QUERY, | ||
| variables={ | ||
| "assetKey": {"path": ["dynamic_partitioned_asset"]}, | ||
| "limit": 10, | ||
| "cursor": None, | ||
| }, | ||
| ) | ||
|
|
||
| assert not results.errors | ||
| records = results.data["assetConditionEvaluationRecordsOrError"]["records"] | ||
| assert len(records) == 1 | ||
| assert records[0]["numRequested"] == 0 | ||
| assert records[0]["evaluationNodes"][0]["numTrue"] == 0 | ||
|
|
||
| def test_get_partition_evaluation_tolerates_deleted_historical_partition_membership( | ||
| self, graphql_context: WorkspaceRequestContext | ||
| ): | ||
| asset_key = AssetKey("dynamic_partitioned_asset") | ||
| graphql_context.instance.add_dynamic_partitions("dynamic", ["a"]) | ||
| evaluation = self._get_key_range_condition_evaluation( | ||
| asset_key, | ||
| "stale historical subset", | ||
| [PartitionKeyRange("a", "a")], | ||
| ) | ||
|
|
||
| check.not_none( | ||
| graphql_context.instance.schedule_storage | ||
| ).add_auto_materialize_asset_evaluations( | ||
| evaluation_id=202, | ||
| asset_evaluations=[ | ||
| AutomationConditionEvaluationWithRunIds(evaluation=evaluation, run_ids=frozenset()) | ||
| ], | ||
| ) | ||
|
|
||
| with patch.object( | ||
| KeyRangesPartitionsSubset, | ||
| "__contains__", | ||
| side_effect=DagsterInvalidInvocationError("Partition key was deleted."), | ||
| ): | ||
| results = execute_dagster_graphql( | ||
| graphql_context, | ||
| LEGACY_QUERY_FOR_SPECIFIC_PARTITION, | ||
| variables={ | ||
| "assetKey": {"path": ["dynamic_partitioned_asset"]}, | ||
| "partition": "a", | ||
| "evaluationId": 202, | ||
| }, | ||
| ) | ||
|
|
||
| assert not results.errors | ||
| evaluation = results.data["assetConditionEvaluationForPartition"] | ||
| assert evaluation["evaluationNodes"][0]["status"] == "FALSE" | ||
|
Comment on lines
+934
to
+936
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! |
||
|
|
||
| def test_since_metadata_field(self, graphql_context: WorkspaceRequestContext): | ||
| """Test that the sinceMetadata field is correctly populated for SinceCondition evaluations.""" | ||
| asset_key = AssetKey("test_asset") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
KeyRangesPartitionsSubset.__len__and__contains__are both decorated with@require_full_partition_loading_context, which callscheck.invariant(...)and raisesDagsterInvariantViolationError— notDagsterInvalidInvocationError— when no partition loading context is active. If a GraphQL code path ever reaches these helpers without a context (e.g. in a test that bypasses the normal request setup), theDagsterInvariantViolationErrorwould propagate unhandled and re-expose the same 500 crash. In normal production paths the context is always set, so this is low risk today, but worth documenting or catching the broaderDagsterErrorbase class if hardening is desired.