Files
superset2/superset/commands/tasks/submit.py

170 lines
6.4 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Submit task command for GTF."""
import logging
import uuid
from functools import partial
from typing import Any, TYPE_CHECKING
from flask import current_app
from marshmallow import ValidationError
from superset_core.tasks.types import TaskScope
from superset.commands.base import BaseCommand
from superset.commands.tasks.exceptions import (
TaskCreateFailedError,
TaskInvalidError,
)
from superset.daos.exceptions import DAOCreateFailedError
from superset.stats_logger import BaseStatsLogger
from superset.tasks.locks import task_lock
from superset.tasks.utils import get_active_dedup_key
from superset.utils.core import get_user_id
from superset.utils.decorators import on_error, transaction
if TYPE_CHECKING:
from superset.models.tasks import Task
logger = logging.getLogger(__name__)
class SubmitTaskCommand(BaseCommand):
"""
Command to submit a task (create new or join existing).
This command owns locking and create-vs-join business logic.
It acquires a distributed lock and then decides whether to:
- Create a new task (if no existing task with same dedup_key)
- Join an existing task by adding the user as subscriber
"""
def __init__(self, data: dict[str, Any]):
self._properties = data.copy()
@transaction(on_error=partial(on_error, reraise=TaskCreateFailedError))
def run(self) -> "Task":
"""
Execute the command with distributed locking.
Acquires lock based on dedup_key, then checks for existing task
and either creates new or joins existing (adding subscriber).
:returns: Task model (either newly created or existing)
"""
task, _ = self.run_with_info()
return task
@transaction(on_error=partial(on_error, reraise=TaskCreateFailedError))
def run_with_info(self) -> tuple["Task", bool]:
"""
Execute the command and return (task, is_new) tuple.
This variant allows callers to distinguish between creating a new task
and joining an existing one. Useful for sync execution where the caller
needs to wait for an existing task to complete rather than executing again.
:returns: Tuple of (Task, is_new) where is_new is True if task was created
"""
from superset.daos.tasks import TaskDAO
self.validate()
# Extract and normalize parameters
task_type = self._properties["task_type"]
task_key = self._properties.get("task_key") or str(uuid.uuid4())
scope = self._properties.get("scope", TaskScope.PRIVATE.value)
user_id = get_user_id()
# Build dedup_key for lock
dedup_key = get_active_dedup_key(
scope=scope,
task_type=task_type,
task_key=task_key,
user_id=user_id,
)
# Acquire lock to prevent race conditions during create/join
with task_lock(dedup_key):
# Check for existing task (safe under lock)
existing = TaskDAO.find_by_task_key(task_type, task_key, scope, user_id)
# Get stats logger
stats_logger: BaseStatsLogger = current_app.config["STATS_LOGGER"]
if existing:
# Join existing task - add subscriber if not already subscribed
if user_id and not existing.has_subscriber(user_id):
TaskDAO.add_subscriber(existing.id, user_id)
stats_logger.incr("gtf.task.subscribe")
logger.info(
"User %s joined existing task: %s",
user_id,
task_key,
)
else:
# Same user submitted the same task - deduplication hit
stats_logger.incr("gtf.task.dedupe")
logger.debug(
"Deduplication hit for task: %s (user_id=%s)",
task_key,
user_id,
)
return existing, False # is_new=False: joined existing task
# Create new task (DAO is now a pure data operation)
try:
task = TaskDAO.create_task(
task_type=task_type,
task_key=task_key,
scope=scope,
task_name=self._properties.get("task_name"),
user_id=user_id,
payload=self._properties.get("payload", {}),
properties=self._properties.get("properties", {}),
)
stats_logger.incr("gtf.task.create")
return task, True # is_new=True: created new task
except DAOCreateFailedError as ex:
raise TaskCreateFailedError() from ex
def validate(self) -> None:
"""Validate command parameters."""
exceptions: list[ValidationError] = []
# Require task_type
if not self._properties.get("task_type"):
exceptions.append(
ValidationError("task_type is required", field_name="task_type")
)
scope = self._properties.get("scope", TaskScope.PRIVATE.value)
scope_value = scope.value if isinstance(scope, TaskScope) else scope
valid_scopes = [s.value for s in TaskScope]
if scope_value not in valid_scopes:
exceptions.append(
ValidationError(
f"scope must be one of {valid_scopes}",
field_name="scope",
)
)
# Store normalized value for use in run()
self._properties["scope"] = scope_value
if exceptions:
raise TaskInvalidError(exceptions=exceptions)