From 1c2b9db4f0d5728f9bcf35ebdef2df5c41d35590 Mon Sep 17 00:00:00 2001 From: Luiz Otavio <45200344+luizotavio32@users.noreply.github.com> Date: Mon, 8 Sep 2025 11:48:23 -0300 Subject: [PATCH 01/26] fix: Upload CSV as Dataset (#34763) --- superset/commands/database/uploaders/base.py | 3 +- .../commands/database/uploaders/csv_reader.py | 222 ++++++- superset/models/core.py | 4 +- .../commands/databases/csv_reader_test.py | 555 ++++++++++++++++++ 4 files changed, 776 insertions(+), 8 deletions(-) diff --git a/superset/commands/database/uploaders/base.py b/superset/commands/database/uploaders/base.py index 84805d0626c..18b4f8024f4 100644 --- a/superset/commands/database/uploaders/base.py +++ b/superset/commands/database/uploaders/base.py @@ -133,7 +133,8 @@ class BaseDataReader: ) ) from ex except Exception as ex: - raise DatabaseUploadFailed(exception=ex) from ex + message = ex.message if hasattr(ex, "message") and ex.message else str(ex) + raise DatabaseUploadFailed(message=message, exception=ex) from ex class UploadCommand(BaseCommand): diff --git a/superset/commands/database/uploaders/csv_reader.py b/superset/commands/database/uploaders/csv_reader.py index c8c43a25f1c..146de7175c3 100644 --- a/superset/commands/database/uploaders/csv_reader.py +++ b/superset/commands/database/uploaders/csv_reader.py @@ -33,6 +33,10 @@ from superset.commands.database.uploaders.base import ( logger = logging.getLogger(__name__) +# Fixed error limit to avoid huge payloads and poor UX given that a file +# might contain thousands of errors. +MAX_DISPLAYED_ERRORS = 5 + ROWS_TO_READ_METADATA = 100 DEFAULT_ENCODING = "utf-8" ENCODING_FALLBACKS = ["utf-8", "latin-1", "cp1252", "iso-8859-1"] @@ -123,6 +127,205 @@ class CSVReader(BaseDataReader): ) return "c" + @staticmethod + def _find_invalid_values_numeric(df: pd.DataFrame, column: str) -> pd.Series: + """ + Find invalid values for numeric type conversion. + + Identifies rows where values cannot be converted to numeric types using + pandas to_numeric with error coercing. Returns a boolean mask indicating + which values are invalid (non-null but unconvertible). + + :param df: DataFrame containing the data + :param column: Name of the column to check for invalid values + + :return: Boolean Series indicating which rows have invalid + values for numeric conversion + """ + converted = pd.to_numeric(df[column], errors="coerce") + return converted.isna() & df[column].notna() + + @staticmethod + def _find_invalid_values_non_numeric( + df: pd.DataFrame, column: str, dtype: str + ) -> pd.Series: + """ + Find invalid values for non-numeric type conversion. + + Identifies rows where values cannot be converted to the specified non-numeric + data type by attempting conversion and catching exceptions. This is used for + string, categorical, or other non-numeric type conversions. + + :param df: DataFrame containing the data + :param column: Name of the column to check for invalid values + :param dtype: Target data type for conversion (e.g., 'string', 'category') + + :return: Boolean Series indicating which rows have + invalid values for the target type + """ + invalid_mask = pd.Series([False] * len(df), index=df.index) + for idx, value in df[column].items(): + if pd.notna(value): + try: + pd.Series([value]).astype(dtype) + except (ValueError, TypeError): + invalid_mask[idx] = True + return invalid_mask + + @staticmethod + def _get_error_details( + df: pd.DataFrame, + column: str, + dtype: str, + invalid_mask: pd.Series, + kwargs: dict[str, Any], + ) -> tuple[list[str], int]: + """ + Get detailed error information for invalid values in type conversion. + + Extracts detailed information about conversion errors, including specific + invalid values and their line numbers. Limits the number of detailed errors + shown to avoid overwhelming output while providing total error count. + + :param df: DataFrame containing the data + :param column: Name of the column with conversion errors + :param dtype: Target data type that failed conversion + :param invalid_mask: Boolean mask indicating which rows have invalid values + :param kwargs: Additional parameters including header row information + + :return: Tuple containing: + - List of formatted error detail strings (limited by MAX_DISPLAYED_ERRORS) + - Total count of errors found + """ + if not invalid_mask.any(): + return [], 0 + + invalid_indices = invalid_mask[invalid_mask].index.tolist() + total_errors = len(invalid_indices) + + error_details = [] + for idx in invalid_indices[:MAX_DISPLAYED_ERRORS]: + invalid_value = df.loc[idx, column] + line_number = idx + kwargs.get("header", 0) + 2 + error_details.append( + f" • Line {line_number}: '{invalid_value}' cannot be converted to " + f"{dtype}" + ) + + return error_details, total_errors + + @staticmethod + def _create_error_message( + df: pd.DataFrame, + column: str, + dtype: str, + invalid_mask: pd.Series, + kwargs: dict[str, Any], + original_error: Exception, + ) -> str: + """ + Create detailed error message for type conversion failure. + + Constructs a comprehensive error message that includes: + - Column name and target type + - Total count of errors found + - Detailed list of first few errors with line numbers and values + - Summary of remaining errors if exceeding display limit + + :param df: DataFrame containing the data + :param column: Name of the column that failed conversion + :param dtype: Target data type that failed + :param invalid_mask: Boolean mask indicating which rows have invalid values + :param kwargs: Additional parameters including header information + :param original_error: Original exception that triggered the error handling + + :return: Formatted error message string ready for display to user + """ + error_details, total_errors = CSVReader._get_error_details( + df, column, dtype, invalid_mask, kwargs + ) + + if error_details: + base_msg = ( + f"Cannot convert column '{column}' to {dtype}. " + f"Found {total_errors} error(s):" + ) + detailed_errors = "\n".join(error_details) + + if total_errors > MAX_DISPLAYED_ERRORS: + remaining = total_errors - MAX_DISPLAYED_ERRORS + additional_msg = f"\n ... and {remaining} more error(s)" + return f"{base_msg}\n{detailed_errors}{additional_msg}" + else: + return f"{base_msg}\n{detailed_errors}" + else: + return f"Cannot convert column '{column}' to {dtype}. {str(original_error)}" + + @staticmethod + def _cast_single_column( + df: pd.DataFrame, column: str, dtype: str, kwargs: dict[str, Any] + ) -> None: + """ + Cast a single DataFrame column to the specified data type. + + Attempts to convert a column to the target data type with enhanced error + handling. For numeric types, uses pandas to_numeric for better performance + and error detection. If conversion fails, provides detailed + error messages including specific invalid values and their line numbers. + + :param df: DataFrame to modify (modified in-place) + :param column: Name of the column to cast + :param dtype: Target data type (e.g., 'int64', 'float64', 'string') + :param kwargs: Additional parameters including header row information + + :raises DatabaseUploadFailed: If type conversion fails, + with detailed error message + """ + numeric_types = {"int64", "int32", "float64", "float32"} + + try: + if dtype in numeric_types: + df[column] = pd.to_numeric(df[column], errors="raise") + df[column] = df[column].astype(dtype) + else: + df[column] = df[column].astype(dtype) + except (ValueError, TypeError) as ex: + try: + if dtype in numeric_types: + invalid_mask = CSVReader._find_invalid_values_numeric(df, column) + else: + invalid_mask = CSVReader._find_invalid_values_non_numeric( + df, column, dtype + ) + + error_msg = CSVReader._create_error_message( + df, column, dtype, invalid_mask, kwargs, ex + ) + except Exception: + error_msg = f"Cannot convert column '{column}' to {dtype}. {str(ex)}" + + raise DatabaseUploadFailed(message=error_msg) from ex + + @staticmethod + def _cast_column_types( + df: pd.DataFrame, types: dict[str, str], kwargs: dict[str, Any] + ) -> pd.DataFrame: + """ + Cast DataFrame columns to specified types with detailed + error reporting. + + :param df: DataFrame to cast + :param types: Dictionary mapping column names to target types + :param kwargs: Original read_csv kwargs for line number calculation + :return: DataFrame with casted columns + :raises DatabaseUploadFailed: If type conversion fails with detailed error info + """ + for column, dtype in types.items(): + if column not in df.columns: + continue + CSVReader._cast_single_column(df, column, dtype, kwargs) + return df + @staticmethod def _read_csv( # noqa: C901 file: FileStorage, @@ -154,6 +357,7 @@ class CSVReader(BaseDataReader): kwargs["low_memory"] = False try: + types = kwargs.pop("dtype", None) if "chunksize" in kwargs: chunks = [] total_rows = 0 @@ -188,13 +392,19 @@ class CSVReader(BaseDataReader): index_col = kwargs.get("index_col") if isinstance(index_col, str): result.index.name = index_col - return result - return pd.DataFrame() + df = result + else: + df = pd.read_csv( + filepath_or_buffer=file.stream, + **kwargs, + ) - return pd.read_csv( - filepath_or_buffer=file.stream, - **kwargs, - ) + if types: + df = CSVReader._cast_column_types(df, types, kwargs) + + return df + except DatabaseUploadFailed: + raise except UnicodeDecodeError as ex: if encoding != DEFAULT_ENCODING: raise DatabaseUploadFailed( diff --git a/superset/models/core.py b/superset/models/core.py index 3daabc9dfab..f6643d18ff2 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -1205,7 +1205,9 @@ class Database(Model, AuditMixinNullable, ImportExportMixin): # pylint: disable def has_table(self, table: Table) -> bool: with self.get_sqla_engine(catalog=table.catalog, schema=table.schema) as engine: # do not pass "" as an empty schema; force null - return engine.has_table(table.table, table.schema or None) + if engine.has_table(table.table, table.schema or None): + return True + return engine.has_table(table.table.lower(), table.schema or None) def has_view(self, table: Table) -> bool: with self.get_sqla_engine(catalog=table.catalog, schema=table.schema) as engine: diff --git a/tests/unit_tests/commands/databases/csv_reader_test.py b/tests/unit_tests/commands/databases/csv_reader_test.py index effbad56398..6d7ee868e91 100644 --- a/tests/unit_tests/commands/databases/csv_reader_test.py +++ b/tests/unit_tests/commands/databases/csv_reader_test.py @@ -421,6 +421,561 @@ def test_csv_reader_file_metadata_invalid_file(): ) +def test_csv_reader_integer_in_float_column(): + csv_data = [ + ["Name", "Score", "City"], + ["name1", 25.5, "city1"], + ["name2", 25, "city2"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"Score": "float"}) + ) + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (2, 3) + assert df["Score"].dtype == "float64" + + +def test_csv_reader_object_type_auto_inferring(): + # this case below won't raise a error + csv_data = [ + ["Name", "id", "City"], + ["name1", 25.5, "city1"], + ["name2", 15, "city2"], + ["name3", 123456789086, "city3"], + ["name4", "abc", "city4"], + ["name5", 4.75, "city5"], + ] + + csv_reader = CSVReader() + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (5, 3) + # pandas automatically infers the type if column_data_types is not informed + # if there's only one string in the column it converts the whole column to object + assert df["id"].dtype == "object" + + +def test_csv_reader_float_type_auto_inferring(): + csv_data = [ + ["Name", "id", "City"], + ["name1", "25", "city1"], + ["name2", "15", "city2"], + ["name3", "123456789086", "city3"], + ["name5", "4.75", "city5"], + ] + + csv_reader = CSVReader() + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (4, 3) + # The type here is automatically inferred to float due to 4.75 value + assert df["id"].dtype == "float64" + + +def test_csv_reader_int_type_auto_inferring(): + csv_data = [ + ["Name", "id", "City"], + ["name1", "0", "city1"], + ["name2", "15", "city2"], + ["name3", "123456789086", "city3"], + ["name5", "45", "city5"], + ] + + csv_reader = CSVReader() + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (4, 3) + assert df["id"].dtype == "int64" + + +def test_csv_reader_bigint_type_auto_inferring(): + csv_data = [ + ["Name", "id", "City"], + ["name1", "9223372036854775807", "city1"], + ["name2", "9223372036854775806", "city2"], + ["name3", "1234567890123456789", "city3"], + ["name4", "0", "city4"], + ["name5", "-9223372036854775808", "city5"], + ] + + csv_reader = CSVReader() + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (5, 3) + assert df["id"].dtype == "int64" + assert df.iloc[0]["id"] == 9223372036854775807 + assert df.iloc[4]["id"] == -9223372036854775808 + + +def test_csv_reader_int_typing(): + csv_data = [ + ["Name", "id", "City"], + ["name1", "0", "city1"], + ["name2", "15", "city2"], + ["name3", "123456789086", "city3"], + ["name5", "45", "city5"], + ] + + csv_reader = CSVReader(options=CSVReaderOptions(column_data_types={"id": "int"})) + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (4, 3) + assert df["id"].dtype == "int64" + + +def test_csv_reader_float_typing(): + csv_data = [ + ["Name", "score", "City"], + ["name1", "0", "city1"], + ["name2", "15.3", "city2"], + ["name3", "45", "city3"], + ["name5", "23.1342", "city5"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"score": "float"}) + ) + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (4, 3) + assert df["score"].dtype == "float64" + + +def test_csv_reader_multiple_errors_display(): + """Test that multiple errors are displayed with proper formatting.""" + csv_data = [ + ["Name", "Age", "Score"], + ["Alice", "25", "95.5"], + ["Bob", "invalid1", "87.2"], + ["Charlie", "invalid2", "92.1"], + ["Diana", "invalid3", "88.5"], + ["Eve", "invalid4", "90.0"], + ["Frank", "30", "85.5"], + ] + + csv_reader = CSVReader(options=CSVReaderOptions(column_data_types={"Age": "int64"})) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Age' to int64" in error_msg + assert "Found 4 error(s):" in error_msg + assert "Line 3: 'invalid1' cannot be converted to int64" in error_msg + assert "Line 4: 'invalid2' cannot be converted to int64" in error_msg + assert "Line 5: 'invalid3' cannot be converted to int64" in error_msg + assert "Line 6: 'invalid4' cannot be converted to int64" in error_msg + # With MAX_DISPLAYED_ERRORS = 5, all 4 errors should be shown without truncation + assert "and" not in error_msg or "more error(s)" not in error_msg + + +def test_csv_reader_non_numeric_in_integer_column(): + csv_data = [ + ["Name", "Age", "City"], + ["name1", "abc", "city1"], + ["name2", "25", "city2"], + ] + + csv_reader = CSVReader(options=CSVReaderOptions(column_data_types={"Age": "int64"})) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Age' to int64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 2: 'abc' cannot be converted to int64" in error_msg + + +def test_csv_reader_non_numeric_in_float_column(): + csv_data = [ + ["Name", "Score", "City"], + ["name1", "5.3", "city1"], + ["name2", "25.5", "city2"], + ["name3", "24.5", "city3"], + ["name4", "1.0", "city4"], + ["name5", "one point five", "city5"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"Score": "float64"}) + ) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Score' to float64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 6: 'one point five' cannot be converted to float64" in error_msg + + +def test_csv_reader_improved_error_detection_int32(): + """Test improved error detection for int32 type casting.""" + csv_data = [ + ["Name", "ID", "City"], + ["name1", "123", "city1"], + ["name2", "456", "city2"], + ["name3", "not_a_number", "city3"], + ["name4", "789", "city4"], + ] + + csv_reader = CSVReader(options=CSVReaderOptions(column_data_types={"ID": "int32"})) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'ID' to int32" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 4: 'not_a_number' cannot be converted to int32" in error_msg + + +def test_csv_reader_improved_error_detection_float32(): + """Test improved error detection for float32 type casting.""" + csv_data = [ + ["Name", "Score", "City"], + ["name1", "1.5", "city1"], + ["name2", "2.7", "city2"], + ["name3", "invalid_float", "city3"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"Score": "float32"}) + ) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Score' to float32" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 4: 'invalid_float' cannot be converted to float32" in error_msg + + +def test_csv_reader_error_detection_with_header_row(): + """Test that line numbers are correctly calculated with custom header row.""" + csv_data = [ + ["skip_this_row", "skip", "skip"], + ["Name", "Age", "City"], + ["name1", "25", "city1"], + ["name2", "invalid_age", "city2"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(header_row=1, column_data_types={"Age": "int"}) + ) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Age' to int" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 4: 'invalid_age' cannot be converted to int" in error_msg + + +def test_csv_reader_error_detection_first_row_error(): + """Test error detection when the first data row has the error.""" + + csv_data = [ + ["Name", "Age", "City"], + ["name1", "not_a_number", "city1"], + ["name2", "25", "city2"], + ] + + csv_reader = CSVReader(options=CSVReaderOptions(column_data_types={"Age": "int64"})) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Age' to int64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 2: 'not_a_number' cannot be converted to int64" in error_msg + + +def test_csv_reader_error_detection_missing_column(): + """Test that missing columns are handled gracefully.""" + csv_data = [ + ["Name", "City"], + ["name1", "city1"], + ["name2", "city2"], + ] + + # Try to cast a column that doesn't exist + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"NonExistent": "int64"}) + ) + + # Should not raise an error for missing columns + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + assert df.shape == (2, 2) + assert df.columns.tolist() == ["Name", "City"] + + +def test_csv_reader_error_detection_mixed_valid_invalid(): + csv_data = [ + ["Name", "Score", "City"], + ["name1", "95.5", "city1"], + ["name2", "87.2", "city2"], + ["name3", "92.1", "city3"], + ["name4", "eighty-five", "city4"], + ["name5", "78.9", "city5"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"Score": "float64"}) + ) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Score' to float64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 5: 'eighty-five' cannot be converted to float64" in error_msg + + +def test_csv_reader_error_detection_multiple_invalid_values(): + """Test error detection with multiple invalid values showing first 5 + count.""" + csv_data = [ + ["Name", "Score", "City"], + ["name1", "95.5", "city1"], + ["name2", "87.2", "city2"], + ["name3", "92.1", "city3"], + ["name4", "eighty-five", "city4"], + ["name4", "eighty-one", "city4"], + ["name4", "eighty", "city4"], + ["name4", "one", "city4"], + ["name4", "two", "city4"], + ["name4", "three", "city4"], + ["name5", "78.9", "city5"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"Score": "float64"}) + ) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Score' to float64" in error_msg + assert "Found 6 error(s):" in error_msg + assert "Line 5: 'eighty-five' cannot be converted to float64" in error_msg + assert "Line 6: 'eighty-one' cannot be converted to float64" in error_msg + assert "Line 7: 'eighty' cannot be converted to float64" in error_msg + assert "Line 8: 'one' cannot be converted to float64" in error_msg + assert "Line 9: 'two' cannot be converted to float64" in error_msg + assert "and 1 more error(s)" in error_msg + + +def test_csv_reader_error_detection_non_numeric_types(): + """Test error detection for non-numeric type casting.""" + csv_data = [ + ["Name", "Status", "City"], + ["name1", "active", "city1"], + ["name2", "inactive", "city2"], + ["name3", 123, "city3"], # This should cause an error when casting to string + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"Status": "string"}) + ) + + # For non-numeric types, the error detection should still work + # but might have different behavior depending on pandas version + try: + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + # If no error is raised, the conversion succeeded + assert df["Status"].dtype == "string" + except DatabaseUploadFailed as ex: + # If an error is raised, it should have proper formatting + error_msg = str(ex.value) + assert "Cannot convert" in error_msg + assert "Status" in error_msg + + +def test_csv_reader_error_detection_with_null_values(): + csv_data = [ + ["Name", "Age", "City"], + ["name1", "25", "city1"], + ["name2", "", "city2"], + ["name3", "invalid_age", "city3"], + ] + + csv_reader = CSVReader(options=CSVReaderOptions(column_data_types={"Age": "int64"})) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Age' to int64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 4: 'invalid_age' cannot be converted to int64" in error_msg + + +def test_csv_reader_successful_numeric_conversion(): + csv_data = [ + ["Name", "Age", "Score", "ID"], + ["name1", "25", "95.5", "1001"], + ["name2", "30", "87.2", "1002"], + ["name3", "35", "92.1", "1003"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions( + column_data_types={ + "Age": "int64", + "Score": "float64", + "ID": "int32", + } + ) + ) + + df = csv_reader.file_to_dataframe(create_csv_file(csv_data)) + + assert df.shape == (3, 4) + assert df["Age"].dtype == "int64" + assert df["Score"].dtype == "float64" + assert df["ID"].dtype == "int32" + assert df.iloc[0]["Age"] == 25 + assert df.iloc[0]["Score"] == 95.5 + assert df.iloc[0]["ID"] == 1001 + + +def test_csv_reader_error_detection_improvements_summary(): + csv_data_with_custom_header = [ + ["metadata_row", "skip", "this"], + ["Name", "Age", "Score"], + ["Alice", "25", "95.5"], + ["Bob", "invalid_age", "87.2"], + ["Charlie", "30", "92.1"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions( + header_row=1, column_data_types={"Age": "int64", "Score": "float64"} + ) + ) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data_with_custom_header)) + + error_msg = str(ex.value) + assert "Cannot convert column 'Age' to int64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 4: 'invalid_age' cannot be converted to int64" in error_msg + + # Test case 2: Multiple type errors - Age comes first alphabetically + csv_data_multiple_errors = [ + ["Name", "Age", "Score"], + ["Alice", "25", "95.5"], + ["Bob", "invalid_age", "invalid_score"], # Error in both columns (line 3) + ["Charlie", "30", "92.1"], + ] + + csv_reader = CSVReader( + options=CSVReaderOptions(column_data_types={"Age": "int64", "Score": "float64"}) + ) + + with pytest.raises(DatabaseUploadFailed) as ex: + csv_reader.file_to_dataframe(create_csv_file(csv_data_multiple_errors)) + + error_msg = str(ex.value) + # Should catch the Age error first (Age comes before Score alphabetically) + assert "Cannot convert column 'Age' to int64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 3: 'invalid_age' cannot be converted to int64" in error_msg + + +def test_csv_reader_cast_column_types_function(): + """Test the _cast_column_types function directly for better isolation.""" + # Create test DataFrame + test_data = { + "name": ["Alice", "Bob", "Charlie"], + "age": ["25", "30", "invalid_age"], + "score": ["95.5", "87.2", "92.1"], + } + df = pd.DataFrame(test_data) + + # Test successful casting + types_success = {"age": "int64", "score": "float64"} + kwargs = {"header": 0} + + # This should work for first two rows, but we'll only test the first two + df_subset = df.iloc[:2].copy() + result_df = CSVReader._cast_column_types(df_subset, types_success, kwargs) + + assert result_df["age"].dtype == "int64" + assert result_df["score"].dtype == "float64" + assert result_df.iloc[0]["age"] == 25 + assert result_df.iloc[0]["score"] == 95.5 + + # Test error case + with pytest.raises(DatabaseUploadFailed) as ex: + CSVReader._cast_column_types(df, types_success, kwargs) + + error_msg = str(ex.value) + assert "Cannot convert column 'age' to int64" in error_msg + assert "Found 1 error(s):" in error_msg + assert "Line 4: 'invalid_age' cannot be converted to int64" in error_msg + + +def test_csv_reader_cast_column_types_missing_column(): + """Test _cast_column_types with missing columns.""" + test_data = { + "name": ["Alice", "Bob"], + "age": ["25", "30"], + } + df = pd.DataFrame(test_data) + + # Try to cast a column that doesn't exist + types = {"age": "int64", "nonexistent": "float64"} + kwargs = {"header": 0} + + # Should not raise an error for missing columns + result_df = CSVReader._cast_column_types(df, types, kwargs) + assert result_df["age"].dtype == "int64" + assert "nonexistent" not in result_df.columns + + +def test_csv_reader_cast_column_types_different_numeric_types(): + """Test _cast_column_types with various numeric types.""" + test_data = { + "int32_col": ["1", "2", "3"], + "int64_col": ["100", "200", "300"], + "float32_col": ["1.5", "2.5", "3.5"], + "float64_col": ["10.1", "20.2", "30.3"], + } + df = pd.DataFrame(test_data) + + types = { + "int32_col": "int32", + "int64_col": "int64", + "float32_col": "float32", + "float64_col": "float64", + } + kwargs = {"header": 0} + + result_df = CSVReader._cast_column_types(df, types, kwargs) + + assert result_df["int32_col"].dtype == "int32" + assert result_df["int64_col"].dtype == "int64" + assert result_df["float32_col"].dtype == "float32" + assert result_df["float64_col"].dtype == "float64" + + def test_csv_reader_chunking_large_file(): """Test that chunking is used for large files.""" # Create a large CSV with more than 100k rows From da7f6efea897efeb54208a8bdb0dd80d72aa4bef Mon Sep 17 00:00:00 2001 From: Ganesh-Babu-GB <42582114+ganeshbabugb@users.noreply.github.com> Date: Mon, 8 Sep 2025 23:42:55 +0530 Subject: [PATCH 02/26] docs(typos): correct spelling mistakes (#35043) Co-authored-by: ganesh --- docs/docs/contributing/development.mdx | 4 ++-- docs/docs/contributing/howtos.mdx | 4 ++-- docs/docs/installation/docker-compose.mdx | 2 +- .../versioned_docs/version-6.0.0/contributing/development.mdx | 4 ++-- docs/versioned_docs/version-6.0.0/contributing/howtos.mdx | 4 ++-- .../version-6.0.0/installation/docker-compose.mdx | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/docs/contributing/development.mdx b/docs/docs/contributing/development.mdx index c811177624b..6e22055d0d4 100644 --- a/docs/docs/contributing/development.mdx +++ b/docs/docs/contributing/development.mdx @@ -620,10 +620,10 @@ See [how tos](/docs/contributing/howtos#linting) :::tip `act` compatibility of Superset's GHAs is not fully tested. Running `act` locally may or may not -work for different actions, and may require fine tunning and local secret-handling. +work for different actions, and may require fine tuning and local secret-handling. For those more intricate GHAs that are tricky to run locally, we recommend iterating directly on GHA's infrastructure, by pushing directly on a branch and monitoring GHA logs. -For more targetted iteration, see the `gh workflow run --ref {BRANCH}` subcommand of the GitHub CLI. +For more targeted iteration, see the `gh workflow run --ref {BRANCH}` subcommand of the GitHub CLI. ::: For automation and CI/CD, Superset makes extensive use of GitHub Actions (GHA). You diff --git a/docs/docs/contributing/howtos.mdx b/docs/docs/contributing/howtos.mdx index 9a23cbc075a..e243a1fcf5c 100644 --- a/docs/docs/contributing/howtos.mdx +++ b/docs/docs/contributing/howtos.mdx @@ -232,7 +232,7 @@ CYPRESS_CONFIG=true docker compose up --build ``` `docker compose` will get to work and expose a Cypress-ready Superset app. This app uses a different database schema (`superset_cypress`) to keep it isolated from -your other dev environmen(s)t, a specific set of examples, and a set of configurations that +your other dev environment(s), a specific set of examples, and a set of configurations that aligns with the expectations within the end-to-end tests. Also note that it's served on a different port than the default port for the backend (`8088`). @@ -627,7 +627,7 @@ feature flag to `true`, you can add the following line to the PR body/descriptio FEATURE_TAGGING_SYSTEM=true ``` -Simarly, it's possible to disable feature flags with: +Similarly, it's possible to disable feature flags with: ``` FEATURE_TAGGING_SYSTEM=false diff --git a/docs/docs/installation/docker-compose.mdx b/docs/docs/installation/docker-compose.mdx index 74145ff0b5b..9727b97eaeb 100644 --- a/docs/docs/installation/docker-compose.mdx +++ b/docs/docs/installation/docker-compose.mdx @@ -282,5 +282,5 @@ address. When running `docker compose up`, docker will build what is required behind the scene, but may use the docker cache if assets already exist. Running `docker compose build` prior to `docker compose up` or the equivalent shortcut `docker compose up --build` ensures that your -docker images matche the definition in the repository. This should only apply to the main +docker images match the definition in the repository. This should only apply to the main docker-compose.yml file (default) and not to the alternative methods defined above. diff --git a/docs/versioned_docs/version-6.0.0/contributing/development.mdx b/docs/versioned_docs/version-6.0.0/contributing/development.mdx index ee38ca76349..4f58c29ae61 100644 --- a/docs/versioned_docs/version-6.0.0/contributing/development.mdx +++ b/docs/versioned_docs/version-6.0.0/contributing/development.mdx @@ -620,10 +620,10 @@ See [how tos](/docs/contributing/howtos#linting) :::tip `act` compatibility of Superset's GHAs is not fully tested. Running `act` locally may or may not -work for different actions, and may require fine tunning and local secret-handling. +work for different actions, and may require fine tuning and local secret-handling. For those more intricate GHAs that are tricky to run locally, we recommend iterating directly on GHA's infrastructure, by pushing directly on a branch and monitoring GHA logs. -For more targetted iteration, see the `gh workflow run --ref {BRANCH}` subcommand of the GitHub CLI. +For more targeted iteration, see the `gh workflow run --ref {BRANCH}` subcommand of the GitHub CLI. ::: For automation and CI/CD, Superset makes extensive use of GitHub Actions (GHA). You diff --git a/docs/versioned_docs/version-6.0.0/contributing/howtos.mdx b/docs/versioned_docs/version-6.0.0/contributing/howtos.mdx index 9a23cbc075a..e243a1fcf5c 100644 --- a/docs/versioned_docs/version-6.0.0/contributing/howtos.mdx +++ b/docs/versioned_docs/version-6.0.0/contributing/howtos.mdx @@ -232,7 +232,7 @@ CYPRESS_CONFIG=true docker compose up --build ``` `docker compose` will get to work and expose a Cypress-ready Superset app. This app uses a different database schema (`superset_cypress`) to keep it isolated from -your other dev environmen(s)t, a specific set of examples, and a set of configurations that +your other dev environment(s), a specific set of examples, and a set of configurations that aligns with the expectations within the end-to-end tests. Also note that it's served on a different port than the default port for the backend (`8088`). @@ -627,7 +627,7 @@ feature flag to `true`, you can add the following line to the PR body/descriptio FEATURE_TAGGING_SYSTEM=true ``` -Simarly, it's possible to disable feature flags with: +Similarly, it's possible to disable feature flags with: ``` FEATURE_TAGGING_SYSTEM=false diff --git a/docs/versioned_docs/version-6.0.0/installation/docker-compose.mdx b/docs/versioned_docs/version-6.0.0/installation/docker-compose.mdx index 74145ff0b5b..9727b97eaeb 100644 --- a/docs/versioned_docs/version-6.0.0/installation/docker-compose.mdx +++ b/docs/versioned_docs/version-6.0.0/installation/docker-compose.mdx @@ -282,5 +282,5 @@ address. When running `docker compose up`, docker will build what is required behind the scene, but may use the docker cache if assets already exist. Running `docker compose build` prior to `docker compose up` or the equivalent shortcut `docker compose up --build` ensures that your -docker images matche the definition in the repository. This should only apply to the main +docker images match the definition in the repository. This should only apply to the main docker-compose.yml file (default) and not to the alternative methods defined above. From b05def1a8a3cbcb7eb48df6baa11cf21d4196db8 Mon Sep 17 00:00:00 2001 From: Avindra Goolcharan Date: Mon, 8 Sep 2025 20:47:31 -0400 Subject: [PATCH 03/26] fix(superset-embedded-sdk/release-if-necessary): remove axios (#35038) --- superset-embedded-sdk/package-lock.json | 426 ------------------ superset-embedded-sdk/package.json | 1 - superset-embedded-sdk/release-if-necessary.js | 5 +- 3 files changed, 1 insertion(+), 431 deletions(-) diff --git a/superset-embedded-sdk/package-lock.json b/superset-embedded-sdk/package-lock.json index 500c1263f8a..590d6ca1faf 100644 --- a/superset-embedded-sdk/package-lock.json +++ b/superset-embedded-sdk/package-lock.json @@ -19,7 +19,6 @@ "@babel/preset-typescript": "^7.24.7", "@types/jest": "^29.5.12", "@types/node": "^22.5.4", - "axios": "^1.7.7", "babel-loader": "^9.1.3", "jest": "^29.7.0", "tscw-config": "^1.1.2", @@ -3233,24 +3232,6 @@ "sprintf-js": "~1.0.2" } }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=", - "dev": true - }, - "node_modules/axios": { - "version": "1.8.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.8.2.tgz", - "integrity": "sha512-ls4GYBm5aig9vWx8AWDSGLpnpDQRtWAfrjU+EuytuODrFBkqesN2RkOQCBzrA1RQNHw1SmRMSDDDSwzNAYQ6Rg==", - "dev": true, - "license": "MIT", - "dependencies": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.0", - "proxy-from-env": "^1.1.0" - } - }, "node_modules/babel-jest": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz", @@ -3644,19 +3625,6 @@ "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", "dev": true }, - "node_modules/call-bind-apply-helpers": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", - "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "dev": true, - "dependencies": { - "es-errors": "^1.3.0", - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -3843,18 +3811,6 @@ "integrity": "sha512-hUewv7oMjCp+wkBv5Rm0v87eJhq4woh5rSR+42YSQJKecCqgIqNkZ6lAlQms/BwHPJA5NKMRlpxPRv0n8HQW6g==", "dev": true }, - "node_modules/combined-stream": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", - "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "dev": true, - "dependencies": { - "delayed-stream": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, "node_modules/commander": { "version": "6.2.1", "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", @@ -4045,15 +4001,6 @@ "node": ">=0.10.0" } }, - "node_modules/delayed-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=", - "dev": true, - "engines": { - "node": ">=0.4.0" - } - }, "node_modules/detect-newline": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz", @@ -4063,20 +4010,6 @@ "node": ">=8" } }, - "node_modules/dunder-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", - "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "dev": true, - "dependencies": { - "call-bind-apply-helpers": "^1.0.1", - "es-errors": "^1.3.0", - "gopd": "^1.2.0" - }, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/electron-to-chromium": { "version": "1.5.19", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.19.tgz", @@ -4137,57 +4070,12 @@ "is-arrayish": "^0.2.1" } }, - "node_modules/es-define-property": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", - "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "dev": true, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-errors": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", - "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/es-module-lexer": { "version": "1.5.4", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.5.4.tgz", "integrity": "sha512-MVNK56NiMrOwitFB7cqDwq0CQutbw+0BvLshJSse0MUNU+y1FC3bUS/AQg7oUng+/wKrrki7JfmwtVHkVfPLlw==", "dev": true }, - "node_modules/es-object-atoms": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", - "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "dev": true, - "dependencies": { - "es-errors": "^1.3.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-set-tostringtag": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", - "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", - "dev": true, - "dependencies": { - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6", - "has-tostringtag": "^1.0.2", - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/escalade": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", @@ -4628,42 +4516,6 @@ "node": ">=8" } }, - "node_modules/follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", - "dev": true, - "funding": [ - { - "type": "individual", - "url": "https://github.com/sponsors/RubenVerborgh" - } - ], - "engines": { - "node": ">=4.0" - }, - "peerDependenciesMeta": { - "debug": { - "optional": true - } - } - }, - "node_modules/form-data": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", - "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", - "dev": true, - "dependencies": { - "asynckit": "^0.4.0", - "combined-stream": "^1.0.8", - "es-set-tostringtag": "^2.1.0", - "hasown": "^2.0.2", - "mime-types": "^2.1.12" - }, - "engines": { - "node": ">= 6" - } - }, "node_modules/fs-readdir-recursive": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/fs-readdir-recursive/-/fs-readdir-recursive-1.1.0.tgz", @@ -4717,30 +4569,6 @@ "node": "6.* || 8.* || >= 10.*" } }, - "node_modules/get-intrinsic": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", - "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "dev": true, - "dependencies": { - "call-bind-apply-helpers": "^1.0.2", - "es-define-property": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.1.1", - "function-bind": "^1.1.2", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "has-symbols": "^1.1.0", - "hasown": "^2.0.2", - "math-intrinsics": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/get-package-type": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz", @@ -4750,19 +4578,6 @@ "node": ">=8.0.0" } }, - "node_modules/get-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", - "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "dev": true, - "dependencies": { - "dunder-proto": "^1.0.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/get-stream": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", @@ -4825,18 +4640,6 @@ "node": ">=4" } }, - "node_modules/gopd": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", - "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -4865,45 +4668,6 @@ "node": ">=4" } }, - "node_modules/has-symbols": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", - "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "dev": true, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-tostringtag": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", - "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", - "dev": true, - "dependencies": { - "has-symbols": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/html-escaper": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", @@ -7092,15 +6856,6 @@ "tmpl": "1.0.5" } }, - "node_modules/math-intrinsics": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", - "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "dev": true, - "engines": { - "node": ">= 0.4" - } - }, "node_modules/merge-stream": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", @@ -7414,12 +7169,6 @@ "node": ">= 6" } }, - "node_modules/proxy-from-env": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", - "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", - "dev": true - }, "node_modules/punycode": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", @@ -10724,23 +10473,6 @@ "sprintf-js": "~1.0.2" } }, - "asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=", - "dev": true - }, - "axios": { - "version": "1.8.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.8.2.tgz", - "integrity": "sha512-ls4GYBm5aig9vWx8AWDSGLpnpDQRtWAfrjU+EuytuODrFBkqesN2RkOQCBzrA1RQNHw1SmRMSDDDSwzNAYQ6Rg==", - "dev": true, - "requires": { - "follow-redirects": "^1.15.6", - "form-data": "^4.0.0", - "proxy-from-env": "^1.1.0" - } - }, "babel-jest": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz", @@ -11023,16 +10755,6 @@ "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", "dev": true }, - "call-bind-apply-helpers": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", - "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "dev": true, - "requires": { - "es-errors": "^1.3.0", - "function-bind": "^1.1.2" - } - }, "callsites": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", @@ -11158,15 +10880,6 @@ "integrity": "sha512-hUewv7oMjCp+wkBv5Rm0v87eJhq4woh5rSR+42YSQJKecCqgIqNkZ6lAlQms/BwHPJA5NKMRlpxPRv0n8HQW6g==", "dev": true }, - "combined-stream": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", - "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "dev": true, - "requires": { - "delayed-stream": "~1.0.0" - } - }, "commander": { "version": "6.2.1", "resolved": "https://registry.npmjs.org/commander/-/commander-6.2.1.tgz", @@ -11299,29 +11012,12 @@ "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", "dev": true }, - "delayed-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=", - "dev": true - }, "detect-newline": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz", "integrity": "sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==", "dev": true }, - "dunder-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", - "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "dev": true, - "requires": { - "call-bind-apply-helpers": "^1.0.1", - "es-errors": "^1.3.0", - "gopd": "^1.2.0" - } - }, "electron-to-chromium": { "version": "1.5.19", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.19.tgz", @@ -11365,45 +11061,12 @@ "is-arrayish": "^0.2.1" } }, - "es-define-property": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", - "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "dev": true - }, - "es-errors": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", - "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true - }, "es-module-lexer": { "version": "1.5.4", "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.5.4.tgz", "integrity": "sha512-MVNK56NiMrOwitFB7cqDwq0CQutbw+0BvLshJSse0MUNU+y1FC3bUS/AQg7oUng+/wKrrki7JfmwtVHkVfPLlw==", "dev": true }, - "es-object-atoms": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", - "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "dev": true, - "requires": { - "es-errors": "^1.3.0" - } - }, - "es-set-tostringtag": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", - "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", - "dev": true, - "requires": { - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6", - "has-tostringtag": "^1.0.2", - "hasown": "^2.0.2" - } - }, "escalade": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", @@ -11712,25 +11375,6 @@ "path-exists": "^4.0.0" } }, - "follow-redirects": { - "version": "1.15.6", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", - "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", - "dev": true - }, - "form-data": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz", - "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==", - "dev": true, - "requires": { - "asynckit": "^0.4.0", - "combined-stream": "^1.0.8", - "es-set-tostringtag": "^2.1.0", - "hasown": "^2.0.2", - "mime-types": "^2.1.12" - } - }, "fs-readdir-recursive": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/fs-readdir-recursive/-/fs-readdir-recursive-1.1.0.tgz", @@ -11768,40 +11412,12 @@ "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", "dev": true }, - "get-intrinsic": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", - "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "dev": true, - "requires": { - "call-bind-apply-helpers": "^1.0.2", - "es-define-property": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.1.1", - "function-bind": "^1.1.2", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "has-symbols": "^1.1.0", - "hasown": "^2.0.2", - "math-intrinsics": "^1.1.0" - } - }, "get-package-type": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz", "integrity": "sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==", "dev": true }, - "get-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", - "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "dev": true, - "requires": { - "dunder-proto": "^1.0.1", - "es-object-atoms": "^1.0.0" - } - }, "get-stream": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", @@ -11844,12 +11460,6 @@ "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", "dev": true }, - "gopd": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", - "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "dev": true - }, "graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -11871,30 +11481,6 @@ "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", "dev": true }, - "has-symbols": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", - "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "dev": true - }, - "has-tostringtag": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", - "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", - "dev": true, - "requires": { - "has-symbols": "^1.0.3" - } - }, - "hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, - "requires": { - "function-bind": "^1.1.2" - } - }, "html-escaper": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", @@ -13505,12 +13091,6 @@ "tmpl": "1.0.5" } }, - "math-intrinsics": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", - "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "dev": true - }, "merge-stream": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", @@ -13742,12 +13322,6 @@ "sisteransi": "^1.0.5" } }, - "proxy-from-env": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", - "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", - "dev": true - }, "punycode": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", diff --git a/superset-embedded-sdk/package.json b/superset-embedded-sdk/package.json index 336b0b926da..884e3595332 100644 --- a/superset-embedded-sdk/package.json +++ b/superset-embedded-sdk/package.json @@ -43,7 +43,6 @@ "@babel/preset-typescript": "^7.24.7", "@types/jest": "^29.5.12", "@types/node": "^22.5.4", - "axios": "^1.7.7", "babel-loader": "^9.1.3", "jest": "^29.7.0", "tscw-config": "^1.1.2", diff --git a/superset-embedded-sdk/release-if-necessary.js b/superset-embedded-sdk/release-if-necessary.js index 632f8cd4b87..83196ad821d 100644 --- a/superset-embedded-sdk/release-if-necessary.js +++ b/superset-embedded-sdk/release-if-necessary.js @@ -18,7 +18,6 @@ */ const { execSync } = require('child_process'); -const axios = require('axios'); const { name, version } = require('./package.json'); function log(...args) { @@ -36,9 +35,7 @@ function logError(...args) { // npm commands output a bunch of garbage in the edge cases, // and require sending semi-validated strings to the command line, // so let's just use good old http. - const { status } = await axios.get(packageUrl, { - validateStatus: (status) => true // we literally just want the status so any status is valid - }); + const { status } = await fetch(packageUrl); if (status === 200) { log('version already exists on npm, exiting'); From c5f220a9ff52b7bc7517bb49071498f11ea7913c Mon Sep 17 00:00:00 2001 From: SBIN2010 <132096459+SBIN2010@users.noreply.github.com> Date: Tue, 9 Sep 2025 03:55:39 +0300 Subject: [PATCH 04/26] fix(table): table search input placeholder (#35064) --- superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx b/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx index f73a4f29d95..18d9af2b606 100644 --- a/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx +++ b/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx @@ -198,7 +198,7 @@ function SearchInput({ {t('Search')} Date: Mon, 8 Sep 2025 21:17:33 -0400 Subject: [PATCH 05/26] fix(utils): Suppress pandas date parsing warnings in normalize_dttm_col (#35042) --- superset/utils/core.py | 92 ++++--- superset/utils/pandas.py | 69 ++++++ tests/unit_tests/utils/test_date_parsing.py | 256 ++++++++++++++++++++ 3 files changed, 385 insertions(+), 32 deletions(-) create mode 100644 superset/utils/pandas.py create mode 100644 tests/unit_tests/utils/test_date_parsing.py diff --git a/superset/utils/core.py b/superset/utils/core.py index 766dbcb7f1b..bb315f3694d 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -34,6 +34,7 @@ import tempfile import threading import traceback import uuid +import warnings import zlib from collections.abc import Iterable, Iterator, Sequence from contextlib import closing, contextmanager @@ -110,6 +111,7 @@ from superset.utils.backports import StrEnum from superset.utils.database import get_example_database from superset.utils.date_parser import parse_human_timedelta from superset.utils.hashing import md5_sha_from_dict, md5_sha_from_str +from superset.utils.pandas import detect_datetime_format if TYPE_CHECKING: from superset.connectors.sqla.models import BaseDatasource, TableColumn @@ -1858,6 +1860,62 @@ class DateColumn: ) +def _process_datetime_column( + df: pd.DataFrame, + col: DateColumn, +) -> None: + """Process a single datetime column with format detection.""" + if col.timestamp_format in ("epoch_s", "epoch_ms"): + dttm_series = df[col.col_label] + if is_numeric_dtype(dttm_series): + # Column is formatted as a numeric value + unit = col.timestamp_format.replace("epoch_", "") + df[col.col_label] = pd.to_datetime( + dttm_series, + utc=False, + unit=unit, + origin="unix", + errors="coerce", + exact=False, + ) + else: + # Column has already been formatted as a timestamp. + try: + df[col.col_label] = dttm_series.apply( + lambda x: pd.Timestamp(x) if pd.notna(x) else pd.NaT + ) + except ValueError: + logger.warning( + "Unable to convert column %s to datetime, ignoring", + col.col_label, + ) + else: + # Try to detect format if not specified + format_to_use = col.timestamp_format or detect_datetime_format( + df[col.col_label] + ) + + # Parse with or without format (suppress warning if no format) + if format_to_use: + df[col.col_label] = pd.to_datetime( + df[col.col_label], + utc=False, + format=format_to_use, + errors="coerce", + exact=False, + ) + else: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=".*Could not infer format.*") + df[col.col_label] = pd.to_datetime( + df[col.col_label], + utc=False, + format=None, + errors="coerce", + exact=False, + ) + + def normalize_dttm_col( df: pd.DataFrame, dttm_cols: tuple[DateColumn, ...] = tuple(), # noqa: C408 @@ -1866,38 +1924,8 @@ def normalize_dttm_col( if _col.col_label not in df.columns: continue - if _col.timestamp_format in ("epoch_s", "epoch_ms"): - dttm_series = df[_col.col_label] - if is_numeric_dtype(dttm_series): - # Column is formatted as a numeric value - unit = _col.timestamp_format.replace("epoch_", "") - df[_col.col_label] = pd.to_datetime( - dttm_series, - utc=False, - unit=unit, - origin="unix", - errors="coerce", - exact=False, - ) - else: - # Column has already been formatted as a timestamp. - try: - df[_col.col_label] = dttm_series.apply( - lambda x: pd.Timestamp(x) if pd.notna(x) else pd.NaT - ) - except ValueError: - logger.warning( - "Unable to convert column %s to datetime, ignoring", - _col.col_label, - ) - else: - df[_col.col_label] = pd.to_datetime( - df[_col.col_label], - utc=False, - format=_col.timestamp_format, - errors="coerce", - exact=False, - ) + _process_datetime_column(df, _col) + if _col.offset: df[_col.col_label] += timedelta(hours=_col.offset) if _col.time_shift is not None: diff --git a/superset/utils/pandas.py b/superset/utils/pandas.py new file mode 100644 index 00000000000..48ece608821 --- /dev/null +++ b/superset/utils/pandas.py @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Pandas utilities for data processing.""" + +import pandas as pd + + +def detect_datetime_format(series: pd.Series, sample_size: int = 100) -> str | None: + """ + Detect the datetime format from a sample of the series. + + :param series: The pandas Series to analyze + :param sample_size: Number of rows to sample for format detection + :return: Detected format string or None if no consistent format found + """ + # Most common formats first for performance + common_formats = [ + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d", + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S.%f", + "%Y-%m-%dT%H:%M:%S.%fZ", + "%m/%d/%Y", + "%d/%m/%Y", + "%Y/%m/%d", + "%m/%d/%Y %H:%M:%S", + "%d/%m/%Y %H:%M:%S", + "%m-%d-%Y", + "%d-%m-%Y", + "%Y%m%d", + ] + + # Get non-null sample + sample = series.dropna().head(sample_size) + if sample.empty: + return None + + # Convert to string if not already + if not pd.api.types.is_string_dtype(sample): + sample = sample.astype(str) + + # Try each format + for fmt in common_formats: + try: + # Test on small sample first + test_sample = sample.head(10) + pd.to_datetime(test_sample, format=fmt, errors="raise") + # If successful, verify on larger sample + pd.to_datetime(sample, format=fmt, errors="raise") + return fmt + except (ValueError, TypeError): + continue + + return None diff --git a/tests/unit_tests/utils/test_date_parsing.py b/tests/unit_tests/utils/test_date_parsing.py new file mode 100644 index 00000000000..824ff15c429 --- /dev/null +++ b/tests/unit_tests/utils/test_date_parsing.py @@ -0,0 +1,256 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for datetime format detection and warning suppression.""" + +import warnings + +import pandas as pd +import pytest + +from superset.utils.core import DateColumn, normalize_dttm_col +from superset.utils.pandas import detect_datetime_format + + +def capture_warnings(func, *args, **kwargs): + """Execute function and return any format inference warnings.""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = func(*args, **kwargs) + format_warnings = [ + str(warning.message) + for warning in w + if "Could not infer format" in str(warning.message) + ] + return result, format_warnings + + +def test_detect_datetime_format(): + """Test format detection for common datetime patterns.""" + test_cases = [ + (["2023-01-01", "2023-01-02"], "%Y-%m-%d"), + (["2023-01-01 12:00:00", "2023-01-02 13:00:00"], "%Y-%m-%d %H:%M:%S"), + (["01/15/2023", "02/20/2023"], "%m/%d/%Y"), + (["2023-01-01", "01/02/2023"], None), # Mixed formats + ([], None), # Empty + ([None, None], None), # All nulls + ] + + for data, expected in test_cases: + assert detect_datetime_format(pd.Series(data)) == expected + + +def test_no_warnings_with_consistent_formats(): + """Verify no warnings are produced for consistent date formats.""" + df = pd.DataFrame( + { + "date": ["2023-01-01", "2023-01-02", "2023-01-03"], + "datetime": [ + "2023-01-01 12:00:00", + "2023-01-02 13:00:00", + "2023-01-03 14:00:00", + ], + } + ) + + date_cols = ( + DateColumn(col_label="date"), + DateColumn(col_label="datetime"), + ) + + _, warnings_list = capture_warnings(normalize_dttm_col, df, date_cols) + assert len(warnings_list) == 0 + + # Verify parsing worked + assert pd.api.types.is_datetime64_any_dtype(df["date"]) + assert pd.api.types.is_datetime64_any_dtype(df["datetime"]) + assert df["date"].iloc[0] == pd.Timestamp("2023-01-01") + + +def test_explicit_format_respected(): + """Verify explicit formats are still used when provided.""" + df = pd.DataFrame({"date": ["01/15/2023", "02/20/2023"]}) + date_cols = (DateColumn(col_label="date", timestamp_format="%m/%d/%Y"),) + + normalize_dttm_col(df, date_cols) + + assert pd.api.types.is_datetime64_any_dtype(df["date"]) + assert df["date"].iloc[0] == pd.Timestamp("2023-01-15") + + +def test_mixed_formats_suppressed(): + """Verify warnings are suppressed for mixed format data.""" + df = pd.DataFrame( + { + "mixed": ["2023-01-01", "01/02/2023", "2023-03-01 12:00:00"], + } + ) + + date_cols = (DateColumn(col_label="mixed"),) + _, warnings_list = capture_warnings(normalize_dttm_col, df, date_cols) + + assert len(warnings_list) == 0 + assert pd.api.types.is_datetime64_any_dtype(df["mixed"]) + + +def test_epoch_format(): + """Verify epoch timestamp handling works correctly.""" + df = pd.DataFrame({"epoch": [1672531200, 1672617600]}) # 2023-01-01, 2023-01-02 + date_cols = (DateColumn(col_label="epoch", timestamp_format="epoch_s"),) + + normalize_dttm_col(df, date_cols) + + assert pd.api.types.is_datetime64_any_dtype(df["epoch"]) + assert df["epoch"].iloc[0] == pd.Timestamp("2023-01-01") + + +def test_epoch_format_invalid_values(caplog): + """Test epoch format with invalid values triggers warning.""" + # Test with non-numeric values that can't be converted to epoch + df = pd.DataFrame({"epoch": ["not_a_number", "invalid", "abc"]}) + date_cols = (DateColumn(col_label="epoch", timestamp_format="epoch_s"),) + + # Clear any existing log records + caplog.clear() + + # Run the function - should log a warning + with caplog.at_level("WARNING"): + normalize_dttm_col(df, date_cols) + + # Verify warning was logged + assert "Unable to convert column epoch to datetime, ignoring" in caplog.text + + # The column should remain unchanged when conversion fails + assert df["epoch"].dtype == object + assert df["epoch"].iloc[0] == "not_a_number" + + +@pytest.mark.parametrize( + "data,expected_format", + [ + (["2023-01-01", "2023-01-02"], "%Y-%m-%d"), + (["01/15/2023", "02/20/2023"], "%m/%d/%Y"), + (["2023-01-01T12:00:00Z", "2023-01-02T13:00:00Z"], "%Y-%m-%dT%H:%M:%SZ"), + ( + ["2023-01-01T12:00:00.123Z", "2023-01-02T13:00:00.456Z"], + "%Y-%m-%dT%H:%M:%S.%fZ", + ), + ], +) +def test_format_detection_patterns(data: list[str], expected_format: str): + """Test detection of various datetime formats.""" + assert detect_datetime_format(pd.Series(data)) == expected_format + + +def test_edge_cases(): + """Test handling of edge cases.""" + edge_cases = [ + pd.DataFrame({"date": []}), # Empty + pd.DataFrame({"date": [None, None]}), # All nulls + pd.DataFrame({"date": ["2023-01-01"]}), # Single value + pd.DataFrame({"date": pd.to_datetime(["2023-01-01"])}), # Already datetime + ] + + for df in edge_cases: + df_copy = df.copy() + date_cols = (DateColumn(col_label="date"),) + # Should not raise + normalize_dttm_col(df_copy, date_cols) + + +def test_detect_datetime_format_empty_series(): + """Test detect_datetime_format returns None for empty series after dropping NaN.""" + # Test with all None values - covers lines 50-51 in pandas.py + series_all_none = pd.Series([None, None, None]) + assert detect_datetime_format(series_all_none) is None + + # Test with all NaN values + series_all_nan = pd.Series([pd.NaT, pd.NaT, pd.NaT]) + assert detect_datetime_format(series_all_nan) is None + + # Test with empty series + series_empty = pd.Series([], dtype=object) + assert detect_datetime_format(series_empty) is None + + +def test_datetime_conversion_value_error(caplog, monkeypatch): + """Test ValueError during datetime conversion logs a warning. + + Covers core.py lines 1887-88. + """ + # Create a DataFrame with string values representing dates that are + # already datetime-like but when epoch_s format is specified and the + # values are NOT numeric, it tries to convert them using pd.Timestamp + # which can fail + + # Create a mock type that raises ValueError when pd.Timestamp is called on it + class BadTimestampValue: + def __init__(self, value): + self.value = value + + def __repr__(self): + return f"BadTimestamp({self.value})" + + def __bool__(self): + return True + + # Create DataFrame with values that will fail pd.Timestamp conversion + df = pd.DataFrame( + { + "date": [ + BadTimestampValue("2023-01-01"), + BadTimestampValue("2023-01-02"), + BadTimestampValue("2023-01-03"), + ] + } + ) + + # Store original Timestamp + original_timestamp = pd.Timestamp + + def failing_timestamp(value): + if isinstance(value, BadTimestampValue): + raise ValueError(f"Cannot convert {value} to Timestamp") + return original_timestamp(value) + + # Set to epoch format with non-numeric data to trigger the else branch + # (lines 1881-1891 in core.py) + date_cols = (DateColumn(col_label="date", timestamp_format="epoch_s"),) + + # Clear any existing log records + caplog.clear() + + # Run the function with our patched Timestamp - should log a warning + with caplog.at_level("WARNING"): + # Use monkeypatch for cleaner patching + monkeypatch.setattr(pd, "Timestamp", failing_timestamp) + normalize_dttm_col(df, date_cols) + + # Verify warning was logged (covers lines 1887-88 in core.py) + assert "Unable to convert column date to datetime, ignoring" in caplog.text + + +def test_warning_suppression(): + """Verify our implementation suppresses warnings for mixed formats.""" + df = pd.DataFrame({"date": ["2023-01-01", "01/02/2023", "March 3, 2023"]}) + + # Our approach should suppress warnings + _, warnings_list = capture_warnings( + normalize_dttm_col, df, (DateColumn(col_label="date"),) + ) + + assert len(warnings_list) == 0 # Should suppress all format inference warnings + assert pd.api.types.is_datetime64_any_dtype(df["date"]) # Should still parse dates From 4c2b27e7f09513d09306d2297791387cbb88275b Mon Sep 17 00:00:00 2001 From: "Michael S. Molina" <70410625+michael-s-molina@users.noreply.github.com> Date: Tue, 9 Sep 2025 11:50:49 -0300 Subject: [PATCH 06/26] fix: Change database event in core (#35071) --- superset-frontend/src/core/sqlLab.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/superset-frontend/src/core/sqlLab.ts b/superset-frontend/src/core/sqlLab.ts index 8f374dea1be..719d984fc49 100644 --- a/superset-frontend/src/core/sqlLab.ts +++ b/superset-frontend/src/core/sqlLab.ts @@ -102,8 +102,11 @@ export const onDidChangeEditorDatabase: typeof sqlLabType.onDidChangeEditorDatab createActionListener( predicate(QUERY_EDITOR_SETDB), listener, - (action: { type: string; queryEditor: { dbId: number } }) => - action.queryEditor.dbId, + (action: { + type: string; + dbId?: number; + queryEditor: { dbId: number }; + }) => action.dbId || action.queryEditor.dbId, thisArgs, ); From 65376c7baf7e04b7657f46e5ef730aa3c900b100 Mon Sep 17 00:00:00 2001 From: Nicolas <48596976+nicob3y@users.noreply.github.com> Date: Tue, 9 Sep 2025 19:22:54 +0200 Subject: [PATCH 07/26] fix(Table Chart): render null dates properly (#34558) --- .../plugins/plugin-chart-table/src/TableChart.tsx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx b/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx index 18d9af2b606..e2329ac906e 100644 --- a/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx +++ b/superset-frontend/plugins/plugin-chart-table/src/TableChart.tsx @@ -89,6 +89,7 @@ import { formatColumnValue } from './utils/formatValue'; import { PAGE_SIZE_OPTIONS, SERVER_PAGE_SIZE_OPTIONS } from './consts'; import { updateTableOwnState } from './DataTable/utils/externalAPIs'; import getScrollBarSize from './DataTable/utils/getScrollBarSize'; +import DateWithFormatter from './utils/DateWithFormatter'; type ValueRange = [number, number]; @@ -934,7 +935,10 @@ export default function TableChart( }, className: [ className, - value == null ? 'dt-is-null' : '', + value == null || + (value instanceof DateWithFormatter && value.input == null) + ? 'dt-is-null' + : '', isActiveFilterValue(key, value) ? ' dt-is-active-filter' : '', ].join(' '), tabIndex: 0, From 2f6434318603c1177185537e0263aaf755ddcd0a Mon Sep 17 00:00:00 2001 From: Gabriel Torres Ruiz Date: Tue, 9 Sep 2025 14:23:16 -0300 Subject: [PATCH 08/26] feat(dataset): create usage tab for dataset (#34707) --- .../ChangeDatasourceModal.test.jsx | 2 +- .../index.tsx} | 2 +- .../DatasourceModal.test.jsx | 2 +- .../index.tsx} | 6 +- .../CollectionTable}/CollectionTable.test.tsx | 2 +- .../CollectionTable/index.tsx} | 6 +- .../DatasourceEditor}/DatasourceEditor.jsx | 129 +++++- .../DashboardLinksExternal.test.tsx | 132 ++++++ .../DashboardLinksExternal/index.tsx | 78 ++++ .../DatasetUsageTab/DatasetUsageTab.test.tsx | 214 +++++++++ .../components/DatasetUsageTab/index.tsx | 293 ++++++++++++ .../DatasourceEditor/components/index.ts | 20 + .../components/DatasourceEditor/index.ts | 19 + .../tests}/DatasourceEditor.test.jsx | 4 +- .../tests}/DatasourceEditorCurrency.test.jsx | 2 +- .../tests}/DatasourceEditorRTL.test.jsx | 0 .../{ => components/Field}/Field.test.tsx | 2 +- .../{Field.tsx => components/Field/index.tsx} | 0 .../Fieldset/index.tsx} | 4 +- .../components/Datasource/components/index.ts | 22 + .../Datasource/{index.tsx => index.ts} | 0 .../Datasource/{utils.js => utils/index.js} | 0 .../Datasource/{ => utils}/utils.test.tsx | 2 +- .../src/components/ListView/CrossLinks.tsx | 10 +- .../ListView/DashboardCrossLinks.tsx | 10 +- .../UploadDataModel/UploadDataModal.test.tsx | 4 +- .../EditDataset/UsageTab/UsageTab.test.tsx | 416 ------------------ .../AddDataset/EditDataset/UsageTab/index.tsx | 265 ----------- .../datasets/AddDataset/EditDataset/index.tsx | 3 +- 29 files changed, 934 insertions(+), 715 deletions(-) rename superset-frontend/src/components/Datasource/{ => ChangeDatasourceModal}/ChangeDatasourceModal.test.jsx (98%) rename superset-frontend/src/components/Datasource/{ChangeDatasourceModal.tsx => ChangeDatasourceModal/index.tsx} (99%) rename superset-frontend/src/components/Datasource/{ => DatasourceModal}/DatasourceModal.test.jsx (99%) rename superset-frontend/src/components/Datasource/{DatasourceModal.tsx => DatasourceModal/index.tsx} (98%) rename superset-frontend/src/components/Datasource/{ => components/CollectionTable}/CollectionTable.test.tsx (96%) rename superset-frontend/src/components/Datasource/{CollectionTable.tsx => components/CollectionTable/index.tsx} (99%) rename superset-frontend/src/components/Datasource/{ => components/DatasourceEditor}/DatasourceEditor.jsx (94%) create mode 100644 superset-frontend/src/components/Datasource/components/DatasourceEditor/components/DashboardLinksExternal/DashboardLinksExternal.test.tsx create mode 100644 superset-frontend/src/components/Datasource/components/DatasourceEditor/components/DashboardLinksExternal/index.tsx create mode 100644 superset-frontend/src/components/Datasource/components/DatasourceEditor/components/DatasetUsageTab/DatasetUsageTab.test.tsx create mode 100644 superset-frontend/src/components/Datasource/components/DatasourceEditor/components/DatasetUsageTab/index.tsx create mode 100644 superset-frontend/src/components/Datasource/components/DatasourceEditor/components/index.ts create mode 100644 superset-frontend/src/components/Datasource/components/DatasourceEditor/index.ts rename superset-frontend/src/components/Datasource/{ => components/DatasourceEditor/tests}/DatasourceEditor.test.jsx (99%) rename superset-frontend/src/components/Datasource/{ => components/DatasourceEditor/tests}/DatasourceEditorCurrency.test.jsx (99%) rename superset-frontend/src/components/Datasource/{ => components/DatasourceEditor/tests}/DatasourceEditorRTL.test.jsx (100%) rename superset-frontend/src/components/Datasource/{ => components/Field}/Field.test.tsx (98%) rename superset-frontend/src/components/Datasource/{Field.tsx => components/Field/index.tsx} (100%) rename superset-frontend/src/components/Datasource/{Fieldset.tsx => components/Fieldset/index.tsx} (96%) create mode 100644 superset-frontend/src/components/Datasource/components/index.ts rename superset-frontend/src/components/Datasource/{index.tsx => index.ts} (100%) rename superset-frontend/src/components/Datasource/{utils.js => utils/index.js} (100%) rename superset-frontend/src/components/Datasource/{ => utils}/utils.test.tsx (99%) delete mode 100644 superset-frontend/src/features/datasets/AddDataset/EditDataset/UsageTab/UsageTab.test.tsx delete mode 100644 superset-frontend/src/features/datasets/AddDataset/EditDataset/UsageTab/index.tsx diff --git a/superset-frontend/src/components/Datasource/ChangeDatasourceModal.test.jsx b/superset-frontend/src/components/Datasource/ChangeDatasourceModal/ChangeDatasourceModal.test.jsx similarity index 98% rename from superset-frontend/src/components/Datasource/ChangeDatasourceModal.test.jsx rename to superset-frontend/src/components/Datasource/ChangeDatasourceModal/ChangeDatasourceModal.test.jsx index 060f6149a12..20fbc37be92 100644 --- a/superset-frontend/src/components/Datasource/ChangeDatasourceModal.test.jsx +++ b/superset-frontend/src/components/Datasource/ChangeDatasourceModal/ChangeDatasourceModal.test.jsx @@ -22,7 +22,7 @@ import fetchMock from 'fetch-mock'; import thunk from 'redux-thunk'; import sinon from 'sinon'; import mockDatasource from 'spec/fixtures/mockDatasource'; -import { ChangeDatasourceModal } from '.'; +import ChangeDatasourceModal from '.'; const mockStore = configureStore([thunk]); const store = mockStore({}); diff --git a/superset-frontend/src/components/Datasource/ChangeDatasourceModal.tsx b/superset-frontend/src/components/Datasource/ChangeDatasourceModal/index.tsx similarity index 99% rename from superset-frontend/src/components/Datasource/ChangeDatasourceModal.tsx rename to superset-frontend/src/components/Datasource/ChangeDatasourceModal/index.tsx index 5acd1b4d3d4..41c70a6f4ca 100644 --- a/superset-frontend/src/components/Datasource/ChangeDatasourceModal.tsx +++ b/superset-frontend/src/components/Datasource/ChangeDatasourceModal/index.tsx @@ -55,7 +55,7 @@ import { } from 'src/features/datasets/constants'; import withToasts from 'src/components/MessageToasts/withToasts'; import { InputRef } from 'antd'; -import type { Datasource, ChangeDatasourceModalProps } from './types'; +import type { Datasource, ChangeDatasourceModalProps } from '../types'; const CONFIRM_WARNING_MESSAGE = t( 'Warning! Changing the dataset may break the chart if the metadata does not exist.', diff --git a/superset-frontend/src/components/Datasource/DatasourceModal.test.jsx b/superset-frontend/src/components/Datasource/DatasourceModal/DatasourceModal.test.jsx similarity index 99% rename from superset-frontend/src/components/Datasource/DatasourceModal.test.jsx rename to superset-frontend/src/components/Datasource/DatasourceModal/DatasourceModal.test.jsx index b614ff87db9..d4045a3de6f 100644 --- a/superset-frontend/src/components/Datasource/DatasourceModal.test.jsx +++ b/superset-frontend/src/components/Datasource/DatasourceModal/DatasourceModal.test.jsx @@ -29,7 +29,7 @@ import fetchMock from 'fetch-mock'; import sinon from 'sinon'; import { SupersetClient } from '@superset-ui/core'; import mockDatasource from 'spec/fixtures/mockDatasource'; -import { DatasourceModal } from '.'; +import DatasourceModal from '.'; // Define your constants here const SAVE_ENDPOINT = 'glob:*/api/v1/dataset/7'; diff --git a/superset-frontend/src/components/Datasource/DatasourceModal.tsx b/superset-frontend/src/components/Datasource/DatasourceModal/index.tsx similarity index 98% rename from superset-frontend/src/components/Datasource/DatasourceModal.tsx rename to superset-frontend/src/components/Datasource/DatasourceModal/index.tsx index 6e551a9a156..ecbd8144ddf 100644 --- a/superset-frontend/src/components/Datasource/DatasourceModal.tsx +++ b/superset-frontend/src/components/Datasource/DatasourceModal/index.tsx @@ -45,9 +45,11 @@ import { import withToasts from 'src/components/MessageToasts/withToasts'; import { ErrorMessageWithStackTrace } from 'src/components'; import type { DatasetObject } from 'src/features/datasets/types'; -import type { DatasourceModalProps } from './types'; +import type { DatasourceModalProps } from '../types'; -const DatasourceEditor = AsyncEsmComponent(() => import('./DatasourceEditor')); +const DatasourceEditor = AsyncEsmComponent( + () => import('../components/DatasourceEditor'), +); const StyledDatasourceModal = styled(Modal)` .modal-content { diff --git a/superset-frontend/src/components/Datasource/CollectionTable.test.tsx b/superset-frontend/src/components/Datasource/components/CollectionTable/CollectionTable.test.tsx similarity index 96% rename from superset-frontend/src/components/Datasource/CollectionTable.test.tsx rename to superset-frontend/src/components/Datasource/components/CollectionTable/CollectionTable.test.tsx index 8736a7e5569..205e9882e02 100644 --- a/superset-frontend/src/components/Datasource/CollectionTable.test.tsx +++ b/superset-frontend/src/components/Datasource/components/CollectionTable/CollectionTable.test.tsx @@ -19,7 +19,7 @@ import { render } from 'spec/helpers/testing-library'; import mockDatasource from 'spec/fixtures/mockDatasource'; -import CollectionTable from './CollectionTable'; +import CollectionTable from '.'; const props = { collection: mockDatasource['7__table'].columns, diff --git a/superset-frontend/src/components/Datasource/CollectionTable.tsx b/superset-frontend/src/components/Datasource/components/CollectionTable/index.tsx similarity index 99% rename from superset-frontend/src/components/Datasource/CollectionTable.tsx rename to superset-frontend/src/components/Datasource/components/CollectionTable/index.tsx index 59f3d07fb46..50169092f48 100644 --- a/superset-frontend/src/components/Datasource/CollectionTable.tsx +++ b/superset-frontend/src/components/Datasource/components/CollectionTable/index.tsx @@ -28,13 +28,13 @@ import Table, { type TablePaginationConfig, TableSize, } from '@superset-ui/core/components/Table'; -import Fieldset from './Fieldset'; -import { recurseReactClone } from './utils'; +import Fieldset from '../Fieldset'; +import { recurseReactClone } from '../../utils'; import { type CRUDCollectionProps, type CRUDCollectionState, type Sort, -} from './types'; +} from '../../types'; const CrudButtonWrapper = styled.div` text-align: right; diff --git a/superset-frontend/src/components/Datasource/DatasourceEditor.jsx b/superset-frontend/src/components/Datasource/components/DatasourceEditor/DatasourceEditor.jsx similarity index 94% rename from superset-frontend/src/components/Datasource/DatasourceEditor.jsx rename to superset-frontend/src/components/Datasource/components/DatasourceEditor/DatasourceEditor.jsx index b55a511bb36..30b499bed5c 100644 --- a/superset-frontend/src/components/Datasource/DatasourceEditor.jsx +++ b/superset-frontend/src/components/Datasource/components/DatasourceEditor/DatasourceEditor.jsx @@ -69,11 +69,12 @@ import { resetDatabaseState, } from 'src/database/actions'; import Mousetrap from 'mousetrap'; -import { DatabaseSelector } from '../DatabaseSelector'; -import CollectionTable from './CollectionTable'; -import Fieldset from './Fieldset'; -import Field from './Field'; -import { fetchSyncedColumns, updateColumns } from './utils'; +import { DatabaseSelector } from '../../../DatabaseSelector'; +import CollectionTable from '../CollectionTable'; +import Fieldset from '../Fieldset'; +import Field from '../Field'; +import { fetchSyncedColumns, updateColumns } from '../../utils'; +import DatasetUsageTab from './components/DatasetUsageTab'; const extensionsRegistry = getExtensionsRegistry(); @@ -142,7 +143,7 @@ const StyledLabelWrapper = styled.div` } `; -const StyledColumnsTabWrapper = styled.div` +const StyledTableTabWrapper = styled.div` .table > tbody > tr > td { vertical-align: middle; } @@ -177,6 +178,7 @@ const TABS_KEYS = { METRICS: 'METRICS', COLUMNS: 'COLUMNS', CALCULATED_COLUMNS: 'CALCULATED_COLUMNS', + USAGE: 'USAGE', SETTINGS: 'SETTINGS', SPATIAL: 'SPATIAL', }; @@ -658,6 +660,8 @@ class DatasourceEditor extends PureComponent { datasourceType: props.datasource.sql ? DATASOURCE_TYPES.virtual.key : DATASOURCE_TYPES.physical.key, + usageCharts: [], + usageChartsCount: 0, }; this.onChange = this.onChange.bind(this); @@ -671,6 +675,7 @@ class DatasourceEditor extends PureComponent { this.validateAndChange = this.validateAndChange.bind(this); this.handleTabSelect = this.handleTabSelect.bind(this); this.formatSql = this.formatSql.bind(this); + this.fetchUsageData = this.fetchUsageData.bind(this); this.currencies = ensureIsArray(props.currencies).map(currencyCode => ({ value: currencyCode, label: `${getCurrencySymbol({ @@ -844,6 +849,89 @@ class DatasourceEditor extends PureComponent { } } + async fetchUsageData( + page = 1, + pageSize = 25, + sortColumn = 'changed_on_delta_humanized', + sortDirection = 'desc', + ) { + const { datasource } = this.state; + try { + const queryParams = rison.encode({ + columns: [ + 'slice_name', + 'url', + 'certified_by', + 'certification_details', + 'description', + 'owners.first_name', + 'owners.last_name', + 'owners.id', + 'changed_on_delta_humanized', + 'changed_on', + 'changed_by.first_name', + 'changed_by.last_name', + 'changed_by.id', + 'dashboards.id', + 'dashboards.dashboard_title', + 'dashboards.url', + ], + filters: [ + { + col: 'datasource_id', + opr: 'eq', + value: datasource.id, + }, + ], + order_column: sortColumn, + order_direction: sortDirection, + page: page - 1, + page_size: pageSize, + }); + + const { json = {} } = await SupersetClient.get({ + endpoint: `/api/v1/chart/?q=${queryParams}`, + }); + + const charts = json?.result || []; + const ids = json?.ids || []; + + // Map chart IDs to chart objects + const chartsWithIds = charts.map((chart, index) => ({ + ...chart, + id: ids[index], + })); + + this.setState({ + usageCharts: chartsWithIds, + usageChartsCount: json?.count || 0, + }); + + return { + charts: chartsWithIds, + count: json?.count || 0, + ids, + }; + } catch (error) { + const { error: clientError, statusText } = + await getClientErrorObject(error); + this.props.addDangerToast( + clientError || + statusText || + t('An error occurred while fetching usage data'), + ); + this.setState({ + usageCharts: [], + usageChartsCount: 0, + }); + return { + charts: [], + count: 0, + ids: [], + }; + } + } + findDuplicates(arr, accessor) { const seen = {}; const dups = []; @@ -1684,7 +1772,7 @@ class DatasourceEditor extends PureComponent { /> ), children: ( - +