Compare commits

...

2 Commits

Author SHA1 Message Date
Joe Li
7c88f2d61d fix(mysql): normalize string/bytes/Decimal values before boolean conversion
- Fix critical bug where bool('0') and bool(b'0') returned True instead of False
- Add proper type normalization for strings, bytes, and Decimal values
- Convert string/bytes/Decimal to int before applying bool() for accurate MySQL boolean conversion
- Maintains existing behavior for integer values while fixing edge cases
- Addresses feedback on conversion logic in mysql.py:323-330

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-25 11:42:26 -07:00
Joe Li
66f6a6ce94 fix(mysql): render TINYINT(1) columns as boolean in SQL Lab and charts
Fixes MySQL TINYINT(1)/BOOLEAN columns displaying as numeric icons with 0/1 values instead of boolean representation with True/False in SQL Lab and Explore views.

## Changes

### Schema Mapping (mysql.py)
- Add precise column_type_mappings for TINYINT(1), BOOLEAN, and BOOL patterns
- Map to GenericDataType.BOOLEAN for proper metadata inspection
- Ensures dataset schemas show boolean icons for actual boolean types only

### Runtime Conversion (mysql.py)
- Implement fetch_data override with ultra-precise boolean detection
- Convert 0/1 integers to True/False for TINYINT(1) columns
- Use multiple reliable markers: FIELD_TYPE.TINY + display_size=1 OR SQLAlchemy type string
- Extract _is_boolean_column helper method for clean detection logic
- Enables pandas boolean dtype inference via extract_dataframe_dtypes

### Testing (test_mysql.py)
- Add boolean type test cases to existing parametrized tests
- Test TINYINT(1), BOOLEAN, BOOL → boolean mapping
- Test TINYINT, TINYINT(2+) → numeric mapping (preserved behavior)

## Technical Details

MySQL stores BOOLEAN as TINYINT(1) but returns 0/1 integers instead of Python booleans. This two-layer solution:
1. Maps TINYINT(1) metadata to GenericDataType.BOOLEAN for schema inspection
2. Converts query result values 0/1 → True/False for proper pandas inference

The detection uses explicit width 1 or positive SQLAlchemy type string markers to avoid mis-converting broader TINYINT columns.

Fixes: https://github.com/apache/superset/issues/35166

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-24 15:20:06 -07:00
2 changed files with 233 additions and 7 deletions

View File

@@ -77,6 +77,18 @@ class MySQLEngineSpec(BasicParametersMixin, BaseEngineSpec):
supports_multivalues_insert = True
column_type_mappings = (
# Boolean types - MySQL uses TINYINT(1) for BOOLEAN
(
re.compile(r"^tinyint\(1\)", re.IGNORECASE),
TINYINT(),
GenericDataType.BOOLEAN,
),
(
re.compile(r"^bool(ean)?", re.IGNORECASE),
TINYINT(),
GenericDataType.BOOLEAN,
),
# Numeric types
(
re.compile(r"^int.*", re.IGNORECASE),
INTEGER(),
@@ -260,6 +272,67 @@ class MySQLEngineSpec(BasicParametersMixin, BaseEngineSpec):
def epoch_to_dttm(cls) -> str:
return "from_unixtime({col})"
@classmethod
def _is_boolean_column(cls, col_desc: tuple[Any, ...]) -> bool:
"""Check if a cursor column description represents a boolean column."""
type_code = col_desc[1] if len(col_desc) > 1 else None
display_size = col_desc[2] if len(col_desc) > 2 else None
# Only process FIELD_TYPE.TINY (type_code 1)
if type_code != 1:
return False
# Explicit width 1 indicates TINYINT(1)/BOOLEAN
if display_size == 1:
return True
# Check SQLAlchemy type string (some drivers provide it at index 4)
if len(col_desc) > 4 and isinstance(col_desc[4], str):
sqla_type_str = col_desc[4].lower()
return any(marker in sqla_type_str for marker in ["bool", "tinyint(1)"])
return False
@classmethod
def fetch_data(cls, cursor: Any, limit: int | None = None) -> list[tuple[Any, ...]]:
"""
Fetch data from cursor, converting MySQL TINYINT(1) values to Python booleans.
MySQL stores BOOLEAN as TINYINT(1), but returns 0/1 integers instead of
True/False. This method detects TINYINT(1) columns using multiple reliable
markers and converts their values to proper Python booleans.
"""
data = super().fetch_data(cursor, limit)
if not cursor.description:
return data
# Find TINYINT(1) columns
bool_column_indices = [
i
for i, col_desc in enumerate(cursor.description)
if cls._is_boolean_column(col_desc)
]
# Convert 0/1 to True/False for boolean columns
if bool_column_indices:
converted_data = []
for row in data:
new_row = list(row)
for col_idx in bool_column_indices:
if new_row[col_idx] is not None:
# Normalize different value types before boolean conversion
# bool("0") returns True, but we need False for MySQL boolean
value = new_row[col_idx]
if isinstance(value, (str, bytes)):
value = int(value)
elif isinstance(value, Decimal):
value = int(value)
new_row[col_idx] = bool(value)
converted_data.append(tuple(new_row))
return converted_data
return data
@classmethod
def _extract_error_message(cls, ex: Exception) -> str:
"""Extract error message for queries"""

View File

@@ -47,8 +47,16 @@ from tests.unit_tests.fixtures.common import dttm # noqa: F401
@pytest.mark.parametrize(
"native_type,sqla_type,attrs,generic_type,is_dttm",
[
# Numeric
# Boolean types - MySQL uses TINYINT(1) for BOOLEAN
("TINYINT(1)", TINYINT, None, GenericDataType.BOOLEAN, False),
("tinyint(1)", TINYINT, None, GenericDataType.BOOLEAN, False),
("BOOLEAN", TINYINT, None, GenericDataType.BOOLEAN, False),
("BOOL", TINYINT, None, GenericDataType.BOOLEAN, False),
# Numeric (ensure TINYINT without (1) remains numeric)
("TINYINT", TINYINT, None, GenericDataType.NUMERIC, False),
("TINYINT(2)", TINYINT, None, GenericDataType.NUMERIC, False),
("TINYINT(4)", TINYINT, None, GenericDataType.NUMERIC, False),
("TINYINT UNSIGNED", TINYINT, None, GenericDataType.NUMERIC, False),
("SMALLINT", types.SmallInteger, None, GenericDataType.NUMERIC, False),
("MEDIUMINT", MEDIUMINT, None, GenericDataType.NUMERIC, False),
("INT", INTEGER, None, GenericDataType.NUMERIC, False),
@@ -77,9 +85,11 @@ def test_get_column_spec(
generic_type: GenericDataType,
is_dttm: bool,
) -> None:
from superset.db_engine_specs.mysql import MySQLEngineSpec as spec # noqa: N813
from superset.db_engine_specs.mysql import MySQLEngineSpec
assert_column_spec(spec, native_type, sqla_type, attrs, generic_type, is_dttm)
assert_column_spec(
MySQLEngineSpec, native_type, sqla_type, attrs, generic_type, is_dttm
)
@pytest.mark.parametrize(
@@ -98,9 +108,9 @@ def test_convert_dttm(
expected_result: Optional[str],
dttm: datetime, # noqa: F811
) -> None:
from superset.db_engine_specs.mysql import MySQLEngineSpec as spec # noqa: N813
from superset.db_engine_specs.mysql import MySQLEngineSpec
assert_convert_dttm(spec, target_type, expected_result, dttm)
assert_convert_dttm(MySQLEngineSpec, target_type, expected_result, dttm)
@pytest.mark.parametrize(
@@ -255,10 +265,153 @@ def test_column_type_mutator(
description: list[Any],
expected_result: list[tuple[Any, ...]],
):
from superset.db_engine_specs.mysql import MySQLEngineSpec as spec # noqa: N813
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = data
mock_cursor.description = description
assert spec.fetch_data(mock_cursor) == expected_result
assert MySQLEngineSpec.fetch_data(mock_cursor) == expected_result
def test_fetch_data_boolean_integers() -> None:
"""Test fetch_data converts integer 0/1 to boolean False/True."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = [(1, "admin"), (0, "user")]
# TINYINT(1) column: type_code=1 (FIELD_TYPE.TINY), display_size=1
mock_cursor.description = [
("is_active", 1, 1, 4, 3, 0, False), # TINYINT(1) - should convert
("role", 254, 255, 0, 0, 0, False), # VARCHAR - should not convert
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [(True, "admin"), (False, "user")]
assert result == expected
def test_fetch_data_boolean_strings() -> None:
"""Test fetch_data converts string "0"/"1" to boolean False/True."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = [("1", "admin"), ("0", "user")]
mock_cursor.description = [
("is_active", 1, 1, 4, 3, 0, False), # TINYINT(1) - should convert
("role", 254, 255, 0, 0, 0, False), # VARCHAR - should not convert
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [(True, "admin"), (False, "user")]
assert result == expected
def test_fetch_data_boolean_bytes() -> None:
"""Test fetch_data converts bytes b"0"/b"1" to boolean False/True."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = [(b"1", "admin"), (b"0", "user")]
mock_cursor.description = [
("is_active", 1, 1, 4, 3, 0, False), # TINYINT(1) - should convert
("role", 254, 255, 0, 0, 0, False), # VARCHAR - should not convert
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [(True, "admin"), (False, "user")]
assert result == expected
def test_fetch_data_boolean_decimals() -> None:
"""Test fetch_data converts Decimal 0/1 to boolean False/True."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = [
(Decimal("1"), "admin"),
(Decimal("0"), "user"),
]
mock_cursor.description = [
("is_active", 1, 1, 4, 3, 0, False), # TINYINT(1) - should convert
("role", 254, 255, 0, 0, 0, False), # VARCHAR - should not convert
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [(True, "admin"), (False, "user")]
assert result == expected
def test_fetch_data_boolean_with_nulls() -> None:
"""Test fetch_data handles NULL values correctly in boolean columns."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = [(1, "admin"), (None, "user"), (0, "guest")]
mock_cursor.description = [
("is_active", 1, 1, 4, 3, 0, True), # TINYINT(1) with nulls - should convert
("role", 254, 255, 0, 0, 0, False), # VARCHAR - should not convert
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [(True, "admin"), (None, "user"), (False, "guest")]
assert result == expected
def test_fetch_data_boolean_mixed_columns() -> None:
"""Test fetch_data with boolean and non-boolean columns together."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = [(1, 50, 0), (0, 100, 1)]
mock_cursor.description = [
("is_admin", 1, 1, 4, 3, 0, False), # TINYINT(1) - should convert
("count", 3, 11, 4, 10, 0, False), # INT - should not convert
("is_active", 1, 1, 4, 3, 0, False), # TINYINT(1) - should convert
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [(True, 50, False), (False, 100, True)]
assert result == expected
def test_fetch_data_no_boolean_columns() -> None:
"""Test fetch_data passes through data when no boolean columns present."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
mock_cursor.fetchall.return_value = [(100, "test"), (200, "data")]
mock_cursor.description = [
("count", 3, 11, 4, 10, 0, False), # INT - should not convert
("name", 254, 255, 0, 0, 0, False), # VARCHAR - should not convert
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [(100, "test"), (200, "data")]
assert result == expected
def test_fetch_data_boolean_mixed_driver_types() -> None:
"""Test fetch_data with different driver return types in same dataset."""
from superset.db_engine_specs.mysql import MySQLEngineSpec
mock_cursor = Mock()
# Mix of integers, strings, bytes, decimals for boolean values
mock_cursor.fetchall.return_value = [
(1, "0", b"1"), # True, False, True
(0, "1", b"0"), # False, True, False
(Decimal(1), None, 0), # True, None, False
]
mock_cursor.description = [
("bool_int", 1, 1, 4, 3, 0, False), # TINYINT(1) - integers
("bool_str", 1, 1, 4, 3, 0, False), # TINYINT(1) - strings
("bool_bytes", 1, 1, 4, 3, 0, False), # TINYINT(1) - bytes
]
result = MySQLEngineSpec.fetch_data(mock_cursor)
expected = [
(True, False, True),
(False, True, False),
(True, None, False),
]
assert result == expected