# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import pytest from superset.mcp_service.utils.sanitization import ( _check_dangerous_patterns, _check_sql_patterns, _remove_dangerous_unicode, _strip_html_tags, sanitize_filter_value, sanitize_user_input, ) # --- _strip_html_tags tests --- def test_strip_html_tags_plain_text(): assert _strip_html_tags("hello world") == "hello world" def test_strip_html_tags_preserves_ampersand(): assert _strip_html_tags("A & B") == "A & B" def test_strip_html_tags_preserves_multiple_ampersands(): assert _strip_html_tags("A & B & C") == "A & B & C" def test_strip_html_tags_strips_bold_tags(): assert _strip_html_tags("hello") == "hello" def test_strip_html_tags_strips_script_tags(): result = _strip_html_tags("") assert "" not in result def test_strip_html_tags_strips_entity_encoded_script(): """Entity-encoded tags must be decoded and stripped, not passed through.""" result = _strip_html_tags("<script>alert(1)</script>") assert "" for _ in range(10): value = value.replace("&", "&").replace("<", "<").replace(">", ">") result = _strip_html_tags(value) assert "") assert "" not in result def test_strip_html_tags_img_onerror_entity_bypass(): """Entity-encoded img/onerror should not survive sanitization.""" result = _strip_html_tags("<img src=x onerror=alert(1)>") assert "