refactor(example_data): replace the way the birth_names data is loaded to DB (#18060)

* refactor: replace the way the birth_names data is loaded to DB

* fix failed unit test

* fix failed unit test

* fix failed tests

* fix pass wrong flag of support datetime type

* remove unused fixture
This commit is contained in:
ofekisr
2022-01-18 23:21:04 +02:00
committed by GitHub
parent 88db2cc0ab
commit 4675ca31c5
29 changed files with 781 additions and 137 deletions

View File

@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from abc import ABC, abstractmethod
from typing import Any, Dict, Iterable
class ExampleDataGenerator(ABC):
@abstractmethod
def generate(self) -> Iterable[Dict[Any, Any]]:
...

View File

@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@@ -0,0 +1,81 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from datetime import datetime
from random import choice, randint
from typing import Any, Dict, Iterable, TYPE_CHECKING
from tests.consts.birth_names import (
BOY,
DS,
GENDER,
GIRL,
NAME,
NUM,
NUM_BOYS,
NUM_GIRLS,
STATE,
)
from tests.consts.us_states import US_STATES
from tests.example_data.data_generator.base_generator import ExampleDataGenerator
if TYPE_CHECKING:
from tests.example_data.data_generator.string_generator import StringGenerator
class BirthNamesGenerator(ExampleDataGenerator):
_names_generator: StringGenerator
_start_year: int
_until_not_include_year: int
_rows_per_year: int
def __init__(
self,
names_generator: StringGenerator,
start_year: int,
years_amount: int,
rows_per_year: int,
) -> None:
assert start_year > -1
assert years_amount > 0
self._names_generator = names_generator
self._start_year = start_year
self._until_not_include_year = start_year + years_amount
self._rows_per_year = rows_per_year
def generate(self) -> Iterable[Dict[Any, Any]]:
for year in range(self._start_year, self._until_not_include_year):
ds = self._make_year(year)
for _ in range(self._rows_per_year):
yield self.generate_row(ds)
def _make_year(self, year: int):
return datetime(year, 1, 1, 0, 0, 0)
def generate_row(self, dt: datetime) -> Dict[Any, Any]:
gender = choice([BOY, GIRL])
num = randint(1, 100000)
return {
DS: dt,
GENDER: gender,
NAME: self._names_generator.generate(),
NUM: num,
STATE: choice(US_STATES),
NUM_BOYS: num if gender == BOY else 0,
NUM_GIRLS: num if gender == GIRL else 0,
}

View File

@@ -0,0 +1,65 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from abc import ABC, abstractmethod
from tests.example_data.data_generator.birth_names.birth_names_generator import (
BirthNamesGenerator,
)
from tests.example_data.data_generator.string_generator_factory import (
StringGeneratorFactory,
)
class BirthNamesGeneratorFactory(ABC):
__factory: BirthNamesGeneratorFactory
@abstractmethod
def _make(self) -> BirthNamesGenerator:
...
@classmethod
def make(cls) -> BirthNamesGenerator:
return cls._get_instance()._make()
@classmethod
def set_instance(cls, factory: BirthNamesGeneratorFactory) -> None:
cls.__factory = factory
@classmethod
def _get_instance(cls) -> BirthNamesGeneratorFactory:
if not hasattr(cls, "_BirthNamesGeneratorFactory__factory"):
cls.__factory = BirthNamesGeneratorFactoryImpl()
return cls.__factory
MIN_NAME_LEN = 3
MAX_NAME_SIZE = 10
START_YEAR = 1960
YEARS_AMOUNT = 60
ROW_PER_YEAR = 20
class BirthNamesGeneratorFactoryImpl(BirthNamesGeneratorFactory):
def _make(self) -> BirthNamesGenerator:
string_generator = StringGeneratorFactory.make_lowercase_based(
MIN_NAME_LEN, MAX_NAME_SIZE
)
return BirthNamesGenerator(
string_generator, START_YEAR, YEARS_AMOUNT, ROW_PER_YEAR
)

View File

@@ -0,0 +1,69 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
US_STATES = [
"AL",
"AK",
"AZ",
"AR",
"CA",
"CO",
"CT",
"DE",
"FL",
"GA",
"HI",
"ID",
"IL",
"IN",
"IA",
"KS",
"KY",
"LA",
"ME",
"MD",
"MA",
"MI",
"MN",
"MS",
"MO",
"MT",
"NE",
"NV",
"NH",
"NJ",
"NM",
"NY",
"NC",
"ND",
"OH",
"OK",
"OR",
"PA",
"RI",
"SC",
"SD",
"TN",
"TX",
"UT",
"VT",
"VA",
"WA",
"WV",
"WI",
"WY",
"other",
]

View File

@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from random import choices, randint
class StringGenerator:
_seed_letters: str
_min_length: int
_max_length: int
def __init__(self, seed_letters: str, min_length: int, max_length: int):
self._seed_letters = seed_letters
self._min_length = min_length
self._max_length = max_length
def generate(self) -> str:
rv_string_length = randint(self._min_length, self._max_length)
randomized_letters = choices(self._seed_letters, k=rv_string_length)
return "".join(randomized_letters)

View File

@@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import string
from tests.example_data.data_generator.string_generator import StringGenerator
class StringGeneratorFactory:
@classmethod
def make(
cls, seed_letters: str, min_length: int, max_length: int
) -> StringGenerator:
cls.__validate_arguments(seed_letters, min_length, max_length)
return StringGenerator(seed_letters, min_length, max_length)
@classmethod
def make_lowercase_based(cls, min_length: int, max_length: int) -> StringGenerator:
return cls.make(string.ascii_lowercase, min_length, max_length)
@classmethod
def make_ascii_letters_based(
cls, min_length: int, max_length: int
) -> StringGenerator:
return cls.make(string.ascii_letters, min_length, max_length)
@staticmethod
def __validate_arguments(
seed_letters: str, min_length: int, max_length: int
) -> None:
assert seed_letters, "seed_letters is empty"
assert min_length > -1, "min_length is negative"
assert max_length > min_length, "max_length is not bigger then min_length"

View File

@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@@ -0,0 +1,35 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest.mock import Mock, patch
from tests.example_data.data_generator.string_generator import StringGenerator
@patch("tests.example_data.data_generator.string_generator.choices")
@patch("tests.example_data.data_generator.string_generator.randint")
def test_string_generator(randint_mock: Mock, choices_mock: Mock):
letters = "abcdets"
min_len = 3
max_len = 5
randomized_string_len = 4
string_generator = StringGenerator(letters, min_len, max_len)
randint_mock.return_value = randomized_string_len
choices_mock.return_value = ["t", "e", "s", "t"]
assert string_generator.generate() == "test"
randint_mock.assert_called_once_with(min_len, max_len)
choices_mock.assert_called_with(letters, k=randomized_string_len)