refactor: examples data loading for tests (#17893)

* refactor: replace the way the birth_names data is generated

* refactor: replace the way the birth_names data is generated

* refactor structure
add tests for common
This commit is contained in:
ofekisr
2022-01-11 14:16:09 +02:00
committed by GitHub
parent 3a58424e62
commit 7fc6a2f36c
12 changed files with 507 additions and 89 deletions

View File

@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@@ -0,0 +1,119 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from datetime import datetime
from random import choice, randint
from typing import Any, Dict, Iterable
from tests.common.example_data_generator.base_generator import ExampleDataGenerator
from tests.common.example_data_generator.consts import US_STATES
from tests.common.example_data_generator.string_generator import StringGenerator
NUM_GIRLS = "num_girls"
NUM_BOYS = "num_boys"
STATE = "state"
NUM = "num"
NAME = "name"
GENDER = "gender"
DS = "ds"
GIRL = "girl"
BOY = "boy"
from collections import OrderedDict
BIRTH_NAMES_COLUMNS = OrderedDict(
[
(DS, datetime),
(GENDER, str),
(NAME, str),
(NUM, int),
(STATE, str),
(NUM_BOYS, int),
(NUM_GIRLS, int),
]
)
class BirthNamesGenerator(ExampleDataGenerator):
_names_generator: StringGenerator
_start_year: int
_until_not_include_year: int
_rows_per_year: int
def __init__(
self,
names_generator: StringGenerator,
start_year: int,
years_amount: int,
rows_per_year: int,
) -> None:
assert start_year > -1
assert years_amount > 0
self._names_generator = names_generator
self._start_year = start_year
self._until_not_include_year = start_year + years_amount
self._rows_per_year = rows_per_year
def generate(self) -> Iterable[Dict[Any, Any]]:
for year in range(self._start_year, self._until_not_include_year):
ds = self._make_year(year)
for _ in range(self._rows_per_year):
yield self.generate_row(ds)
def _make_year(self, year: int):
return datetime(year, 1, 1, 0, 0, 0)
def generate_row(self, dt: datetime) -> Dict[Any, Any]:
gender = choice([BOY, GIRL])
num = randint(1, 100000)
return {
DS: dt,
GENDER: gender,
NAME: self._names_generator.generate(),
NUM: num,
STATE: choice(US_STATES),
NUM_BOYS: num if gender == BOY else 0,
NUM_GIRLS: num if gender == GIRL else 0,
}

View File

@@ -0,0 +1,74 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from abc import ABC, abstractmethod
from tests.common.example_data_generator.birth_names.birth_names_generator import (
BirthNamesGenerator,
)
from tests.common.example_data_generator.string_generator_factory import (
StringGeneratorFactory,
)
class BirthNamesGeneratorFactory(ABC):
__factory: BirthNamesGeneratorFactory
@abstractmethod
def _make(self) -> BirthNamesGenerator:
...
@classmethod
def make(cls) -> BirthNamesGenerator:
return cls._get_instance()._make()
@classmethod
def set_instance(cls, factory: BirthNamesGeneratorFactory) -> None:
cls.__factory = factory
@classmethod
def _get_instance(cls) -> BirthNamesGeneratorFactory:
if not hasattr(cls, "_BirthNamesGeneratorFactory__factory"):
cls.__factory = BirthNamesGeneratorFactoryImpl()
return cls.__factory
MIN_NAME_LEN = 3
MAX_NAME_SIZE = 10
START_YEAR = 1960
YEARS_AMOUNT = 60
ROW_PER_YEAR = 20
class BirthNamesGeneratorFactoryImpl(BirthNamesGeneratorFactory):
def _make(self) -> BirthNamesGenerator:
string_generator = StringGeneratorFactory.make_lowercase_based(
MIN_NAME_LEN, MAX_NAME_SIZE
)
return BirthNamesGenerator(
string_generator, START_YEAR, YEARS_AMOUNT, ROW_PER_YEAR
)