From b77bdc167c436d3e4807a294b6e6257a695d6ff8 Mon Sep 17 00:00:00 2001 From: Robert Goldmann <deadlocker@gmx.de> Date: Sun, 22 Aug 2021 14:54:12 +0200 Subject: [PATCH] #9 - first implementation --- src/logic/database/Crud.py | 5 +++ src/logic/database/DatabaseCleaner.py | 41 +++++++++++++++++- src/logic/routers/GeneralRouter.py | 4 +- src/test/DatabaseCleanerTest.py | 62 +++++++++++++++++++++++++++ src/test/__init__.py | 0 5 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 src/test/DatabaseCleanerTest.py create mode 100644 src/test/__init__.py diff --git a/src/logic/database/Crud.py b/src/logic/database/Crud.py index 9fde026..4c6c1ce 100644 --- a/src/logic/database/Crud.py +++ b/src/logic/database/Crud.py @@ -168,6 +168,11 @@ def delete_measurement(db: Session, measurement: Schemas.Measurement): db.commit() +def delete_multiple_measurements(db: Session, measurementIds: List[int]): + db.query(Models.Measurement).filter(Models.Measurement.id.in_(measurementIds)).delete() + db.commit() + + def get_total_number_of_measurements(db: Session) -> List[int]: return db.query(Models.Measurement).count() diff --git a/src/logic/database/DatabaseCleaner.py b/src/logic/database/DatabaseCleaner.py index 1ea9076..241688a 100644 --- a/src/logic/database/DatabaseCleaner.py +++ b/src/logic/database/DatabaseCleaner.py @@ -1,10 +1,12 @@ import logging from dataclasses import dataclass +from datetime import datetime, timedelta from typing import List from sqlalchemy.orm import Session from logic import Constants +from logic.database import Crud LOGGER = logging.getLogger(Constants.APP_NAME) @@ -16,13 +18,50 @@ class RetentionPolicy: class DatabaseCleaner: + MIN_DATETIME = datetime(year=1970, month=1, day=1, hour=0, minute=0, second=0, microsecond=0) + + # TODO DEBUG: + # MIN_DATETIME = datetime.now() - timedelta(days=31) + def __init__(self, retentionPolicies: List[RetentionPolicy]): self._policies = retentionPolicies - def clean(self, db: Session): + def clean(self, db: Session, currentDateTime: datetime): LOGGER.info('Performing database cleanup...') for policy in self._policies: LOGGER.debug(f'Enforcing retention policy: {policy}') + policyStart = currentDateTime - timedelta(days=policy.ageInDays) + + affectedMeasurements = Crud.get_measurements(db=db, + startDateTime=self.MIN_DATETIME.strftime(Crud.DATE_FORMAT), + endDateTime=policyStart.strftime(Crud.DATE_FORMAT)) + LOGGER.debug(f'Found {len(affectedMeasurements)} measurements older than {policyStart}') + if not affectedMeasurements: + continue + + affectedMeasurements.reverse() + + self.__delete_old_measurements(affectedMeasurements, db, policy) + LOGGER.info('Database cleanup done') + + # TODO: force backup? + + def __delete_old_measurements(self, affectedMeasurements, db, policy): + lastTimestamp = datetime.strptime(affectedMeasurements[0].timestamp, Crud.DATE_FORMAT) + nextAllowedTimestamp = lastTimestamp - timedelta(minutes=policy.resolutionInMinutes) + + measurementsIdsToDelete = [] + + for measurement in affectedMeasurements[1:]: + timestamp = datetime.strptime(measurement.timestamp, Crud.DATE_FORMAT) + if timestamp > nextAllowedTimestamp: + measurementsIdsToDelete.append(measurement.id) + else: + lastTimestamp = timestamp + nextAllowedTimestamp = lastTimestamp - timedelta(minutes=policy.resolutionInMinutes) + + LOGGER.debug(f'Scheduled {len(measurementsIdsToDelete)} measurements for deletion (keeping {len(affectedMeasurements) - len(measurementsIdsToDelete)})') + Crud.delete_multiple_measurements(db, measurementsIdsToDelete) diff --git a/src/logic/routers/GeneralRouter.py b/src/logic/routers/GeneralRouter.py index 3987764..a23fcb8 100644 --- a/src/logic/routers/GeneralRouter.py +++ b/src/logic/routers/GeneralRouter.py @@ -1,3 +1,5 @@ +from datetime import datetime + from fastapi import APIRouter, Depends from sqlalchemy.orm import Session @@ -37,7 +39,7 @@ async def databaseCleanup(db: Session = Depends(get_database)): for item in retentionPolicies: policies.append(RetentionPolicy(resolutionInMinutes=item['resolutionInMinutes'], ageInDays=item['ageInDays'])) - DatabaseCleaner(policies).clean(db) + DatabaseCleaner(policies).clean(db, datetime.now()) infoAfter = DatabaseInfoProvider.get_database_info(db) diff --git a/src/test/DatabaseCleanerTest.py b/src/test/DatabaseCleanerTest.py new file mode 100644 index 0000000..2b3696a --- /dev/null +++ b/src/test/DatabaseCleanerTest.py @@ -0,0 +1,62 @@ +import unittest +from datetime import datetime, timedelta +from unittest.mock import Mock, patch + +from logic.database import Schemas + + +class TestDatabaseCleaner(unittest.TestCase): + CURRENT_DATE_TIME = datetime(year=2021, month=8, day=18, hour=22, minute=0, second=0) + + def test_noRetentionPolicies_doNothing(self): + mockedCrud = Mock() + with patch.dict('sys.modules', **{'logic.database.Crud': mockedCrud}): + mockedCrud.get_measurements.return_value = [] + + database = Mock() + from logic.database.DatabaseCleaner import DatabaseCleaner + DatabaseCleaner([]).clean(database, self.CURRENT_DATE_TIME) + + mockedCrud.get_measurements.assert_not_called() + + def test_onePolicy_fetchMeasurementsOlderThanPolicyStart(self): + mockedCrud = Mock() + with patch.dict('sys.modules', **{'logic.database.Crud': mockedCrud}): + mockedCrud.get_measurements.return_value = [] + mockedCrud.DATE_FORMAT = '%Y-%m-%d %H:%M:%S' + + database = Mock() + from logic.database.DatabaseCleaner import DatabaseCleaner + from logic.database.DatabaseCleaner import RetentionPolicy + + policy = RetentionPolicy(resolutionInMinutes=10, ageInDays=1) + DatabaseCleaner([policy]).clean(database, self.CURRENT_DATE_TIME) + + expectedEndTime = self.CURRENT_DATE_TIME - timedelta(days=policy.ageInDays) + mockedCrud.get_measurements.assert_called_once_with(db=database, + startDateTime=DatabaseCleaner.MIN_DATETIME.strftime(mockedCrud.DATE_FORMAT), + endDateTime=expectedEndTime.strftime(mockedCrud.DATE_FORMAT)) + + def test_onePolicy_deleteMeasurements(self): + mockedCrud = Mock() + with patch.dict('sys.modules', **{'logic.database.Crud': mockedCrud}): + + measurementToBeDeleted1 = Schemas.Measurement(id=1, value='5', timestamp='2021-08-17 20:05:00', sensor_id=2) + measurementToKeep = Schemas.Measurement(id=2, value='5', timestamp='2021-08-17 20:09:12', sensor_id=2) + measurementToBeDeleted2 = Schemas.Measurement(id=3, value='5', timestamp='2021-08-17 21:07:12', sensor_id=2) + measurementToBeDeleted3 = Schemas.Measurement(id=4, value='5', timestamp='2021-08-17 21:09:12', sensor_id=2) + measurementAfterPolicyStart = Schemas.Measurement(id=5, value='5', timestamp='2021-08-17 21:10:12', sensor_id=2) + + mockedCrud.get_measurements.return_value = [measurementToBeDeleted1, measurementToKeep, measurementToBeDeleted2, measurementToBeDeleted3, measurementAfterPolicyStart] + mockedCrud.DATE_FORMAT = '%Y-%m-%d %H:%M:%S' + + database = Mock() + from logic.database.DatabaseCleaner import DatabaseCleaner + from logic.database.DatabaseCleaner import RetentionPolicy + + policy = RetentionPolicy(resolutionInMinutes=10, ageInDays=1) + DatabaseCleaner([policy]).clean(database, self.CURRENT_DATE_TIME) + + mockedCrud.delete_multiple_measurements.assert_called_once_with(database, [4, 3, 1]) + + # TODO: test: if cleanup is performed on the next day: prevent deletion of already low resolution measurements diff --git a/src/test/__init__.py b/src/test/__init__.py new file mode 100644 index 0000000..e69de29 -- GitLab