Files
workdock-platform/backend/workflows/backup_ops.py
2026-03-27 01:11:29 +01:00

379 lines
14 KiB
Python

from __future__ import annotations
import hashlib
import json
import os
import shutil
import subprocess
import tarfile
import tempfile
from datetime import datetime
from pathlib import Path
from django.conf import settings
from django.utils import timezone
from django.utils.dateparse import parse_datetime
from django.utils.translation import gettext as _
from .models import WorkflowConfig
from .services import delete_from_nextcloud, upload_to_nextcloud
# Backup bundles are local-first. Remote copy is a secondary delivery path and
# must never replace the ability to verify/restore the local bundle directly.
def _backup_root() -> Path:
root = Path(settings.BACKUP_OUTPUT_DIR)
root.mkdir(parents=True, exist_ok=True)
return root
def _metadata_path(backup_dir: Path) -> Path:
return backup_dir / 'backup_meta.json'
def _checksums_path(backup_dir: Path) -> Path:
return backup_dir / 'SHA256SUMS'
def _db_env() -> dict[str, str]:
db = settings.DATABASES['default']
env = os.environ.copy()
env['PGPASSWORD'] = str(db['PASSWORD'])
return env
def _db_base_args() -> list[str]:
db = settings.DATABASES['default']
return [
'-h', str(db['HOST']),
'-p', str(db['PORT']),
'-U', str(db['USER']),
]
def _sha256(path: Path) -> str:
digest = hashlib.sha256()
with path.open('rb') as handle:
for chunk in iter(lambda: handle.read(65536), b''):
digest.update(chunk)
return digest.hexdigest()
def _write_checksums(backup_dir: Path, db_dump_path: Path, media_archive_path: Path) -> None:
_checksums_path(backup_dir).write_text(
f'{_sha256(db_dump_path)} {db_dump_path.name}\n{_sha256(media_archive_path)} {media_archive_path.name}\n',
encoding='utf-8',
)
def _ignorable_pg_restore(stderr: str) -> bool:
text = (stderr or '').strip()
if not text:
return False
normalized = ' '.join(line.strip() for line in text.splitlines())
return (
'unrecognized configuration parameter "transaction_timeout"' in normalized
and 'errors ignored on restore: 1' in normalized
)
def _load_metadata(backup_dir: Path) -> dict:
path = _metadata_path(backup_dir)
if not path.exists():
return {}
try:
return json.loads(path.read_text(encoding='utf-8'))
except json.JSONDecodeError:
return {}
def _save_metadata(backup_dir: Path, payload: dict) -> None:
_metadata_path(backup_dir).write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding='utf-8')
def list_backup_bundles() -> list[dict]:
rows: list[dict] = []
for entry in sorted(_backup_root().glob('backup_*'), reverse=True):
if not entry.is_dir():
continue
meta = _load_metadata(entry)
rows.append(
{
'name': entry.name,
'path': str(entry),
'created_at': meta.get('created_at') or '',
'verified_at': meta.get('verified_at') or '',
'verify_status': meta.get('verify_status') or '',
'summary': meta.get('verify_summary') or '',
'remote_status': meta.get('remote_status') or '',
'remote_summary': meta.get('remote_summary') or '',
'remote_target_type': meta.get('remote_target_type') or '',
'remote_path': meta.get('remote_path') or '',
'db_dump_exists': (entry / 'db.dump').exists(),
'media_archive_exists': (entry / 'media.tar.gz').exists(),
}
)
return rows
def latest_backup_health_snapshot(stale_after_hours: int = 48) -> dict:
# A single snapshot keeps the UI, scheduled command, and future monitoring
# on the same health contract.
rows = list_backup_bundles()
if not rows:
return {
'status': 'missing',
'label': str(_('Kein Backup vorhanden')),
'summary': str(_('Es wurde noch kein Backup-Bundle erstellt.')),
'bundle_name': '',
'is_stale': True,
}
latest = rows[0]
verified_at_raw = latest.get('verified_at') or ''
verified_at = parse_datetime(verified_at_raw) if verified_at_raw else None
if verified_at and timezone.is_naive(verified_at):
verified_at = timezone.make_aware(verified_at, timezone.get_current_timezone())
if latest.get('verify_status') != 'verified' or not verified_at:
return {
'status': 'unverified',
'label': str(_('Nicht verifiziert')),
'summary': str(_('Das neueste Backup-Bundle wurde noch nicht erfolgreich verifiziert.')),
'bundle_name': latest['name'],
'verified_at': verified_at_raw,
'is_stale': True,
}
age = timezone.now() - verified_at
is_stale = age.total_seconds() > stale_after_hours * 3600
if is_stale:
return {
'status': 'stale',
'label': str(_('Verifikation veraltet')),
'summary': _('Die letzte erfolgreiche Backup-Verifikation ist älter als %(hours)s Stunden.') % {'hours': stale_after_hours},
'bundle_name': latest['name'],
'verified_at': verified_at_raw,
'is_stale': True,
}
return {
'status': 'healthy',
'label': str(_('Verifikation aktuell')),
'summary': str(_('Das neueste Backup-Bundle wurde erfolgreich und rechtzeitig verifiziert.')),
'bundle_name': latest['name'],
'verified_at': verified_at_raw,
'is_stale': False,
}
def _remote_backup_config() -> dict:
config = WorkflowConfig.objects.filter(name='Default').order_by('-id').first() or WorkflowConfig.objects.order_by('id').first()
if not config:
return {'enabled': False, 'target_type': '', 'nextcloud_directory': ''}
return {
'enabled': bool(config.remote_backup_enabled),
'target_type': (config.remote_backup_target_type or '').strip(),
'nextcloud_directory': (config.remote_backup_nextcloud_directory or '').strip().strip('/'),
's3_bucket': (config.remote_backup_s3_bucket or '').strip(),
'nfs_path': (config.remote_backup_nfs_path or '').strip(),
}
def _upload_bundle_remote(backup_dir: Path, payload: dict) -> dict:
remote = _remote_backup_config()
payload.update(
{
'remote_status': '',
'remote_summary': '',
'remote_target_type': remote['target_type'],
'remote_path': '',
}
)
if not remote['enabled']:
payload['remote_status'] = 'disabled'
payload['remote_summary'] = str(_('Remote Backup ist deaktiviert.'))
return payload
if remote['target_type'] != 'nextcloud':
payload['remote_status'] = 'not_implemented'
payload['remote_summary'] = _('Zieltyp %(target)s ist vorbereitet, aber noch nicht implementiert.') % {'target': remote['target_type'] or '-'}
return payload
remote_directory = remote['nextcloud_directory']
if not remote_directory:
payload['remote_status'] = 'failed'
payload['remote_summary'] = str(_('Nextcloud Backup-Verzeichnis fehlt.'))
return payload
remote_bundle_dir = f'{remote_directory}/{backup_dir.name}'
files_to_upload = ['db.dump', 'media.tar.gz', 'SHA256SUMS']
uploaded_files: list[str] = []
for filename in files_to_upload:
local_path = backup_dir / filename
if not local_path.exists():
continue
ok = upload_to_nextcloud(
local_path,
filename,
directory_override=remote_bundle_dir,
require_enabled=False,
)
if not ok:
payload['remote_status'] = 'failed'
payload['remote_summary'] = _('Upload nach Nextcloud fehlgeschlagen bei %(file)s.') % {'file': filename}
payload['remote_path'] = remote_bundle_dir
return payload
uploaded_files.append(filename)
payload['remote_status'] = 'uploaded'
payload['remote_summary'] = _('Nach Nextcloud hochgeladen: %(count)s Datei(en).') % {'count': len(uploaded_files)}
payload['remote_uploaded_at'] = timezone.now().isoformat()
payload['remote_target_type'] = 'nextcloud'
payload['remote_path'] = remote_bundle_dir
payload['remote_files'] = uploaded_files
return payload
def create_backup_bundle() -> dict:
timestamp = timezone.localtime().strftime('%Y%m%d_%H%M%S')
backup_dir = _backup_root() / f'backup_{timestamp}'
backup_dir.mkdir(parents=True, exist_ok=False)
db_dump_path = backup_dir / 'db.dump'
media_archive_path = backup_dir / 'media.tar.gz'
db = settings.DATABASES['default']
subprocess.run(
[
'pg_dump',
*_db_base_args(),
'-d', str(db['NAME']),
'-Fc',
'--no-owner',
'--no-privileges',
'-f', str(db_dump_path),
],
check=True,
env=_db_env(),
)
with tarfile.open(media_archive_path, 'w:gz') as archive:
archive.add(settings.MEDIA_ROOT, arcname='media')
payload = {
'created_at': timezone.now().isoformat(),
'postgres_db': str(db['NAME']),
'postgres_user': str(db['USER']),
'db_dump_file': db_dump_path.name,
'media_archive_file': media_archive_path.name,
'verify_status': '',
'verified_at': '',
'verify_summary': '',
}
_save_metadata(backup_dir, payload)
_write_checksums(backup_dir, db_dump_path, media_archive_path)
payload = _upload_bundle_remote(backup_dir, payload)
_save_metadata(backup_dir, payload)
if payload.get('remote_status') == 'uploaded' and payload.get('remote_path'):
upload_to_nextcloud(
_metadata_path(backup_dir),
_metadata_path(backup_dir).name,
directory_override=payload['remote_path'],
require_enabled=False,
)
return {'name': backup_dir.name, 'path': str(backup_dir)}
def verify_backup_bundle(backup_name: str) -> dict:
backup_dir = _backup_root() / backup_name
db_dump_path = backup_dir / 'db.dump'
media_archive_path = backup_dir / 'media.tar.gz'
if not backup_dir.exists() or not db_dump_path.exists() or not media_archive_path.exists():
raise FileNotFoundError(_('Backup-Dateien nicht gefunden.'))
verify_db = f'{settings.DATABASES["default"]["NAME"]}_verify_{int(timezone.now().timestamp())}'
env = _db_env()
args = _db_base_args()
meta = _load_metadata(backup_dir)
try:
subprocess.run(
['psql', *args, '-d', 'postgres', '-v', 'ON_ERROR_STOP=1', '-c', f'CREATE DATABASE "{verify_db}";'],
check=True,
env=env,
capture_output=True,
text=True,
)
restore = subprocess.run(
['pg_restore', *args, '-d', verify_db, '--no-owner', '--no-privileges', str(db_dump_path)],
env=env,
capture_output=True,
text=True,
)
if restore.returncode != 0 and not _ignorable_pg_restore(restore.stderr):
raise subprocess.CalledProcessError(
restore.returncode,
restore.args,
output=restore.stdout,
stderr=restore.stderr,
)
table_count = subprocess.check_output(
['psql', *args, '-d', verify_db, '-t', '-A', '-c', "SELECT COUNT(*) FROM pg_tables WHERE schemaname='public';"],
env=env,
text=True,
).strip()
onboarding_count = subprocess.check_output(
['psql', *args, '-d', verify_db, '-t', '-A', '-c', 'SELECT COUNT(*) FROM workflows_onboardingrequest;'],
env=env,
text=True,
).strip()
offboarding_count = subprocess.check_output(
['psql', *args, '-d', verify_db, '-t', '-A', '-c', 'SELECT COUNT(*) FROM workflows_offboardingrequest;'],
env=env,
text=True,
).strip()
with tempfile.TemporaryDirectory(prefix='workdock_backup_verify_media_') as tmpdir:
with tarfile.open(media_archive_path, 'r:gz') as archive:
archive.extractall(tmpdir, filter='data')
media_dir = Path(tmpdir) / 'media'
if not media_dir.exists():
raise RuntimeError(_('Media-Archiv enthält kein media/-Verzeichnis.'))
media_file_count = sum(1 for path in media_dir.rglob('*') if path.is_file())
summary = _('%(tables)s Tabellen, %(onboarding)s Onboarding, %(offboarding)s Offboarding, %(media)s Mediendateien geprüft.') % {
'tables': table_count,
'onboarding': onboarding_count,
'offboarding': offboarding_count,
'media': media_file_count,
}
meta['verified_at'] = timezone.now().isoformat()
meta['verify_status'] = 'verified'
meta['verify_summary'] = summary
_save_metadata(backup_dir, meta)
return {'name': backup_name, 'summary': summary}
finally:
subprocess.run(
['psql', *args, '-d', 'postgres', '-v', 'ON_ERROR_STOP=1', '-c', f'DROP DATABASE IF EXISTS "{verify_db}";'],
check=False,
env=env,
capture_output=True,
text=True,
)
def delete_backup_bundle(backup_name: str) -> dict:
backup_dir = (_backup_root() / backup_name).resolve()
backup_root = _backup_root().resolve()
if backup_root not in backup_dir.parents:
raise ValueError(_('Ungültiger Backup-Pfad.'))
if not backup_dir.exists() or not backup_dir.is_dir():
raise FileNotFoundError(_('Backup-Dateien nicht gefunden.'))
meta = _load_metadata(backup_dir)
if meta.get('remote_status') == 'uploaded' and meta.get('remote_target_type') == 'nextcloud' and meta.get('remote_path'):
ok = delete_from_nextcloud(meta['remote_path'], directory_override='')
if not ok:
raise RuntimeError(_('Remote Backup in Nextcloud konnte nicht gelöscht werden.'))
shutil.rmtree(backup_dir)
return {'name': backup_name}