snapshot: preserve backup UX, remote target setup, and docs updates

This commit is contained in:
Md Bayazid Bostame
2026-03-26 01:53:44 +01:00
parent 2a372fdb15
commit 438334bd92
26 changed files with 1737 additions and 383 deletions

View File

@@ -0,0 +1,325 @@
from __future__ import annotations
import hashlib
import json
import os
import shutil
import subprocess
import tarfile
import tempfile
from datetime import datetime
from pathlib import Path
from django.conf import settings
from django.utils import timezone
from django.utils.translation import gettext as _
from .models import WorkflowConfig
from .services import delete_from_nextcloud, upload_to_nextcloud
def _backup_root() -> Path:
root = Path(settings.BACKUP_OUTPUT_DIR)
root.mkdir(parents=True, exist_ok=True)
return root
def _metadata_path(backup_dir: Path) -> Path:
return backup_dir / 'backup_meta.json'
def _checksums_path(backup_dir: Path) -> Path:
return backup_dir / 'SHA256SUMS'
def _db_env() -> dict[str, str]:
db = settings.DATABASES['default']
env = os.environ.copy()
env['PGPASSWORD'] = str(db['PASSWORD'])
return env
def _db_base_args() -> list[str]:
db = settings.DATABASES['default']
return [
'-h', str(db['HOST']),
'-p', str(db['PORT']),
'-U', str(db['USER']),
]
def _sha256(path: Path) -> str:
digest = hashlib.sha256()
with path.open('rb') as handle:
for chunk in iter(lambda: handle.read(65536), b''):
digest.update(chunk)
return digest.hexdigest()
def _write_checksums(backup_dir: Path, db_dump_path: Path, media_archive_path: Path) -> None:
_checksums_path(backup_dir).write_text(
f'{_sha256(db_dump_path)} {db_dump_path.name}\n{_sha256(media_archive_path)} {media_archive_path.name}\n',
encoding='utf-8',
)
def _ignorable_pg_restore(stderr: str) -> bool:
text = (stderr or '').strip()
if not text:
return False
normalized = ' '.join(line.strip() for line in text.splitlines())
return (
'unrecognized configuration parameter "transaction_timeout"' in normalized
and 'errors ignored on restore: 1' in normalized
)
def _load_metadata(backup_dir: Path) -> dict:
path = _metadata_path(backup_dir)
if not path.exists():
return {}
try:
return json.loads(path.read_text(encoding='utf-8'))
except json.JSONDecodeError:
return {}
def _save_metadata(backup_dir: Path, payload: dict) -> None:
_metadata_path(backup_dir).write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding='utf-8')
def list_backup_bundles() -> list[dict]:
rows: list[dict] = []
for entry in sorted(_backup_root().glob('backup_*'), reverse=True):
if not entry.is_dir():
continue
meta = _load_metadata(entry)
rows.append(
{
'name': entry.name,
'path': str(entry),
'created_at': meta.get('created_at') or '',
'verified_at': meta.get('verified_at') or '',
'verify_status': meta.get('verify_status') or '',
'summary': meta.get('verify_summary') or '',
'remote_status': meta.get('remote_status') or '',
'remote_summary': meta.get('remote_summary') or '',
'remote_target_type': meta.get('remote_target_type') or '',
'remote_path': meta.get('remote_path') or '',
'db_dump_exists': (entry / 'db.dump').exists(),
'media_archive_exists': (entry / 'media.tar.gz').exists(),
}
)
return rows
def _remote_backup_config() -> dict:
config = WorkflowConfig.objects.filter(name='Default').order_by('-id').first() or WorkflowConfig.objects.order_by('id').first()
if not config:
return {'enabled': False, 'target_type': '', 'nextcloud_directory': ''}
return {
'enabled': bool(config.remote_backup_enabled),
'target_type': (config.remote_backup_target_type or '').strip(),
'nextcloud_directory': (config.remote_backup_nextcloud_directory or '').strip().strip('/'),
's3_bucket': (config.remote_backup_s3_bucket or '').strip(),
'nfs_path': (config.remote_backup_nfs_path or '').strip(),
}
def _upload_bundle_remote(backup_dir: Path, payload: dict) -> dict:
remote = _remote_backup_config()
payload.update(
{
'remote_status': '',
'remote_summary': '',
'remote_target_type': remote['target_type'],
'remote_path': '',
}
)
if not remote['enabled']:
payload['remote_status'] = 'disabled'
payload['remote_summary'] = str(_('Remote Backup ist deaktiviert.'))
return payload
if remote['target_type'] != 'nextcloud':
payload['remote_status'] = 'not_implemented'
payload['remote_summary'] = _('Zieltyp %(target)s ist vorbereitet, aber noch nicht implementiert.') % {'target': remote['target_type'] or '-'}
return payload
remote_directory = remote['nextcloud_directory']
if not remote_directory:
payload['remote_status'] = 'failed'
payload['remote_summary'] = str(_('Nextcloud Backup-Verzeichnis fehlt.'))
return payload
remote_bundle_dir = f'{remote_directory}/{backup_dir.name}'
files_to_upload = ['db.dump', 'media.tar.gz', 'SHA256SUMS']
uploaded_files: list[str] = []
for filename in files_to_upload:
local_path = backup_dir / filename
if not local_path.exists():
continue
ok = upload_to_nextcloud(
local_path,
filename,
directory_override=remote_bundle_dir,
require_enabled=False,
)
if not ok:
payload['remote_status'] = 'failed'
payload['remote_summary'] = _('Upload nach Nextcloud fehlgeschlagen bei %(file)s.') % {'file': filename}
payload['remote_path'] = remote_bundle_dir
return payload
uploaded_files.append(filename)
payload['remote_status'] = 'uploaded'
payload['remote_summary'] = _('Nach Nextcloud hochgeladen: %(count)s Datei(en).') % {'count': len(uploaded_files)}
payload['remote_uploaded_at'] = timezone.now().isoformat()
payload['remote_target_type'] = 'nextcloud'
payload['remote_path'] = remote_bundle_dir
payload['remote_files'] = uploaded_files
return payload
def create_backup_bundle() -> dict:
timestamp = timezone.localtime().strftime('%Y%m%d_%H%M%S')
backup_dir = _backup_root() / f'backup_{timestamp}'
backup_dir.mkdir(parents=True, exist_ok=False)
db_dump_path = backup_dir / 'db.dump'
media_archive_path = backup_dir / 'media.tar.gz'
db = settings.DATABASES['default']
subprocess.run(
[
'pg_dump',
*_db_base_args(),
'-d', str(db['NAME']),
'-Fc',
'--no-owner',
'--no-privileges',
'-f', str(db_dump_path),
],
check=True,
env=_db_env(),
)
with tarfile.open(media_archive_path, 'w:gz') as archive:
archive.add(settings.MEDIA_ROOT, arcname='media')
payload = {
'created_at': timezone.now().isoformat(),
'postgres_db': str(db['NAME']),
'postgres_user': str(db['USER']),
'db_dump_file': db_dump_path.name,
'media_archive_file': media_archive_path.name,
'verify_status': '',
'verified_at': '',
'verify_summary': '',
}
_save_metadata(backup_dir, payload)
_write_checksums(backup_dir, db_dump_path, media_archive_path)
payload = _upload_bundle_remote(backup_dir, payload)
_save_metadata(backup_dir, payload)
if payload.get('remote_status') == 'uploaded' and payload.get('remote_path'):
upload_to_nextcloud(
_metadata_path(backup_dir),
_metadata_path(backup_dir).name,
directory_override=payload['remote_path'],
require_enabled=False,
)
return {'name': backup_dir.name, 'path': str(backup_dir)}
def verify_backup_bundle(backup_name: str) -> dict:
backup_dir = _backup_root() / backup_name
db_dump_path = backup_dir / 'db.dump'
media_archive_path = backup_dir / 'media.tar.gz'
if not backup_dir.exists() or not db_dump_path.exists() or not media_archive_path.exists():
raise FileNotFoundError(_('Backup-Dateien nicht gefunden.'))
verify_db = f'{settings.DATABASES["default"]["NAME"]}_verify_{int(timezone.now().timestamp())}'
env = _db_env()
args = _db_base_args()
meta = _load_metadata(backup_dir)
try:
subprocess.run(
['psql', *args, '-d', 'postgres', '-v', 'ON_ERROR_STOP=1', '-c', f'CREATE DATABASE "{verify_db}";'],
check=True,
env=env,
capture_output=True,
text=True,
)
restore = subprocess.run(
['pg_restore', *args, '-d', verify_db, '--no-owner', '--no-privileges', str(db_dump_path)],
env=env,
capture_output=True,
text=True,
)
if restore.returncode != 0 and not _ignorable_pg_restore(restore.stderr):
raise subprocess.CalledProcessError(
restore.returncode,
restore.args,
output=restore.stdout,
stderr=restore.stderr,
)
with connection.cursor() as cursor:
pass
table_count = subprocess.check_output(
['psql', *args, '-d', verify_db, '-t', '-A', '-c', "SELECT COUNT(*) FROM pg_tables WHERE schemaname='public';"],
env=env,
text=True,
).strip()
onboarding_count = subprocess.check_output(
['psql', *args, '-d', verify_db, '-t', '-A', '-c', 'SELECT COUNT(*) FROM workflows_onboardingrequest;'],
env=env,
text=True,
).strip()
offboarding_count = subprocess.check_output(
['psql', *args, '-d', verify_db, '-t', '-A', '-c', 'SELECT COUNT(*) FROM workflows_offboardingrequest;'],
env=env,
text=True,
).strip()
with tempfile.TemporaryDirectory(prefix='tubco_backup_verify_media_') as tmpdir:
with tarfile.open(media_archive_path, 'r:gz') as archive:
archive.extractall(tmpdir, filter='data')
media_dir = Path(tmpdir) / 'media'
if not media_dir.exists():
raise RuntimeError(_('Media-Archiv enthält kein media/-Verzeichnis.'))
media_file_count = sum(1 for path in media_dir.rglob('*') if path.is_file())
summary = _('%(tables)s Tabellen, %(onboarding)s Onboarding, %(offboarding)s Offboarding, %(media)s Mediendateien geprüft.') % {
'tables': table_count,
'onboarding': onboarding_count,
'offboarding': offboarding_count,
'media': media_file_count,
}
meta['verified_at'] = timezone.now().isoformat()
meta['verify_status'] = 'verified'
meta['verify_summary'] = summary
_save_metadata(backup_dir, meta)
return {'name': backup_name, 'summary': summary}
finally:
subprocess.run(
['psql', *args, '-d', 'postgres', '-v', 'ON_ERROR_STOP=1', '-c', f'DROP DATABASE IF EXISTS "{verify_db}";'],
check=False,
env=env,
capture_output=True,
text=True,
)
def delete_backup_bundle(backup_name: str) -> dict:
backup_dir = (_backup_root() / backup_name).resolve()
backup_root = _backup_root().resolve()
if backup_root not in backup_dir.parents:
raise ValueError(_('Ungültiger Backup-Pfad.'))
if not backup_dir.exists() or not backup_dir.is_dir():
raise FileNotFoundError(_('Backup-Dateien nicht gefunden.'))
meta = _load_metadata(backup_dir)
if meta.get('remote_status') == 'uploaded' and meta.get('remote_target_type') == 'nextcloud' and meta.get('remote_path'):
ok = delete_from_nextcloud(meta['remote_path'], directory_override='')
if not ok:
raise RuntimeError(_('Remote Backup in Nextcloud konnte nicht gelöscht werden.'))
shutil.rmtree(backup_dir)
return {'name': backup_name}