- container_manager.py: Pre-Check vor containers.run() hinzufügen - Prüfe ob Container bereits existiert (z.B. nach Fehler) - Wenn running: Wiederverwenden statt zu erstellen - Wenn stopped: Versuchen zu starten oder zu löschen - Verhindert Docker 409 Conflict-Fehler - api.py: Container-Spawn Fehlerbehandlung verbessern - Container-Spawn ist optional beim Signup - User wird trotzdem erstellt wenn Spawn fehlschlägt - JWT wird immer returned (Status 200) - docs/BUGFIX_CONTAINER_SPAWN.md: Dokumentation hinzufügen - Erklär die Probleme, Fixes und Testing
259 lines
11 KiB
Python
259 lines
11 KiB
Python
import requests_unixsocket
|
|
import docker
|
|
from config import Config
|
|
|
|
class ContainerManager:
|
|
def __init__(self):
|
|
self.client = None
|
|
|
|
def _get_client(self):
|
|
"""Lazy initialization of Docker client"""
|
|
if self.client is None:
|
|
try:
|
|
# Nutze from_env() - DOCKER_HOST aus Umgebungsvariable
|
|
self.client = docker.from_env()
|
|
|
|
except Exception as e:
|
|
raise Exception(f"Docker connection failed: {str(e)}")
|
|
return self.client
|
|
|
|
def spawn_container(self, user_id, slug):
|
|
"""Spawnt einen neuen Container für den User"""
|
|
try:
|
|
existing = self._get_user_container(slug)
|
|
if existing and existing.status == 'running':
|
|
return existing.id, self._get_container_port(existing)
|
|
|
|
# Pfad-basiertes Routing: User unter coder.domain.org/<slug>
|
|
base_host = f"{Config.SPAWNER_SUBDOMAIN}.{Config.BASE_DOMAIN}"
|
|
|
|
# Labels vorbereiten
|
|
labels = {
|
|
'traefik.enable': 'true',
|
|
'traefik.docker.network': Config.TRAEFIK_NETWORK,
|
|
|
|
# HTTPS Router mit PathPrefix
|
|
f'traefik.http.routers.user{user_id}.rule':
|
|
f'Host(`{base_host}`) && PathPrefix(`/{slug}`)',
|
|
f'traefik.http.routers.user{user_id}.entrypoints': Config.TRAEFIK_ENTRYPOINT,
|
|
f'traefik.http.routers.user{user_id}.priority': '100',
|
|
# StripPrefix Middleware - entfernt /{slug} bevor Container Request erhält
|
|
f'traefik.http.routers.user{user_id}.middlewares': f'user{user_id}-strip',
|
|
f'traefik.http.middlewares.user{user_id}-strip.stripprefix.prefixes': f'/{slug}',
|
|
# TLS für HTTPS
|
|
f'traefik.http.routers.user{user_id}.tls': 'true',
|
|
f'traefik.http.routers.user{user_id}.tls.certresolver': Config.TRAEFIK_CERTRESOLVER,
|
|
|
|
# Service
|
|
f'traefik.http.services.user{user_id}.loadbalancer.server.port': '8080',
|
|
|
|
# Metadata
|
|
'spawner.user_id': str(user_id),
|
|
'spawner.slug': slug,
|
|
'spawner.managed': 'true'
|
|
}
|
|
|
|
# Logging: Traefik-Labels ausgeben
|
|
print(f"[SPAWNER] Creating container user-{slug}-{user_id}")
|
|
print(f"[SPAWNER] Traefik Labels:")
|
|
for key, value in labels.items():
|
|
if 'traefik' in key:
|
|
print(f"[SPAWNER] {key}: {value}")
|
|
|
|
container = self._get_client().containers.run(
|
|
Config.USER_TEMPLATE_IMAGE,
|
|
name=f"user-{slug}-{user_id}",
|
|
detach=True,
|
|
network=Config.TRAEFIK_NETWORK,
|
|
labels=labels,
|
|
environment={
|
|
'USER_ID': str(user_id),
|
|
'USER_SLUG': slug
|
|
},
|
|
restart_policy={'Name': 'unless-stopped'},
|
|
mem_limit=Config.DEFAULT_MEMORY_LIMIT,
|
|
cpu_quota=Config.DEFAULT_CPU_QUOTA
|
|
)
|
|
|
|
print(f"[SPAWNER] Container created: {container.id[:12]}")
|
|
print(f"[SPAWNER] URL: https://{base_host}/{slug}")
|
|
return container.id, 8080
|
|
|
|
except docker.errors.ImageNotFound as e:
|
|
error_msg = f"Template-Image '{Config.USER_TEMPLATE_IMAGE}' nicht gefunden"
|
|
print(f"[SPAWNER] ERROR: {error_msg}")
|
|
raise Exception(error_msg)
|
|
except docker.errors.APIError as e:
|
|
error_msg = f"Docker API Fehler: {str(e)}"
|
|
print(f"[SPAWNER] ERROR: {error_msg}")
|
|
raise Exception(error_msg)
|
|
except Exception as e:
|
|
print(f"[SPAWNER] ERROR: {str(e)}")
|
|
raise
|
|
|
|
def start_container(self, container_id):
|
|
"""Startet einen gestoppten User-Container"""
|
|
try:
|
|
container = self._get_client().containers.get(container_id)
|
|
if container.status != 'running':
|
|
container.start()
|
|
print(f"[SPAWNER] Container {container_id[:12]} gestartet")
|
|
return True
|
|
except docker.errors.NotFound:
|
|
return False
|
|
|
|
def stop_container(self, container_id):
|
|
"""Stoppt einen User-Container"""
|
|
try:
|
|
container = self._get_client().containers.get(container_id)
|
|
container.stop(timeout=10)
|
|
return True
|
|
except docker.errors.NotFound:
|
|
return False
|
|
|
|
def remove_container(self, container_id):
|
|
"""Entfernt einen User-Container komplett"""
|
|
try:
|
|
container = self._get_client().containers.get(container_id)
|
|
container.remove(force=True)
|
|
return True
|
|
except docker.errors.NotFound:
|
|
return False
|
|
|
|
def get_container_status(self, container_id):
|
|
"""Gibt Status eines Containers zurück"""
|
|
try:
|
|
container = self._get_client().containers.get(container_id)
|
|
return container.status
|
|
except docker.errors.NotFound:
|
|
return 'not_found'
|
|
|
|
def _get_user_container(self, slug):
|
|
"""Findet existierenden Container für User"""
|
|
filters = {'label': f'spawner.slug={slug}'}
|
|
containers = self._get_client().containers.list(all=True, filters=filters)
|
|
return containers[0] if containers else None
|
|
|
|
def _get_container_port(self, container):
|
|
"""Extrahiert Port aus Container-Config"""
|
|
return 8080
|
|
|
|
def spawn_multi_container(self, user_id: int, slug: str, container_type: str) -> tuple:
|
|
"""
|
|
Spawnt einen Container für einen User mit bestimmtem Typ
|
|
|
|
Args:
|
|
user_id: User ID
|
|
slug: User Slug (für URL)
|
|
container_type: 'dev' oder 'prod'
|
|
|
|
Returns:
|
|
(container_id, container_port)
|
|
"""
|
|
try:
|
|
# Template-Config holen
|
|
template = Config.CONTAINER_TEMPLATES.get(container_type)
|
|
if not template:
|
|
raise ValueError(f"Ungültiger Container-Typ: {container_type}")
|
|
|
|
image = template['image']
|
|
container_name = f"user-{slug}-{container_type}-{user_id}"
|
|
|
|
# Traefik Labels mit Suffix
|
|
slug_with_suffix = f"{slug}-{container_type}"
|
|
base_host = f"{Config.SPAWNER_SUBDOMAIN}.{Config.BASE_DOMAIN}"
|
|
|
|
labels = {
|
|
'traefik.enable': 'true',
|
|
'traefik.docker.network': Config.TRAEFIK_NETWORK,
|
|
|
|
# HTTPS Router mit PathPrefix
|
|
f'traefik.http.routers.user{user_id}-{container_type}.rule':
|
|
f'Host(`{base_host}`) && PathPrefix(`/{slug_with_suffix}`)',
|
|
f'traefik.http.routers.user{user_id}-{container_type}.entrypoints': Config.TRAEFIK_ENTRYPOINT,
|
|
f'traefik.http.routers.user{user_id}-{container_type}.priority': '100',
|
|
# StripPrefix Middleware - entfernt /{slug_with_suffix} bevor Container Request erhält
|
|
f'traefik.http.routers.user{user_id}-{container_type}.middlewares': f'user{user_id}-{container_type}-strip',
|
|
f'traefik.http.middlewares.user{user_id}-{container_type}-strip.stripprefix.prefixes': f'/{slug_with_suffix}',
|
|
# TLS für HTTPS
|
|
f'traefik.http.routers.user{user_id}-{container_type}.tls': 'true',
|
|
f'traefik.http.routers.user{user_id}-{container_type}.tls.certresolver': Config.TRAEFIK_CERTRESOLVER,
|
|
|
|
# Service
|
|
f'traefik.http.services.user{user_id}-{container_type}.loadbalancer.server.port': '8080',
|
|
|
|
# Metadata
|
|
'spawner.user_id': str(user_id),
|
|
'spawner.slug': slug,
|
|
'spawner.container_type': container_type,
|
|
'spawner.managed': 'true'
|
|
}
|
|
|
|
# Prüfe ob Container bereits existiert (z.B. nach Fehler oder fehlgeschlagener Löschung)
|
|
try:
|
|
existing_container = self._get_client().containers.get(container_name)
|
|
print(f"[SPAWNER] Container {container_name} existiert bereits (Status: {existing_container.status})")
|
|
|
|
if existing_container.status == 'running':
|
|
# Container läuft bereits
|
|
return existing_container.id, 8080
|
|
else:
|
|
# Container gestoppt - versuche zu starten
|
|
try:
|
|
existing_container.start()
|
|
print(f"[SPAWNER] Existierender Container {container_name} neu gestartet")
|
|
return existing_container.id, 8080
|
|
except Exception as e:
|
|
# Container kann nicht gestartet werden - lösche ihn und erstelle neuen
|
|
print(f"[SPAWNER] Kann Container nicht starten, lösche: {str(e)}")
|
|
try:
|
|
existing_container.remove(force=True)
|
|
print(f"[SPAWNER] Alten Container {container_name} gelöscht - erstelle neuen")
|
|
# Fahre fort um neuen Container zu erstellen
|
|
except Exception as remove_err:
|
|
print(f"[SPAWNER] WARNUNG: Kann alten Container nicht löschen: {str(remove_err)}")
|
|
# Fahre trotzdem fort und versuche neuen zu erstellen
|
|
except docker.errors.NotFound:
|
|
# Container existiert nicht - das ist normal, weiterfahren
|
|
pass
|
|
|
|
# Logging: Traefik-Labels ausgeben
|
|
print(f"[SPAWNER] Creating {container_type} container user-{slug}-{container_type}-{user_id}")
|
|
print(f"[SPAWNER] Image: {image}")
|
|
print(f"[SPAWNER] Traefik Labels:")
|
|
for key, value in labels.items():
|
|
if 'traefik' in key:
|
|
print(f"[SPAWNER] {key}: {value}")
|
|
|
|
container = self._get_client().containers.run(
|
|
image=image,
|
|
name=container_name,
|
|
detach=True,
|
|
network=Config.TRAEFIK_NETWORK,
|
|
labels=labels,
|
|
environment={
|
|
'USER_ID': str(user_id),
|
|
'USER_SLUG': slug,
|
|
'CONTAINER_TYPE': container_type
|
|
},
|
|
restart_policy={'Name': 'unless-stopped'},
|
|
mem_limit=Config.DEFAULT_MEMORY_LIMIT,
|
|
cpu_quota=Config.DEFAULT_CPU_QUOTA
|
|
)
|
|
|
|
print(f"[SPAWNER] {container_type.upper()} container created: {container.id[:12]}")
|
|
print(f"[SPAWNER] URL: {Config.PREFERRED_URL_SCHEME}://{base_host}/{slug_with_suffix}")
|
|
return container.id, 8080
|
|
|
|
except docker.errors.ImageNotFound as e:
|
|
error_msg = f"Template-Image '{template['image']}' für Typ '{container_type}' nicht gefunden"
|
|
print(f"[SPAWNER] ERROR: {error_msg}")
|
|
raise Exception(error_msg)
|
|
except docker.errors.APIError as e:
|
|
error_msg = f"Docker API Fehler: {str(e)}"
|
|
print(f"[SPAWNER] ERROR: {error_msg}")
|
|
raise Exception(error_msg)
|
|
except Exception as e:
|
|
print(f"[SPAWNER] ERROR: {str(e)}")
|
|
raise
|