1
0

commited changes I forgot to commit

This commit is contained in:
Benedikt Galbavy 2025-09-29 18:37:38 +02:00
parent a14b7dc0df
commit 81adab4dd8
2 changed files with 278 additions and 89 deletions

View File

@ -219,7 +219,7 @@
@misc{hashicorp_vagrant, @misc{hashicorp_vagrant,
author = {HashiCorp}, author = {HashiCorp},
title = {vagrant up - Command-Line Interface | Vagrant | HashiCorp Developer}, title = {vagrant up - Command-Line Interface},
url = {https://developer.hashicorp.com/vagrant/docs/cli/up}, url = {https://developer.hashicorp.com/vagrant/docs/cli/up},
urldate = {2025-04-11} urldate = {2025-04-11}
} }
@ -243,4 +243,195 @@
urldate = {2025-05-02}, urldate = {2025-05-02},
year = {2019}, year = {2019},
organization = {Unit42} organization = {Unit42}
}
@online{Configure2025,
title = {Configure logging drivers},
organization = {Docker},
year = {2025},
url = {https://docs.docker.com/engine/logging/configure},
urldate = {2025-05-20},
}
@online{Networking2025,
title = {Networking with standalone containers},
organization = {Docker},
year = {2025},
url = {https://docs.docker.com/engine/network/tutorials/standalone/#use-the-default-bridge-network},
urldate = {2025-05-20},
}
@online{Filtering2025,
title = {Packet filtering and firewalls},
organization = {Docker},
year = {2025},
url = {https://docs.docker.com/engine/network/packet-filtering-firewalls/},
urldate = {2025-05-20},
}
@online{PostgreSQL,
title = {PostgreSQL: Security Information},
organization = {PostgreSQL Global Development Group},
url = {https://www.postgresql.org/support/security},
urldate = {2025-05-20},
}
@online{PostgreSQL,
title = {PostgreSQL: Security Information},
organization = {PostgreSQL Global Development Group},
url = {https://www.postgresql.org/support/security},
urldate = {2025-05-20},
}
@online{GiteaDocs,
title = {Installation with Docker},
organization = {Gitea},
url = {https://docs.gitea.com/installation/install-with-docker},
urldate = {2025-05-20},
}
@online{VaultwardenHardening,
title = {Hardening Guide},
organization = {GitHub, dani-garcia/vaultwarden},
day = {03},
month = {08},
year = {2024},
url = {https://github.com/dani-garcia/vaultwarden/wiki/Hardening-Guide},
urldate = {2025-05-20},
}
@online{GiteaHardening,
title = {Configuration Cheat Sheet - Security},
organization = {Gitea},
url = {https://docs.gitea.com/administration/config-cheat-sheet#security-security},
urldate = {2025-05-20},
}
@book{Kent2006,
title = {Guide to Computer Security Log Management},
url = {http://dx.doi.org/10.6028/NIST.SP.800-92},
DOI = {10.6028/nist.sp.800-92},
institution = {National Institute of Standards and Technology},
author = {Kent, Karen and Souppaya, Murugiah},
year = {2006}
}
@book{Scarfone2023,
title = {Cybersecurity Log Management Planning Guide},
url = {https://doi.org/10.6028/NIST.SP.800-92r1.ipd},
DOI = {10.6028/NIST.SP.800-92r1.ipd},
institution = {National Institute of Standards and Technology},
author = {Scarfone, Karen and Souppaya, Murugiah},
month = {10},
year = {2023},
note = {Public Draft of Rev. 1 of NIST SP 800-92}
}
@online{OWASP,
title = {Principles of security - OWASP Developer Guide},
organization = {OWASP},
author = {OWASP Developer Guide team},
url = {https://devguide.owasp.org/en/02-foundations/03-security-principles/#security-by-default},
urldate = {2025-05-20},
}
@online{C5,
title = {C5: Secure By Default Configurations - OWASP Top 10 Proactive Controls},
organization = {OWASP},
url = {https://top10proactive.owasp.org/the-top-10/c5-secure-by-default},
urldate = {2025-05-20},
}
@misc{CVE_2019_9193,
title = {{CVE}-2019-9193},
day = {04},
month = {01},
year = {2019},
url = {https://www.cve.org/CVERecord?id=CVE-2019-9193},
urldate = {2025-05-20},
}
@online{WhatIsFalco,
title = {What is Falco?},
organization = {Falco},
url = {https://falco.org/about},
urldate = {2025-05-20},
}
@Article{iot5030026,
AUTHOR = {Ajith, Vishnu and Cyriac, Tom and Chavda, Chetan and Kiyani, Anum Tanveer and Chennareddy, Vijay and Ali, Kamran},
TITLE = {Analyzing Docker Vulnerabilities through Static and Dynamic Methods and Enhancing IoT Security with AWS IoT Core, CloudWatch, and GuardDuty},
JOURNAL = {IoT},
VOLUME = {5},
YEAR = {2024},
NUMBER = {3},
PAGES = {592--607},
URL = {https://www.mdpi.com/2624-831X/5/3/26},
ISSN = {2624-831X},
ABSTRACT = {In the age of fast digital transformation, Docker containers have become one of the central technologies for flexible and scalable application deployment. However, this has opened a new dimension of challenges in security, which are skyrocketing with increased technology adoption. This paper discerns these challenges through a manifold approach: first, comprehensive static analysis by Trivy, and second, real-time dynamic analysis by Falco in order to uncover vulnerabilities in Docker environments pre-deployment and during runtime. One can also find similar challenges in security within the Internet of Things (IoT) sector, due to the huge number of devices connected to WiFi networks, from simple data breaches such as brute force attacks and unauthorized access to large-scale cyber attacks against critical infrastructure, which represent only a portion of the problems. In connection with this, this paper is calling for the execution of robust AWS cloud security solutions: IoT Core, CloudWatch, and GuardDuty. IoT Core provides a secure channel of communication for IoT devices, and CloudWatch offers detailed monitoring and logging. Additional security is provided by GuardDutys automatized threat detection system, which continuously seeks out potential threats across network traffic. Armed with these technologies, we try to build a more resilient and privacy-oriented IoT while ensuring the security of our digital existence. The result is, therefore, an all-inclusive work on security in both Docker and IoT domains, which might be considered one of the most important efforts so far to strengthen the digital infrastructure against fast-evolving cyber threats, combining state-of-the-art methods of static and dynamic analyses for Docker security with advanced, cloud-based protection for IoT devices.},
DOI = {10.3390/iot5030026}
}
@online{RPiWebServer,
title = {Raspberry PI Web Server},
day = {14},
month = {07},
year = {2019},
url = {https://forums.raspberrypi.com/viewtopic.php?t=245729},
urldate = {2025-05-20},
}
@online{Martin,
title = {How I run my blog on a Raspberry Pi},
day = {11},
month = {03},
year = {2022},
author = {Martin Anderson-Clutz},
publisher = {Opensource.com},
url = {https://opensource.com/article/22/3/run-drupal-raspberry-pi},
urldate = {2025-05-20},
}
@report{106028nistsp800123,
title = {Guide to general server security},
author = "Scarfone, Karen and Jansen, Wayne and Tracy, Miles",
month = {07},
year = {2008},
doi = {10.6028/nist.sp.800-123},
publisher = {National Institute of Standards and Technology},
url = {https://doi.org/10.6028/nist.sp.800-123},
}
@online{CISBench,
title = {CIS Benchmarks},
publisher = {CIS},
url = {https://www.cisecurity.org/cis-benchmarks},
urldate = {2025-05-20},
}
@article{gessert2017nosql,
title={NoSQL database systems: a survey and decision guidance},
author={Gessert, Felix and Wingerath, Wolfram and Friedrich, Steffen and Ritter, Norbert},
journal={Computer Science-Research and Development},
volume={32},
pages={353--365},
year={2017},
publisher={Springer}
}
@inproceedings{pokorny2011nosql,
title={NoSQL databases: a step to database scalability in web environment},
author={Pokorny, Jaroslav},
booktitle={Proceedings of the 13th International Conference on Information Integration and Web-based Applications and Services},
pages={278--283},
year={2011}
}
@online{rHomelabLocalUsage,
title = {Raspberry PI Web Server},
day = {28},
month = {10},
year = {2023},
url = {https://www.reddit.com/r/homelab/comments/17i9xrg},
urldate = {2025-05-20},
} }

View File

@ -59,11 +59,11 @@
%\secondsupervisor{Titel Vorname Name, Titel} %\secondsupervisor{Titel Vorname Name, Titel}
%\secondsupervisor[Begutachter]{Titel Vorname Name, Titel} %\secondsupervisor[Begutachter]{Titel Vorname Name, Titel}
%\secondsupervisor[Begutachterinnen]{Titel Vorname Name, Titel} %\secondsupervisor[Begutachterinnen]{Titel Vorname Name, Titel}
\place{Wien} \place{Vienna}
%\kurzfassung{\blindtext} \kurzfassung{Die Containerisierung hat sich als De-Facto Standard zur Bereitstellung von Anwendungen etabliert. In der Praxis werden jedoch Dienste samt ihrer Abhängigkeiten dupliziert, was unnötig Ressourcen bindet. Dieser Arbeit untersucht, ob sich durch Ersetzen duplizierter Dienste---wie z.B. Datenbanken---mit einer gemeinsamen Instanz auf dem Host Ressourcen einsparen lässt, ohne dabei die Sicherheit zu schwächen. Dazu wurde reproduzierbares Labor auf Basis von Vagrant, VirtualBox, Ansible und Docker aufgebaut, in dem ein vollständig containerisierter Stack einem ``hybriden'' Ansatz gegenübergestellt wurde. In beiden Szenarien wurden Sicherheitstests der Phasen \textit{Reconnaissance}, \textit{Exploitation} und \textit{Post-Exploitation} durchgeführt. Die Tests offenbarten keine neuen inhärenten Schwachstellen im hybriden Modell; allerdings erhöht das Konzept gemeinsam genutzter Dienste die Zahl der Komponenten, die von einem Sicherheitsvorfall betroffen sein könnten, und steigert die Komplexität der Konfiguration. Automatisiert erhobene Leistungsdaten zeigten im Leerlauf geringe, aber denoch statistisch significante Einsparungen in Verbrauchtem Arbeitsspeicher ($\approx3,6\%$) keine Vorteile bei der CPU-Auslastung. Insgesamt kann eine hybride Container-Host-Infrastruktur ebenso sicher und etwas ressourcenschonender als ein reiner Container-Stacks sein, erfordert jedoch sorgfältige Konfiguration und die konsequente Einhaltung von Sicherheits-Best-Practices. Die Ergebnisse basieren auf einer klein angelegten Leerlaufmessung; weitere Studien sollten klären, wie sich der Ansatz in größeren Produktionsumgebungen bewährt.}
%\schlagworte{Schlagwort1, Schlagwort2, Schlagwort3, Schlagwort4} \schlagworte{Containerisierung, Hybride Containerarchitektur, Docker, Service-Deduplikation, Containersicherheit}
\outline{\blindtext} \outline{Containerization is widely used for deploying applications, but often duplicates services and their dependencies across containers, leading to resource inefficiencies. This thesis examines whether moving common services---such as databases---out of Docker containers and running them once on the host, instead of running a separate instance for each service, can save resources without weakening security. A reproducible lab built with Vagrant, VirtualBox, Ansible and Docker was used to compare a fully containerized setup with a ``hybrid'' one. Within this environment, security tests covering reconnaissance, exploitation and post-exploitation were conducted to assess the viability of hybrid configurations. The tests revealed no new inherent vulnerabilities in the hybrid model, but the shared-service design increases the number of components affected by a potential breach and makes configuration more complex. Performance metrics were also collected using an automated script, and revealed only minor resource gains at idle ($\approx3.6\%$ less RAM usage and no CPU benefit). Overall, hybrid container-host infrastructures can be as secure as pure container stacks and slightly leaner, but they demand meticulous configuration and adherence to security best practices. These findings are based on a small-scale idle-state evaluation; further work is recommended to determine suitability for larger, production-scale environments.}
\keywords{Docker, Containerization, Hybrid, Security} \keywords{Containerization, Hybrid container architecture, Docker, Service deduplication, Container security}
%\acknowledgements{\blindtext} %\acknowledgements{\blindtext}
\begin{document} \begin{document}
@ -75,66 +75,67 @@
% %
\chapter{Containerization} \chapter{Containerization}
Containerization is more relevant today than ever. Modern software design patterns like microsservices rely heavily on using multiple isolated components---something that would not be possible with traditional hardware-based systems, or at least unfeasibly expensive; this is especially relevant for development and testing environments, which require reproducible and lightweight setups. DevOps pipelines often use containerization to run tasks on demand, ranging from unit tests to artifact building, and are prevalent in many software control systems---for example in the form of GitHub Actions or GitLab CI. Compared to earlier approaches of virtual machines, containers allow running similarly independent systems with less overhead and less start-up delay, primarily due to not simulating the hardware and kernel, but only isolating user space. These points are only amplified by the wide-spread adoption through major tech companies and cloud platforms, which spent time and money on improving the software and surrounding tooling \cite{finley_2014_amazon}\cite{tozzi_2018_5}, as well as expanding on the original idea through concepts like orchestration of larger, distributed systems \cite{kubernetes_2023_overview}. Containerization is more relevant today than ever. Modern software design patterns like microservices rely heavily on using multiple isolated components---something that would not be possible with traditional hardware-based systems, or at least unfeasibly expensive; this is especially relevant for development and testing environments, which require reproducible and lightweight setups. DevOps pipelines often use containerization to run tasks on demand, ranging from unit tests to artifact building, and are prevalent in many software control systems---for example, in the form of GitHub Actions or GitLab CI. Compared to earlier approaches of virtual machines, containers allow running similarly independent systems with less overhead and less start-up delay, primarily due to not simulating the hardware and kernel, but only isolating user space. These points are only amplified by the widespread adoption through major tech companies and cloud platforms, which spent time and money on improving the software and surrounding tooling \cite{finley_2014_amazon,tozzi_2018_5}, as well as expanding on the original idea through concepts like orchestration of larger, distributed systems \cite{kubernetes_2023_overview}.
\section{A Solution to Dependencies} \section{A Solution to Dependencies}
Especially in development, dependencies are a well known problem in unstandardized environments; different software requires different versions of the same libraries, often resulting in conflicts. This can be caused by installing the latest version of a given dependency at some point, but not updating it---commonly known as the ``it works on my machine'' problem \cite{pardo_2023_but}\cite{wang_2025_common}. One such scenario of a dependency conflict is python, which often removes functions even in minor versions \cite{a2024_whats}, thus leading to incompatibilities---which are expressed as run-time errors in the case of python specifically, making them harder to detect. Containerization solves that problem by virtualizing encapsulated, standardized environments; Docker is one of the most widely adopted containerization tools \cite{a2025_leading}, and will thus be used as a stand-in for containerization for the purposes of this thesis. There is some functionality specific to docker, which have been highlighted in \autoref{cha:discussion}; overall these differences are very minor, however, and for most purposes alternative tools like Podman fulfill the role equally well. Especially in development, dependencies are a well-known problem in unstandardized environments; different software requires different versions of the same libraries, often resulting in conflicts. This can be caused by installing the latest version of a given dependency at some point, but not updating it---commonly known as the ``it works on my machine'' problem \cite{pardo_2023_but,wang_2025_common}. One such scenario of a dependency conflict is Python, which often removes functions even in minor versions \cite{a2024_whats}, thus leading to incompatibilities---which are expressed as run-time errors in the case of Python specifically, making them harder to detect. Containerization solves that problem by virtualizing encapsulated, standardized environments; Docker is one of the most widely adopted containerization tools \cite{a2025_leading}, and will thus be used as a stand-in for containerization for the purposes of this thesis. There is some functionality specific to Docker, which has been highlighted in \autoref{cha:discussion}; overall these differences are very minor, however, and for most purposes alternative tools like Podman fulfill the role equally well.
\section{A Duplication of Dependencies} \section{A Duplication of Dependencies}
As each container is a separate system, this introduces redundancy, particularly in background services and system-level dependencies rather than application libraries. While Docker does not duplicate the kernel and the hardware similar to how a full virtual machine does \cite{docker_2023_what}, each container still contains a complete operating system---although it usually does not feature a full desktop environment, but only a minimal user space---especially with minimal distributions like Alpine Linux. Furthermore there is commonly a larger dependency overlap in most modern software\todo{quote?}: Static elements like glibc and language interpreters mostly contribute to storage redundancy, but have a minimal performance impact. In contrast, service dependencies---including logging daemons, databases, and schedulers---consume CPU time, memory, and I/O resources. Considering similar services are often run on the same machine, such as two web services as shown in \autoref{sec:use_case_small_scale_web_services}, these dependencies are often similar or identical. One notable example is that of databases---which are commonly provided as standalone containers \cite{zhao_2024_simplifying}, instead of being shipped as part of an image. While this separation can be convenient, this can lead to multiple instances of the same container on one host. As each container is a separate system, this introduces redundancy, particularly in background services and system-level dependencies rather than application libraries. While Docker does not duplicate the kernel and the hardware similar to how a full virtual machine does \cite{docker_2023_what}, each container still contains a complete operating system---although it usually does not feature a full desktop environment, but only a minimal user space---especially with minimal distributions like Alpine Linux. Furthermore, there is commonly a larger dependency overlap in most modern software: Taking docker as an example again, Skourtis et. al. states ``many layers only differ in a
small number of files but would otherwise be identical''\cite{skourtis_2019_carving}. Static elements like glibc and language interpreters mostly contribute to storage redundancy, but have a minimal performance impact. In contrast, service dependencies---including logging daemons, databases, and schedulers---consume CPU time, memory, and I/O resources. Considering similar services are often run on the same machine, such as two web services as shown in \autoref{sec:use_case_small_scale_web_services}, these dependencies are often similar or identical. One notable example is that of databases---which are commonly provided as standalone containers \cite{zhao_2024_simplifying} instead of being shipped as part of an image. While this separation can be convenient, this can lead to multiple instances of the same container on one host.
\section{A Solution to Duplication}\label{sec:a_solution_to_duplication} \section{A Solution to Duplication}\label{sec:a_solution_to_duplication}
While much research has already explored reducing storage inefficiencies in docker images \cite{skourtis_2019_carving}, a less explored area is the duplication of service dependencies across containers. Many systems run multiple instances of the same service---message queues, databases, caching systems, authentication and redirection proxies, and logging systems are all common examples, despite their potential for centralization. While not all of these services can be reused, identifying the ones that are practical to share offers opportunities for less overhead and tighter integration with the host system. Logging in particular is a promising candidate, as it is not only essential for most systems, but docker already captures container output streams. Instead of routing them to a new container or service, they can be processed centrally on the host, reducing duplication and simplifying management. While much research has already explored reducing storage inefficiencies in Docker images \cite{skourtis_2019_carving}, a less explored area is the duplication of service dependencies across containers. Many systems run multiple instances of the same service---message queues, databases, caching systems, authentication and redirection proxies, and logging systems are all common examples, despite their potential for centralization. While not all of these services can be reused, identifying the ones that are practical to share offers opportunities for less overhead and tighter integration with the host system. Logging in particular is a promising candidate, as it is not only essential for most systems, but Docker already captures container output streams. Instead of routing them to a new container or service, they can be processed centrally on the host, reducing duplication and simplifying management.
\section{Sharing Resources---To the Wrong Audience} \section{Sharing Resources---To the Wrong Audience}
One of the core premises of containerization is isolation---each container acting independently with limited access to other containers or the host system. The previous sections focused on breaking this isolation for the purposes of resource efficiency. This brings the obvious trade-off of increasing the attack surface. There has been a great deal of research on attacks such as container escapes \cite{putta_2023_enhancing}\cite{yasrab_2018_mitigating}\cite{yasrab_2018_mitigating}, and consequences of such an attack increase dramatically when resources are shared between the containers. While docker networks secure inter-container communication \cite{a2024_networking}\cite{docker}, this does not secure interaction between the container and host system, and does not prevent containers from accessing host resources, especially if they have been deliberately exposed. A prominent example of such an attack is the Log4Shell exploit \cite{zhaojun_2021_cve202144228}---which allowed remote code execution through logging---assuming similar exploits could exist in other logging software, just a single crafted log message could be enough to turn a single compromised container into a wider breach. One of the core premises of containerization is isolation---each container acting independently with limited access to other containers or the host system. The previous sections focused on breaking this isolation for the purposes of resource efficiency. This brings the obvious trade-off of increasing the attack surface. There has been a great deal of research on attacks such as container escapes \cite{putta_2023_enhancing,yasrab_2018_mitigating,bui_2015_analysis}, and consequences of such an attack increase dramatically when resources are shared between the containers. While Docker networks secure inter-container communication \cite{a2024_networking,docker}, this does not secure interaction between the container and host system and does not prevent containers from accessing host resources, especially if they have been deliberately exposed. A prominent example of such an attack is the Log4Shell exploit \cite{zhaojun_2021_cve202144228}---which allowed remote code execution through logging---assuming similar exploits could exist in other logging software, just a single crafted log message could be enough to turn a single compromised container into a wider breach.
These risks highlight the importance of considering security implications when services are reused for a hybrid environment---a challenge addressed in the following chapters. These risks highlight the importance of considering security implications when services are reused for a hybrid environment---a challenge addressed in the following chapters.
\clearpage \clearpage
\chapter{Constructing a Real World Scenario} \chapter{Constructing a Real-World Scenario}
Since the focus of this thesis is on identifying improvements and analyzing trade-offs between isolation and efficiency of real world applications, a realistic scenario is required. The constructed environment requires interfaces---HTTP(S), SSH, persistent volumes, background services---similar to those found in a production environment, while still keeping it minimal to ensure accuracy of any implementations. There are countless scenarios that fulfill these criteria, but one of the most common are web services. Since the focus of this thesis is on identifying improvements and analyzing trade-offs between isolation and efficiency of real-world applications, a realistic scenario is required. The constructed environment requires interfaces---HTTP(S), SSH, persistent volumes, background services, etc.---similar to those found in a production environment, while still keeping it minimal to ensure implementations function as intended. There are countless scenarios that fulfill these criteria, but one of the most common is web services.
\section{Use-Case: Small Scale Web Services}\label{sec:use_case_small_scale_web_services} \section{Use-Case: Small Scale Web Services}\label{sec:use_case_small_scale_web_services}
In an enterprise context, redundancies can often be planned for and eliminated in advance, but in smaller scale scenarios, be it smaller companies, or even just personal projects, environments often grow organically by adding additional services reactively to changing requirements rather than planning long term---leading to unplanned duplication. Such systems also often use docker compose \cite{kamath_2021_containerize} or similar orchestration tools with convenience focused configurations---containing less moving parts and are easier to update---instead of favoring optimization. However these scenarios also have the highest incentive to stay cost effective. In an enterprise context, redundancies can often be planned for and eliminated in advance, but in smaller-scale scenarios, such as small companies or even just personal projects, environments often grow organically by adding additional services reactively to changing requirements rather than planning long term---leading to unplanned duplication. Such systems also often use Docker Compose \cite{kamath_2021_containerize} or similar orchestration tools with convenience-focused configurations---containing fewer moving parts and being easier to update---instead of favoring optimization. In addition, use cases as decribed intrinsicly have a higher motivation to stay cost-effective.
\subsection{The pieces of the puzzle} \subsection{The pieces of the puzzle}
\begin{figure}[!htbp] \begin{figure}[!htbp]
\centering \centering
\includegraphics[width=0.7\linewidth]{webservice-use_case.png} \includegraphics[width=0.7\linewidth]{webservice-use_case.png}
\caption{Network diagram of a docker setup with Gitea and Bitwarden}\label{fig:webservice} \caption{Network diagram of a Docker setup with Gitea and Bitwarden}\label{fig:webservice}
\end{figure} \end{figure}
\subsubsection{Gitea---A Wide Attack Surface} \subsubsection{Gitea---A Wide Attack Surface}
Gitea serves as an example for any git server. It will be used due to its lightweight design, but is otherwise representative of enterprise grade systems like GitLab. A git server provides a wide attack surface \cite{gitea}\cite{gitlab} due to the mix of different user content through file storage, comments and issues, and CI pipelines, and a mix of interfaces with HTTP(S) access and SSH access---commonly via a user git. SSH access in particular requires careful configuration \cite{gasser_2014_a}, as the host needs to access an SSH server inside the container, something rarely done otherwise. Gitea requires a database for storing user submissions---which can be an integrated SQLite3 instance, but more commonly is a postgres database container, and it requires a file volume, commonly mounted directly to the host; it furthermore has optional dependencies in code runners for pipelines, and an email server. Gitea serves as an example for Git servers in a broader context. It will be used due to its lightweight design, but is otherwise representative of enterprise-grade systems like GitLab. A Git server provides a wide attack surface \cite{gitea,gitlab} due to the mix of different user content through file storage, comments and issues, and CI pipelines, and a mix of interfaces with HTTP(S) access and SSH access---commonly via a user \texttt{git}. SSH access in particular requires careful configuration \cite{gasser_2014_a}, as the host needs to access an SSH server inside the container, something rarely done otherwise. Gitea requires a database for storing user submissions---which can be provided by an integrated SQLite3 instance, but more commonly a postgres database container is used, and it requires a file volume, commonly mounted directly to the host; it furthermore has optional dependencies in code runners for pipelines and an email server.
\subsubsection{Bitwarden---Increasing the Stakes} \subsubsection{Bitwarden---Increasing the Stakes}
Bitwarden, or more accurately vaultwarden---an open source implementation of a bitwarden server---serves a less complex component, but stores highly sensitive data. Unlike Gitea, it only requires a database and optionally an email server, but this raises the question of how big of a security risk is introduced by sharing a core service like a database between two services differing greatly in risk. Vaultwarden in particular provides great value to such an analysis due to its thorough documentation \cite{danigarcia_2025}, allowing purposeful misconfiguration to test different attack vectors. Bitwarden, or more accurately vaultwarden---an open-source implementation of a Bitwarden server---fulfills the role of component, that is less complex, but stores highly sensitive data. Unlike Gitea, it only requires a database and, optionally, an email server, but this raises the question of how big of a security risk is introduced by sharing a core service like a database between two services differing greatly in risk. Vaultwarden in particular provides great value to such an analysis due to its thorough documentation \cite{danigarcia_2025}, allowing purposeful misconfiguration to test different attack vectors.
\subsubsection{NGinX---The Entrypoint} \subsubsection{Nginx---The Entrypoint}
Reverse proxies like NGinX or Traefik are commonly used as an entry point to route incoming requests \cite{wahanani_2021_implementation} to the correct services based on hostname. Compared to Traefik, NGinX also provides a significant amount of additional functionality, however an extended feature set can also introduce unnecessary complexity, increasing the risk of misconfigurations---this has to be analysed in the following chapters. It is also important to point out that oftentimes a firewall is even before the proxy, or integrated into it; examples include iptables rules on the host, or fail2ban integrated with the NGinX logs. Reverse proxies like Nginx or Traefik are commonly used as an entry point to route incoming requests \cite{wahanani_2021_implementation} to the correct services based on hostname. Compared to Traefik, Nginx also provides a significant amount of additional functionality; however, an extended feature set can also introduce unnecessary complexity, increasing the risk of misconfigurations---this has to be analyzed in the following chapters. It is also important to point out that oftentimes a firewall another step infront of the proxy, or integrated into it; examples include iptables rules on the host or fail2ban integrated with the Nginx logs.
\subsubsection{Background Service Redundancy}\label{ssub:background_service_redundancy} \subsubsection{Background Service Redundancy}\label{ssub:background_service_redundancy}
Additionally to the primary services, there are some background services required, which affect both Gitea and Bitwarden: In addition to the primary services, there are some background services required, which affect both Gitea and Bitwarden:
For TLS/SSL, certificates are required---which are commonly handled by a Lets Encrypt ``certbot''. The service can either run on the host directly, or in its own container; either way the certificates will need to be exported to the NGinX container---in most cases the certificate service will write to a shared volume. However due to rate limits on issuing certificates, and the rapid testing required for this thesis, automatic certificate management has been eliminated as part of the tested setup. For TLS/SSL, certificates are required---which are commonly handled by a Let's Encrypt ``certbot''. The service can either run on the host directly or in its own container; either way, the certificates will need to be exported to the Nginx container---in most cases, the certificate service will write to a shared volume. However, due to rate limits on issuing certificates and the rapid iteration over different configurations required for this thesis, automatic certificate management has been eliminated as part of the tested setup.
As previously touched upon, most scenarios include some form of log management and processing. Docker already provides prerequisites, and allows configuring log collection to a file, to journald, or to log drivers of common log management systems\todo{quote docker page of log drivers}. However since this is already integrated into docker itself, and does not need additional configurations of the docker services, analysing it would be beyond the scope of this thesis. As previously hinted at, most scenarios include some form of log management and processing. Docker already provides prerequisites and allows configuring log collection to a file, to journald, or to log drivers of common log management systems \cite{Configure2025}. However, since this is already integrated into Docker itself, and does not need additional configurations of the Docker services, analyzing it would be beyond the scope of this thesis.
It should be noted, that the applications themselves are not the focus of the analysis, but just serve as means to an end; the focus is in the analysis of the dependencies. It should be noted that the applications themselves are not the focus of the analysis, but just serve as means to an end; the focus is on the analysis of the dependencies.
\subsection{Reducing redundancy} \subsection{Reducing redundancy}\label{sub:reducing_redundancy}
\begin{figure}[!htbp] \begin{figure}[!htbp]
\centering \centering
@ -142,21 +143,21 @@ It should be noted, that the applications themselves are not the focus of the an
\caption{Network diagram of \autoref{fig:webservice} with a shared Postgres service}\label{fig:webservice-hybrid} \caption{Network diagram of \autoref{fig:webservice} with a shared Postgres service}\label{fig:webservice-hybrid}
\end{figure} \end{figure}
\autoref{fig:webservice-hybrid} illustrates two services running on a shared host system, rather than deploying a separate instance for each service. Initially the logging system was considered as a candidate for demonstrating a hybrid setup. However, since docker already manages logging---including support for external logging systems, as discussed in \autoref{ssub:background_service_redundancy}---this approach would neither be novel nor yield any measureable difference to the baseline. Disabling Docker's built-in logging systems would introduce an arbitrary change not reflective of real-world scenarios. As a resuilt the focus now lies on replacing the two database containers with a shared service on the host. This change is expected to produce more meaningful differences in a practical scenario. \autoref{fig:webservice-hybrid} illustrates two services running on a shared host system, rather than deploying a separate instance for each service. Initially the logging system was considered as a candidate for demonstrating a hybrid setup. However, since Docker already manages logging---including support for external logging systems, as discussed in \autoref{ssub:background_service_redundancy}---this approach would neither be novel nor yield any measurable difference to the baseline. Disabling Docker's built-in logging systems would introduce an arbitrary change not reflective of real-world scenarios. As a result the focus now lies on replacing the two database containers with a shared service on the host. This change is expected to produce more significant differences in a practical scenario.
\subsection{The caveats of the setup} \subsection{The caveats of the setup}
Even in a comparatively simple scenario such as the one described in this chapter, conflicts and may arise between the services---and even Docker itself. By default, all Docker containers are connected to the \texttt{docker0} network interface, which uses the subnet \texttt{172.17.0.0/16}\todo{cite: docker documentation}. An exception to this rule is Docker Compose, which creates a separate network for each Compose file. To alleviate this issue, a network must be defined in the Docker Compose file, and the corresponding subnet must be allowed in the PostgreSQL configuration\todo{cite: docker compose docs}. Even in a comparatively simple scenario such as the one described in this chapter, conflicts may arise between the services---and even Docker itself. By default, all Docker containers are connected to the \texttt{docker0} network interface, which uses the subnet \texttt{172.17.0.0/16} \cite{Networking2025}. An exception to this rule is Docker Compose, which creates a separate default network independed of \texttt{docker0} for each Compose file; however, Docker Compose always uses the first unused \texttt{/16} subnet in its pool, making the exact subnet mask unknown, as other services running on the same system could interfer. To alleviate this issue, a network must be defined in the Docker Compose file, and the corresponding subnet must be allowed in the PostgreSQL configuration \cite{Networking2025}.
\clearpage \clearpage
\chapter{Reproducibility} \chapter{Reproducibility}
Since the docker host system will also be tested, it also needs to be reproducible---to achieve that it will be instantiated as a virtual machine. Since the term host often has different meanings, especially in a context of containerization, this section will clarify the terms used for the rest of the thesis: Since the Docker host system will be tested as well, it also needs to be reproducible---to achieve that, it will be instantiated as a virtual machine. Since the term host often has different meanings, especially in a context of containerization, this section will clarify the terms used for the rest of the thesis:
The device on which the VM is hosted will henceforth be called VM-host; the host of the Docker containers---the described VM---will be called docker host, docker VM, or just host. To allow reliable reproduction of attacks, these will also be made from a VM, which will be called the client-vm, or just client. If any further services are required, which would normally be external ``on the internet'', a third vm will be used, the ``external-vm''. The VM-host will only ever be used for configuring the VMs, never to test anything. The base configuration can be found in \autoref{appendix_base_config}. The device on which the VM is hosted will henceforth be called VM host; the host of the Docker containers---the described VM---will be called Docker host, Docker VM, or just host. To allow reliable reproduction of attacks, these will also be made from a VM, which will be called the client-vm, or just client. If any further services are required, which would normally be external ``on the internet'', a third VM will be used, the ``external-vm''. The VM host will only ever be used for configuring the VMs, never to test anything. The base configuration can be found in \autoref{appendix_base_config}.
\section{The Host of the Host} \section{The Host of the Host}
Tools have been selected based on reproducibility and compatibility, but not performance. The resulting stack---Vagrant, VirtualBox, Ansible, Ubuntu---is widely adopted \cite{a2024_vagrant} and well supported, allowing better reuse of existing research, as well as sharing of the test setup. Tools have been selected based on reproducibility and compatibility, but not performance. The resulting stack---Vagrant, VirtualBox, Ansible, and Ubuntu---is widely adopted \cite{a2024_vagrant} and well supported, allowing better reuse of existing research, as well as sharing of the test setup.
\begin{figure}[!htbp] \begin{figure}[!htbp]
\centering \centering
@ -164,26 +165,22 @@ Tools have been selected based on reproducibility and compatibility, but not per
\caption{Relation of the components in the lab setup using a simplified representation of the tested services}\label{fig:lab} \caption{Relation of the components in the lab setup using a simplified representation of the tested services}\label{fig:lab}
\end{figure} \end{figure}
\section{Tooling for the VM-Host}
\todo{possible vagrant explanation, if needed}
\section{Preparing for Attack} \section{Preparing for Attack}
To evaluate the effectiveness of configuration and the implemented measures, first a preliminary scan is run on the docker-host; afterwards a series of controlled attacks are performed from the client VM against the running services in the docker host. The scan is conducted using the CIS Docker Benchmark, which serves as a foundational tool for establishing a security baseline for all environments. The attack process afterwards is split into three phases, mirroring real world scenarios: To evaluate the effectiveness of the configuration and the implemented measures, first a preliminary scan is run on the Docker host; afterwards, a series of controlled attacks are performed from the client VM against the services running on the Docker host. The scan is conducted using the CIS Docker Benchmark, which serves as a foundational tool for establishing a security baseline for all environments. The attack process afterwards is split into three phases, mirroring real-world scenarios:
\begin{itemize} \begin{itemize}
\item Reconnaissance: Tools like nmap, netcat and curl are used to discover any open ports, services, and misconfigurations. \item Reconnaissance: Tools like nmap, netcat, and curl are used to discover any open ports, services, and misconfigurations.
\item Exploitation: Metasploit and custom scripts are used to test the effectiveness of known exploits on a specific configuration. Due to the reproducibility of the environment, effectiveness can be measured and compared as a simple pass/fail rate. \item Exploitation: Metasploit and custom scripts are used to test the effectiveness of known exploits on a specific configuration. Due to the reproducibility of the environment, effectiveness can be measured and compared as a simple pass/fail rate, but it is expected that qualitative assessments will need to be used to capture the nuances of different exploits.
\item Post-Exploitation: After gaining access, tools like linpeas and manual inspecting are used to determine access to shared resources. \item Post-Exploitation: After gaining access, tools like linpeas and manual inspection are used to determine access to shared resources.
\end{itemize} \end{itemize}
For the client VM, first Ubuntu Desktop was considered as the OS, however as the client is not the focus of this thesis and thus does not need to be representative of the real world to the same degree as the docker VM, Kali Linux was determined to be a better option due to the suite of preinstalled tooling for the simulated attacks. For the client VM, first Ubuntu Desktop was considered as the OS; however, as the client is not the focus of this thesis and thus does not need to be representative of the real-world to the same degree as the Docker VM, Kali Linux was determined to be a better option due to the suite of preinstalled tooling for the simulated attacks.
The goal in these tests is not to discover novel exploits, but to simulate real world attack paths and analyse the additional risk introduced by the hybrid architecture. It should also be noted, that some tested measures only protect against a specific step, or assumes certain prerequisites---some steps will thus be skipped where applicable. The goal of these tests is not to discover novel exploits, but to simulate real-world attack paths and analyze the additional risk introduced by the hybrid architecture. It should also be noted that some tested measures only protect against a specific step or assume certain prerequisites---some steps will thus be skipped where applicable.
\section{Entrypoints}\label{sec:entrypoints} \section{Entrypoints}\label{sec:entrypoints}
While for most attacks the entry point will be the same as for regular usage---in most cases via the exposed HTTP(S) port---such attacks are limited to surface weaknesses. It is however realistic to expect attackers to gain access in some form, through misconfigurations, issues introduced in the further up the software supply chain, or in extrem cases even through zero-day exploits; thus it is prudent to adopt ``assume breach'' mindset for setups as described in this thesis \cite{souppaya_2017_application} \cite{avrahami_2019_breaking}---for the purposes of testing the configurations, an assumed breach will be provided via a docker container\todo{How will access be simulated?}. While for most attacks the entry point will be the same as for regular usage---commonly via the exposed HTTP(S) port---such attacks are limited to surface weaknesses. It is, however, realistic to expect attackers to gain access in some form, through misconfigurations, issues introduced further up the software supply chain, or, in extreme cases, even through zero-day exploits; thus, it is prudent to adopt an ``assume breach'' mindset for setups as described in this thesis \cite{souppaya_2017_application, avrahami_2019_breaking}. For the purposes of testing the configurations, an assumed breach will be provided via a Docker container described in \autoref{code:sandbox:vuln_docker}.
\clearpage \clearpage
\chapter{The Holes in the Wall} \chapter{The Holes in the Wall}
@ -192,47 +189,47 @@ This chapter describes the tests against the architecture. Each test starts with
\section{Security analysis---Use-Case: Web Services} \section{Security analysis---Use-Case: Web Services}
\subsection[Base Configuration]{Base Configuration\footnote{\autoref{fig:webservice}}} \subsection[Base Configuration]{Base Configuration}
The base configuration is minimal, relying on default values wherever possible. As a result the Docker Benchmark results\footnote{\autoref{docker_bench:base}} are not directly relevant to this section, but serve as a baseline with later comparisons. The base configuration (\autoref{fig:webservice}) is minimal, relying on default values wherever possible. As a result, the Docker Benchmark results (\autoref{docker_bench:base}) are not directly relevant to this section, but serve as a baseline for later comparisons.
\subsubsection*{Reconnaissance}\label{ssub:base:recon} \subsubsection*{Reconnaissance}\label{ssub:base:recon}
\paragraph*{NMap Scan} \paragraph*{NMap Scan}
As shown in \autoref{log:base:nmap_sS}, although no unexpected ports are open\footnote{Port 2222 is used for the setup via Vagrant, and thus only an artifact of the specific environment used for this thesis. An identical setup in a production environment would not expose port 2222.}, the scan does reveal that the setup redirects to Gitea by default, instead of Bitwarden or a blank page. This behavior is expected, as no alternative default has been configured. As shown in \autoref{log:base:nmap_sS}, although no unexpected ports are open\footnote{Port 2222 is used for the setup via Vagrant and thus only an artifact of the specific environment used for this thesis. An identical setup in a production environment would not expose port 2222.}, the scan does reveal that the setup redirects to Gitea, by default, instead of Bitwarden or a blank page. This behavior is expected, as no alternative default has been configured.
\paragraph*{Known services analysis} \paragraph*{Known services analysis}
The HTTP headers of the NGinX entrypoint (\autoref{log:base:curl_I}) show a redirect and reveal the NGinX version; following the redirect of the Gitea service (\autoref{log:base:curl_IL_gitea}) does not bring any new information. The body of this request (\autoref{log:base:curl_L_gitea}) forms the landing page of Gitea, and does not directly expose any critical data, though it does reveal the installation's version number, which paired with known security vulnerabilities \cite{gitea} could pose a security risk. It also reveals the address \texttt{http://localhost:3000/} in a \texttt{<meta>} tag, though it is unclear if this reflects an active configuration or a cosmetic misconfiguration. However, base64 encoded manifest includes the same address, implying the address is indeed used internally. The HTTP headers of the Nginx entrypoint (\autoref{log:base:curl_I}) show a redirect and reveal the Nginx version; following the redirect of the Gitea service (\autoref{log:base:curl_IL_gitea}) does not bring any new information. The body of this request (\autoref{log:base:curl_L_gitea}) forms the landing page of Gitea, and does not directly expose any critical data, though it does reveal the installation's version number, which, paired with known security vulnerabilities \cite{gitea}, could pose a security risk. It also reveals the address \texttt{http://localhost:3000/} in a \texttt{<meta>} tag, though it is unclear if this reflects an active configuration or a cosmetic misconfiguration. However, base64-encoded manifest includes the same address, implying the address is indeed used internally.
Vaultwarden presents a similar issue (\autoref{log:base:curl_L_bitwarden}) with regard to its version after allowing the page to execute JavaScript, albeit with a more complex set of HTTP headers (\autoref{log:base:curl_IL_bitwarden}). Vaultwarden presents a similar issue (\autoref{log:base:curl_L_bitwarden}) with regard to its version after allowing the page to execute JavaScript, albeit with a more complex set of HTTP headers (\autoref{log:base:curl_IL_bitwarden}).
\subsubsection*{Exploitation} \subsubsection*{Exploitation}
As the goal of this exercise is not to find novel exploits, and preliminary scans do not reveal any known vulnerabilities. This suggests the base configuration is moderately secure as is. For multilayered security it is essential to test more components than just the external interface\todo{cite paper about this approach}. To simulate internal access, the container described in \autoref{sec:entrypoints} is used, as demonstrated in \autoref{log:base:metasploit:ssh_login}. Similar \texttt{nmap} scans as described before confirm the presence of an open port 3000, as shown in \autoref{log:base:vuln:nmap}, but do not reveal any additional services. As preliminary scans do not reveal any known vulnerabilities, and the goal is not to find unknown vulnerabilities in the services, the base configuration will assumed to be secure on a surface level. For multilayered security, it is essential to test more components than just the external interface \cite{souppaya_2017_application}. To simulate internal access, the container described in \autoref{sec:entrypoints} is used, as demonstrated in \autoref{log:base:metasploit:ssh_login}. Similar \texttt{nmap} scans as described before confirm the presence of an open port 3000, as shown in \autoref{log:base:vuln:nmap}, but do not reveal any additional services.
\subsubsection*{Post-Exploitation} \subsubsection*{Post-Exploitation}
In a typical Docker Compose setup, Docker networks already provide strong encapsulation\todo{cite the paper about Docker network security}. As such the database for either service could not be accessed. The only successful container access was establishing direct communication with other public-facing services, effectively bypassing any potential firewall. However, this can again be alleviated by using a separate bridge network between each service and the Nginx container. In a typical Docker Compose setup, Docker networks already provide strong encapsulation \cite{Filtering2025}. As such, the database for either service could not be accessed. The only successful container access was establishing direct communication with other public-facing services, effectively bypassing any potential firewall. However, this can again be alleviated by using a separate bridge network between each service and the Nginx container.
\subsection[Hybrid Configuration]{Hybrid Configuration\footnote{\autoref{fig:webservice-hybrid}}} \subsection[Hybrid Configuration]{Hybrid Configuration}
\subsubsection*{Reconnaissance} \subsubsection*{Reconnaissance}
On a surface level, the hybrid configuration does not differ from the base configuration in any way\footnote{The same tests have been performed as described in \autoref{ssub:base:recon}, results have been omitted for brevity, as they are identical}. This is expected, as the only major changes lies in the database, which is not public facing in either configuration. However, after simulating access again, an \texttt{nmap} scan reveals an additional open port 5432 (\autoref{log:hybrid:vuln:nmap}). On a surface level, the hybrid configuration (\autoref{fig:webservice-hybrid}) does not differ from the base configuration in any way\footnote{The same tests have been performed as described in \autoref{ssub:base:recon}, results have been omitted for brevity, as they are identical}. This is expected, as the only major changes lie in the database, which is not public-facing in either configuration. However, after simulating access again, an \texttt{nmap} scan reveals an additional open port 5432 (\autoref{log:hybrid:vuln:nmap}).
\subsubsection*{Exploitation} \subsubsection*{Exploitation}
After establishing a port forward to the client VM (\autoref{log:hybrid:meterpreter:fwd}), basic attacks can be run against the PostgreSQL database---the same instance in use by the two services. This poses a risk of DoS attacks, or even data breaches, if the instance is sufficiently misconfigured. Unfortunately for the purposes of this test, an up-to-date PostgreSQL database is secure against common attacks (\autoref{log:hybrid:portfwd:postgres_version}, \autoref{log:hybrid:portfwd:postgres_bruteforce})\todo{cite pentest report postgres}. After establishing a port forward to the client VM (\autoref{log:hybrid:meterpreter:fwd}), basic attacks can be run against the PostgreSQL database---the same instance in use by the two services. This poses a risk of DoS attacks, or even data breaches, if the instance is sufficiently misconfigured. Unfortunately for the purposes of this test, an up-to-date PostgreSQL database is secure against common attacks (\autoref{log:hybrid:portfwd:postgres_version}, \autoref{log:hybrid:portfwd:postgres_bruteforce}); this is supported by PostgreSQL's publicly documented security-response process \cite{PostgreSQL}.
\subsection{Outdated Versions of Services} \subsection{Outdated Versions of Services}\label{sub:outdated_versions_of_services}
As the hybrid configuration---contrary to expectations---did not introduce any major security flaws, this configuration is intended to demonstrate how potential risks mentioned in the previous sections could become actual risks. As it is expected for this configuration to perform badly in terms of security, the CIS Docker Benchmark has been omitted. Due to the reverse proxy interfering with one of the exploits this configuration intends to demonstrate, additionally port 3000 is directly forwarded from the gitea service. It can be argued that such a misconfiguration is realistic, as the official gitea documentation suggests forwarding port 3000\todo{cite https://docs.gitea.com/installation/install-with-docker}. For the exploit to work, additionally the configuration \texttt{ALLOW\_LOCALNETWORKS} in the migrations section needs to be enabled. While in production scenarios this is rarely a required options, especially in home-lab environments, it is conceivable that migrations within a local network happen, for example if a different git server was previously used. As the hybrid configuration---contrary to expectations---did not introduce any major security flaws, this configuration is intended to demonstrate how potential risks mentioned in the previous sections could become actual risks. As it is expected for this configuration to perform poorly in terms of security, the CIS Docker Benchmark has been omitted. Due to the reverse proxy interfering with one of the exploits this configuration intends to demonstrate, additionally port 3000 is directly forwarded from the gitea service. It can be argued that such a misconfiguration is realistic, as the official gitea documentation suggests forwarding port 3000 \cite{GiteaDocs}. Additionally, for the exploit to work, the \texttt{ALLOW\_LOCALNETWORKS} configuration of the migrations section needs to be enabled. While in production scenarios this is rarely a required option, especially in home-lab environments, it is conceivable that migrations within a local network happen, for example, if a different git server was previously used. While this scenario is artifically constructed, anecdotal evidence does suggest local-only connections are commonplace in home-lab environments \cite{rHomelabLocalUsage}.
\paragraph*{Reconnaissance} \paragraph*{Reconnaissance}
An \texttt{nmap} scan on this system confirms the presence of an open port 3000 (\autoref{log:insecure:nmap}), as expected; and accesing the webservice---either via the hostname \texttt{gitea.vm.local} or the now exposed port \texttt{192.168.56.10:3000}---confirms the outdated version \texttt{1.16.6}. Otherwise surface scans of this configuration produce identical results to the hybrid configuration. An \texttt{nmap} scan on this system confirms the presence of an open port 3000 (\autoref{log:insecure:nmap}), as expected; accessing the web service---either via the hostname \texttt{gitea.vm.local} or the now exposed port \texttt{192.168.56.10:3000}---confirms the outdated version \texttt{1.16.6}. Otherwise surface scans of this configuration produce identical results to the hybrid configuration.
\paragraph*{Exploitation} \paragraph*{Exploitation}
@ -244,11 +241,11 @@ After gaining shell access to the Gitea container, it can be explored freely; th
\subsection{Hardened configuration} \subsection{Hardened configuration}
The hardened configuration is intended to improve upon the hybrid setup, however as the hybrid configuration already proved secure, this test is primarily a configuration exercise than a penetration test, and as such will not follow to usual pattern. Comparing the CIS Docker Bench results in \autoref{docker_bench:hardened} with the results from the hybrid configuration (\autoref{docker_bench:hybrid}), it can be concluded that even with relatively simple changes, security can be significantly improved. The hardened configuration is intended to improve upon the hybrid setup, however, as the hybrid configuration already proved secure, this test is primarily a configuration exercise rather than a penetration test, and as such will not follow the usual pattern. Comparing the CIS Docker Bench results in \autoref{docker_bench:hardened} with the results from the hybrid configuration in \autoref{docker_bench:hybrid}, it can be concluded that even with relatively simple changes, security can be significantly improved.
\section{Performance}\label{sec:performance} \section{Performance}\label{sec:performance}
Going back to the orignal idea of \autoref{sec:a_solution_to_duplication}, while security is one of the central considerations, the other is performance. This poses the question, does a hybrid system improve performance? As the goal is not to perform an extensive benchmark, a simple approach is to measure available processing power and used memory at idle. For this purpose a shell script (\autoref{code:idle_measure}) was used, measuring 60 samples each across 5 runs per setup\footnote{The raw data is available in electronic form upon request}. Going back to the original idea of \autoref{sec:a_solution_to_duplication}, while security is one of the central considerations, the other is performance. This poses the question, does a hybrid system improve performance? As the goal is not to perform an extensive benchmark, a simple approach is to measure available processing power and used memory at idle. For this purpose a shell script (\autoref{code:idle_measure}) was used, measuring 60 samples each across 5 runs per setup\footnote{The raw data is available in electronic form upon request}.
\begin{table}[!htbp] \begin{table}[!htbp]
\centering \centering
@ -261,57 +258,73 @@ CPU Idle (\%) & 99.57 $\pm$ 0.10 & 99.64 $\pm$ 0.10 & +0.06\%\\\hline
\end{tabular} \end{tabular}
\end{table} \end{table}
\autoref{tab:system_comparison} indicates significantly less memory used at idle, with a 3.58\% reduction and high statistical confidence ($p < 0.05$). CPU idle percenteages, however, are statistically equivalent between the two measured configurations. The measurements also show a narrow margin of error, with $\pm1.00 MB$ for RAM usage, and $\pm0.10\%$ for CPU idle. \autoref{tab:system_comparison} indicates significantly less memory used at idle, with a 3.58\% reduction and high statistical confidence ($p < 0.05$). CPU idle percentages, however, are statistically equivalent between the two measured configurations. The measurements also show a narrow margin of error, with $\pm1.00 MB$ for RAM usage, and $\pm0.10\%$ for CPU idle.
\clearpage \clearpage
\chapter{Discussion}\label{cha:discussion} \chapter{Discussion}\label{cha:discussion}
\section{Evaluating Success} \section{Evaluating Success}\label{sec:evaluating_success}
This thesis poses a central problem of too much redundancy in services, especially with containerized systems like docker; the proposed solution was to share services between containers or the host where feasable. As discussed in \autoref{sec:performance}, a minor improvement in used memory, but no reduction in CPU usage could be measured at idle. The primary goal of this thesis was to assess whether reducing service redundancy in containerized systems without degrading security can be achieved through a hybrid approach. For this purpose, a reproducible and testable environment was constructed, which allowed for rapid iteration over different configurations and testing on a known state. With the help of this environment, security benchmarks (\autoref{appendix:docker_bench}) and controlled tests found that no major security vulnerabilities were introduced by a hybrid approach, but the increased number of configurations required may increase the likelihood of human error when doing so. Additionally a performance improvement could be measured in the form of a 30 MB reduction of used memory at idle---however, no CPU usage improvement was detected---as discussed in \autoref{sec:performance} (see also \autoref{tab:system_comparison}).
Additionally the concern was raised, that such sharing of dependencies could introduce new security vulnerabilities. The conducted tests found no critical vulnerabilities introduced by this hybridization, however an increased danger of misconfigurations was observed. It has to be noted, while these findings are promising, they are not yet generalizable: The study deliberately focused on the general feasibility of a hybrid approach, and as such was limited to a minimal configuration. To determine further viability for different applications, larger scale setups should be tested. Additionally, the performance tests only measured on an idle level, and as such no definitive conclusions can be drawn to the effectiveness on production environment; this does not invalidate the tests, however, as this isolates the overhead of running a system from actual loads.
\section{Untested Configurations} \section{Untested Configurations}
Due to the wide array of possible configurations for any Docker setup, is it virtually impossible to cover all in detail. Nonetheless this section will try to highlight some more common configurations, which were left out, and reason on why they were not tested. Furthermore this section also highlights oppertunities for expanding the concept of hybrid systems. It is important to note that this list is by no means a complete list in any form. Due to the wide array of possible configurations for any Docker setup, it is virtually impossible to cover all in detail. Nonetheless this section will try to highlight some more common configurations that were left out, and reason on why they were not tested. Furthermore this section also highlights opportunities for expanding the concept of hybrid systems. It is important to note that this list is by no means a complete list.
\subsection{Alternatives to Docker Networks} \subsection{Alternatives to Docker Networks}
While it is common to expose specific ports for services---such as 3000 for NODE.js and thus Gitea, or variations on 8080 (8081, 8090, \textellipsis) for HTTP services---this approach is prone to cause port collisions. To avoid this, it is common to use a Docker network \cite{a2024_networking} instead, especially as Docker compose already defines the name of each service as its hostname. As Docker networks are also a common security measure \cite{yasrab_2018_mitigating}, using hostnames not only improves convenience---both, in terms of setup and usage---but also security---thus testing configurations without using Docker networks would not provide any meaningful results. While it is common to expose specific ports for services---such as 3000 for Node.js and thus Gitea, or variations on 8080 (8081, 8090, \textellipsis) for HTTP services---this approach is prone to cause port collisions. To avoid this, it is common to use a Docker network \cite{a2024_networking} instead, especially as Docker Compose already defines the name of each service as its hostname. As Docker networks are also a common security measure \cite{yasrab_2018_mitigating}, using hostnames not only improves convenience---both in setup and day-to-day use---but also security---thus testing configurations without using Docker networks would not provide any meaningful results.
\subsection{Hardening of Services} \subsection{Hardening of Services}
The security of both services in the tested setup can be further improved by implementing the suggested hardening measures according to their documentations\todo{cite hardening page gitea and bitwarden}---some of which are implemented for other tests anyway---extensively testing security of the services in itself would however go past the scope of this thesis, as the selected services are merely a representation of a possible scenario. The security of both services in the tested setup can be further improved by implementing the suggested hardening measures according to their documentation \cite{VaultwardenHardening,GiteaHardening}---some of which are implemented for other tests anyway---extensive security testing of the services itself would, however, go past the scope of this thesis, as the selected services are merely a representation of a possible scenario.
\subsection{Centralized Logging}
In \autoref{sub:reducing_redundancy} the subject of logging was briefly touched upon, however, it is worth reiterating: While logging is an essential component to almost any system, its integration into Docker makes it difficult to draw any meaningful conclusions from testing it, as any such tests would be a configuration exercise of built-in systems. Furthermore, loggging and log management are often treated as a dedicated topic, distinct form application and network security itself \cite{Kent2006,Scarfone2023}---thus researching it would exceed the scope of this thesis.
\subsection{Stress Testing} \subsection{Stress Testing}
Due to the high variability and complex testing procedure of stress test, it has been decided performing stress tests on the configuration would be beyond the scope of the thesis. To evaluate further uses of hybrid systems, as will be discussed in \autoref{sec:scaling_issues}, it would be beneficial to perform more rigorous performance testing of different configurations. Due to the high variability and complex testing procedure of stress tests, it has been decided performing stress tests on the configuration would be beyond the scope of the thesis. To evaluate further uses of hybrid systems, as will be discussed in \autoref{sec:scaling_issues}, it would be beneficial to perform more rigorous performance testing of different configurations. Because the focus of this thesis was---as mentioned in \autoref{sec:evaluating_success}---evaluating general feasibility of hybrid systems, rigorous stress tests would go beyond the scope.
\section{Security Trade-offs} \section{Security Trade-offs}
While there are no direct vulnerabilities introduce by hybridization, this is only the case due to modern day secure-by-default configurations\todo{source?}. For example, after exploiting a vulnerability in the Gitea service, it still was not possible to access any data of the Vaultwarden database. Would PostgreSQL configure a default admin account, however, the whole database would have been compromised as a result. Another trade off is the increased complexity in ensuring network segregation: In a traditional setup, each service and its dependencies can be on a shared network to segragate them fully, however in a hybrid network, multiple networks need access to the same dependencies while ideally not being allowed to communcate with eachother. But there are also some benefits for security: With the example of a shared PostgreSQL service, a single instance also means a single instance to keep up-to-date, and a single instance to monitor. A single instance also reduces redundant configurations. The use hybridisation changes two main areas of system security: It amplifies the `blast radius' of a breach and increases configuration complexity.
Overall, modern day environment should hardly have any negative impact in terms of security from a hybrid system, especially considering the suite of tools available, from easy-to-configure Docker networks, over drop-in firewalls, to automated monitoring tools. \subsection{Potential Risks}
While \autoref{sub:outdated_versions_of_services} showed potential vulnerabilities introduced through the hybrid approach---it demonstrated access to a database also used by another service after exploiting an RCE vulnerability in Gitea---it failed to exploit said access in a way that would not be possible in a conventional system. It did, however, demonstrate the dangers of unpatched systems, which implies, if the vulnerability would have been in a shared service, data of the vaultwarden would also have been compromised.
Another trade-off is the increased complexity in ensuring network segregation: In a traditional setup, for each service and its dependencies a separate network can be defined to fully segregate it from other services, however, in a hybrid network, multiple networks need access to the same dependencies while ideally not being allowed to communicate with eachother. However, this issue is remedied by the easy of use of Docker networks, and as such only requiring minimal additional configuration.
\subsection{Security Defaults vs. Manual Configuration}
As already highlighted in previous sections, hybrid systems sometimes require particular configuration to function properly. This strays from a secure-by-default approach \cite{OWASP,C5}, increasing the potential for human error. Such issues are only amplified by the severity of misconfigurations, with the potential to escalate to a system wide breach. Taking the previous example, the vulnerability in Gitea can be escalated to a system wide breach through an unsecured super user account, by utilizing vulnerabilities such as CVE-2019-9193\footnote{CVE-2019-9193 centers on the \texttt{COPY TO/FROM PROGRAM} function, which could allow a superuser or otherwise permitted user to execute arbitrary OS commands on the host system as the service's system user. The CVE is marked as `DISPUTED' because the PostgreSQL project views this capability as expected for superusers; nonetheless, the consequences of an attacker gaining access to an unsecured superuser remain critical.} \cite{CVE_2019_9193} to gain shell access. Due to PostgreSQL running as a service on the host, exploiting the mentioned vulnerability would bypass container isolation entirely and gain direct access to the host system---resulting in a cross-service breach.
Another disadvantage of hybrid systems is its underexplored nature, as such there is intrinsicly less documentation and user experience available than for conventional system. On a positive note, shared services only need to be configured once, reducing the chance of forgetting about a service or similar.
\subsection{Real-World Implications}
Overall, modern day environment should incur minimal additional risk from a hybridisation, especially considering the suite of tools available, from easy-to-configure Docker networks, over drop-in firewalls, to automated monitoring tools \cite{iot5030026,WhatIsFalco}. Additionally a centralized instance of a dependency simplifies pactch managements: An adminstrator is only required to update one service, reducing the window of exposure for known vulnerabilities. These factors suggest hybrid systems can be viable, assuming conventional wisdom regarding system adminstration is followed \cite{106028nistsp800123,CISBench}.
\section{Scaling Issues}\label{sec:scaling_issues} \section{Scaling Issues}\label{sec:scaling_issues}
While the reduction of services seems lucrative at first, it can be assumed such improvements do not scale linearly with server size. The tested system was purposefully designed to be minimal, active production systems fluctuate in the range of gigabytes, however, and as such the overhead of another PostgreSQL service is quite miniscule in comparison. In fact, distributed systems often spin up new instances of non-relational databases on demand\todo{insert quote}. However in small scale environments, that often sit idle, such differences can matter. It is not uncommon for a Raspberry Pi to be used as a home-server\todo{insert source}, and on this scale 30 MB can be up to 6\% of the total memory, depending on model\footnote{The Raspberry Pi 3B---considered one of the most sold models---has 1 GB of RAM}. In home-lab environments it is also more common to have mutliple services on the same system, further increasing the likelyhood of redundant services. While the reduction of services seems lucrative at first, it can be assumed such improvements do not scale linearly with server size. The tested system was purposefully designed to be minimal, active production systems fluctuate in the range of gigabytes, however, and as such the overhead of another PostgreSQL service is quite miniscule in comparison. In fact, distributed systems often spin up new instances of non-relational databases on demand \cite{gessert2017nosql,pokorny2011nosql}. However, in small scale environments, that often sit idle, such differences can matter. Anecdotal evidence suggest, it is not uncommon for a Raspberry Pi to be used as a home-server \cite{RPiWebServer,Martin}, and on this scale 30 MB can be up to 6\% of the total memory, depending on model\footnote{The Raspberry Pi 3B---considered one of the most sold models---has 1 GB of RAM}. In home-lab environments it is also more common to have mutliple services on the same system, further increasing the likelyhood of redundant services.
\section{Dangers of Misconfiguration}
As already highlighted in previous sections, hybrid systems sometimes require particular configuration to function properly. This strives away from a secure-by-default approach, increasing the potential for human error. Such issues are only amplified by the severity of misconfigurations, with the potential to escalate to a system wide breach. Furthermore due to the underexplored nature of hybrid systems, there is intrinsicly less documentation and user experience availble than for conventional system. On a positive note configurations on dependency services only need to be made once, reducing the chance of forgetting about a service or similar.
\section{Reflextion on Reproducibility and Methodology} \section{Reflextion on Reproducibility and Methodology}
The usage of Vagrant to allow seamless recreation of any system could be one of the best advantages of the chosen methodology; while Vagrant itself did cause some issues---there has been a Ruby update during the writing of this thesis, temporarly breaking dependencies---a VM management system in general allowed for repeated runs of performance tests, or rapit iteration over different configuration options. Furthermore the usage of tools like Metasploit and Docker Bench automated large parts of the tests, while still completing the task of analysing the systems for common issues. The usage of Vagrant to allow seamless recreation of any system could be one of the best advantages of the chosen methodology; while Vagrant itself did cause some issues---there has been a Ruby update during the writing of this thesis, temporarily breaking dependencies---a VM management system in general allowed for repeated runs of performance tests, or rapid iteration over different configuration options. Furthermore the usage of tools like Metasploit and Docker Bench automated large parts of the tests, while still completing the task of analysing the systems for common issues.
\section{Conclusion} \section{Conclusion}
\todo{write} The results demonstrate that hybrid configurations can be secure and efficient, but only when managed with deliberate configuration and a clear understanding of the associated trade-offs. The question is not whether to share resources, but how to do so safely---especially in setups that prioritize cost-effectiveness and agility over centralized control.
% Hier können Sie Ihre KI-Tools dokumentieren. Diese werden automatisch in eine Tabelle integriert. % Hier können Sie Ihre KI-Tools dokumentieren. Diese werden automatisch in eine Tabelle integriert.
\aitoolentry{GPT-4o}{Proofreading}{``Hello, I'm writing my bachelor thesis with the title `[title]'. Can you help me proofread my work? I'd prefer you to explain the issues in the original than to simple correct them! I will send you individual sections as they are ready. Focus on sentence structure, clarity, and grammar and spelling mistakes.'' Entire Document} \aitoolentry{GPT-4o}{Proofreading}{``Hello, I'm writing my bachelor thesis with the title `<title>'. Can you help me proofread my work? I'd prefer you to explain the issues in the original than to simply correct them! I will send you individual sections as they are ready. Focus on sentence structure, clarity, and grammar and spelling mistakes.'', Entire Document}
\aitoolentry{DeepL}{Translation}{<Abstract>}
\aitoolentry{GPT-o3}{Improving coherency of translations}{``Hallo, ich habe einen Abstract aus dem Englischen übersetzt, der aber jetzt gekünstelt klingt. Kannst du mir helfen ihn zu verbessern?: <Abstract>'', Abstract}
% %
% Hier beginnen die Verzeichnisse % Hier beginnen die Verzeichnisse
@ -345,6 +358,7 @@ The usage of Vagrant to allow seamless recreation of any system could be one of
\acro{SSL}[SSL]{Secure Sockets Layer} \acro{SSL}[SSL]{Secure Sockets Layer}
\acro{CI}[CI]{continuous integration} \acro{CI}[CI]{continuous integration}
\acro{DoS}[DoS]{denial of service} \acro{DoS}[DoS]{denial of service}
\acro{RCE}[RCE]{remote code-execution}
\end{acronym} \end{acronym}
% %
@ -1328,14 +1342,6 @@ diff --color -ruN hybrid/sandbox/playbook.yml hardened/sandbox/playbook.yml
All commands shown in the following section are either bash commands, or metasploit commands\footnote{Metasploit commands are entered into the metasploit console after starting it using the command \texttt{msfconsole}}, if not indicated otherwise. As metasploit commands usually consist of multiple configuration options, the listing itself often contains further commands, which are indicated by the default greater-than symbol. All commands shown in the following section are either bash commands, or metasploit commands\footnote{Metasploit commands are entered into the metasploit console after starting it using the command \texttt{msfconsole}}, if not indicated otherwise. As metasploit commands usually consist of multiple configuration options, the listing itself often contains further commands, which are indicated by the default greater-than symbol.
\begin{code}
\captionof{listing}{\texttt{echo "Hello, World!"}}
\label{log:empty}
\begin{minted}[breaklines,fontsize=\footnotesize]{text}
Hello, World!
\end{minted}
\end{code}
\begin{code} \begin{code}
\captionof{listing}{\texttt{nmap -sS 192.168.56.10} on the base system (client)} \captionof{listing}{\texttt{nmap -sS 192.168.56.10} on the base system (client)}
\label{log:base:nmap_sS} \label{log:base:nmap_sS}
@ -2088,14 +2094,6 @@ Query Text: 'SELECT column_name, data_type FROM information_schema.columns WHERE
\end{minted} \end{minted}
\end{code} \end{code}
\begin{code}
\captionof{listing}{\texttt{metasploit}}
\label{log:base:metasploit:test}
\begin{minted}[breaklines,fontsize=\footnotesize]{text}
Hello, World!
\end{minted}
\end{code}
%\begin{code} %\begin{code}
%\captionof{listing}{\texttt{metasplou123t}\footnote{Metasploit commands are entered into the metasploit console after starting it using the command \texttt{msfconsole}}} %\captionof{listing}{\texttt{metasplou123t}\footnote{Metasploit commands are entered into the metasploit console after starting it using the command \texttt{msfconsole}}}
%\label{log:base:metasploit:test2} %\label{log:base:metasploit:test2}
@ -2104,7 +2102,7 @@ Hello, World!
%\end{minted} %\end{minted}
%\end{code} %\end{code}
\section{Docker Bench Results}\label{appendix_docker_bench} \section{Docker Bench Results}\label{appendix:docker_bench}
Docker Bench for Security is a tool based in the CIS Docker benchmark\footnote{\url{https://www.cisecurity.org/benchmark/docker}}. Docker Bench generates a \texttt{.log} file and a \texttt{.log.json} file; only the raw log files have been included, as they show the same data in a more compact format. Docker Bench for Security is a tool based in the CIS Docker benchmark\footnote{\url{https://www.cisecurity.org/benchmark/docker}}. Docker Bench generates a \texttt{.log} file and a \texttt{.log.json} file; only the raw log files have been included, as they show the same data in a more compact format.