Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a03f3c3
WIP feat: add gatewayapi support
l0wl3vel Apr 28, 2026
7f467b0
feat: add https gateway listener with self-signed cert
l0wl3vel Apr 29, 2026
c847feb
feat: enable zitadel httproute
l0wl3vel Apr 29, 2026
8ba6232
feat: move kind and sonic containerlab to dedicated network
l0wl3vel Apr 30, 2026
0605470
fix: bind zitadel only to https listener
l0wl3vel May 4, 2026
540904d
feat: expose metal-api gRPC endpoint
l0wl3vel May 4, 2026
3bf3f04
feat: expose nsq endpoint
l0wl3vel May 4, 2026
9df6b85
fix: improve naming consistency
l0wl3vel May 4, 2026
611f353
fix: use valid hosts for gateway certificates
l0wl3vel May 4, 2026
4fd0126
fix: move gateway configuration into their respective sections
l0wl3vel May 7, 2026
baaae61
fix: undo changes to gardener ingress ips
l0wl3vel May 8, 2026
3485b41
fix: fix incorrect use of gateway instead of ingress controller
l0wl3vel May 8, 2026
f84c000
feat: remove ingress-nginx exposed ports
l0wl3vel May 8, 2026
d91c7b3
fix: change mgmt network to mini_lab_internal
l0wl3vel May 11, 2026
1856649
feat: add sonic-vpp image
l0wl3vel May 20, 2026
2625f17
WIP feat: working sonic-vpp example
l0wl3vel May 23, 2026
c28abed
WIP feat: booting sonic-vpp and make up succeeds
l0wl3vel May 24, 2026
4f39ef8
WIP fix: faster bootup
l0wl3vel May 24, 2026
52682a2
WIP feat: wire up SONiC DHCP relay
l0wl3vel May 26, 2026
03144b6
fix: go back to virtio-net-pci devices
l0wl3vel May 28, 2026
97c3a4c
feat: move dhcp server off the sonic switch
l0wl3vel May 28, 2026
273a9da
feat: use sonic-vpp master branch build
l0wl3vel May 28, 2026
92553f0
feat: clean up sonic launch.py
l0wl3vel May 28, 2026
8446f3c
fix: add more documentation
l0wl3vel May 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/base-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
- name: 202311
- name: 202411
- name: 202505
- name: 202511-vpp

steps:
- name: Log in to the container registry
Expand Down
28 changes: 23 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ KINDCONFIG := $(or $(KINDCONFIG),control-plane/kind.yaml)
KUBECONFIG := $(shell pwd)/.kubeconfig

METALCTL_HMAC := $(or $(METALCTL_HMAC),metal-admin)
METALCTL_API_URL := $(or $(METALCTL_API_URL),http://api.172.17.0.1.nip.io:8080/metal)
METALCTL_API_URL := $(or $(METALCTL_API_URL),http://api.172.42.0.42.nip.io:8080/metal)

MKE2FS_CONFIG := $(shell pwd)/mke2fs.conf
# Default values
Expand All @@ -23,9 +23,11 @@ ANSIBLE_DISPLAY_SKIPPED_HOSTS=false

MINI_LAB_FLAVOR := $(or $(MINI_LAB_FLAVOR),sonic)
MINI_LAB_VM_IMAGE := $(or $(MINI_LAB_VM_IMAGE),ghcr.io/metal-stack/mini-lab-vms:latest)
MINI_LAB_SONIC_IMAGE := $(or $(MINI_LAB_SONIC_IMAGE),ghcr.io/metal-stack/mini-lab-sonic:latest)
MINI_LAB_SONIC_IMAGE := $(or $(MINI_LAB_SONIC_IMAGE),ghcr.io/metal-stack/mini-lab-sonic:202511-vpp)
MINI_LAB_DELL_SONIC_VERSION := $(or $(MINI_LAB_DELL_SONIC_VERSION),4.5.1)

MINI_LAB_INTERNAL_NETWORK=mini_lab_internal

MACHINE_OS=debian-12.0
MAX_RETRIES := 30

Expand Down Expand Up @@ -117,26 +119,33 @@ create-proxy-registries:

.PHONY: control-plane-bake
control-plane-bake:

@if ! docker network ls | grep -q mini_lab_internal; then docker network create mini_lab_internal --gateway 172.42.0.1 --ip-range=172.42.0.0/24 --subnet=172.42.0.0/24 --ipv6=false ; fi
@if ! which kind > /dev/null; then echo "kind needs to be installed"; exit 1; fi
@if ! kind get clusters | grep metal-control-plane > /dev/null; then \
kind create cluster $(KIND_ARGS) \
--name metal-control-plane \
--config $(KINDCONFIG) \
--kubeconfig $(KUBECONFIG); fi
$(MAKE) create-proxy-registries
docker compose up -d --force-recreate cloud-provider-kind

.PHONY: partition
partition: partition-bake
docker compose $(COMPOSE_ARGS) up --remove-orphans --force-recreate partition

.PHONY: partition-bake
partition-bake: external_network
docker pull $(MINI_LAB_VM_IMAGE)
docker pull $(MINI_LAB_VM_IMAGE)
if ! docker inspect vrnetlab/canonical_ubuntu:jammy; then \
./scripts/build_ubuntu_image.sh; \
fi

ifeq ($(CI),true)
docker pull $(MINI_LAB_SONIC_IMAGE)
endif
ifneq ($(filter $(MINI_LAB_FLAVOR),dell_sonic capms),$(MINI_LAB_FLAVOR))
docker pull $(MINI_LAB_SONIC_IMAGE)
#docker pull $(MINI_LAB_SONIC_IMAGE)
endif
@if ! sudo $(CONTAINERLAB) --topo $(LAB_TOPOLOGY) inspect | grep -i leaf01 > /dev/null; then \
sudo --preserve-env=MINI_LAB_SONIC_IMAGE --preserve-env=MINI_LAB_DELL_SONIC_VERSION --preserve-env=MINI_LAB_VM_IMAGE $(CONTAINERLAB) deploy --topo $(LAB_TOPOLOGY) --reconfigure && \
Expand Down Expand Up @@ -166,6 +175,7 @@ env:

.PHONY: cleanup
cleanup: cleanup-control-plane cleanup-partition
docker network rm --force mini_lab_internal

.PHONY: cleanup-control-plane
cleanup-control-plane:
Expand Down Expand Up @@ -412,6 +422,14 @@ build-sonic-base:
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202311 images/sonic/base-202311
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202411 images/sonic/base-202411
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202505 images/sonic/base-202505
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp images/sonic/base-202511-vpp

.PHONY: build-sonic
build-sonic:
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202311 images/sonic/base-202311
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202411 images/sonic/base-202411
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202505 images/sonic/base-202505
docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp images/sonic/base-202511-vpp

## DEV TARGETS ##

Expand All @@ -438,7 +456,7 @@ build-dell-sonic:
fetch-virtual-kubeconfig:
# TODO: it's hard to get the latest issued generic kubeconfig secret... just take the first result for now
kubectl --kubeconfig=$(KUBECONFIG) get secret -n garden $(shell kubectl --kubeconfig=$(KUBECONFIG) get secret -n garden -l managed-by=secrets-manager,manager-identity=gardener-operator,name=generic-token-kubeconfig --no-headers | awk '{ print $$1 }') -o jsonpath='{.data.kubeconfig}' | base64 -d > .virtual-kubeconfig
@kubectl --kubeconfig=.virtual-kubeconfig config set-cluster garden --server=https://api.gardener-kube-apiserver.172.17.0.1.nip.io:4443
@kubectl --kubeconfig=.virtual-kubeconfig config set-cluster garden --server=https://api.gardener-kube-apiserver.172.42.0.1.nip.io:4443
@kubectl --kubeconfig=.virtual-kubeconfig config set-credentials garden --token=$(shell kubectl --kubeconfig=$(KUBECONFIG) get secret -n garden shoot-access-virtual-garden -o jsonpath='{.data.token}' | base64 -d)
@kubectl --kubeconfig=$(KUBECONFIG) config unset users.garden
@kubectl --kubeconfig=$(KUBECONFIG) config unset contexts.garden
Expand Down
14 changes: 14 additions & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,20 @@ services:
- REGISTRY_PROXY_TTL=168h
- REGISTRY_STORAGE_DELETE_ENABLED=true
- OTEL_TRACES_EXPORTER=none
cloud-provider-kind:
image: registry.k8s.io/cloud-provider-kind/cloud-controller-manager:v0.10.0
restart: always
networks:
- kind
environment:
- KIND_EXPERIMENTAL_DOCKER_NETWORK=${KIND_EXPERIMENTAL_DOCKER_NETWORK:-kind}
command:
# v0.10.0 of cloud controller does not support tcproutes, since it does not support the experimental gateway api channel
# using envoy-gateway deployed via roles/gateway instead
- --gateway-channel
- disabled
volumes:
- /var/run/docker.sock:/var/run/docker.sock
volumes:
proxy-docker:
proxy-gcr:
Expand Down
6 changes: 1 addition & 5 deletions control-plane/kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
networking:
apiServerPort: 6443
apiServerAddress: 0.0.0.0
apiServerAddress: 172.42.0.1
nodes:
- role: control-plane
extraMounts:
Expand All @@ -13,10 +13,6 @@ nodes:
hostPort: 4443
- containerPort: 8080
hostPort: 8080
- containerPort: 4150
hostPort: 4150
- containerPort: 50051
hostPort: 50051
# if you want to run gardener operator + metal-stack, you need more pods
kubeadmConfigPatches:
- |
Expand Down
2 changes: 2 additions & 0 deletions deploy_control_plane.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
roles:
- name: ansible-common
tags: always
- name: gateway
tags: gateway
- name: ingress-controller
tags: ingress-controller
- name: metal-roles/control-plane/roles/prepare
Expand Down
4 changes: 2 additions & 2 deletions deploy_gardener.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
name: shoot-info
namespace: kube-system
data:
nodeNetwork: 172.18.0.0/16
nodeNetwork: 172.42.0.0/16
podNetwork: 10.244.0.0/24
serviceNetwork: 10.96.0.0/16
tags: gardener
Expand Down Expand Up @@ -81,7 +81,7 @@
status:
loadBalancer:
ingress:
- ip: "172.17.0.1"
- ip: "172.42.0.1"
tags: gardener

- name: Expose istio gateway through ingress-nginx (for local environments)
Expand Down
47 changes: 27 additions & 20 deletions deploy_partition.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
- name: Wait for system to become reachable
ansible.builtin.wait_for_connection:
delay: 10
timeout: 50
timeout: 300
roles:
- name: ansible-common
tags: always
Expand Down Expand Up @@ -65,19 +65,30 @@
sysctl_set: yes
value: "1"

- name: Deploy dhcp server on leaf01 (Community SONiC)
hosts: leaf01:!dell_sonic
pre_tasks:
- name: Temporary workaround for EOL debian bullseye backports repository (using archive.debian.org)
lineinfile:
path: /etc/apt/sources.list
search_string: deb [arch=amd64] http://deb.debian.org/debian/ bullseye-backports main contrib non-free
line: deb [arch=amd64] http://archive.debian.org/debian/ bullseye-backports main contrib non-free
roles:
- name: ansible-common
tags: always
- name: metal-roles/partition/roles/dhcp
tags: dhcp
- name: Install docker on management server
hosts: managementserver
become: true
tasks:
- name: Install docker
ansible.builtin.apt:
name: docker.io
state: present
update_cache: true

- name: Enable and start docker
ansible.builtin.systemd:
name: docker
enabled: true
state: started

# - name: Deploy dhcp server
# hosts: managementserver
# become: true
# roles:
# - name: ansible-common
# tags: always
# - name: metal-roles/partition/roles/dhcp
# tags: dhcp

# FIXME: For some reason, the first docker pull always fails on dell_sonic but succeeds on second attempt.
# Investigate the cause and remove this play
Expand All @@ -91,8 +102,8 @@
pull: true
failed_when: false

- name: Deploy pixiecore on leaf01
hosts: leaf01
- name: Deploy pixiecore
hosts: managementserver
become: true
roles:
- name: ansible-common
Expand Down Expand Up @@ -136,10 +147,6 @@
hosts: leaves
any_errors_fatal: true
become: true
pre_tasks:
- name: Wait some time
pause:
seconds: 120
roles:
- name: ansible-common
tags: always
Expand Down
4 changes: 2 additions & 2 deletions docs/overview-kamaji.drawio.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions docs/overview.drawio.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ DEPLOYMENT_BASE_IMAGE_TAG=${DEPLOYMENT_BASE_IMAGE_TAG}
CI=${CI:=false}
DOCKER_HUB_USER=${DOCKER_HUB_USER:=}
DOCKER_HUB_TOKEN=${DOCKER_HUB_TOKEN:=}
KIND_EXPERIMENTAL_DOCKER_NETWORK=${MINI_LAB_INTERNAL_NETWORK:=}
EOF
20 changes: 20 additions & 0 deletions files/certs/default-gateway/server.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"CN": "default-gateway",
"hosts": [
"api.172.42.0.42.nip.io",
"v2.api.172.42.0.42.nip.io"
],
"key": {
"algo": "rsa",
"size": 4096
},
"names": [
{
"C": "DE",
"L": "Munich",
"O": "metal-stack",
"OU": "DevOps",
"ST": "Bavaria"
}
]
}
2 changes: 1 addition & 1 deletion files/certs/grpc/server.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"CN": "metal-api",
"hosts": [
"172.17.0.1",
"172.42.0.42",
"203.0.113.1"
],
"key": {
Expand Down
4 changes: 2 additions & 2 deletions files/dev_images.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
# Do not change these values
metal_api_image_tag: dev
metal_core_image_name: 172.17.0.1:5000/metalstack/metal-core
metal_core_image_name: 172.42.0.42:5000/metalstack/metal-core
metal_core_image_tag: dev
metal_hammer_image_url: http://172.17.0.1:20015/metal-hammer-initrd.img.lz4
metal_hammer_image_url: http://172.42.0.42:20015/metal-hammer-initrd.img.lz4
2 changes: 1 addition & 1 deletion files/startup-config/leaf01_4.4.3.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}
},
"DNS_SERVER": {
"172.17.0.1": {},
"172.42.0.1": {},
"1.1.1.1": {},
"1.0.0.1": {}
},
Expand Down
2 changes: 1 addition & 1 deletion files/startup-config/leaf01_4.5.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}
},
"DNS_SERVER": {
"172.17.0.1": {},
"172.42.0.1": {},
"1.1.1.1": {},
"1.0.0.1": {}
},
Expand Down
2 changes: 1 addition & 1 deletion files/startup-config/leaf02_4.4.3.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}
},
"DNS_SERVER": {
"172.17.0.1": {},
"172.42.0.1": {},
"1.1.1.1": {},
"1.0.0.1": {}
},
Expand Down
2 changes: 1 addition & 1 deletion files/startup-config/leaf02_4.5.1.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
}
},
"DNS_SERVER": {
"172.17.0.1": {},
"172.42.0.1": {},
"1.1.1.1": {},
"1.0.0.1": {}
},
Expand Down
8 changes: 6 additions & 2 deletions images/sonic/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ RUN apt-get update && \
iproute2 \
linux-image-cloud-amd64 \
python3 \
python3-pip \
python3-guestfs \
python3-scapy \
qemu-system-x86 \
telnet

COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202505 /sonic-vs.img /sonic-vs.img
COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202505 /frr-pythontools.deb /frr-pythontools.deb
COPY requirements.txt /
RUN pip install --break-system-packages -r requirements.txt

COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:master-vpp /sonic-vs.img /sonic-vs.img
COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:master-vpp /frr-pythontools.deb /frr-pythontools.deb

ENTRYPOINT ["/launch.py"]

Expand Down
12 changes: 12 additions & 0 deletions images/sonic/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Virtual Sonic Images

We use sonic-vpp to emulate SONiC switches. It is running in kvm inside a containerlab container. To provide better emulation accuracy we use sonic-vpp, which used the Vector Package Processor to emulate somthing like a switch ASIC, like the Broadcom Tomahawk 3 used in our Edgecore Accton AS7726-X32 workhorse we use in production. We migrated to sonic-vpp because the sonic-vs image used mostly netlink primitives, which behaved differently than an ASIC driven through SONiCs SAI layer. It's slower but still sane.


# Configuration knobs

You can edit the port_config.ini to add more ports.


# Boot process
The switch will boot with a default first-boot configuration. This is required since first boot will generate some required configuration for VPP. After a short while the configuration that is generated in launch.py is injected and the sonic is reloaded. After the new configuration is loaded the container will be marked ready. Check the docker logs for errors if bootup takes more than a minute.
Loading
Loading