diff --git a/.airstack/modules/osmo.sh b/.airstack/modules/osmo.sh new file mode 100755 index 000000000..053decbee --- /dev/null +++ b/.airstack/modules/osmo.sh @@ -0,0 +1,719 @@ +#!/usr/bin/env bash + +# osmo.sh — AirStack-on-OSMO convenience commands. +# +# Wraps `osmo workflow submit/port-forward/logs/cancel` for the +# osmo/workflows/airstack-dev.yaml workflow so a Mac/Windows student doesn't +# have to memorize the WebRTC port range or the entry-script path. +# +# This module is pure bash + the cross-platform `osmo` CLI — no Docker +# dependency. Safe to run on a laptop with no AirStack runtime. +# +# Most commands need a workflow id. `osmo:up` saves the id to +# $OSMO_STATE_FILE; the other commands read it from there. You can also +# override it for a single invocation by exporting AIRSTACK_OSMO_WF. + +# State directory and file: ~/.airstack/osmo-state stores the most recent +# workflow id submitted with `airstack osmo:up`. +OSMO_STATE_DIR="${HOME}/.airstack" +OSMO_STATE_FILE="${OSMO_STATE_DIR}/osmo-state" + +# WebRTC livestream ports — must match the ports published by the +# isaac-sim-livestream service in +# simulation/isaac-sim/docker/docker-compose.yaml AND the +# app.livestream.fixedHostPort setting pinned in the Pegasus launch script +# (simulation/isaac-sim/launch_scripts/example_one_px4_pegasus_launch_script.py). +# +# Two ports total: +# TCP 49100 — omni.kit.livestream.webrtc WebSocket signaling +# UDP 49099 — SRTP media (pinned; Kit 107 otherwise picks dynamically and +# escapes both the compose-published and CLI-forwarded ranges) +OSMO_WEBRTC_TCP="49100" +OSMO_WEBRTC_UDP="49099" + +# GCS Foxglove websocket: container 8765 → host 8766 (per +# gcs/docker/docker-compose.yaml). +OSMO_FOXGLOVE_PORT="8766:8766" + +# SSH port-forward: local 2200 → pod 22. +OSMO_SSH_PORT="2200:22" + +# Default `osmo workflow port-forward` connect-timeout (24h). +OSMO_PF_TIMEOUT="${OSMO_PF_TIMEOUT:-86400}" + +# Helper: ensure the osmo CLI is on PATH. +function _osmo_check_cli { + if ! command -v osmo >/dev/null 2>&1; then + log_error "osmo CLI not found on PATH. Install from https://github.com/NVIDIA/OSMO and run 'osmo login'." + return 1 + fi +} + +# Helper: strip leading/trailing whitespace + CR/NUL bytes from the +# variable named in $1. +# +# Why this exists: bracket-paste mode and cross-OS clipboards (RDP, VNC, +# Windows-side note apps) routinely smuggle invisible bytes around long +# pastes — Nucleus API tokens (JWT, ~1 KB) and SSH keys are the usual +# victims. Nucleus's auth endpoint silently `DENIES` a token that has +# one extra trailing byte, with no actionable error from the client side. +# Stripping defensively at prompt time saves an entire round-trip of +# "regenerate token → still denied → check auth-service log" debugging. +function _osmo_trim { + local var_name="$1" + local val="${!var_name}" + local original_len="${#val}" + val="${val//$'\r'/}" + val="${val//$'\0'/}" + val="${val#"${val%%[![:space:]]*}"}" + val="${val%"${val##*[![:space:]]}"}" + if [ "${#val}" -ne "$original_len" ]; then + log_warn "Stripped $((original_len - ${#val})) whitespace/control byte(s) from ${var_name}." + fi + printf -v "$var_name" '%s' "$val" +} + +# Helper: read a value with prompt; supports -s for silent (passwords). +# +# Visible prompts switch the TTY out of canonical mode for the duration of +# the read. Without this, macOS caps each input line at MAX_CANON = 1024 +# bytes (per ) and rings the terminal bell on Enter when +# the buffer overflows. Nucleus API tokens are JWTs ~950 bytes long, so +# `Nucleus API token: ` lands right at the cap. `stty -icanon` makes +# the kernel deliver bytes to bash as they're typed, with no line-buffer +# limit; bash's `read` still terminates on newline normally. +# +# We use a trap to guarantee the saved stty is restored if the user Ctrl-Cs +# mid-paste — otherwise the shell would be left in raw mode. +# +# After reading we always run _osmo_trim — see comment there. +function _osmo_prompt { + local var_name="$1" + local prompt_text="$2" + local silent="${3:-false}" + local saved_stty="" + + if [ "$silent" = "true" ]; then + # Passwords are short — canonical-mode cap is fine here. + read -r -s -p "${prompt_text}: " "$var_name" + printf "\n" >&2 + else + if [ -t 0 ]; then + saved_stty="$(stty -g 2>/dev/null || true)" + if [ -n "$saved_stty" ]; then + trap 'stty "$saved_stty" 2>/dev/null; trap - INT' INT + stty -icanon 2>/dev/null + fi + fi + read -r -p "${prompt_text}: " "$var_name" + if [ -n "$saved_stty" ]; then + stty "$saved_stty" 2>/dev/null + trap - INT + fi + fi + + _osmo_trim "$var_name" + + if [ -z "${!var_name}" ]; then + log_error "Empty input for ${var_name}; aborting." + return 1 + fi +} + +# osmo:setup — interactively register the three OSMO credentials AirStack +# needs (airlab-docker-registry, airlab-docker-login, airlab-nucleus). +# Idempotent — re-running rotates the credentials. +function cmd_osmo_setup { + _osmo_check_cli || return 1 + + cat >&2 <<'EOF' + +This sets up the three per-user OSMO credentials AirStack-on-OSMO needs: + + 1. airlab-docker-registry (REGISTRY) — for OSMO to pull the workspace image + 2. airlab-docker-login (GENERIC) — for the inner dockerd to pull AirStack images + 3. airlab-nucleus (GENERIC) — for Isaac Sim Nucleus access + +You'll be asked for: + + - your Andrew ID (no @andrew.cmu.edu suffix) + - your AirLab Docker password (same as your Andrew password) + - your Nucleus API token (https://airlab-nucleus.andrew.cmu.edu/omni/web3/ + → right-click cloud → API Tokens). NOT your Andrew password. + +Values go directly to OSMO; nothing is written to disk locally. + +EOF + + local andrew_id andrew_password nucleus_token + _osmo_prompt andrew_id "Andrew ID" false || return 1 + _osmo_prompt andrew_password "AirLab Docker password (hidden)" true || return 1 + _osmo_prompt nucleus_token "Nucleus API token" false || return 1 + + # Sanity-check the Nucleus token shape. Nucleus issues RS256 JWTs: + # base64url(header).base64url(payload).base64url(signature), with the + # header always starting `eyJ` (base64url of `{"`). Catching a wrong + # paste here (e.g. Andrew password, or token without the trailing + # signature segment) saves the user from a silent `InternalCredentials + # .auth: DENIED` round-trip later on. We do not validate the signature. + case "$nucleus_token" in + eyJ*.*.*) ;; # looks like a 3-segment JWT + *) + log_error "That doesn't look like a Nucleus API token." + log_error " - Expected: a JWT of the form eyJ…… (~1 KB long)" + log_error " - Got: ${#nucleus_token} chars, prefix '$(printf '%s' "$nucleus_token" | head -c 8)…'" + log_error " Generate one at https://airlab-nucleus.andrew.cmu.edu/omni/web3/" + log_error " → right-click cloud icon → API Tokens → Create." + return 1 + ;; + esac + + local omni_server="${OMNI_SERVER:-omniverse://airlab-nucleus.andrew.cmu.edu/NVIDIA/Assets/Isaac/5.1}" + local airlab_registry="${AIRLAB_REGISTRY:-airlab-docker.andrew.cmu.edu}" + + # `osmo credential set` is NOT an upsert for GENERIC credentials — re-setting + # one that already exists fails with `400 duplicate key value violates unique + # constraint "credential_pkey"`. Delete first so re-running osmo:setup + # (e.g. to rotate a Nucleus token) is idempotent. The `|| true` swallows the + # "credential not found" case on a first-time run. + log_info "Refreshing airlab-docker-registry (REGISTRY)..." + osmo credential delete airlab-docker-registry >/dev/null 2>&1 || true + osmo credential set airlab-docker-registry \ + --type REGISTRY \ + --payload "registry=${airlab_registry}" \ + "username=${andrew_id}" \ + "auth=${andrew_password}" \ + || { log_error "osmo credential set airlab-docker-registry failed"; return 1; } + + log_info "Refreshing airlab-docker-login (GENERIC)..." + osmo credential delete airlab-docker-login >/dev/null 2>&1 || true + osmo credential set airlab-docker-login \ + --type GENERIC \ + --payload "username=${andrew_id}" \ + "password=${andrew_password}" \ + || { log_error "osmo credential set airlab-docker-login failed"; return 1; } + + log_info "Refreshing airlab-nucleus (GENERIC)..." + osmo credential delete airlab-nucleus >/dev/null 2>&1 || true + osmo credential set airlab-nucleus \ + --type GENERIC \ + --payload "omni_user=${andrew_id}" \ + "omni_pass=${nucleus_token}" \ + "omni_server=${omni_server}" \ + || { log_error "osmo credential set airlab-nucleus failed"; return 1; } + + log_info "All three credentials registered. List them with: osmo credential list" + log_info "Next: airstack osmo:up [--pool POOL]" +} + +# Helper: pick the first existing SSH public key on the host. +function _osmo_pick_pubkey { + local candidates=( + "${HOME}/.ssh/id_ed25519.pub" + "${HOME}/.ssh/id_ecdsa.pub" + "${HOME}/.ssh/id_rsa.pub" + ) + for k in "${candidates[@]}"; do + if [ -f "$k" ]; then + echo "$k" + return 0 + fi + done + return 1 +} + +# Helper: get the active workflow id (env override first, then state file). +# +# The state file persists across shell sessions, so it can easily go stale +# (e.g. a previous airstack-dev-N is now FAILED/CANCELED). To avoid the +# confusing "Workflow airstack-dev-10 is not running!" 410 error from the +# downstream osmo command, this helper verifies the saved id is still in a +# live state (PENDING / RUNNING) before returning it. +function _osmo_wf_id { + local wf + if [ -n "${AIRSTACK_OSMO_WF:-}" ]; then + wf="${AIRSTACK_OSMO_WF}" + elif [ -f "${OSMO_STATE_FILE}" ]; then + wf="$(cat "${OSMO_STATE_FILE}")" + else + log_error "No workflow id found. Run 'airstack osmo:up' first, or export AIRSTACK_OSMO_WF=." + return 1 + fi + + # Validate the workflow is still alive (only when osmo CLI is available). + if command -v osmo >/dev/null 2>&1; then + local status + status="$(osmo workflow query "${wf}" 2>/dev/null | awk -F': +' '/^Status/ {print $2; exit}' | tr -d ' \r\n')" + case "${status}" in + PENDING|RUNNING|"") + # "" means we couldn't reach osmo; let the downstream + # command surface the real error rather than failing here. + ;; + *) + log_error "Saved workflow '${wf}' is ${status}, not running." + log_warn "Run 'airstack osmo:up' to launch a fresh one, or:" + log_warn " rm ${OSMO_STATE_FILE}" + log_warn " export AIRSTACK_OSMO_WF=" + return 1 + ;; + esac + fi + + echo "${wf}" + return 0 +} + +# Helper: persist the workflow id. +function _osmo_save_wf_id { + mkdir -p "${OSMO_STATE_DIR}" + echo "$1" > "${OSMO_STATE_FILE}" + log_info "Saved workflow id '$1' to ${OSMO_STATE_FILE}" +} + +# Helper: best-effort detection of the user's current AirStack branch so +# `airstack osmo:up` can default --branch to whatever the user is editing +# locally. Returns the branch name on stdout, or empty if we shouldn't +# auto-pin (detached HEAD, not a git repo, etc.). +# +# Why default to the local branch: the pod's entrypoint clones AirStack +# fresh from GitHub on every workflow start (the pod fs is ephemeral, so +# nothing else makes sense). If we don't tell it which branch, it +# defaults to `main` — and any developer testing branch-only OSMO +# changes (compose services, entrypoint tweaks, workflow yaml edits) +# silently runs against stale `main` code instead of their work. +# Defaulting to the local branch makes "edit on laptop, push, osmo:up" +# the natural workflow. +function _osmo_local_branch { + if ! command -v git >/dev/null 2>&1; then + return 0 + fi + local b + b="$(git -C "${PROJECT_ROOT}" rev-parse --abbrev-ref HEAD 2>/dev/null)" || return 0 + case "$b" in + ""|HEAD) return 0 ;; # detached HEAD or empty + esac + echo "$b" +} + +# Helper: warn if the about-to-submit branch isn't safely pushed. The +# pod clones from GitHub, so unpushed commits / dirty working tree don't +# make it into the pod even if the user thinks they did. Catching this +# before submit avoids a 60-90s "wait for pod, then realize" round trip. +function _osmo_check_branch_pushed { + local branch="$1" + command -v git >/dev/null 2>&1 || return 0 + local repo="${PROJECT_ROOT}" + [ -d "${repo}/.git" ] || return 0 + + local local_sha upstream_sha + local_sha="$(git -C "$repo" rev-parse "${branch}" 2>/dev/null)" || return 0 + + # Look for a remote-tracking branch first (the explicit upstream + # set by `git push -u`); fall back to origin/. + upstream_sha="$(git -C "$repo" rev-parse "${branch}@{upstream}" 2>/dev/null)" + if [ -z "$upstream_sha" ]; then + upstream_sha="$(git -C "$repo" rev-parse "origin/${branch}" 2>/dev/null)" + fi + + if [ -z "$upstream_sha" ]; then + log_warn "Branch '${branch}' has no upstream on origin — the pod's clone will fail. Run: git push -u origin ${branch}" + return 0 + fi + + if [ "$local_sha" != "$upstream_sha" ]; then + local ahead behind + ahead="$(git -C "$repo" rev-list --count "${upstream_sha}..${local_sha}" 2>/dev/null)" + behind="$(git -C "$repo" rev-list --count "${local_sha}..${upstream_sha}" 2>/dev/null)" + if [ "${ahead:-0}" -gt 0 ]; then + log_warn "Local '${branch}' is ${ahead} commit(s) ahead of origin/${branch} — the pod will clone the older origin tip. Run: git push" + fi + if [ "${behind:-0}" -gt 0 ]; then + log_info "Local '${branch}' is ${behind} commit(s) behind origin/${branch} (pod will clone the newer origin tip)." + fi + fi + + if [ -n "$(git -C "$repo" status --porcelain 2>/dev/null)" ]; then + log_warn "Working tree has uncommitted changes — the pod will not see them. Commit + push first if you want the pod to pick them up." + fi +} + +# osmo:up — submit airstack-dev.yaml with the local pubkey injected. +# +# Usage: airstack osmo:up [--pool POOL] [--key PATH] [--branch BRANCH] +# +# --branch defaults to the local repo's current branch (or `main` if we +# can't detect one), and is passed through as AIRSTACK_BRANCH so the +# pod's entrypoint clones the matching code. Pass `--branch main` +# explicitly to override. +function cmd_osmo_up { + _osmo_check_cli || return 1 + + local pool="${OSMO_POOL:-}" + local pubkey_file="" + local branch="" + local branch_explicit=false + local extra_args=() + + while [ $# -gt 0 ]; do + case "$1" in + --pool) pool="$2"; shift 2 ;; + --key) pubkey_file="$2"; shift 2 ;; + --branch) branch="$2"; branch_explicit=true; shift 2 ;; + *) extra_args+=("$1"); shift ;; + esac + done + + if [ -z "$pubkey_file" ]; then + if ! pubkey_file="$(_osmo_pick_pubkey)"; then + log_error "No SSH public key found in ~/.ssh. Generate one with: ssh-keygen -t ed25519" + return 1 + fi + fi + log_info "Using SSH public key: ${pubkey_file}" + + local workflow_yaml="${PROJECT_ROOT}/osmo/workflows/airstack-dev.yaml" + if [ ! -f "$workflow_yaml" ]; then + log_error "Workflow file not found: ${workflow_yaml}" + return 1 + fi + + # Auto-pin --branch to the local checkout if the user didn't pass one. + if [ "$branch_explicit" = false ] && [ -z "$branch" ]; then + branch="$(_osmo_local_branch)" + if [ -n "$branch" ]; then + log_info "Auto-detected local branch '${branch}'; pod will clone from origin/${branch} (override with --branch main)." + else + log_info "Could not detect local branch (detached HEAD?); pod will clone from origin/main." + fi + fi + if [ -n "$branch" ]; then + _osmo_check_branch_pushed "$branch" + fi + + local cmd=(osmo workflow submit "$workflow_yaml") + if [ -n "$pool" ]; then + cmd+=(--pool "$pool") + else + log_warn "No --pool provided and OSMO_POOL is unset; using your osmo profile's default pool." + fi + # IMPORTANT: `osmo workflow submit --set-env` is variadic. Passing two + # separate `--set-env A=1 --set-env B=2` silently drops the first one + # (only the last `--set-env` flag's values are kept). We collect all + # K=V pairs and pass them under a single `--set-env`. + local env_kvs=("SSH_PUB_KEY=$(cat "$pubkey_file")") + if [ -n "$branch" ]; then + env_kvs+=("AIRSTACK_BRANCH=${branch}") + fi + cmd+=(--set-env "${env_kvs[@]}") + if [ ${#extra_args[@]} -gt 0 ]; then + cmd+=("${extra_args[@]}") + fi + + log_info "Submitting: ${cmd[*]}" + local output + if ! output="$("${cmd[@]}" 2>&1)"; then + echo "$output" >&2 + log_error "osmo workflow submit failed." + return 1 + fi + echo "$output" + + # Parse the workflow id out of the submit output. The cookbook examples + # show "Workflow ID - " formatted output (see OSMO + # submission.rst). Match that line. + local wf_id + wf_id="$(echo "$output" | awk -F'- ' '/^Workflow ID/ {print $2; exit}' | tr -d ' \r\n')" + if [ -z "$wf_id" ]; then + log_warn "Could not parse workflow id from submit output. Set it manually:" + log_warn " echo > ${OSMO_STATE_FILE}" + return 0 + fi + _osmo_save_wf_id "$wf_id" + + log_info "Next steps:" + log_info " airstack osmo:logs # follow startup until 'sshd listening'" + log_info " airstack osmo:ide # port-forward sshd + open VS Code" + log_info " airstack osmo:webrtc # forward Isaac Sim WebRTC ports" + log_info " airstack osmo:foxglove # forward GCS Foxglove websocket" + log_info " airstack osmo:down # cancel the workflow" +} + +# osmo:logs — follow the workspace task logs. +# +# Despite the `osmo workflow logs --help` output advertising only `-n +# LAST_N_LINES` (no `--follow`), the CLI in fact streams the tail and keeps +# the connection open as new lines arrive — i.e. it already behaves like +# `tail -f`. We just exec it in the foreground so the user sees output +# immediately and can Ctrl+C to stop. (An earlier implementation wrapped +# this in `out=$(osmo workflow logs ...)`; command substitution waits for +# the process to exit, which never happened, so nothing was ever printed.) +function cmd_osmo_logs { + _osmo_check_cli || return 1 + local wf; wf="$(_osmo_wf_id)" || return 1 + + local task="${OSMO_LOGS_TASK:-workspace}" + local lines="${OSMO_LOGS_TAIL:-500}" + + log_info "Following ${task} logs for ${wf} (last ${lines} lines, then live; Ctrl+C to stop)" + + # Filter stderr for the same OSMOUserError-when-workflow-dies case + # the port-forward path hits — same noisy asyncio Traceback + + # "Task exception was never retrieved" header. _osmo_pf_filter + # collapses it into one clean log line. + osmo workflow logs "${wf}" -t "${task}" -n "${lines}" \ + 2> >(_osmo_pf_filter "${wf}") +} + +# osmo:ide — port-forward sshd + (optionally) launch VS Code/Cursor on the +# `airstack-osmo` host. Runs the port-forward in the foreground so closing +# the terminal closes the tunnel. +# +# Usage: airstack osmo:ide [--no-open] [code|cursor] +function cmd_osmo_ide { + _osmo_check_cli || return 1 + local wf; wf="$(_osmo_wf_id)" || return 1 + + local open_ide=true + local ide_cmd="" + while [ $# -gt 0 ]; do + case "$1" in + --no-open) open_ide=false; shift ;; + code|cursor) ide_cmd="$1"; shift ;; + *) log_warn "Ignoring unknown osmo:ide arg: $1"; shift ;; + esac + done + + if [ -z "$ide_cmd" ]; then + if command -v cursor >/dev/null 2>&1; then + ide_cmd="cursor" + elif command -v code >/dev/null 2>&1; then + ide_cmd="code" + else + log_warn "Neither 'cursor' nor 'code' found on PATH; will only port-forward (open the IDE manually and Connect to Host airstack-osmo)." + open_ide=false + fi + fi + + log_info "Make sure ~/.ssh/config has a 'Host airstack-osmo' entry pointing at localhost:2200, User root." + + # Local TCP port the user's IDE will connect to (the local side of the + # `--port LOCAL:REMOTE` mapping). + local local_port="${OSMO_SSH_PORT%%:*}" + + # Every fresh OSMO pod ships a new sshd host key. If the user's + # ~/.ssh/known_hosts still has an entry for [localhost]:${local_port} + # from a previous workflow, ssh aborts with "Host key for [localhost] + # :${local_port} has changed and you have requested strict checking", + # which the IDE surfaces as a generic "could not connect" error. + # + # The recommended ~/.ssh/config block for `airstack-osmo` uses + # `UserKnownHostsFile /dev/null`, which sidesteps this entirely — but + # users who set up before that change still have a stale entry on + # disk. Scrub it defensively on every osmo:ide invocation. ssh-keygen + # -R is idempotent: a no-op if the entry doesn't exist. + if command -v ssh-keygen >/dev/null 2>&1; then + ssh-keygen -R "[localhost]:${local_port}" >/dev/null 2>&1 || true + fi + + # Reuse an existing forward if one is already listening (the user might + # have run this from a second terminal, or osmo:foxglove already opened + # a multi-port forward). Otherwise spawn one in the background and wait + # for it to bind before launching the IDE — this avoids the race where + # Cursor/VS Code tries to SSH before the tunnel exists and dies with + # "connect to host localhost port 2200: Connection refused". + local pf_pid="" + if nc -z localhost "$local_port" 2>/dev/null; then + log_info "Port ${local_port} is already listening; reusing existing port-forward." + else + log_info "osmo workflow port-forward ${wf} workspace --port ${OSMO_SSH_PORT} --connect-timeout ${OSMO_PF_TIMEOUT}" + osmo workflow port-forward "$wf" workspace --port "$OSMO_SSH_PORT" --connect-timeout "$OSMO_PF_TIMEOUT" \ + > "${OSMO_STATE_DIR}/ssh-pf.log" 2>&1 & + pf_pid=$! + # Wait up to 30s for the tunnel to start accepting connections. + local waited=0 + until nc -z localhost "$local_port" 2>/dev/null; do + sleep 1; waited=$((waited+1)) + if [ "$waited" -ge 30 ]; then + log_error "Timed out waiting for port-forward on :${local_port} after ${waited}s." + log_error " port-forward log: ${OSMO_STATE_DIR}/ssh-pf.log" + kill "$pf_pid" 2>/dev/null + return 1 + fi + if ! kill -0 "$pf_pid" 2>/dev/null; then + log_error "port-forward exited early. Tail:" + tail -10 "${OSMO_STATE_DIR}/ssh-pf.log" >&2 + return 1 + fi + done + log_info "Port-forward established on localhost:${local_port} (pid ${pf_pid})." + fi + + if [ "$open_ide" = true ]; then + # vscode-remote URI launches the IDE pre-attached to the remote host. + local uri="vscode-remote://ssh-remote+airstack-osmo/root/AirStack" + log_info "Launching ${ide_cmd} → ${uri}" + ( "$ide_cmd" --folder-uri "$uri" >/dev/null 2>&1 || \ + "$ide_cmd" "$uri" >/dev/null 2>&1 || \ + log_warn "Could not launch ${ide_cmd} automatically; open it and pick airstack-osmo from Remote-SSH manually." ) & + fi + + if [ -n "$pf_pid" ]; then + log_info "Leave this terminal running for the length of your session (Ctrl+C to disconnect)." + # Forward Ctrl+C to the port-forward and clean up. + trap 'kill "$pf_pid" 2>/dev/null; exit 0' INT TERM + wait "$pf_pid" + else + log_info "Existing port-forward owns the tunnel; this command will exit immediately." + log_info "Stop the tunnel with: pkill -f 'osmo workflow port-forward' or airstack osmo:down" + fi +} + +# Helper: filter `osmo workflow port-forward` stderr through awk to +# suppress the asyncio traceback that erupts whenever the workflow gets +# canceled mid-flight (e.g. via osmo:down in another shell, or because +# OSMO timed it out). The CLI raises OSMOUserError("Workflow X is not +# running!") from inside an asyncio Task, which then prints "Task +# exception was never retrieved" + a multi-line Traceback that obscures +# the actual one-line cause. We translate that into a single clean log +# line and drop everything else. +function _osmo_pf_filter { + local wf="$1" + awk -v WF="$wf" ' + /^Task exception was never retrieved/ { skipping=1; next } + /^future:/ { skipping=1; next } + /^Traceback \(most recent call last\):/ { skipping=1; next } + /^ File "/ { next } + /^src\.lib\.utils\.osmo_errors\.OSMOUserError/ { + sub(/^src\.lib\.utils\.osmo_errors\.OSMOUserError: */, "") + printf "\033[0;31m[ERROR]\033[0m %s (run `airstack osmo:up` to start a new workflow)\n", $0 + next + } + /OSMOUserError: Workflow .* is not running!/ { + printf "\033[0;31m[ERROR]\033[0m Workflow %s is no longer running (run `airstack osmo:up` to start a new one).\n", WF + next + } + skipping && /^$/ { skipping=0; next } + skipping { next } + { print } + ' >&2 +} + +# Helper: run `osmo workflow port-forward` with the noise filter +# attached. Returns the underlying exit code so callers can decide +# whether to retry / fail. Args after the helper name are passed to +# `osmo workflow port-forward` verbatim. +function _osmo_run_port_forward { + osmo workflow port-forward "$@" 2> >(_osmo_pf_filter "$1") +} + +# osmo:webrtc — forward both Isaac Sim WebRTC port ranges (TCP in this +# terminal, spawn UDP in the background). Cleans up the UDP child on +# exit (Ctrl+C, foreground TCP failure, or the workflow disappearing +# mid-stream) so we don't leak a port-forward into the user's process +# table. +function cmd_osmo_webrtc { + _osmo_check_cli || return 1 + local wf; wf="$(_osmo_wf_id)" || return 1 + + log_info "Spawning UDP port-forward in background: ${OSMO_WEBRTC_UDP}" + nohup osmo workflow port-forward "$wf" workspace \ + --port "$OSMO_WEBRTC_UDP" --udp \ + --connect-timeout "$OSMO_PF_TIMEOUT" \ + > "${OSMO_STATE_DIR}/webrtc-udp.log" 2>&1 & + local udp_pid=$! + log_info " UDP log: ${OSMO_STATE_DIR}/webrtc-udp.log (pid ${udp_pid})" + + # Tear the UDP fork down when this function exits, by any path. + # Without this, hitting Ctrl+C on the TCP foreground (or the + # workflow being canceled, which surfaces as the foreground exiting + # non-zero) leaves the UDP `osmo workflow port-forward` running + # against a dead workflow until the user notices and pkill's it. + trap ' + if kill -0 "'"${udp_pid}"'" 2>/dev/null; then + kill "'"${udp_pid}"'" 2>/dev/null + wait "'"${udp_pid}"'" 2>/dev/null + fi + trap - EXIT INT TERM + ' EXIT INT TERM + + log_info "Foreground TCP port-forward: ${OSMO_WEBRTC_TCP}" + log_info "Open the Omniverse Streaming Client / WebRTC client at http://localhost" + _osmo_run_port_forward "$wf" workspace \ + --port "$OSMO_WEBRTC_TCP" \ + --connect-timeout "$OSMO_PF_TIMEOUT" +} + +# osmo:foxglove — install the AirStack Foxglove extensions into the local +# Foxglove Desktop user-extensions dir, then forward the GCS Foxglove +# websocket. +# +# The extension install is the same script the GCS container runs on +# startup — gcs/foxglove_extensions/install.py — invoked with env-var +# overrides that point at the local laptop dirs. Default destination on +# Linux/macOS is ~/.foxglove-studio/extensions (Foxglove's canonical user +# extensions path; the macOS rebrand still reads from here). Override +# with OSMO_FOXGLOVE_EXT_DIR, or skip the install entirely with +# OSMO_FOXGLOVE_SKIP_EXTENSIONS=1 (e.g. when using app.foxglove.dev +# which doesn't load local extensions anyway). +function cmd_osmo_foxglove { + _osmo_check_cli || return 1 + local wf; wf="$(_osmo_wf_id)" || return 1 + + local ext_src="${PROJECT_ROOT}/gcs/foxglove_extensions" + local ext_dst="${OSMO_FOXGLOVE_EXT_DIR:-${HOME}/.foxglove-studio/extensions}" + + if [ "${OSMO_FOXGLOVE_SKIP_EXTENSIONS:-0}" != "1" ] && [ -d "${ext_src}" ]; then + if command -v python3 >/dev/null 2>&1; then + log_info "Installing Foxglove extensions to ${ext_dst}" + FOXGLOVE_EXT_SRC="${ext_src}" FOXGLOVE_EXT_DST="${ext_dst}" \ + python3 "${ext_src}/install.py" \ + || log_warn "Foxglove extension install failed; panels like 'Robot Tasks' may show as 'Unknown panel type' in Foxglove" + else + log_warn "python3 not found on PATH — skipping Foxglove extension install." + log_warn " Custom panels (Robot Tasks, Waypoint Editor, Polygon Editor) will show as 'Unknown panel type'." + log_warn " Install python3 (e.g. 'brew install python') or copy ${ext_src}/* manually to ${ext_dst}." + fi + elif [ "${OSMO_FOXGLOVE_SKIP_EXTENSIONS:-0}" = "1" ]; then + log_info "Skipping Foxglove extension install (OSMO_FOXGLOVE_SKIP_EXTENSIONS=1)." + fi + + log_info "osmo workflow port-forward ${wf} workspace --port ${OSMO_FOXGLOVE_PORT} --connect-timeout ${OSMO_PF_TIMEOUT}" + log_info "Then in Foxglove Desktop: Open connection → ws://localhost:8766" + log_info " Layouts → Import from file → ${ext_src}/airstack_default.json" + log_info " (Restart Foxglove Desktop once if newly-installed panels still show as 'Unknown panel type'.)" + _osmo_run_port_forward "$wf" workspace \ + --port "$OSMO_FOXGLOVE_PORT" \ + --connect-timeout "$OSMO_PF_TIMEOUT" +} + +# osmo:down — cancel the active workflow. Reminds you to push first. +function cmd_osmo_down { + _osmo_check_cli || return 1 + local wf; wf="$(_osmo_wf_id)" || return 1 + + log_warn "About to cancel workflow '${wf}'." + log_warn "Anything not pushed to git in /root/AirStack inside the pod will be LOST." + log_warn "Hit Ctrl-C in the next 5 seconds to abort." + sleep 5 + osmo workflow cancel "$wf" + rm -f "${OSMO_STATE_FILE}" +} + +# Register commands from this module. +function register_osmo_commands { + COMMANDS["osmo:setup"]="cmd_osmo_setup" + COMMANDS["osmo:up"]="cmd_osmo_up" + COMMANDS["osmo:logs"]="cmd_osmo_logs" + COMMANDS["osmo:ide"]="cmd_osmo_ide" + COMMANDS["osmo:webrtc"]="cmd_osmo_webrtc" + COMMANDS["osmo:foxglove"]="cmd_osmo_foxglove" + COMMANDS["osmo:down"]="cmd_osmo_down" + + COMMAND_HELP["osmo:setup"]="One-time per-user OSMO credential setup (airlab-docker-registry, airlab-docker-login, airlab-nucleus)" + COMMAND_HELP["osmo:up"]="Submit osmo/workflows/airstack-dev.yaml with your SSH pubkey injected (--pool POOL, --key PATH, --branch BRANCH)" + COMMAND_HELP["osmo:logs"]="Follow the workspace task logs (osmo workflow logs -t workspace -n 500; OSMO_LOGS_TASK / OSMO_LOGS_TAIL override)" + COMMAND_HELP["osmo:ide"]="Port-forward sshd (2200:22) and open VS Code/Cursor on Host airstack-osmo" + COMMAND_HELP["osmo:webrtc"]="Port-forward Isaac Sim WebRTC ranges (TCP foreground + UDP background)" + COMMAND_HELP["osmo:foxglove"]="Install AirStack Foxglove extensions locally, then port-forward GCS Foxglove websocket (8766:8766). Override target dir with OSMO_FOXGLOVE_EXT_DIR; skip install with OSMO_FOXGLOVE_SKIP_EXTENSIONS=1." + COMMAND_HELP["osmo:down"]="Cancel the active workflow (push to git before running this)" +} diff --git a/.env b/.env index a90a1143e..92403f8c0 100644 --- a/.env +++ b/.env @@ -12,7 +12,7 @@ PROJECT_NAME="airstack" # If you've run ./airstack.sh setup, then this will auto-generate from the git commit hash every time a change is made # to a Dockerfile or docker-compose.yaml file. Otherwise this can also be set explicitly to make a release version. # auto-generated from git commit hash -VERSION="0.18.0-alpha.10" +VERSION="0.18.0-alpha.11" # Choose "dev" or "prebuilt". "dev" is for mounted code that must be built live. "prebuilt" is for built ros_ws baked into the image DOCKER_IMAGE_BUILD_MODE="dev" # Where to push and pull images from. Can replace with your docker hub username if using docker hub. diff --git a/.gitignore b/.gitignore index a5776557c..4868b5c74 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,5 @@ common/rayfronts/ # Docker build cache (root-owned subdirs cause permission warnings on `git add`) robot/docker/cache/ +.DS_Store +gcs/.DS_Store diff --git a/airstack.sh b/airstack.sh index 3d1e955e3..78b475c07 100755 --- a/airstack.sh +++ b/airstack.sh @@ -5,6 +5,47 @@ # This script provides a unified interface for common development tasks # in the AirStack project, including setup, installation, and container management. +# Re-exec under bash 4+ if necessary. macOS ships bash 3.2 which can't handle +# `declare -A` (associative arrays) used throughout this script. Searches for +# a newer bash via $AIRSTACK_BASH, then common Homebrew install paths, then +# any `bash` on PATH that reports version >= 4. Sets AIRSTACK_REEXEC_BASH=1 +# to guard against infinite re-exec loops. +if [ -z "${AIRSTACK_REEXEC_BASH:-}" ] && [ "${BASH_VERSINFO[0]:-0}" -lt 4 ]; then + _airstack_candidates=( + "${AIRSTACK_BASH:-}" + /opt/homebrew/bin/bash # Apple Silicon Homebrew + /usr/local/bin/bash # Intel Homebrew + /opt/local/bin/bash # MacPorts + ) + if command -v bash5 >/dev/null 2>&1; then + _airstack_candidates+=("$(command -v bash5)") + fi + # Add any `bash` on PATH whose version is >= 4 (other than the one we just + # got here from, which is < 4 by the if-check above). + for _alt in $(command -v -a bash 2>/dev/null); do + _airstack_candidates+=("$_alt") + done + + for _airstack_alt_bash in "${_airstack_candidates[@]}"; do + [ -z "$_airstack_alt_bash" ] && continue + [ -x "$_airstack_alt_bash" ] || continue + # Probe BASH_VERSINFO[0] without sourcing the script. + if "$_airstack_alt_bash" -c '[ "${BASH_VERSINFO[0]:-0}" -ge 4 ]' 2>/dev/null; then + export AIRSTACK_REEXEC_BASH=1 + exec "$_airstack_alt_bash" "$0" "$@" + fi + done + + cat >&2 <<'EOF' +[ERROR] airstack.sh requires bash 4 or newer (your bash is 3.x). + macOS ships bash 3.2 by default; install a modern bash with: + brew install bash + Or set AIRSTACK_BASH=/path/to/bash >= 4 before invoking this script. +EOF + exit 1 +fi +unset AIRSTACK_REEXEC_BASH + set -e # Script directory diff --git a/docs/getting_started/index.md b/docs/getting_started/index.md index e7bb64235..3b319ec00 100644 --- a/docs/getting_started/index.md +++ b/docs/getting_started/index.md @@ -1,5 +1,12 @@ # Getting Started +!!! tip "On Mac, Windows, or no GPU?" + + This page assumes a Linux desktop with an NVIDIA GPU. If that's not you, + use [AirStack on OSMO](../tutorials/airstack_on_osmo.md) instead — you + only need an SSH key, the `osmo` CLI, and VS Code or Cursor. No local + Docker, no NVIDIA drivers, no `airstack install`. + !!! warning "" AirStack is currently in ALPHA and only meant for internal usage. diff --git a/docs/tutorials/airstack_on_osmo.md b/docs/tutorials/airstack_on_osmo.md new file mode 100644 index 000000000..d4da917a1 --- /dev/null +++ b/docs/tutorials/airstack_on_osmo.md @@ -0,0 +1,591 @@ +# AirStack on OSMO — Recommended Remote Development Workflow + +This is AirStack's recommended day-to-day development path going forward. +You submit one OSMO workflow that spins up a GPU pod running the full +three-container AirStack stack (Isaac Sim, robot-desktop, GCS), attach VS +Code or Cursor to it over Remote-SSH, and stream Isaac Sim and the GCS +Foxglove dashboard back to your browser. + +Why this is the recommended path: + +- **Pooled GPUs.** A lab's GPUs are shared on-demand across the whole team + instead of pinned one-per-desktop. Onboarding doesn't require buying + hardware. +- **No local CUDA / Docker / driver maintenance.** Your laptop just needs + `git`, an SSH key, and an IDE. macOS, Windows, and Linux all work + identically. +- **Same image as CI and field robots.** The OSMO pod runs the exact + Docker images that the system tests and deployed robots run, so your + dev environment can't drift away from production. +- **One-command onboarding.** A new student goes from zero to "Isaac Sim + streaming into my browser" with `airstack osmo:setup` followed by + `airstack osmo:up` — no install marathon. +- **Hardware bigger than your laptop.** The pod has more CPU/RAM/GPU than + most dev laptops, even if you have a GPU laptop. + +> **Still want local development on a Linux+GPU desktop?** It works and +> can be faster for tight inner loops — see +> [Getting Started](../getting_started/index.md). It just isn't the +> recommended default anymore. + +## Who is this for? + +Anyone developing AirStack — Mac, Windows, or Linux, with or without a +local GPU. + +You're comfortable using `git` from a terminal, you have an SSH key +(`~/.ssh/id_ed25519` or similar), and you have either VS Code or Cursor +installed. That's the entire local-machine bar. + +## Architecture in a sentence + +`airstack osmo:up` (which wraps `osmo workflow submit`) spins up a GPU pod +that runs sshd plus a Docker-in-Docker daemon. Inside that pod, `airstack +up` brings up the familiar three AirStack containers (Isaac Sim, +robot-desktop, GCS). Your IDE attaches over Remote-SSH; Isaac Sim and +Foxglove are reached via separate port-forwards. + +```mermaid +flowchart LR + subgraph laptop [Your laptop] + ide[VS Code or Cursor + Remote-SSH] + osmo[osmo CLI] + fox[app.foxglove.dev] + webrtc[Isaac Sim WebRTC client] + end + subgraph pod [OSMO workspace pod - GPU] + sshd[sshd] + inner[Inner dockerd] + isaac[isaac-sim container] + robot[robot-desktop container] + gcs[gcs container] + end + osmo -- submit and port-forward --> pod + ide -- ssh on 2200 --> sshd + fox -- ws on 8766 --> gcs + webrtc -- "WebRTC on 49100/tcp, 49099/udp" --> isaac + inner --> isaac + inner --> robot + inner --> gcs +``` + +## Prerequisites + +| You need | Why | +|---|---| +| A local clone of AirStack (`git clone https://github.com/castacks/AirStack.git`) | The `airstack osmo:*` wrappers, the workflow YAML, and the Foxglove extensions all live in the repo | +| The [`osmo` CLI](https://github.com/NVIDIA/OSMO) on your `PATH` | Submitting workflows and port-forwarding | +| `osmo login` done once | Stores your auth token in `~/.config/osmo` | +| An SSH keypair (e.g. `~/.ssh/id_ed25519`) | The pod authorises your pubkey at submit time. Generate one with `ssh-keygen -t ed25519` if you don't already have one. | +| **VS Code with the Remote-SSH extension** *or* **Cursor with its Remote-SSH equivalent** | Where you'll actually edit AirStack code | +| Optional: Foxglove desktop app, or just `app.foxglove.dev` | View ROS topics | +| Optional: an Omniverse Streaming Client / WebRTC browser client | View the streamed Isaac Sim render | + +You **do not** need: Docker, NVIDIA drivers, `airstack install`, `airstack +setup`, sudo, or Linux. + +> **Lab admin prerequisites (someone else's job, once).** A lab admin +> pushes the `airstack-osmo-workspace` image to +> `airlab-docker.andrew.cmu.edu`. Details in +> [`osmo/README.md`](https://github.com/castacks/AirStack/blob/main/osmo/README.md). +> +> **Your job, once:** the next step. + +## Step 0 — Register your OSMO credentials (one time) + +OSMO credentials are **per-user** (each Andrew ID has its own Nucleus token, +its own AirLab Docker password, its own OSMO profile). You register them +once with the `osmo` CLI on your laptop and OSMO injects them into every +workflow you submit afterwards. They never leave your OSMO profile and your +laptop never sees the values again. + +You need three credentials. The exact names matter — the workflow YAML +references them by these exact names. + +From your AirStack clone, run: + +```bash +git clone https://github.com/castacks/AirStack.git +cd AirStack +./airstack.sh osmo:setup +``` + +This prompts for your Andrew ID, AirLab Docker password, and Nucleus API +token (get one at → +right-click cloud icon → **API Tokens** → Create), then registers the +three credentials with OSMO. The values go directly to your OSMO profile +— nothing is written to local disk. + +> **macOS prereq: bash 4+.** macOS ships bash 3.2 by default and the +> `airstack` CLI needs bash 4+. If you see +> `airstack.sh requires bash 4 or newer`, install a modern bash with: +> +> ```bash +> brew install bash +> ``` +> +> No further config needed — `airstack.sh` auto-detects the Homebrew bash +> at `/opt/homebrew/bin/bash` (Apple Silicon) or `/usr/local/bin/bash` +> (Intel) and re-execs under it. You don't need to change your login shell. + +### Verify + +List your credentials: + +```bash +osmo credential list +``` + +You should see all three (`airlab-docker-registry`, `airlab-docker-login`, +`airlab-nucleus`). To rotate any of them later, just re-run +`./airstack.sh osmo:setup`. + +
+Under the hood — the three raw `osmo credential set` calls + +`airstack osmo:setup` (defined in +[`.airstack/modules/osmo.sh`](https://github.com/castacks/AirStack/blob/main/.airstack/modules/osmo.sh) +as `cmd_osmo_setup`) is equivalent to running these three commands by hand +— useful for debugging or rotating one credential at a time: + +```bash +# 1. AirLab Docker registry (REGISTRY) — for OSMO's outer image-pull of +# airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace +osmo credential set airlab-docker-registry \ + --type REGISTRY \ + --payload registry=airlab-docker.andrew.cmu.edu \ + username= \ + auth='' + +# 2. AirLab Docker login (GENERIC) — for the *inner* dockerd inside the +# pod to `docker login` and pull the AirStack image set +osmo credential set airlab-docker-login \ + --type GENERIC \ + --payload username= \ + password='' + +# 3. AirLab Nucleus (GENERIC) — for Isaac Sim to authenticate against +# omniverse://airlab-nucleus.andrew.cmu.edu (API token, NOT password) +osmo credential set airlab-nucleus \ + --type GENERIC \ + --payload omni_user= \ + omni_pass='' \ + omni_server=omniverse://airlab-nucleus.andrew.cmu.edu/NVIDIA/Assets/Isaac/5.1 +``` + +
+ +> **Why three credentials?** It's tempting to consolidate. The reason for +> the split: OSMO REGISTRY credentials drive Kubernetes `imagePullSecrets` +> (auto-attached, never exposed as env vars), while GENERIC credentials are +> what get injected as env vars inside the running container. The pod +> needs **both** kinds of access — outer pull of the workspace image, plus +> inner login from the inner dockerd to pull AirStack images. + +## Step 1 — Add an SSH config entry (one time) + +VS Code and Cursor's Remote-SSH "Connect to Host…" picker reads +`~/.ssh/config`. Add this block once and the host shows up by name forever: + +```bash +cat >> ~/.ssh/config <<'EOF' + +Host airstack-osmo + HostName localhost + Port 2200 + User root + # Every OSMO workflow boots a fresh pod with a fresh sshd host key, so + # any saved fingerprint for [localhost]:2200 will be wrong on the next + # `airstack osmo:up`. Skip the host-key check here: this alias only + # connects via the local port-forward, so the security boundary is + # OSMO's authenticated control-plane tunnel — not the SSH fingerprint. + # /dev/null keeps known_hosts clean (no stale entries pile up); LogLevel + # ERROR silences the "Permanently added [localhost]:2200" banner. + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + LogLevel ERROR + # SSH agent forwarding so `git push` from inside the pod uses your + # local laptop's SSH key (the pod's sshd has AllowAgentForwarding yes + # baked in by osmo/workspace/sshd_config). Without this, the pod has + # no key to push to github.com with — its ~/.ssh/ only holds the + # authorized_keys file for inbound connections. + ForwardAgent yes + # macOS Keychain integration — first push from the pod auto-loads + # your key into the local ssh-agent and unlocks it via the system + # keychain (no passphrase prompts). Harmless on Linux: those clients + # ignore the option. AddKeysToAgent works on both OSes. + AddKeysToAgent yes + UseKeychain yes +EOF +``` + +The `localhost:2200` is what we'll port-forward to in step 4. + +> **Already added the old block?** If your `~/.ssh/config` still has +> `StrictHostKeyChecking accept-new` for `airstack-osmo` from an earlier +> setup, replace it with the three lines above. As a one-time cleanup of +> the stale fingerprint left behind by previous pods, also run: +> +> ```bash +> ssh-keygen -R "[localhost]:2200" +> ``` +> +> `airstack osmo:ide` does this scrub for you on every run, so you only +> need it once when migrating. + +> **Smoke-test the agent forward** once the pod is up: SSH in and run +> `ssh-add -l` — you should see your local key listed. If you see "The +> agent has no identities", run `ssh-add ~/.ssh/id_ed25519` on your +> laptop and reconnect. + +## Step 2 — Submit the workflow + +From the AirStack clone: + +```bash +./airstack.sh osmo:up --pool airstack +``` + +This submits +[`osmo/workflows/airstack-dev.yaml`](https://github.com/castacks/AirStack/blob/main/osmo/workflows/airstack-dev.yaml) +with two things injected: + +- your local SSH pubkey as `SSH_PUB_KEY` — that's what authorises + **your** key on **this** workflow (each student passes their own at + submit time; the lab admin doesn't manage a global `authorized_keys` + file). +- `AIRSTACK_BRANCH` set to your local repo's current branch — the pod + ignores your laptop's working tree (it's ephemeral and runs in a + different machine room) and clones AirStack fresh from GitHub on + every workflow start, so this is how it knows which branch to use. + Override with `--branch main` if you want the pod to track main even + while you're on a feature branch. + +> **The pod clones from GitHub, not your laptop.** Local edits (and +> commits you haven't pushed) won't make it into the pod. `airstack +> osmo:up` warns you up-front if your branch is ahead of origin or has +> uncommitted changes — `git push` first if you want the pod to pick +> them up. + +`airstack osmo:up` prints a workflow id like `airstack-dev-1` and stores +it in `~/.airstack/osmo-state`, so the rest of the `airstack osmo:*` +commands in this tutorial pick it up automatically — no `export WF=...` +needed. To target a specific workflow for a single invocation, export +`AIRSTACK_OSMO_WF=`. + +
+Under the hood — raw `osmo workflow submit` + +`airstack osmo:up` (defined in +[`.airstack/modules/osmo.sh`](https://github.com/castacks/AirStack/blob/main/.airstack/modules/osmo.sh) +as `cmd_osmo_up`) is equivalent to: + +```bash +osmo workflow submit osmo/workflows/airstack-dev.yaml \ + --pool airstack \ + --set-env "SSH_PUB_KEY=$(cat ~/.ssh/id_ed25519.pub)" +``` + +Save the printed workflow id as `$WF` if you're using the raw form, and +substitute it for `airstack osmo:*` in the rest of the tutorial. + +
+ +## Step 3 — Wait for the stack to come up + +Tail the lead task's logs and watch for milestones: + +```bash +./airstack.sh osmo:logs +``` + +Expected milestones, in order (each is one line in the log): + +1. `[entrypoint] sshd listening on :22` — VS Code/Cursor can attach. +2. `[entrypoint] dockerd ready` — the inner Docker daemon is up. +3. `Successfully built airstack_isaac-sim` *(or `Pulled` if pre-built)* — + the image set is in place. +4. `airstack-isaac-sim-livestream-1 ... started` +5. `airstack-robot-desktop-1 ... started` +6. `airstack-gcs-1 ... started` + +If step (1) appears, you can attach the IDE while the rest is still +spinning up — the bring-up will continue in the background. + +
+Under the hood — raw `osmo workflow logs` + +`airstack osmo:logs` (defined in +[`.airstack/modules/osmo.sh`](https://github.com/castacks/AirStack/blob/main/.airstack/modules/osmo.sh) +as `cmd_osmo_logs`) just exec's: + +```bash +osmo workflow logs $WF -t workspace -n 500 +``` + +The `osmo` CLI's `workflow logs` command prints the last N lines and then +keeps the stream open as new lines arrive (it already behaves like `tail +-f`, even though `--help` only documents `-n LAST_N_LINES`). Ctrl+C to +stop. Override the task / tail length with `OSMO_LOGS_TASK` / +`OSMO_LOGS_TAIL` env vars. + +
+ +## Step 4 — Forward sshd and attach the IDE + +In one terminal, run: + +```bash +./airstack.sh osmo:ide +``` + +This (a) starts the `localhost:2200 → pod:22` port-forward with a 24h +connect-timeout (matching the workflow's `exec_timeout`), waits for the +tunnel to come up, then (b) launches Cursor or VS Code (whichever it +finds on `PATH`) pre-attached to +`vscode-remote://ssh-remote+airstack-osmo/root/AirStack`. **Leave the +terminal running** for the length of your session — closing it tears the +tunnel down. + +The IDE installs its remote server in the pod on first connect (~50 MB, +slower on a fresh pod, cached on subsequent connects). Then: + +1. The IDE should open `/root/AirStack` automatically. (If not: + **Open Folder…** → `/root/AirStack`.) +2. Open the integrated terminal — you're root in `/root/AirStack`. +3. Edit code in the IDE; the changes land directly on the pod's disk. + +Verify everything is wired up by running: + +```bash +docker ps +``` + +You should see four containers: `airstack-isaac-sim-livestream-1`, +`airstack-robot-desktop-1`, `airstack-gcs-1`, plus the AirStack CLI helper. + +
+Under the hood — raw port-forward + manual IDE attach + +`airstack osmo:ide` (defined in +[`.airstack/modules/osmo.sh`](https://github.com/castacks/AirStack/blob/main/.airstack/modules/osmo.sh) +as `cmd_osmo_ide`) is equivalent to running the port-forward by hand: + +```bash +osmo workflow port-forward $WF workspace --port 2200:22 --connect-timeout 86400 +``` + +…then in the editor: + +- **VS Code:** Command Palette → **Remote-SSH: Connect to Host…** → pick + `airstack-osmo`. +- **Cursor:** the same flow under its remote-development menu. + +Add `--no-open` to `airstack osmo:ide` to only run the port-forward and +attach the IDE manually. + +
+ +## Step 5 — Pick a feature branch and start working + +The pod cloned `main` into `/root/AirStack` on startup. Treat it like any +git working tree: + +```bash +git checkout -b my-feature +# edit code in the IDE... +bws --packages-select # build inside the robot-desktop container per AGENTS.md +``` + +Standard ROS 2 commands work from the integrated terminal: + +```bash +docker exec airstack-robot-desktop-1 bash -c "ros2 node list" +docker exec airstack-robot-desktop-1 bash -c "ros2 topic hz /robot_1/odometry" +``` + +This is the same `docker exec` pattern documented in +[AGENTS.md](https://github.com/castacks/AirStack/blob/main/AGENTS.md) — the +fact that you're on a remote pod is invisible from inside the IDE. + +## Step 6 — View Isaac Sim (WebRTC livestream) + +Isaac Sim runs headless inside the pod with the Kit +`omni.kit.livestream.webrtc` extension enabled (configured by the +`isaac-sim-livestream` Compose profile). To view it locally: + +```bash +./airstack.sh osmo:webrtc +``` + +This spawns the UDP port-forward (media, `49099`) in the background and +runs the TCP port-forward (signaling, `49100`) in the foreground — leave +that terminal running. + +Then point the **Omniverse Streaming Client** (or a WebRTC-capable browser +client) at `http://localhost`. The simulation viewport shows up the same +way it would on a local Linux desktop. + +
+Under the hood — raw TCP + UDP port-forwards + +`airstack osmo:webrtc` (defined in +[`.airstack/modules/osmo.sh`](https://github.com/castacks/AirStack/blob/main/.airstack/modules/osmo.sh) +as `cmd_osmo_webrtc`) is equivalent to running the two raw port-forwards +in separate terminals — Kit's WebRTC needs both TCP signaling and UDP +SRTP media, and the AirStack workflow pins both to single ports rather +than scanning the Kit default range: + +```bash +# Terminal A — TCP signaling (49100): +osmo workflow port-forward $WF workspace --port 49100 --connect-timeout 86400 + +# Terminal B — UDP media (49099, pinned by the Pegasus launch script): +osmo workflow port-forward $WF workspace --port 49099 --udp --connect-timeout 86400 +``` + +
+ +## Step 7 — View ROS topics in Foxglove + +The GCS container runs `foxglove_bridge` on container-port `8765`, +published as host-port `8766` on the workspace pod. To install the +AirStack Foxglove extensions locally and forward the websocket in one +step: + +```bash +./airstack.sh osmo:foxglove +``` + +This copies the AirStack Foxglove extensions (Robot Tasks, Waypoint +Editor, Polygon Editor) into your local Foxglove Desktop user-extensions +dir (default `~/.foxglove-studio/extensions`; override with +`OSMO_FOXGLOVE_EXT_DIR`, skip with `OSMO_FOXGLOVE_SKIP_EXTENSIONS=1` for +`app.foxglove.dev` which doesn't load local extensions), then runs the +`localhost:8766 → pod:8766` port-forward in the foreground — leave the +terminal running. + +Then in [https://app.foxglove.dev](https://app.foxglove.dev) (or Foxglove +Desktop): + +1. **Open connection** → `ws://localhost:8766`. +2. **Layouts** → **Import from file** → + [`gcs/foxglove_extensions/airstack_default.json`](https://github.com/castacks/AirStack/blob/main/gcs/foxglove_extensions/airstack_default.json) + from your AirStack clone. +3. Pick the imported layout from the layout dropdown in the top-right. + +The full Foxglove flow — layout import, panel customisation, DDS bridge +naming — is documented at +[Foxglove Visualization](../gcs/foxglove.md). The only OSMO-specific +difference is the `osmo:foxglove` line in front of it. + +
+Under the hood — raw `osmo workflow port-forward` + +`airstack osmo:foxglove` (defined in +[`.airstack/modules/osmo.sh`](https://github.com/castacks/AirStack/blob/main/.airstack/modules/osmo.sh) +as `cmd_osmo_foxglove`) wraps the extension install plus: + +```bash +osmo workflow port-forward $WF workspace --port 8766:8766 --connect-timeout 86400 +``` + +Set `OSMO_FOXGLOVE_SKIP_EXTENSIONS=1` to only run the port-forward. + +
+ +## Step 8 — Commit and push from inside the IDE + +The pod's filesystem is **ephemeral**. The persistence boundary is git, not +disk. Commit and push every meaningful chunk of work — a Source Control +panel commit + push, or in the integrated terminal: + +```bash +git add -A +git commit -m "WIP: feature X" +git push -u origin my-feature +``` + +Once your branch is on the remote, you can pull it from anywhere — your +laptop, a fresh pod tomorrow, a colleague's machine. + +> **Configuring git auth in the pod.** The pod is yours for the session. +> Inside the IDE's integrated terminal, set `git config user.name`, +> `user.email`, and configure your push auth (HTTPS + a GitHub PAT, or a +> per-pod SSH key the IDE forwards via `AllowAgentForwarding yes`). The +> `airstack-osmo-workspace` image deliberately does not bake any one +> student's git creds. + +## Step 9 — Tearing down + +When you're done: + +```bash +./airstack.sh osmo:down +``` + +This prints a 5-second warning then cancels the workflow stored in +`~/.airstack/osmo-state`. Hit Ctrl-C in the grace window if you submitted +by accident. + +> **Push first.** Anything that's still in your working tree, in `.git/` +> but not pushed, in `build/`, in `bags/`, or in `/root/` outside the repo +> **will be lost** on cancel. The pod is cattle. If you forget and need +> something pulled out, see "I forgot to push before tearing down" below +> *before* hitting cancel. + +
+Under the hood — raw `osmo workflow cancel` + +`airstack osmo:down` (defined in +[`.airstack/modules/osmo.sh`](https://github.com/castacks/AirStack/blob/main/.airstack/modules/osmo.sh) +as `cmd_osmo_down`) is equivalent to: + +```bash +osmo workflow cancel $WF +``` + +
+ +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---|---|---| +| `Remote-SSH: Connection refused` after a working session | Port-forward died (laptop slept, network blip) | Re-run `./airstack.sh osmo:ide` | +| `Permission denied (publickey)` on Remote-SSH | The pod authorised a different pubkey than the one your local SSH client is offering | Confirm `cat ~/.ssh/id_ed25519.pub` matches the key that was injected at submit time. Re-submit with `./airstack.sh osmo:down && ./airstack.sh osmo:up --pool airstack`. | +| `airstack osmo:logs` shows `ERROR: SSH_PUB_KEY not set` | The submit didn't inject a pubkey (e.g. you ran raw `osmo workflow submit` without `--set-env`) | `./airstack.sh osmo:down`, then resubmit with `./airstack.sh osmo:up --pool airstack` (it injects `SSH_PUB_KEY` automatically). | +| `docker pull` fails inside the pod with `unauthorized` | Your `airlab-docker-login` credential is missing or has the wrong Andrew ID/password | Re-run `./airstack.sh osmo:setup`. | +| Logs show `WARN: airlab-nucleus OSMO credential not set` and Isaac Sim asset loads fail, **or** Isaac Sim shows "Login Required: Unable to connect server omniverse://airlab-nucleus..." with the auth-service log showing `InternalCredentials.auth … 'username': '' … status: 'DENIED'` (no `Tokens.auth_with_api_token` call) | The pod is doing **password auth** instead of **API-token auth**. Inside the pod, `simulation/isaac-sim/docker/omni_pass.env` must have `OMNI_USER=$$omni-api-token` (literal `$$`, the sentinel for API-token auth — docker-compose v2 collapses `$$` to `$` on its way to the container). The OSMO entrypoint sets this automatically when `OMNI_PASS` looks like a JWT; if you see `OMNI_USER=` in the file, recreate the container with `docker compose --profile desktop --profile isaac-sim-livestream up -d isaac-sim-livestream` (`restart` does NOT re-read `env_file`). | +| Logs show `WARN: airlab-nucleus OSMO credential not set` and Isaac Sim asset loads fail, **or** Isaac Sim shows "Login Required: Unable to connect server omniverse://airlab-nucleus..." with the auth-service log showing `Tokens.auth_with_api_token … status: 'DENIED'` | Your `airlab-nucleus` API token is missing, expired, or revoked (rotation invalidates the predecessor). Confirm by SSH'ing the Nucleus host and running `sudo docker logs --tail 200 base_stack-nucleus-auth-1`. Regenerate the token at , then `./airstack.sh osmo:setup` and `./airstack.sh osmo:down && ./airstack.sh osmo:up --pool airstack` to resubmit (or live-edit `simulation/isaac-sim/docker/omni_pass.env` in the pod and recreate the `isaac-sim-livestream` container — see row above). | +| Isaac Sim container restarts repeatedly | GPU not visible to the inner Docker daemon (toolkit not configured on the node) | Lab admin task. From inside the pod: `docker info \| grep -i runtime` should list `nvidia`. | +| Isaac Sim is up but the WebRTC stream is blank | The Pegasus script isn't getting `--/app/livestream/enabled=true`, or the wrong Compose profile is active | In the integrated terminal: `docker logs airstack-isaac-sim-livestream-1`. Confirm `ISAAC_SIM_LIVESTREAM=true` and that the `isaac-sim-livestream` profile is the one running (`docker ps`). | +| Foxglove "no connection" | Port-forward died, GCS container hasn't started yet, or browser is caching an old connection | Re-run `./airstack.sh osmo:foxglove`; check `docker ps` shows `airstack-gcs-1` Up; try `ws://127.0.0.1:8766` instead of `ws://localhost:8766`. | +| First Remote-SSH connect takes forever | VS Code / Cursor downloading its remote server (~50 MB) into the fresh pod | Wait it out the first time. Subsequent connects to the same pod hit the cache. | +| **I forgot to push before tearing down** | The pod is still up; cancel hasn't fired yet | Don't run `./airstack.sh osmo:down`. SSH in via the existing port-forward (`./airstack.sh osmo:ide --no-open` if the tunnel is gone), push from the IDE terminal, *then* tear down. If the workflow has already terminated and the pod is gone, the work is gone — git is the only persistence layer. | + +## What survives `airstack osmo:down`? + +| Artifact | Lives in | Survives? | +|---|---|---| +| Code committed and pushed to a feature branch | GitHub | **Yes** | +| Code committed but not pushed | Pod-local `.git` | **No** | +| Uncommitted edits in the IDE | Pod-local working tree | **No** | +| `colcon build` outputs (`build/`, `install/`, `log/`) | `/root/AirStack/**/ros_ws/...` | **No** (gitignored Linux x86_64 binaries; rebuild trivially) | +| Inner-dockerd image cache | Pod-local Docker layer cache | **No** | +| Bag files, sim recordings, debug screenshots | `/root/AirStack/bags/`, etc. | **No** — pull selectively via `osmo workflow rsync download "$(cat ~/.airstack/osmo-state)" :` *before* tearing down | + +The rule of thumb: **commit + push every time you'd save a file in a +git-tracked sense.** The Source Control panel is the persistence boundary. + +## See also + +- [`osmo/README.md`](https://github.com/castacks/AirStack/blob/main/osmo/README.md) + — lab-admin reference (pool prerequisites, OSMO credential registration, + workspace image build, validation stages). +- [Foxglove Visualization](../gcs/foxglove.md) — full layout import + + panel-customisation flow once your `airstack osmo:foxglove` is up. +- [AGENTS.md](https://github.com/castacks/AirStack/blob/main/AGENTS.md) — + inside-the-pod workflow once you're attached: `bws`, `sws`, `docker exec`, + ROS 2 commands. +- [Getting Started](../getting_started/index.md) — the local-Linux-GPU + alternative. diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index d1606f4e3..1a9a4836f 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -5,6 +5,7 @@ Step-by-step guides for common AirStack workflows. If you are new, start with ** | Tutorial | Description | |---|---| | [Getting Started](../getting_started.md) | Install AirStack, pull Docker images, launch a simulated robot, and fly it for the first time. | +| [AirStack on OSMO (Mac/Windows OK)](airstack_on_osmo.md) | Develop on AirStack from a Mac, Windows, or no-GPU Linux laptop using NVIDIA OSMO + VS Code/Cursor Remote-SSH. No local Docker, no AirStack clone, no `airstack install`. | | [Multi-Robot Simulation](multi_robot_simulation.md) | Spin up multiple simulated robots in Isaac Sim and verify independent ROS 2 namespaces. | | [Autonomy Modes](autonomy_modes.md) | Understand `onboard_all`, `onboard_local`, and `offboard_global` modes and the commands to run each. | | [Deploying to Hardware](deploying_to_hardware.md) | Flash a Jetson or VOXL device, configure the robot hostname, and run the autonomy stack on a real drone. | diff --git a/gcs/foxglove_extensions/install.py b/gcs/foxglove_extensions/install.py index f948cac54..fc28de102 100644 --- a/gcs/foxglove_extensions/install.py +++ b/gcs/foxglove_extensions/install.py @@ -1,11 +1,34 @@ #!/usr/bin/env python3 +""" +Install AirStack Foxglove extensions into a Foxglove user-extensions dir. + +By default this targets the GCS container's bundled Foxglove app +(/root/.foxglove-studio/extensions), which is the entrypoint that +gcs/docker/gcs-base-docker-compose.yaml runs on container start. + +The src/dst paths can be overridden via env vars, which is how the +`airstack osmo:foxglove` wrapper reuses this same script to install the +extensions into the laptop's local Foxglove Desktop app before +port-forwarding the GCS bridge — that way the laptop's Foxglove sees +"Robot Tasks" / "Waypoint Editor" / "Polygon Editor" instead of the +"Unknown panel type: ..." placeholders. + +Env vars: + FOXGLOVE_EXT_SRC directory containing the extension subdirectories + (each with a package.json + dist/extension.js) + FOXGLOVE_EXT_DST target user-extensions directory, e.g. + ~/.foxglove-studio/extensions on Linux/macOS. +""" + import json import os import re import shutil -src = '/root/AirStack/gcs/foxglove_extensions' -dst = '/root/.foxglove-studio/extensions' +src = os.environ.get( + 'FOXGLOVE_EXT_SRC', '/root/AirStack/gcs/foxglove_extensions') +dst = os.path.expanduser(os.environ.get( + 'FOXGLOVE_EXT_DST', '/root/.foxglove-studio/extensions')) os.makedirs(dst, exist_ok=True) @@ -13,11 +36,16 @@ def _slug(s: str) -> str: return re.sub(r'[^a-z0-9-]+', '-', s.lower()).strip('-') -for ext in os.listdir(src): +installed = 0 +for ext in sorted(os.listdir(src)): pkg_path = os.path.join(src, ext, 'package.json') if not os.path.exists(pkg_path): continue pkg = json.load(open(pkg_path)) name = '{}.{}-{}'.format(_slug(pkg['publisher']), pkg['name'], pkg['version']) shutil.copytree(os.path.join(src, ext), os.path.join(dst, name), dirs_exist_ok=True) - print('Installed Foxglove extension:', name) + print('Installed Foxglove extension:', name, '->', os.path.join(dst, name)) + installed += 1 + +if installed == 0: + print('No Foxglove extensions found under', src) diff --git a/mkdocs.yml b/mkdocs.yml index 00a1aee14..c4d92fede 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -52,6 +52,7 @@ nav: - Home: docs/index.md - Getting Started: - docs/getting_started/index.md + - docs/tutorials/airstack_on_osmo.md - docs/getting_started/tutorials_reference.md - Development: - docs/development/index.md diff --git a/osmo/README.md b/osmo/README.md new file mode 100644 index 000000000..a9e33b41c --- /dev/null +++ b/osmo/README.md @@ -0,0 +1,301 @@ +# AirStack on OSMO + +This directory holds the bits that let students develop on AirStack remotely +through [NVIDIA OSMO](https://github.com/NVIDIA/OSMO): + +``` +osmo/ +├── README.md # This file (admin / operator reference) +├── workflows/ +│ └── airstack-dev.yaml # The OSMO workflow students submit +└── workspace/ + ├── Dockerfile # The airstack-osmo-workspace image + ├── sshd_config # Pubkey-only sshd config baked into the image + └── entrypoint.sh # Pod startup: sshd, dockerd, clone, airstack up +``` + +The student-facing walkthrough lives in +[`docs/tutorials/airstack_on_osmo.md`](../docs/tutorials/airstack_on_osmo.md) +— including the per-user **Step 0** for registering OSMO credentials. This +README is the **lab admin / operator** reference: pool requirements, +workspace image build & push, validation stages, plus a credential summary +for context. + +> **Scope:** developer workflow only. CI/CD on OSMO is **not** part of this +> integration — the existing `system-tests.yml` + OpenStack orchestrator path +> is unchanged. + +## Architecture in one minute + +A student submits one OSMO task that runs a Docker-in-Docker (DinD) pod with +sshd. Inside that pod, `airstack.sh up` brings up the regular +three-container AirStack stack (Isaac Sim, robot-desktop, GCS) on the inner +Docker daemon. The student attaches VS Code or Cursor over Remote-SSH and +streams Isaac Sim (WebRTC) and the GCS Foxglove bridge (websocket) back to +their laptop via `osmo workflow port-forward`. + +``` +Student laptop OSMO workspace pod (GPU) +───────────────── ───────────────────────────────────── +VS Code / Cursor ── ssh ──► port-forward 2200:22 ──► sshd +Isaac Sim WebRTC ── webrtc ► port-forward 47995… ──► inner isaac-sim ctnr +app.foxglove.dev ── ws ────► port-forward 8766 ────► inner gcs ctnr (8765) + ▲ + │ inner dockerd + │ (NVIDIA runtime) + │ + airstack.sh up brings these 3 up +``` + +## Pool requirements + +The OSMO pool the workflow runs on must satisfy: + +| Requirement | Why | +|---|---| +| GPU pool with NVIDIA driver + `nvidia-container-toolkit` on each node | Isaac Sim needs the GPU. The toolkit must be on the node so the inner `dockerd` (configured with `--add-runtime nvidia=...`, `default-runtime: nvidia`) can hand the device to the inner Isaac Sim container. | +| No NetworkPolicy blocking pod-namespace ports `47995–48012/tcp+udp`, `49000–49007/tcp+udp`, `49100/tcp`, `8766/tcp`, `22/tcp` | These are the ports `osmo workflow port-forward` reaches inside the pod NS for Isaac Sim WebRTC, GCS Foxglove websocket, and sshd. | +| Resource limits ≥ `cpu: 16`, `memory: 64Gi`, `storage: 200Gi`, `gpu: 1` | Isaac Sim + AirStack images + `colcon build` working tree. Adjust upward if running multiple robots or heavy bag recording. | + +`hostNetwork: true` is **not** required. `osmo workflow port-forward` reaches +the pod's network namespace, which is where the inner `dockerd` publishes +ports via standard NAT (or `network_mode: host` on individual inner +containers, both of which terminate at the pod NS, not the cluster node). + +## OSMO credentials (per user, one time) + +OSMO credentials live in **each user's** OSMO profile, not in a lab-wide +store. Every student registers their own three credentials with `osmo +credential set` once on their laptop. The full walkthrough — including the +exact `osmo credential set ...` commands and how to obtain a Nucleus API +token — lives in +[`docs/tutorials/airstack_on_osmo.md` Step 0](../docs/tutorials/airstack_on_osmo.md#step-0--register-your-osmo-credentials-one-time). + +The three credentials, summarized for quick reference: + +| Name | Type | Used for | Referenced in workflow YAML? | +|---|---|---|---| +| `airlab-docker-registry` | `REGISTRY` | OSMO's automatic pull of the workspace image (`airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace:...`) | No — OSMO auto-attaches it to any image whose hostname matches the credential's `registry=` field. | +| `airlab-docker-login` | `GENERIC` | `entrypoint.sh` calls `docker login airlab-docker.andrew.cmu.edu` on the **inner** dockerd before `airstack up`, so the inner Compose stack can pull AirStack images | Yes — exposed as env vars `AIRLAB_REGISTRY_USER`/`AIRLAB_REGISTRY_PASS`. | +| `airlab-nucleus` | `GENERIC` | `entrypoint.sh` materializes `simulation/isaac-sim/docker/omni_pass.env` from it so Compose can env-file it into the Isaac Sim container | Yes — exposed as env vars `OMNI_USER`/`OMNI_PASS`/`OMNI_SERVER`. | + +The convenience helper `airstack osmo:setup` in +[`.airstack/modules/osmo.sh`](../.airstack/modules/osmo.sh) prompts for the +underlying values (Andrew ID, AirLab password, Nucleus API token) and runs +all three `osmo credential set` commands. + +> **Why a `REGISTRY` and a `GENERIC` credential for the same registry?** +> OSMO `REGISTRY` credentials drive Kubernetes `imagePullSecrets` — +> auto-attached but not exposed to the container as env vars. The +> **inner** dockerd (DinD) that `entrypoint.sh` starts is a separate +> Docker daemon and needs its own `docker login`. Hence the two-credential +> split. + +## Build & push the workspace image + +The workspace image is built once and pushed to the AirLab registry; students +never build it themselves. + +> **Always use `docker buildx build --platform linux/amd64 --push`.** +> OSMO pool workers are linux/amd64. Building with plain `docker build` on +> an Apple Silicon Mac silently produces a `linux/arm64` image and the +> resulting `latest` tag will fail every workflow with +> `no match for platform in manifest ...: not found` (the outer pod's +> image-pull bails before the entrypoint even runs). Forcing `--platform +> linux/amd64` cross-compiles for amd64 even on an arm64 host. `--push` +> is required because buildx cross-platform builds can't be loaded into a +> local Docker daemon — they live only in the build cache or the +> registry. Linux/amd64 admins can use plain `docker build && docker push`. + +```bash +cd osmo/workspace + +# One-time builder setup (skip if `docker buildx ls` already shows a builder): +docker buildx create --use --name airstack-builder + +docker buildx build \ + --platform linux/amd64 \ + -t airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace:latest \ + --push \ + . +``` + +Verify the manifest has `linux/amd64` after pushing: + +```bash +docker manifest inspect airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace:latest \ + | grep -A2 architecture +# → "architecture": "amd64" +``` + +Tag a versioned release alongside `latest` if you change anything in +`Dockerfile`, `sshd_config`, or `entrypoint.sh`: + +```bash +docker buildx build \ + --platform linux/amd64 \ + -t airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace:latest \ + -t airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace:v0.1.0 \ + --push \ + . +``` + +Then update the `image:` field in +[`workflows/airstack-dev.yaml`](workflows/airstack-dev.yaml) to match. + +The image bakes: + +- Ubuntu 24.04 base with `docker-ce`, `docker-compose-plugin`, `nvidia-container-toolkit` +- `git`, `python3`, `curl` +- `openssh-server` with **password auth permanently disabled** (pubkey only) via the baked `sshd_config` +- The AirStack `airstack.sh` CLI script on `PATH` + +The image does **not** bake the AirStack source tree. `entrypoint.sh` clones +it on first start (and skips re-cloning across pod restarts). + +## Validation stages + +Run these in order against a fresh submission. Each unlocks the next; if (a) +fails don't bother trying (b). + +### (a) sshd reachable, key auth works + +```bash +osmo workflow submit osmo/workflows/airstack-dev.yaml \ + --pool \ + --set-env "SSH_PUB_KEY=$(cat ~/.ssh/id_ed25519.pub)" +# → record + +osmo workflow port-forward workspace --port 2200:22 --connect-timeout 86400 & +# StrictHostKeyChecking=no + UserKnownHostsFile=/dev/null because every +# fresh pod has a different sshd host key — the previous workflow's +# fingerprint will always look like a "host key changed" attack +# otherwise. +ssh -p 2200 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ + root@localhost 'echo ok && whoami' +# → "ok\nroot" +``` + +If SSH fails: check `osmo workflow logs workspace` for the +`SSH_PUB_KEY not set` error or for `sshd` failing to start. + +### (b) VS Code / Cursor Remote-SSH attaches and opens `/root/AirStack` + +Add to `~/.ssh/config`: + +``` +Host airstack-osmo + HostName localhost + Port 2200 + User root + # Each fresh pod has a new sshd host key, so accept-new doesn't help + # — the second workflow always trips the "host key changed" check. + # Bypass host-key checks for this loopback alias only; the security + # boundary is OSMO's authenticated port-forward, not the local key. + StrictHostKeyChecking no + UserKnownHostsFile /dev/null + LogLevel ERROR +``` + +Then in VS Code: Command Palette → **Remote-SSH: Connect to Host…** → +`airstack-osmo` → open folder `/root/AirStack`. The IDE will install its +remote server in the pod on first connect (~50 MB download, slow on a fresh +pod; cached afterwards). + +### (c) `airstack up` brings the three containers Up + +In the IDE's integrated terminal (or `osmo workflow exec`): + +```bash +docker ps +# → expect: airstack-isaac-sim-1, airstack-robot-desktop-1, airstack-gcs-1 +``` + +If any container is missing or restarting, the most common causes (in order): + +1. The user's `airlab-docker-login` GENERIC credential is wrong / unset → + inner `docker pull` from `airlab-docker.andrew.cmu.edu` failed. + Re-run `airstack osmo:setup` (or the explicit `osmo credential set + airlab-docker-login ...` command in the tutorial Step 0). +2. `nvidia-container-toolkit` is not configured on the node → inner Isaac Sim + can't see the GPU. Check `docker info | grep -i runtime` inside the + workspace pod; you should see `nvidia` in the runtime list. +3. The pod ran out of `storage:` quota during the image pull. Bump it. + +### (d) Isaac Sim WebRTC client renders + +Two port-forwards (TCP + UDP): + +```bash +osmo workflow port-forward workspace \ + --port 47995-48012,49000-49007,49100 --connect-timeout 86400 & +osmo workflow port-forward workspace \ + --port 47995-48012,49000-49007 --udp --connect-timeout 86400 & +``` + +Open the Omniverse Streaming Client (or a browser WebRTC client) at +`http://localhost`. + +If the stream is blank: check that the Pegasus standalone script was launched +with `--/app/livestream/enabled=true`. The +[`isaac-sim-livestream`](../simulation/isaac-sim/docker/docker-compose.yaml) +Compose profile is what wires that argument; verify the workflow YAML has +`ISAAC_SIM_LIVESTREAM=true` in `environment:`. + +### (e) Foxglove websocket loads the AirStack layout + +```bash +osmo workflow port-forward workspace --port 8766:8766 --connect-timeout 86400 & +``` + +Open [https://app.foxglove.dev](https://app.foxglove.dev) → **Open +connection** → `ws://localhost:8766` → **Layouts** → **Import from file** → +[`gcs/foxglove_extensions/airstack_default.json`](../gcs/foxglove_extensions/airstack_default.json). + +The wider Foxglove layout / panel-import flow is documented in +[`docs/gcs/foxglove.md`](../docs/gcs/foxglove.md); the only OSMO-specific +piece is the `port-forward` line in front of it. + +## Nucleus connectivity from OSMO + +`airlab-nucleus.andrew.cmu.edu` runs the standard Omniverse Enterprise +Nucleus stack with TLS termination at its Ingress Router (NGINX) on **port +443**. Per [NVIDIA's TLS doc](https://docs.omniverse.nvidia.com/nucleus/latest/enterprise/installation/tls.html), +clients only need outbound TCP **443** — the Ingress Router path-based- +routes requests (`/omni/api`, `/omni/auth`, `/omni/lft`, `/omni/conn`, +`/omni/web3/...`) to the internal service ports (3009, 3100, 3030, 3019, +3400). Omniclient detects SSL/TLS and prefers it, so the OSMO pod (whose +egress allows 80/443/22) reaches Nucleus over the same single 443 the +Web3 navigator uses. **The native protocol ports 3009–3180 do NOT need to +be open from OSMO** as long as TLS is configured on the Nucleus side. + +If you see Isaac Sim's "Login Required" popup at startup: + +1. **Check the auth-service log on the Nucleus host** (`ssh + ubuntu@; sudo docker logs --tail 200 + base_stack-nucleus-auth-1`). Look for `InternalCredentials.auth: + {... 'username': ''} → status: 'DENIED'` lines. That + means the API token in your `airlab-nucleus` OSMO credential is + revoked, expired, or has whitespace/quoting damage. +2. **Regenerate the token** at + → right-click the + cloud icon → **API Tokens** → create a new one. +3. **Update the OSMO credential** with `airstack osmo:setup` (or the + raw `osmo credential set airlab-nucleus ...` command from the + tutorial Step 0) and **resubmit the workflow** so the new token + lands in `omni_pass.env` on pod boot. To live-patch a running pod + instead, edit `simulation/isaac-sim/docker/omni_pass.env` inside + the workspace and `docker compose --profile isaac-sim-livestream + restart isaac-sim-livestream`. + +## Out of scope (followups) + +- **OSMO-native split** — three separate OSMO tasks for `isaac-sim` / + `robot-desktop` / `gcs` instead of one DinD pod. Larger refactor of + Compose, DDS networking, and `tests/conftest.py`. The `osmo/workflows/` + layout leaves room for additional workflow files when this is done. +- **Persistent workspace** — mount `/root/AirStack` to a PVC so uncommitted + edits survive `osmo workflow cancel`. Pool-policy dependent. +- **CI/CD on OSMO** — the existing `.github/workflows/system-tests.yml` + + OpenStack ephemeral runner path is unchanged. Migrating CI to OSMO is a + separate effort. diff --git a/osmo/workflows/airstack-dev.yaml b/osmo/workflows/airstack-dev.yaml new file mode 100644 index 000000000..7c7adc943 --- /dev/null +++ b/osmo/workflows/airstack-dev.yaml @@ -0,0 +1,99 @@ +# AirStack remote developer workflow on OSMO. +# +# Submits a single GPU task ("workspace") that runs Docker-in-Docker +# (DinD) and brings up the regular AirStack three-container stack (Isaac Sim +# with WebRTC livestream, robot-desktop, GCS) on the inner Docker daemon. The +# task also runs sshd so a student can attach VS Code or Cursor over Remote-SSH +# from their laptop (Mac, Windows, or Linux — no local Docker / NVIDIA driver +# required). +# +# To submit (replace and substitute your actual pubkey): +# +# osmo workflow submit osmo/workflows/airstack-dev.yaml \ +# --pool \ +# --set-env "SSH_PUB_KEY=$(cat ~/.ssh/id_ed25519.pub)" +# +# To stream the IDE, Isaac Sim, and Foxglove back to the laptop: +# +# # SSH so VS Code / Cursor Remote-SSH can attach (1 terminal): +# osmo workflow port-forward workspace --port 2200:22 --connect-timeout 86400 +# +# # Isaac Sim WebRTC (2 terminals — TCP + UDP): +# osmo workflow port-forward workspace \ +# --port 47995-48012,49000-49007,49100 --connect-timeout 86400 +# osmo workflow port-forward workspace \ +# --port 47995-48012,49000-49007 --udp --connect-timeout 86400 +# +# # GCS Foxglove websocket (1 terminal): +# osmo workflow port-forward workspace --port 8766:8766 --connect-timeout 86400 +# +# See docs/tutorials/airstack_on_osmo.md for the full walkthrough and +# osmo/README.md for the lab-admin setup (pool prerequisites, OSMO credential +# registration, workspace image build). + +workflow: + name: airstack-dev + groups: # `groups:` keeps room to add a separate + # dind sidecar or split out isaac-sim / + # robot-desktop / gcs as their own + # tasks later. For now, one lead task. + - name: airstack + tasks: + - name: workspace + lead: true + image: airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace:latest + # Required so the inner dockerd can run. hostNetwork is intentionally + # NOT set — osmo workflow port-forward reaches the pod NS, where the + # inner dockerd publishes ports via standard NAT. + privileged: true + command: ["bash"] + args: ["/usr/local/bin/entrypoint.sh"] + environment: + # Behaviour switches consumed by entrypoint.sh and airstack.sh: + AUTOLAUNCH: "true" # boot AirStack on startup + ISAAC_SIM_LIVESTREAM: "true" # use the isaac-sim-livestream profile + NUM_ROBOTS: "1" + AIRSTACK_BRANCH: "main" # branch entrypoint.sh clones + AIRSTACK_REPO_URL: "https://github.com/castacks/AirStack.git" + # SSH_PUB_KEY is supplied at submit time: + # --set-env "SSH_PUB_KEY=$(cat ~/.ssh/id_ed25519.pub)" + credentials: + # Each student registers these in their own OSMO profile once. + # See docs/tutorials/airstack_on_osmo.md "Step 0". + # + # airlab-nucleus (GENERIC) — materialized into omni_pass.env by + # entrypoint.sh so Compose env_files it into the Isaac Sim ctnr. + airlab-nucleus: + OMNI_USER: omni_user + OMNI_PASS: omni_pass + OMNI_SERVER: omni_server + # airlab-docker-login (GENERIC) — exposed as env vars so the + # inner dockerd can authenticate to airlab-docker.andrew.cmu.edu + # when `airstack up` triggers an AirStack image-pull. (The + # *outer* pod's image-pull of airstack-osmo-workspace is handled + # automatically by a sibling REGISTRY-type credential which does + # not need a reference here — OSMO auto-attaches it on submit.) + airlab-docker-login: + AIRLAB_REGISTRY_USER: username + AIRLAB_REGISTRY_PASS: password + + resources: + default: + cpu: 16 + gpu: 1 + memory: 64Gi + # AirStack image set is large: airstack-dev-9 (2026-05-14) hit the + # 100Gi container ephemeral cap during inner Compose's first image + # extract, before the second image even started downloading. So the + # full set of inner images alone exceeds 100Gi extracted. Going to + # 500Gi to comfortably hold isaac-sim + robot-desktop + gcs images, + # plus the AirStack source clone, colcon build output, and bag + # recordings. The airstack pool's workers have 4.2Ti of ephemeral + # capacity each (root disks resized 2026-05-14), so this leaves + # plenty of room for other tenants on the shared workers. + storage: 500Gi + + timeout: + # 8h covers a normal dev session. Bump for longer runs; cancel manually + # before the timeout if you want to free the GPU early. + exec_timeout: 8h diff --git a/osmo/workspace/Dockerfile b/osmo/workspace/Dockerfile new file mode 100644 index 000000000..e80f3be59 --- /dev/null +++ b/osmo/workspace/Dockerfile @@ -0,0 +1,112 @@ +# airstack-osmo-workspace +# +# Image used by the OSMO airstack-dev workflow. Boots into a Docker-in-Docker +# (DinD) pod with sshd on :22 so VS Code / Cursor Remote-SSH can attach. The +# inner dockerd then runs the regular AirStack docker-compose stack (Isaac +# Sim, robot-desktop, GCS) on the GPU forwarded into the pod. +# +# Built and pushed by the lab admin (see osmo/README.md): +# +# cd osmo/workspace +# docker buildx build --platform linux/amd64 \ +# -t airlab-docker.andrew.cmu.edu/airstack/airstack-osmo-workspace:latest \ +# --push . +# +# Use `docker buildx build --platform linux/amd64 --push` (not plain +# `docker build && docker push`) so an Apple Silicon Mac doesn't silently +# push an arm64 image; OSMO workers are amd64 and would fail every +# workflow with "no match for platform in manifest". Students never +# build this image. + +FROM ubuntu:24.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 + +# Base utilities + sshd + dev ergonomics. python3 is here so the airstack.sh +# CLI's helper scripts can shell out to python3 without extra installs. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + git-lfs \ + gnupg \ + iproute2 \ + iptables \ + jq \ + less \ + locales \ + lsb-release \ + openssh-server \ + procps \ + python3 \ + python3-pip \ + rsync \ + sudo \ + tmux \ + tzdata \ + vim-tiny \ + wget \ + && locale-gen C.UTF-8 \ + && rm -rf /var/lib/apt/lists/* + +# Docker CE + Compose plugin (required for `airstack up` to work inside the +# pod). The same install procedure as get.docker.com but pinned to apt repos +# so we can upgrade explicitly. +RUN install -m 0755 -d /etc/apt/keyrings \ + && curl -fsSL https://download.docker.com/linux/ubuntu/gpg \ + | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \ + && chmod a+r /etc/apt/keyrings/docker.gpg \ + && echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \ + https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" \ + > /etc/apt/sources.list.d/docker.list \ + && apt-get update && apt-get install -y --no-install-recommends \ + docker-ce \ + docker-ce-cli \ + containerd.io \ + docker-buildx-plugin \ + docker-compose-plugin \ + fuse-overlayfs \ + && rm -rf /var/lib/apt/lists/* + +# Note: dockerd inside an OSMO/k8s pod is running on top of an overlayfs +# rootfs (the pod's own /). Docker refuses overlay2-on-overlayfs, so without +# fuse-overlayfs the entrypoint's storage-driver fallback chain lands on +# vfs, which has no copy-on-write and bloats the AirStack image set ~3x +# (airstack-dev-10, 2026-05-14 burned 270Gi for 2 of 3 inner images). +# fuse-overlayfs gives proper CoW for DinD over overlayfs. + +# NVIDIA Container Toolkit, configured to register the `nvidia` runtime with +# dockerd. This is what lets the inner Isaac Sim container see the GPU +# forwarded into the workspace pod. The pool's nodes still need the host-side +# NVIDIA driver + nvidia-container-toolkit; this just configures the inner +# dockerd. +RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ + | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ + | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ + > /etc/apt/sources.list.d/nvidia-container-toolkit.list \ + && apt-get update && apt-get install -y --no-install-recommends \ + nvidia-container-toolkit \ + && rm -rf /var/lib/apt/lists/* + +# sshd: pubkey-only, no password auth ever. Host keys generated at runtime in +# entrypoint.sh so each pod has unique keys (good practice; harmless cost). +COPY sshd_config /etc/ssh/sshd_config +RUN chmod 644 /etc/ssh/sshd_config && mkdir -p /var/run/sshd + +# entrypoint: starts sshd, dockerd, clones AirStack, materializes secrets, +# runs `airstack up`, then sleeps so port-forwards keep working. +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod 0755 /usr/local/bin/entrypoint.sh + +# Symlink airstack.sh from the cloned repo into PATH on first run; for now +# expose a placeholder so command lookups don't fail before the clone. +WORKDIR /root + +# Default to a long-running entrypoint. The OSMO workflow overrides command +# and args to invoke /tmp/entry.sh which sources this image's entrypoint.sh +# logic. Either path works. +CMD ["/usr/local/bin/entrypoint.sh"] diff --git a/osmo/workspace/entrypoint.sh b/osmo/workspace/entrypoint.sh new file mode 100755 index 000000000..d119016bf --- /dev/null +++ b/osmo/workspace/entrypoint.sh @@ -0,0 +1,272 @@ +#!/usr/bin/env bash +# entrypoint.sh — airstack-osmo-workspace pod startup. +# +# Order of operations: +# 1. Install SSH_PUB_KEY into authorized_keys, generate sshd host keys, +# start sshd. Done first so the student can SSH in even if a later step +# fails (huge debugging accelerator). +# 2. Start the inner Docker daemon (DinD) with the NVIDIA runtime so Isaac +# Sim sees the GPU forwarded into the pod. +# 3. Clone AirStack into /root/AirStack (skipped if already cloned by a +# previous pod incarnation). +# 4. Materialize simulation/isaac-sim/docker/omni_pass.env from the +# `airlab-nucleus` OSMO GENERIC credential. +# 5. docker login airlab-docker.andrew.cmu.edu using the +# `airlab-docker-login` OSMO GENERIC credential. +# 6. cd /root/AirStack && ./airstack.sh up +# 7. sleep infinity so port-forwards keep working. +# +# All steps are idempotent across pod restarts: re-running this script +# inside the same pod is safe. + +set -uo pipefail + +log() { echo "[entrypoint] $*"; } +fail() { echo "[entrypoint] ERROR: $*" >&2; exit 1; } + +# ─── 0. Stale-state cleanup ──────────────────────────────────────────────── +# +# Cursor / VS Code Remote-SSH guards its server install with a file lock +# at /tmp/cursor-remote-lock.* (and a sibling .target file naming the PIDs +# that hold it). If a previous connect attempt crashed mid-install +# (e.g. the port-forward died while the install was in flight, as +# happened on airstack-dev-13 / 2026-05-14), the lock file outlives the +# dead PIDs and every subsequent IDE retry bails out *silently* at the +# lock check — leaving an empty bin// dir and the user staring at +# a "Connecting to remote host (attempt 1)..." spinner forever. +# +# A fresh pod has nothing to preserve here, so clearing these on startup +# is always safe. +rm -f /tmp/cursor-remote-lock.* /tmp/vscode-remote-lock.* 2>/dev/null || true + +# ─── 1. SSHD ─────────────────────────────────────────────────────────────── + +log "configuring sshd" + +mkdir -p /root/.ssh && chmod 700 /root/.ssh + +if [ -z "${SSH_PUB_KEY:-}" ]; then + fail "SSH_PUB_KEY not set. Re-submit with --set-env \"SSH_PUB_KEY=\$(cat ~/.ssh/id_ed25519.pub)\"" +fi + +# Always overwrite — last submit wins. Single-user dev pod. +echo "${SSH_PUB_KEY}" > /root/.ssh/authorized_keys +chmod 600 /root/.ssh/authorized_keys + +# Generate fresh host keys if missing (first boot of this pod). +ssh-keygen -A + +mkdir -p /var/run/sshd +/usr/sbin/sshd +log "sshd listening on :22" + +# ─── 2. Inner dockerd (DinD) ─────────────────────────────────────────────── + +log "starting inner dockerd (DinD with NVIDIA runtime)" + +# nvidia-container-toolkit ships a CLI that registers the nvidia runtime in +# the dockerd config and (optionally) sets it as the default. We want it as +# the default so `airstack up` doesn't have to specify --runtime. +nvidia-ctk runtime configure --runtime=docker --set-as-default || \ + log "WARN: nvidia-ctk runtime configure failed — Isaac Sim probably won't see the GPU" + +# Pre-flight diagnostics so failures surface in OSMO logs (the pod is gone +# by the time anyone reads /var/log/dockerd.log otherwise). +log "diagnostics: kernel=$(uname -r) cgroups=$(stat -fc %T /sys/fs/cgroup 2>/dev/null) rootfs=$(stat -fc %T / 2>/dev/null)" +log "diagnostics: /var/lib/docker fs=$(stat -fc %T /var/lib/docker 2>/dev/null || echo absent)" + +# Inner dockerd setup. We try storage drivers in order: overlay2 (fastest, +# works on most modern hosts) → fuse-overlayfs (rootless-friendly, may not be +# present) → vfs (always works, slowest). Falling back avoids the +# overlay-on-overlay failure that bites DinD on some kernel/storage +# combinations. +# +# data-root: the OSMO pod's `/` is itself an overlay (containerd's +# snapshot), and Linux refuses to stack a second overlayfs on top of an +# overlay rootfs — that's exactly why dockerd here used to fall through +# to fuse-overlayfs. fuse-overlayfs is a userspace FUSE driver, and every +# `creat()` during layer extraction pays a kernel↔userspace round-trip, +# which crushes throughput on the apt/pip/ROS layers (observed: ~30-50 +# MB/s vs. ~480 MB/s on layers with few large files). Pointing data-root +# at /osmo/run/docker (the kubelet emptyDir bind-mount, backed by ext4 on +# /dev/vda3) lets us use kernel overlay2 instead, restoring the 10× +# extraction speed-up. emptyDir lives for the workflow's lifetime, which +# is exactly the docker-cache lifetime we want anyway. +DOCKERD_DATA_ROOT="${DOCKERD_DATA_ROOT:-}" +if [ -z "$DOCKERD_DATA_ROOT" ]; then + if [ -d /osmo/run ] && [ -w /osmo/run ]; then + DOCKERD_DATA_ROOT=/osmo/run/docker + else + DOCKERD_DATA_ROOT=/var/lib/docker + fi +fi +mkdir -p "$DOCKERD_DATA_ROOT" +log "dockerd data-root: $DOCKERD_DATA_ROOT (fs=$(stat -fc %T "$DOCKERD_DATA_ROOT" 2>/dev/null))" + +# Concurrency: dockerd's defaults are --max-concurrent-downloads=3 and +# --max-concurrent-uploads=5. With 2 GB+ AirStack image blobs on a 10 GbE +# pool, a single TLS pull stream tops out around 300-500 MiB/s (CPU-bound +# on the registry-side TLS encryption), so 3 parallel streams cap the +# whole bring-up around the 300 MiB/s mark seen empirically against the +# airlab-backup-10g registry — even though Ceph + 10 GbE can do far more. +# Bumping to 10 streams overlaps blob downloads enough to saturate the +# pipe without overwhelming the registry. Override with DOCKERD_MAX_* +# env vars at submit time if a particular pool needs different tuning. +DOCKERD_MAX_DOWNLOADS="${DOCKERD_MAX_DOWNLOADS:-10}" +DOCKERD_MAX_UPLOADS="${DOCKERD_MAX_UPLOADS:-10}" + +_start_dockerd() { + local driver="$1" + : > /var/log/dockerd.log + nohup dockerd \ + --host=unix:///var/run/docker.sock \ + --data-root="$DOCKERD_DATA_ROOT" \ + --storage-driver="$driver" \ + --max-concurrent-downloads="$DOCKERD_MAX_DOWNLOADS" \ + --max-concurrent-uploads="$DOCKERD_MAX_UPLOADS" \ + > /var/log/dockerd.log 2>&1 & + DOCKERD_PID=$! + log "dockerd started (pid=$DOCKERD_PID, data-root=$DOCKERD_DATA_ROOT, storage-driver=$driver); waiting for socket" + for i in $(seq 1 30); do + if docker info >/dev/null 2>&1; then + log "dockerd ready (storage-driver=$driver)" + return 0 + fi + if ! kill -0 "$DOCKERD_PID" 2>/dev/null; then + log "dockerd exited; tailing /var/log/dockerd.log:" + tail -40 /var/log/dockerd.log | sed 's/^/[dockerd] /' + return 1 + fi + sleep 1 + done + log "dockerd unresponsive after 30s; tailing /var/log/dockerd.log:" + tail -40 /var/log/dockerd.log | sed 's/^/[dockerd] /' + kill "$DOCKERD_PID" 2>/dev/null || true + return 1 +} + +DOCKERD_OK=false +for drv in overlay2 fuse-overlayfs vfs; do + if _start_dockerd "$drv"; then + DOCKERD_OK=true + break + fi + log "WARN: dockerd failed with storage-driver=$drv; trying next" +done +if [ "$DOCKERD_OK" != "true" ]; then + fail "dockerd refused to start with any of overlay2 / fuse-overlayfs / vfs" +fi + +# ─── 3. Clone AirStack ───────────────────────────────────────────────────── + +AIRSTACK_REPO_URL="${AIRSTACK_REPO_URL:-https://github.com/castacks/AirStack.git}" +AIRSTACK_BRANCH="${AIRSTACK_BRANCH:-main}" +AIRSTACK_ROOT=/root/AirStack + +if [ ! -d "$AIRSTACK_ROOT/.git" ]; then + log "cloning $AIRSTACK_REPO_URL ($AIRSTACK_BRANCH) -> $AIRSTACK_ROOT" + git clone --recursive --branch "$AIRSTACK_BRANCH" "$AIRSTACK_REPO_URL" "$AIRSTACK_ROOT" \ + || fail "git clone failed" +else + log "$AIRSTACK_ROOT already cloned (skipping)" +fi + +# Make sure the airstack CLI is on PATH for interactive shells. +ln -sf "$AIRSTACK_ROOT/airstack.sh" /usr/local/bin/airstack +ln -sf "$AIRSTACK_ROOT/airstack.sh" /usr/local/bin/airstack.sh + +# ─── 4. omni_pass.env from airlab-nucleus credential ─────────────────────── + +OMNI_PASS_FILE="$AIRSTACK_ROOT/simulation/isaac-sim/docker/omni_pass.env" + +if [ -z "${OMNI_USER:-}" ] || [ -z "${OMNI_PASS:-}" ]; then + log "WARN: airlab-nucleus OSMO credential not set." + log "WARN: Run on your laptop:" + log "WARN: osmo credential set airlab-nucleus --type GENERIC \\" + log "WARN: --payload omni_user= omni_pass= \\" + log "WARN: omni_server=omniverse://airlab-nucleus.andrew.cmu.edu/NVIDIA/Assets/Isaac/5.1" + log "WARN: Falling back to guest/guest (read-only Nucleus) — Isaac Sim assets may fail to load." +fi + +# Default to read-only Nucleus access so a missing credential degrades +# instead of crashing the pod. +: "${OMNI_USER:=guest}" +: "${OMNI_PASS:=guest}" +: "${OMNI_SERVER:=omniverse://airlab-nucleus.andrew.cmu.edu/NVIDIA/Assets/Isaac/5.1}" + +# If OMNI_PASS looks like a Nucleus API JWT (header starts with `eyJ`), +# switch to API-token auth: omniclient expects the literal sentinel +# username `$omni-api-token` paired with the JWT as the password. +# Setting OMNI_USER to the actual Andrew ID would route the JWT through +# the password-verification path instead and Nucleus would silently +# DENY (visible only in the auth-service log as +# `InternalCredentials.auth … 'username': '' … status: DENIED`). +# +# docker-compose v2 interpolates env_file values, so the literal `$` +# must be doubled to `$$` to survive Compose's parser. The container +# ultimately sees `OMNI_USER=$omni-api-token`. +case "$OMNI_PASS" in + eyJ*.*.*) + log "OMNI_PASS looks like a JWT — using API-token auth (OMNI_USER=\$omni-api-token)" + OMNI_USER_LINE='OMNI_USER=$$omni-api-token' + ;; + *) + OMNI_USER_LINE="OMNI_USER=${OMNI_USER}" + ;; +esac + +log "writing $OMNI_PASS_FILE (${OMNI_USER_LINE}, omni_server=${OMNI_SERVER})" +cat > "$OMNI_PASS_FILE" < password=" +fi + +# ─── 6. airstack up ──────────────────────────────────────────────────────── + +# Honor optional overrides passed in via OSMO env. Defaults match a "single +# robot, Isaac Sim with WebRTC livestream" dev session. +export AUTOLAUNCH="${AUTOLAUNCH:-true}" +export NUM_ROBOTS="${NUM_ROBOTS:-1}" +export ISAAC_SIM_LIVESTREAM="${ISAAC_SIM_LIVESTREAM:-true}" + +# COMPOSE_PROFILES selection: the default `desktop,isaac-sim` from .env runs +# the standard isaac-sim service. If the student wants livestream, they (or +# we) swap to the isaac-sim-livestream profile, which is the OSMO-friendly +# variant defined in simulation/isaac-sim/docker/docker-compose.yaml. +if [ "$ISAAC_SIM_LIVESTREAM" = "true" ]; then + export COMPOSE_PROFILES="${COMPOSE_PROFILES:-desktop,isaac-sim-livestream}" +else + export COMPOSE_PROFILES="${COMPOSE_PROFILES:-desktop,isaac-sim}" +fi + +log "airstack up (COMPOSE_PROFILES=$COMPOSE_PROFILES, NUM_ROBOTS=$NUM_ROBOTS, livestream=$ISAAC_SIM_LIVESTREAM)" +cd "$AIRSTACK_ROOT" +./airstack.sh up || log "WARN: airstack up exited non-zero — pod stays alive for debugging via SSH" + +# ─── 7. Sleep ────────────────────────────────────────────────────────────── + +log "entrypoint complete; sleeping forever so port-forwards keep working" +log "pod-side log paths:" +log " - dockerd: /var/log/dockerd.log" +log " - airstack: docker logs airstack-isaac-sim-1 / airstack-robot-desktop-1 / airstack-gcs-1" +exec sleep infinity diff --git a/osmo/workspace/sshd_config b/osmo/workspace/sshd_config new file mode 100644 index 000000000..097efdb21 --- /dev/null +++ b/osmo/workspace/sshd_config @@ -0,0 +1,41 @@ +# sshd_config baked into airstack-osmo-workspace. +# +# Permanently disables password auth — the only way in is via a pubkey +# installed by entrypoint.sh from the SSH_PUB_KEY env var (passed at submit +# time with `osmo workflow submit ... --set-env "SSH_PUB_KEY=$(cat ~/.ssh/id_ed25519.pub)"`). + +Port 22 +AddressFamily any +ListenAddress 0.0.0.0 +ListenAddress :: + +# Pubkey only. +PasswordAuthentication no +PubkeyAuthentication yes +ChallengeResponseAuthentication no +KbdInteractiveAuthentication no +PermitEmptyPasswords no + +# Single-user dev pod; the IDE attaches as root because /root/AirStack and +# the existing AirStack ergonomics expect it. PermitRootLogin +# prohibit-password forbids password root login but allows pubkey root login. +PermitRootLogin prohibit-password + +# Standard auth path. +AuthorizedKeysFile .ssh/authorized_keys + +UsePAM yes + +# Performance / VS Code-Remote-SSH friendliness: +# - AcceptEnv lets the IDE forward LANG, ENV, etc. +# - X11 forwarding off (no display in the pod). +# - Allow forwarding so port-forwards through SSH are permitted if students +# want to layer their own. +AcceptEnv LANG LC_* +X11Forwarding no +AllowAgentForwarding yes +AllowTcpForwarding yes +PrintMotd no + +# Subsystems VS Code Remote needs for sftp/file ops. +Subsystem sftp /usr/lib/openssh/sftp-server diff --git a/robot/docker/Dockerfile.robot b/robot/docker/Dockerfile.robot index 55b1ad40f..8112a0744 100644 --- a/robot/docker/Dockerfile.robot +++ b/robot/docker/Dockerfile.robot @@ -78,6 +78,7 @@ RUN apt update && apt install -y --no-install-recommends \ python3-rosdep \ tmux \ gdb \ + xvfb \ && rm -rf /var/lib/apt/lists/* # Install any additional ROS2 packages diff --git a/robot/docker/docker-compose.yaml b/robot/docker/docker-compose.yaml index dfbe71fd3..82cbef07b 100644 --- a/robot/docker/docker-compose.yaml +++ b/robot/docker/docker-compose.yaml @@ -30,6 +30,11 @@ services: # 'command' uses variables so that it can be shared across robot-desktop and robot-l4t, with different launch packages and roles. command: > bash -c " + if [ -z \"$$DISPLAY\" ]; then + command -v Xvfb >/dev/null 2>&1 || (apt-get update && apt-get install -y --no-install-recommends xvfb); + Xvfb :99 -screen 0 1280x720x24 -ac +extension GLX +render -noreset >/tmp/xvfb.log 2>&1 & + export DISPLAY=:99; + fi; service ssh restart; tmux new -d -s bringup; if [ $$AUTOLAUNCH == 'true' ]; then diff --git a/simulation/isaac-sim/docker/docker-compose.yaml b/simulation/isaac-sim/docker/docker-compose.yaml index a1b40ae56..dfd699aa4 100644 --- a/simulation/isaac-sim/docker/docker-compose.yaml +++ b/simulation/isaac-sim/docker/docker-compose.yaml @@ -94,3 +94,61 @@ services: tmux send-keys -t isaac '/isaac-sim/runapp.sh' ENTER; sleep infinity" networks: !reset null + + # =================================================================================================================== + # WebRTC livestream variant for OSMO / remote dev. Headless: no X server, + # no display, no GUI window. Kit's `omni.kit.livestream.webrtc` extension + # serves WebSocket signaling on TCP 49100 and SRTP media on UDP 49099 (the + # latter pinned by `app.livestream.fixedHostPort/minHostPort/maxHostPort=49099` + # in the Pegasus launch script — see example_one_px4_pegasus_launch_script.py). + # Those two ports are published into the host (the OSMO workspace pod's + # network namespace) so `osmo workflow port-forward` can reach them. Kit + # 107's default media-port range is wider and dynamic, so pinning to a + # known value is the only way to keep the forward surface to two ports. + # + # Selected via COMPOSE_PROFILES (e.g. `desktop,isaac-sim-livestream`) in + # osmo/workspace/entrypoint.sh, or by setting ISAAC_SIM_LIVESTREAM=true. + isaac-sim-livestream: + extends: + service: isaac-sim + container_name: isaac-sim-livestream + profiles: !override + - isaac-sim-livestream + # Always run the Pegasus standalone path; the livestream branch in the + # script is gated on ISAAC_SIM_LIVESTREAM=true (env-driven, additive + # to the existing script behavior). + command: > + bash -c " + tmux new -d -s isaac; + tmux send-keys -t isaac 'PYTHONPATH=\"$$ISAAC_SIM_PYTHONPATH\" /isaac-sim/python.sh /isaac-sim/AirStack/simulation/isaac-sim/launch_scripts/${ISAAC_SIM_SCRIPT_NAME} --ext-folder ~/.local/share/ov/data/documents/Kit/shared/exts --/app/livestream/enabled=true' ENTER; + sleep infinity" + environment: + # Inherit everything from isaac-sim and append: + - ISAAC_SIM_LIVESTREAM=true + - ISAAC_SIM_USE_STANDALONE=true + - ISAAC_SIM_HEADLESS=true + # Publish the WebRTC livestream ports to the pod NS. Bridge-mode + + # publish (the conservative choice) keeps the rest of the stack on + # airstack_network for DDS multicast. + ports: + - "49100:49100/tcp" # WebSocket signaling (omni.kit.livestream.webrtc, app.livestream.port=49100) + - "49099:49099/udp" # SRTP media (pinned via app.livestream.fixedHostPort=49099 in the launch script) + # Drop X11-specific volume mounts inherited from the isaac-sim service — + # there is no X server in an OSMO pod. + volumes: !override + - $HOME/docker/isaac-sim/cache/main:/isaac-sim/.cache:rw + - $HOME/docker/isaac-sim/cache/computecache:/isaac-sim/.nv/ComputeCache:rw + - $HOME/docker/isaac-sim/logs:/isaac-sim/.nvidia-omniverse/logs:rw + - $HOME/docker/isaac-sim/config:/isaac-sim/.nvidia-omniverse/config:rw + - $HOME/docker/isaac-sim/data:/isaac-sim/.local/share/ov/data:rw + - $HOME/docker/isaac-sim/pkg:/isaac-sim/.local/share/ov/pkg:rw + - ../extensions/PegasusSimulator/extensions/pegasus.simulator:/isaac-sim/.local/share/ov/data/documents/Kit/shared/exts/pegasus.simulator/:rw + - ./omniverse.toml:/isaac-sim/.nvidia-omniverse/config/omniverse.toml:rw + - ./user.config.json:/isaac-sim/.local/share/ov/data/Kit/Isaac-Sim Full/5.1/user.config.json:rw + - .dev:/isaac-sim/.dev:rw + - .bashrc:/isaac-sim/.bashrc:rw + - ../../../common/inputrc:/etc/inputrc:rw + - ../../../common/.tmux.conf:/isaac-sim/.tmux.conf:rw + - ../../..:/isaac-sim/AirStack:rw + - ../../../.devcontainer/isaac-sim/launch.json:/isaac-sim/AirStack/.vscode/launch.json:rw + - ../../../.devcontainer/isaac-sim/tasks.json:/isaac-sim/AirStack/.vscode/tasks.json:rw diff --git a/simulation/isaac-sim/docker/omni_pass_TEMPLATE.env b/simulation/isaac-sim/docker/omni_pass_TEMPLATE.env index 45bf2fc71..e010e0db4 100644 --- a/simulation/isaac-sim/docker/omni_pass_TEMPLATE.env +++ b/simulation/isaac-sim/docker/omni_pass_TEMPLATE.env @@ -6,15 +6,28 @@ ######################################################################### ## Nucleus Login information -# This can either be your username and password or the nucleus login token -# The login token method is preferred. You can get the token by going to -# the nucleus server website. For us -# https://airlab-nucleus.andrew.cmu.edu/omni/web3/ -# logging in. -# Then right clicking on the cloud and click the "API Tokens" window -# to generate an API token and copy it to "OMNI_PASS". -# If you skip that step, leave both values at their guest defaults. - +# +# Recommended: API-token auth. Generate a token at +# https://airlab-nucleus.andrew.cmu.edu/omni/web3/ +# → right-click cloud icon → API Tokens → Create +# Then set: +# OMNI_USER=$$omni-api-token ← literal sentinel value +# OMNI_PASS= ← the JWT (~1 KB, starts with eyJ) +# +# IMPORTANT: the `$$` is intentional. docker-compose v2 interpolates +# env_file values, and the literal `$` must be doubled to survive +# Compose's parser. The container ultimately sees `OMNI_USER=$omni-api-token`, +# which is what omniclient expects for API-token auth (anything else, e.g. +# your Andrew ID, routes the JWT through the password-verification path +# and Nucleus silently DENIES the request). +# +# Fallback: username/password auth. Set OMNI_USER to your Nucleus username +# and OMNI_PASS to your Nucleus password (NOT your Andrew password unless +# Nucleus is SSO-linked to it). +# +# If you skip Nucleus auth entirely, leave both at the guest defaults +# (read-only access; Isaac Sim asset loads from Nucleus may fail). +# ######################################################################### OMNI_USER=guest diff --git a/simulation/isaac-sim/launch_scripts/example_one_px4_pegasus_launch_script.py b/simulation/isaac-sim/launch_scripts/example_one_px4_pegasus_launch_script.py index f53ea0993..11819fc2f 100755 --- a/simulation/isaac-sim/launch_scripts/example_one_px4_pegasus_launch_script.py +++ b/simulation/isaac-sim/launch_scripts/example_one_px4_pegasus_launch_script.py @@ -10,17 +10,77 @@ - Optionally saving the prepared scene as a self-contained USD """ -import carb -from isaacsim import SimulationApp - -# Must be created before any omni imports -simulation_app = SimulationApp({"headless": False}) - import os import sys import time import asyncio +import carb +from isaacsim import SimulationApp + +_LIVESTREAM = os.environ.get("ISAAC_SIM_LIVESTREAM", "").lower() == "true" + +# Must be created before any omni imports. +# +# When livestreaming, mirror the NVIDIA reference config from +# simulation/isaac-sim/standalone_examples/api/isaacsim.simulation_app/livestream.py +# so the Kit GUI (menu bar, toolbar, viewport, status bar) actually gets +# rendered into the WebRTC stream instead of just the bare 3D viewport. +# Key field: `hide_ui: False` — SimulationApp's default when `headless=True` +# is to also hide the UI; the livestream reference opts back into showing +# it. `display_options=3286` is the same bitmask the reference uses to keep +# the default grid + axes visible at scene start. +if _LIVESTREAM: + _SIM_APP_CONFIG = { + "width": 1280, + "height": 720, + "window_width": 1920, + "window_height": 1080, + "headless": True, + "hide_ui": False, + "renderer": "RaytracedLighting", + "display_options": 3286, + } +else: + _SIM_APP_CONFIG = {"headless": False} + +simulation_app = SimulationApp(launch_config=_SIM_APP_CONFIG) + +if _LIVESTREAM: + # Headless + WebRTC livestream when ISAAC_SIM_LIVESTREAM=true (set by the + # OSMO airstack-osmo-workspace entrypoint and the isaac-sim-livestream + # Compose profile). Local desktop dev keeps the original windowed behavior. + # Mirrors AirStack's standalone livestream reference at + # simulation/isaac-sim/standalone_examples/api/isaacsim.simulation_app/livestream.py + from isaacsim.core.utils.extensions import enable_extension + simulation_app.set_setting("/app/window/drawMouse", True) + simulation_app.set_setting("/app/livestream/enabled", True) + + # Pin the UDP media port so it stays inside the narrow set of ports we + # publish from this container and that `airstack osmo:webrtc` forwards. + # + # Kit 107's WebRTC livestream picks a UDP media port dynamically. The + # documented `omni.services.livestream.nvcf` defaults were + # minHostPort=47998 / maxHostPort=48020 / fixedHostPort=0, but the + # actual Kit binary ignored that range on airstack-dev-13 and bound to + # UDP 49042 — outside both the Compose-published port range AND the + # default osmo `--udp` forward (47995-48012,49000-49007). Result: + # signaling worked (TCP 49100), the WebRTC Streaming Client window + # opened, but every media packet was dropped → black viewport + + # the `NVST_CCE_DISCONNECTED when m_connectionCount 0 != 1` underflow + # storm in the Kit log. + # + # Set all three settings so whichever code path the plugin reads, it + # lands on UDP 49099. The value of 49099 is picked as one-off from the + # 49100 signaling port — same range, easy to remember, and TCP/UDP can + # coexist on the same number if anyone later wants a single port. + LIVESTREAM_UDP_PORT = int(os.environ.get("ISAAC_SIM_LIVESTREAM_UDP_PORT", "49099")) + simulation_app.set_setting("/app/livestream/fixedHostPort", LIVESTREAM_UDP_PORT) + simulation_app.set_setting("/app/livestream/minHostPort", LIVESTREAM_UDP_PORT) + simulation_app.set_setting("/app/livestream/maxHostPort", LIVESTREAM_UDP_PORT) + + enable_extension("omni.kit.livestream.webrtc") + import omni.kit.app import omni.timeline import omni.usd