324 lines
11 KiB
Python
324 lines
11 KiB
Python
"""WebSocket → PTY bridge for the dashboard's Terminal tab.
|
|
|
|
Spawns a shell (bash by default) inside a pseudo-terminal on the robot and
|
|
relays stdin/stdout to a browser xterm.js instance over WebSocket. From the
|
|
operator's seat this is functionally identical to an in-browser
|
|
`ssh unitree@<robot>` — except no SSH handshake is needed because the
|
|
dashboard process already runs as unitree on the robot. The Terminal tab
|
|
connects to ws://<dashboard>/ws/terminal and you land in unitree's shell
|
|
directly.
|
|
|
|
PROTOCOL — text frames only. Control vs. keystrokes are disambiguated by
|
|
the leading byte:
|
|
client → server:
|
|
"\\x1f" + json-encoded control object (init / resize)
|
|
e.g. "\\x1f{\\"type\\":\\"init\\",\\"cols\\":80,\\"rows\\":24}"
|
|
<any other text> keystrokes — written to PTY
|
|
server → client:
|
|
<text> PTY stdout/stderr chunks
|
|
|
|
The \\x1f prefix (ASCII Unit Separator) is the disambiguator. If we just
|
|
JSON-sniffed every message, a user pasting `{"type":"resize",...}` into
|
|
their shell would silently resize the PTY instead of pasting the text.
|
|
|
|
SECURITY NOTE: anyone who can reach the dashboard URL gets shell access
|
|
as the unitree user. The dashboard already exposes equally-powerful
|
|
endpoints (E-STOP, motion replay, audio mute, etc.) so this isn't a new
|
|
threat class — but it IS a single-bullet kill switch for the robot. Bind
|
|
the dashboard to a trusted network only.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import fcntl
|
|
import json
|
|
import os
|
|
import pty
|
|
import select
|
|
import shutil
|
|
import signal
|
|
import struct
|
|
import termios
|
|
import threading
|
|
|
|
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
|
|
|
from Project.Sanad.core.logger import get_logger
|
|
|
|
log = get_logger("terminal_ws")
|
|
|
|
router = APIRouter()
|
|
|
|
# Magic prefix that distinguishes control messages from raw keystrokes.
|
|
# ASCII 0x1F (Unit Separator) — not produced by normal keyboard input,
|
|
# so user-pasted JSON can never spoof a control frame.
|
|
_CTRL_PREFIX = "\x1f"
|
|
|
|
# Concurrent-session cap so a runaway tab can't spawn 50 bashes on the robot.
|
|
_MAX_SESSIONS = 4
|
|
_active: set[int] = set()
|
|
_active_lock = threading.Lock()
|
|
|
|
# Bounded queue depth between the PTY reader thread and the WS sender.
|
|
# A chatty shell command (e.g. `yes`, `cat /dev/urandom`) at gigabytes/sec
|
|
# would otherwise pile up unbounded asyncio tasks + string refs. Past the
|
|
# cap we drop chunks and surface a single drop notice — ANSI may corrupt
|
|
# briefly but the session stays alive.
|
|
_SEND_QUEUE_MAX = 64
|
|
|
|
|
|
def _resolve_shell() -> list[str]:
|
|
"""Pick a sensible shell. SHELL env first, then /bin/bash, then sh."""
|
|
sh = os.environ.get("SHELL", "")
|
|
if sh and shutil.which(sh):
|
|
return [sh, "-i"]
|
|
if shutil.which("/bin/bash"):
|
|
return ["/bin/bash", "-i"]
|
|
return ["/bin/sh", "-i"]
|
|
|
|
|
|
def _set_pty_size(fd: int, cols: int, rows: int) -> None:
|
|
"""Inform the PTY of its new window size so curses-style apps (htop,
|
|
less, vim) lay out correctly."""
|
|
try:
|
|
# TIOCSWINSZ payload: rows, cols, xpixel, ypixel (xpixel/ypixel
|
|
# unused, kept 0).
|
|
fcntl.ioctl(fd, termios.TIOCSWINSZ,
|
|
struct.pack("HHHH", rows, cols, 0, 0))
|
|
except Exception as exc:
|
|
log.debug("TIOCSWINSZ failed (cols=%s rows=%s): %s", cols, rows, exc)
|
|
|
|
|
|
async def _reap_child(pid: int) -> None:
|
|
"""SIGHUP → wait briefly → SIGKILL → wait briefly → giveup.
|
|
|
|
Earlier version SIGKILLed unconditionally because the WNOHANG check
|
|
happened immediately after SIGHUP (which never returns true that fast).
|
|
Now we poll for up to ~1.5s after SIGHUP before escalating.
|
|
"""
|
|
async def _wait_exit(timeout_s: float, interval_s: float = 0.1) -> bool:
|
|
end = asyncio.get_running_loop().time() + timeout_s
|
|
while asyncio.get_running_loop().time() < end:
|
|
try:
|
|
done_pid, _ = os.waitpid(pid, os.WNOHANG)
|
|
except ChildProcessError:
|
|
return True # already reaped
|
|
except OSError:
|
|
return False
|
|
if done_pid:
|
|
return True
|
|
await asyncio.sleep(interval_s)
|
|
return False
|
|
|
|
# 1. Polite request
|
|
try:
|
|
os.kill(pid, signal.SIGHUP)
|
|
except ProcessLookupError:
|
|
return
|
|
except OSError as exc:
|
|
log.debug("SIGHUP pid=%d: %s", pid, exc)
|
|
return
|
|
|
|
if await _wait_exit(1.5):
|
|
return
|
|
|
|
# 2. Force
|
|
try:
|
|
os.kill(pid, signal.SIGKILL)
|
|
except ProcessLookupError:
|
|
return
|
|
except OSError as exc:
|
|
log.debug("SIGKILL pid=%d: %s", pid, exc)
|
|
return
|
|
|
|
if not await _wait_exit(1.0):
|
|
log.warning("terminal child pid=%d failed to exit after SIGKILL", pid)
|
|
|
|
|
|
@router.websocket("/ws/terminal")
|
|
async def terminal_ws(ws: WebSocket) -> None:
|
|
"""Bridge a browser xterm.js to a shell PTY on the robot."""
|
|
await ws.accept()
|
|
|
|
# Concurrent-session guard.
|
|
with _active_lock:
|
|
if len(_active) >= _MAX_SESSIONS:
|
|
await ws.send_text(
|
|
f"\r\n[terminal] Refused — already have {_MAX_SESSIONS} "
|
|
f"open sessions. Close another tab and reconnect.\r\n"
|
|
)
|
|
await ws.close(code=1008)
|
|
return
|
|
|
|
# Fork + exec the shell. Parent gets the master fd; child becomes the
|
|
# shell with stdin/stdout/stderr wired to the slave end.
|
|
cmd = _resolve_shell()
|
|
try:
|
|
pid, fd = pty.fork()
|
|
except OSError as exc:
|
|
log.error("pty.fork failed: %s", exc)
|
|
await ws.send_text(f"\r\n[terminal] pty.fork failed: {exc}\r\n")
|
|
await ws.close(code=1011)
|
|
return
|
|
|
|
if pid == 0:
|
|
# CHILD — set env so the shell is interactive and looks right.
|
|
os.environ.setdefault("TERM", "xterm-256color")
|
|
os.environ.setdefault("LANG", os.environ.get("LANG", "en_US.UTF-8"))
|
|
try:
|
|
os.execvp(cmd[0], cmd)
|
|
except OSError as exc:
|
|
# exec failed — printing to fd 2 reaches the parent via the
|
|
# PTY so the browser sees the error before we _exit.
|
|
os.write(2, f"[terminal] exec failed: {exc}\n".encode())
|
|
os._exit(127)
|
|
return # unreachable in child
|
|
|
|
# PARENT
|
|
with _active_lock:
|
|
_active.add(pid)
|
|
log.info("terminal session started pid=%d cmd=%s", pid, cmd[0])
|
|
|
|
loop = asyncio.get_running_loop()
|
|
closed = asyncio.Event()
|
|
# Bounded queue + dedicated sender task = backpressure. If the queue
|
|
# fills up we drop the chunk and bump _dropped so we can surface a
|
|
# short notice in the stream.
|
|
send_q: asyncio.Queue[str] = asyncio.Queue(maxsize=_SEND_QUEUE_MAX)
|
|
dropped = 0
|
|
|
|
def _reader_thread() -> None:
|
|
"""Drain PTY master fd → queue. Runs in a daemon thread because
|
|
select.select on a pipe blocks; asyncio has no portable
|
|
equivalent for arbitrary fds on Windows (and we want one code
|
|
path)."""
|
|
nonlocal dropped
|
|
try:
|
|
while not closed.is_set():
|
|
try:
|
|
r, _, _ = select.select([fd], [], [], 0.1)
|
|
except (OSError, ValueError):
|
|
break
|
|
if not r:
|
|
continue
|
|
try:
|
|
data = os.read(fd, 4096)
|
|
except OSError:
|
|
break
|
|
if not data: # EOF — child exited / PTY closed
|
|
break
|
|
try:
|
|
text = data.decode("utf-8", errors="replace")
|
|
except Exception:
|
|
continue
|
|
# put_nowait raises on full — we drop and count.
|
|
try:
|
|
loop.call_soon_threadsafe(_enqueue, text)
|
|
except RuntimeError:
|
|
# loop closed — bail
|
|
break
|
|
finally:
|
|
loop.call_soon_threadsafe(closed.set)
|
|
|
|
def _enqueue(text: str) -> None:
|
|
nonlocal dropped
|
|
try:
|
|
send_q.put_nowait(text)
|
|
except asyncio.QueueFull:
|
|
dropped += 1
|
|
|
|
async def _sender_task() -> None:
|
|
"""Drains send_q → WebSocket. Single producer, single consumer
|
|
means no extra locking needed. Backoff on send failure and let
|
|
the closed flag end the session."""
|
|
nonlocal dropped
|
|
while not closed.is_set():
|
|
try:
|
|
text = await asyncio.wait_for(send_q.get(), timeout=0.5)
|
|
except asyncio.TimeoutError:
|
|
continue
|
|
try:
|
|
await ws.send_text(text)
|
|
except Exception as exc:
|
|
log.info("terminal ws.send failed (likely client gone): %s", exc)
|
|
closed.set()
|
|
return
|
|
# If we dropped chunks since the last successful send, tell
|
|
# the user once so the ANSI corruption isn't mysterious.
|
|
if dropped:
|
|
d = dropped
|
|
dropped = 0
|
|
try:
|
|
await ws.send_text(
|
|
f"\r\n\x1b[2m[term: dropped {d} chunk(s) — slow client]"
|
|
f"\x1b[0m\r\n",
|
|
)
|
|
except Exception:
|
|
closed.set()
|
|
return
|
|
|
|
reader = threading.Thread(target=_reader_thread, daemon=True,
|
|
name=f"terminal-rx-{pid}")
|
|
reader.start()
|
|
sender = asyncio.create_task(_sender_task())
|
|
|
|
# Initial sizing — xterm.js will send a {type:"init",...} control
|
|
# frame right after onopen with the actual window size.
|
|
_set_pty_size(fd, 80, 24)
|
|
|
|
try:
|
|
while not closed.is_set():
|
|
try:
|
|
msg = await asyncio.wait_for(ws.receive_text(), timeout=0.5)
|
|
except asyncio.TimeoutError:
|
|
continue
|
|
except WebSocketDisconnect:
|
|
break
|
|
|
|
if not msg:
|
|
continue
|
|
|
|
# Control frame? Must start with the magic prefix. User-typed
|
|
# / pasted text can never spoof this — \x1f isn't producible
|
|
# by normal keyboard input.
|
|
if msg[:1] == _CTRL_PREFIX:
|
|
try:
|
|
ctrl = json.loads(msg[1:])
|
|
except (json.JSONDecodeError, ValueError):
|
|
ctrl = None
|
|
if isinstance(ctrl, dict) and ctrl.get("type") in ("init", "resize"):
|
|
cols = int(ctrl.get("cols") or 80)
|
|
rows = int(ctrl.get("rows") or 24)
|
|
_set_pty_size(fd, cols, rows)
|
|
# Either way, control frames are NEVER forwarded to PTY.
|
|
continue
|
|
|
|
# Plain keystrokes — write to PTY master.
|
|
try:
|
|
os.write(fd, msg.encode("utf-8", errors="replace"))
|
|
except OSError as exc:
|
|
log.info("terminal pty write failed (child likely exited): %s", exc)
|
|
break
|
|
finally:
|
|
closed.set()
|
|
try:
|
|
sender.cancel()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
await _reap_child(pid)
|
|
except Exception as exc:
|
|
log.debug("reap_child pid=%d: %s", pid, exc)
|
|
try:
|
|
os.close(fd)
|
|
except OSError:
|
|
pass
|
|
with _active_lock:
|
|
_active.discard(pid)
|
|
log.info("terminal session ended pid=%d", pid)
|
|
try:
|
|
await ws.close()
|
|
except Exception:
|
|
pass
|