diff --git a/decart/lipsync/__init__.py b/decart/lipsync/__init__.py deleted file mode 100644 index 611895e..0000000 --- a/decart/lipsync/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .client import RealtimeLipsyncClient - -__all__ = ["RealtimeLipsyncClient"] diff --git a/decart/lipsync/client.py b/decart/lipsync/client.py deleted file mode 100644 index 17ccb95..0000000 --- a/decart/lipsync/client.py +++ /dev/null @@ -1,171 +0,0 @@ -import asyncio -import websockets -from typing import Optional, Tuple -from .messages import ( - LipsyncClientMessage, - LipsyncServerMessage, - LipsyncServerMessageAdapter, - LipsyncConfigMessage, - LipsyncConfigAckMessage, - LipsyncAudioInputMessage, - LipsyncVideoInputMessage, - LipsyncInterruptAudioMessage, - LipsyncSyncedOutputMessage, - LipsyncErrorMessage, -) -import fractions -import time -import logging -import cv2 -import numpy as np - -logger = logging.getLogger(__name__) - - -class RealtimeLipsyncClient: - DECART_LIPSYNC_ENDPOINT = "/router/lipsync/ws" - VIDEO_FPS = 25 - - def __init__( - self, - api_key: str, - base_url: str = "wss://api3.decart.ai", - audio_sample_rate: int = 16000, - video_fps: int = VIDEO_FPS, - sync_latency: float = 0.0, - ): - """ - Args: - api_key: The API key for the Decart Lipsync server - url: The URL of the Decart Lipsync server - audio_sample_rate: The sample rate of the audio - video_fps: The FPS of the video - sync_latency: Delay next frame up to this many seconds, to account for variable latency - """ - self._url = f"{base_url}{self.DECART_LIPSYNC_ENDPOINT}".replace( - "https://", "wss://" - ).replace("http://", "ws://") - self._api_key = api_key - self._audio_sample_rate = audio_sample_rate - self._video_fps = video_fps - self._sync_latency = sync_latency - - self._websocket: Optional[websockets.ClientConnection] = None - self._out_queue = asyncio.Queue() - self._response_handling_task: Optional[asyncio.Task] = None - - self._video_frame_interval = fractions.Fraction(1, video_fps) - self._video_out_frame_index = 0 - self._video_out_start_time = 0 - - async def _recv(self) -> LipsyncServerMessage: - response = await self._websocket.recv() - return LipsyncServerMessageAdapter.validate_json(response) - - async def _send(self, message: LipsyncClientMessage): - msg = message.model_dump_json() - await self._websocket.send(msg) - - async def _handle_server_responses(self): - try: - while self._websocket is not None: - response = await self._recv() - if isinstance(response, LipsyncSyncedOutputMessage): - await self._out_queue.put(response) - elif isinstance(response, LipsyncErrorMessage): - logger.error(f"Lipsync server error: {response.message}") - raise Exception(response.message) - else: - logger.error(f"Unknown response from lipsync server: {response}") - except asyncio.CancelledError: - pass - except websockets.exceptions.ConnectionClosedOK: - logger.debug("Connection closed by server") - - async def _decode_video_frame(self, video_frame: bytes) -> bytes: - def _decode_video_frame_sync(video_frame: bytes) -> bytes: - nparr = np.frombuffer(video_frame, np.uint8) - video_frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR) - return video_frame - - return await asyncio.to_thread(_decode_video_frame_sync, video_frame) - - async def _encode_video_frame(self, image: np.ndarray) -> bytes: - def _encode_video_frame_sync(image: np.ndarray) -> bytes: - success, encoded_image = cv2.imencode(".jpeg", image) - if not success: - raise Exception("Failed to encode video frame as JPEG") - return encoded_image.tobytes() - - return await asyncio.to_thread(_encode_video_frame_sync, image) - - async def _decode_audio_frame(self, audio_frame: bytes) -> bytes: - return audio_frame - - async def connect(self): - logger.debug(f"Connecting to lipsync server at {self._url}") - self._websocket = await websockets.connect(f"{self._url}?api_key={self._api_key}") - logger.debug("WebSocket connected") - # Initial handshake - await self._send( - LipsyncConfigMessage( - video_fps=self._video_fps, - audio_sample_rate=self._audio_sample_rate, - ) - ) - logger.debug("Configuration sent") - response = await self._recv() - if not isinstance(response, LipsyncConfigAckMessage): - raise Exception(f"Configuration not acknowledged by server: {response}") - logger.debug("Configuration acknowledged") - - self._response_handling_task = asyncio.create_task(self._handle_server_responses()) - - logger.debug("Connected to lipsync server") - - async def disconnect(self): - if self._websocket is not None: - await self._websocket.close() - self._websocket = None - - if self._response_handling_task is not None: - self._response_handling_task.cancel() - try: - await self._response_handling_task - except asyncio.CancelledError: - pass - self._response_handling_task = None - - async def send_audio(self, audio_data: bytes): - await self._send(LipsyncAudioInputMessage(audio_data=audio_data)) - - async def send_video_frame_bytes(self, video_frame_bytes: bytes): - await self._send(LipsyncVideoInputMessage(video_frame=video_frame_bytes)) - - async def send_video_frame(self, image: np.ndarray): - encoded_image = await self._encode_video_frame(image) - await self.send_video_frame_bytes(encoded_image) - - async def interrupt_audio(self): - await self._send(LipsyncInterruptAudioMessage()) - - async def get_synced_output(self, timeout: Optional[float] = None) -> Tuple[bytes, bytes]: - synced_output: LipsyncSyncedOutputMessage = await asyncio.wait_for( - self._out_queue.get(), timeout=timeout - ) - - video_frame = await self._decode_video_frame(synced_output.video_frame) - audio_frame = await self._decode_audio_frame(synced_output.audio_frame) - - if self._video_out_frame_index == 0: - self._video_out_start_time = time.time() + self._sync_latency - - time_til_frame = ( - self._video_out_start_time - + (self._video_out_frame_index * self._video_frame_interval) - - time.time() - ) - if time_til_frame > 0: - await asyncio.sleep(time_til_frame) - - return video_frame, audio_frame diff --git a/decart/lipsync/messages.py b/decart/lipsync/messages.py deleted file mode 100644 index e45fee0..0000000 --- a/decart/lipsync/messages.py +++ /dev/null @@ -1,58 +0,0 @@ -from pydantic import BaseModel, Field, ConfigDict, TypeAdapter -from typing import Literal, Union, Annotated - - -class LipsyncMessage(BaseModel): - model_config = ConfigDict(ser_json_bytes="base64", val_json_bytes="base64") - - -class LipsyncConfigMessage(LipsyncMessage): - type: Literal["config"] = "config" - video_fps: int - audio_sample_rate: int - - -class LipsyncConfigAckMessage(LipsyncMessage): - type: Literal["config_ack"] = "config_ack" - - -class LipsyncAudioInputMessage(LipsyncMessage): - type: Literal["audio_input"] = "audio_input" - audio_data: bytes - - -class LipsyncVideoInputMessage(LipsyncMessage): - type: Literal["video_input"] = "video_input" - video_frame: bytes - - -class LipsyncInterruptAudioMessage(LipsyncMessage): - type: Literal["interrupt_audio"] = "interrupt_audio" - - -class LipsyncSyncedOutputMessage(LipsyncMessage): - type: Literal["synced_result"] = "synced_result" - video_frame: bytes - audio_frame: bytes - - -class LipsyncErrorMessage(LipsyncMessage): - type: Literal["error"] = "error" - message: str - - -LipsyncClientMessage = Annotated[ - Union[ - LipsyncConfigMessage, - LipsyncAudioInputMessage, - LipsyncVideoInputMessage, - LipsyncInterruptAudioMessage, - ], - Field(discriminator="type"), -] -LipsyncServerMessage = Annotated[ - Union[LipsyncConfigAckMessage, LipsyncSyncedOutputMessage, LipsyncErrorMessage], - Field(discriminator="type"), -] - -LipsyncServerMessageAdapter = TypeAdapter(LipsyncServerMessage) diff --git a/decart/models.py b/decart/models.py index ebb0447..00d986f 100644 --- a/decart/models.py +++ b/decart/models.py @@ -1,26 +1,20 @@ import warnings -from typing import Literal, Optional, List, Generic, TypeVar +from typing import Literal, Optional, Generic, TypeVar from pydantic import BaseModel, Field, ConfigDict, model_validator from .errors import ModelNotFoundError -from .types import FileInput, MotionTrajectoryInput +from .types import FileInput RealTimeModels = Literal[ # Canonical names - "lucy", "lucy-2.1", "lucy-2.1-vton", - "lucy-restyle", "lucy-restyle-2", - "live-avatar", # Latest aliases (server-side resolution) "lucy-latest", "lucy-vton-latest", "lucy-restyle-latest", # Deprecated names - "mirage", "mirage_v2", - "lucy_v2v_720p_rt", - "live_avatar", ] VideoModels = Literal[ # Canonical names @@ -28,13 +22,11 @@ "lucy-2.1", "lucy-2.1-vton", "lucy-restyle-2", - "lucy-motion", # Latest aliases (server-side resolution) "lucy-latest", "lucy-vton-latest", "lucy-restyle-latest", "lucy-clip-latest", - "lucy-motion-latest", # Deprecated names "lucy-pro-v2v", "lucy-restyle-v2v", @@ -51,10 +43,7 @@ MODEL_ALIASES: dict[str, str] = { # Realtime aliases - "mirage": "lucy-restyle", "mirage_v2": "lucy-restyle-2", - "lucy_v2v_720p_rt": "lucy", - "live_avatar": "live-avatar", # Video aliases "lucy-pro-v2v": "lucy-clip", "lucy-restyle-v2v": "lucy-restyle-2", @@ -121,13 +110,6 @@ class VideoToVideoInput(DecartBaseModel): enhance_prompt: Optional[bool] = None -class ImageToMotionVideoInput(DecartBaseModel): - data: FileInput - trajectory: List[MotionTrajectoryInput] = Field(..., min_length=2, max_length=1000) - seed: Optional[int] = None - resolution: Optional[str] = None - - class VideoRestyleInput(DecartBaseModel): """Input for lucy-restyle-v2v model. @@ -189,13 +171,6 @@ class ImageToImageInput(DecartBaseModel): _MODELS = { "realtime": { # Canonical names - "lucy": ModelDefinition( - name="lucy", - url_path="/v1/stream", - fps=25, - width=1280, - height=704, - ), "lucy-2.1": ModelDefinition( name="lucy-2.1", url_path="/v1/stream", @@ -210,13 +185,6 @@ class ImageToImageInput(DecartBaseModel): width=1088, height=624, ), - "lucy-restyle": ModelDefinition( - name="lucy-restyle", - url_path="/v1/stream", - fps=25, - width=1280, - height=704, - ), "lucy-restyle-2": ModelDefinition( name="lucy-restyle-2", url_path="/v1/stream", @@ -224,13 +192,6 @@ class ImageToImageInput(DecartBaseModel): width=1280, height=704, ), - "live-avatar": ModelDefinition( - name="live-avatar", - url_path="/v1/stream", - fps=25, - width=1280, - height=720, - ), # Latest aliases (server-side resolution) "lucy-latest": ModelDefinition( name="lucy-latest", @@ -254,13 +215,6 @@ class ImageToImageInput(DecartBaseModel): height=704, ), # Deprecated names - "mirage": ModelDefinition( - name="mirage", - url_path="/v1/stream", - fps=25, - width=1280, - height=704, - ), "mirage_v2": ModelDefinition( name="mirage_v2", url_path="/v1/stream", @@ -268,20 +222,6 @@ class ImageToImageInput(DecartBaseModel): width=1280, height=704, ), - "lucy_v2v_720p_rt": ModelDefinition( - name="lucy_v2v_720p_rt", - url_path="/v1/stream", - fps=25, - width=1280, - height=704, - ), - "live_avatar": ModelDefinition( - name="live_avatar", - url_path="/v1/stream", - fps=25, - width=1280, - height=720, - ), }, "video": { # Canonical names @@ -317,14 +257,6 @@ class ImageToImageInput(DecartBaseModel): height=704, input_schema=VideoRestyleInput, ), - "lucy-motion": ModelDefinition( - name="lucy-motion", - url_path="/v1/jobs/lucy-motion", - fps=25, - width=1280, - height=704, - input_schema=ImageToMotionVideoInput, - ), # Latest aliases (server-side resolution) "lucy-latest": ModelDefinition( name="lucy-latest", @@ -358,14 +290,6 @@ class ImageToImageInput(DecartBaseModel): height=704, input_schema=VideoToVideoInput, ), - "lucy-motion-latest": ModelDefinition( - name="lucy-motion-latest", - url_path="/v1/jobs/lucy-motion-latest", - fps=25, - width=1280, - height=704, - input_schema=ImageToMotionVideoInput, - ), # Deprecated names "lucy-pro-v2v": ModelDefinition( name="lucy-pro-v2v", @@ -435,8 +359,8 @@ def video(model: VideoModels) -> VideoModelDefinition: Available models: - "lucy-clip" - Video-to-video - "lucy-2.1" - Video editing (newer, higher quality) + - "lucy-2.1-vton" - Virtual try-on video editing - "lucy-restyle-2" - Video restyling with prompt or reference image - - "lucy-motion" - Image-to-motion-video """ _warn_deprecated(model) try: diff --git a/decart/realtime/audio_stream_manager.py b/decart/realtime/audio_stream_manager.py deleted file mode 100644 index 19d58e0..0000000 --- a/decart/realtime/audio_stream_manager.py +++ /dev/null @@ -1,150 +0,0 @@ -""" -Audio stream manager for live_avatar mode. - -Mirrors the JS SDK's AudioStreamManager — ensures WebRTC always has -audio frames to send even when no user mic/audio is provided. -""" - -import asyncio -import fractions -import io -import logging -from collections import deque -from pathlib import Path -from typing import Optional, Union - -import av -from aiortc import MediaStreamTrack - -logger = logging.getLogger(__name__) - -SAMPLE_RATE = 48000 -SAMPLES_PER_FRAME = 960 # 20ms at 48kHz -BYTES_PER_SAMPLE = 2 # s16 format -BYTES_PER_FRAME = SAMPLES_PER_FRAME * BYTES_PER_SAMPLE - - -def _make_silence_frame() -> av.AudioFrame: - frame = av.AudioFrame(samples=SAMPLES_PER_FRAME, layout="mono", format="s16") - for plane in frame.planes: - plane.update(bytes(BYTES_PER_FRAME)) - return frame - - -class _AudioTrack(MediaStreamTrack): - kind = "audio" - - def __init__(self) -> None: - super().__init__() - self._queue: deque[av.AudioFrame] = deque() - self._pts = 0 - self._start: Optional[float] = None - self._done_event: Optional[asyncio.Event] = None - - async def recv(self) -> av.AudioFrame: - if self._start is None: - self._start = asyncio.get_event_loop().time() - - target = self._start + (self._pts / SAMPLE_RATE) - delay = target - asyncio.get_event_loop().time() - if delay > 0: - await asyncio.sleep(delay) - - if self._queue: - frame = self._queue.popleft() - if not self._queue and self._done_event: - self._done_event.set() - self._done_event = None - else: - frame = _make_silence_frame() - - frame.pts = self._pts - frame.sample_rate = SAMPLE_RATE - frame.time_base = fractions.Fraction(1, SAMPLE_RATE) - self._pts += SAMPLES_PER_FRAME - - return frame - - def enqueue(self, frames: list[av.AudioFrame], done: asyncio.Event) -> None: - self._queue.extend(frames) - self._done_event = done - - def clear(self) -> None: - self._queue.clear() - if self._done_event: - self._done_event.set() - self._done_event = None - - -class AudioStreamManager: - """Manages audio for live_avatar mode. - - Provides a continuous audio track that outputs silence by default - and allows playing audio data through it via play_audio(). - """ - - def __init__(self) -> None: - self._track = _AudioTrack() - self._playing = False - - def get_track(self) -> MediaStreamTrack: - return self._track - - @property - def is_playing(self) -> bool: - return self._playing - - async def play_audio(self, audio: Union[bytes, str, Path]) -> None: - """Play audio through the stream. Resolves when audio finishes playing. - - Args: - audio: Audio data as bytes, file path string, or Path object. - """ - if self._playing: - self.stop_audio() - - if isinstance(audio, bytes): - container: av.InputContainer = av.open(io.BytesIO(audio)) # type: ignore[assignment] - else: - container: av.InputContainer = av.open(str(audio)) # type: ignore[assignment] - - try: - resampler = av.AudioResampler(format="s16", layout="mono", rate=SAMPLE_RATE) - raw = bytearray() - - for frame in container.decode(audio=0): - for resampled in resampler.resample(frame): - raw.extend(bytes(resampled.planes[0])) - - for resampled in resampler.resample(None): - raw.extend(bytes(resampled.planes[0])) - finally: - container.close() - - if not raw: - return - - frames = [] - for i in range(0, len(raw), BYTES_PER_FRAME): - chunk = raw[i : i + BYTES_PER_FRAME] - if len(chunk) < BYTES_PER_FRAME: - chunk.extend(bytes(BYTES_PER_FRAME - len(chunk))) - - frame = av.AudioFrame(samples=SAMPLES_PER_FRAME, layout="mono", format="s16") - frame.planes[0].update(bytes(chunk)) - frames.append(frame) - - done = asyncio.Event() - self._playing = True - self._track.enqueue(frames, done) - - await done.wait() - self._playing = False - - def stop_audio(self) -> None: - self._track.clear() - self._playing = False - - def cleanup(self) -> None: - self.stop_audio() - self._track.stop() diff --git a/decart/realtime/client.py b/decart/realtime/client.py index 77ffa1d..f7bffda 100644 --- a/decart/realtime/client.py +++ b/decart/realtime/client.py @@ -8,7 +8,6 @@ from aiortc import MediaStreamTrack from pydantic import BaseModel -from .audio_stream_manager import AudioStreamManager from .webrtc_manager import WebRTCManager, WebRTCConfiguration from .messages import PromptMessage, SessionIdMessage, GenerationTickMessage from .subscribe import ( @@ -70,12 +69,9 @@ def __init__( self, manager: WebRTCManager, http_session: Optional[aiohttp.ClientSession] = None, - model_name: Optional[str] = None, ): self._manager = manager self._http_session = http_session - self._model_name = model_name - self._audio_stream_manager: Optional[AudioStreamManager] = None self._connection_callbacks: list[Callable[[ConnectionState], None]] = [] self._error_callbacks: list[Callable[[DecartSDKError], None]] = [] self._generation_tick_callbacks: list[Callable[[GenerationTickMessage], None]] = [] @@ -110,15 +106,6 @@ async def connect( ws_url = f"{base_url}{options.model.url_path}" ws_url += f"?api_key={quote(api_key)}&model={quote(options.model.name)}" - model_name: str = options.model.name - - is_avatar_live = model_name in ("live_avatar", "live-avatar") - audio_stream_manager: Optional[AudioStreamManager] = None - - if is_avatar_live and local_track is None: - audio_stream_manager = AudioStreamManager() - local_track = audio_stream_manager.get_track() - config = WebRTCConfiguration( webrtc_url=ws_url, api_key=api_key, @@ -131,7 +118,6 @@ async def connect( initial_state=options.initial_state, customize_offer=options.customize_offer, integration=integration, - model_name=model_name, ) http_session = aiohttp.ClientSession() @@ -140,9 +126,7 @@ async def connect( client = cls( manager=manager, http_session=http_session, - model_name=model_name, ) - client._audio_stream_manager = audio_stream_manager config.on_connection_state_change = client._emit_connection_change config.on_error = lambda error: client._emit_error(WebRTCError(str(error), cause=error)) @@ -167,8 +151,6 @@ async def connect( initial_prompt=initial_prompt, ) except Exception as e: - if audio_stream_manager: - audio_stream_manager.cleanup() await manager.cleanup() await http_session.close() raise WebRTCError(str(e), cause=e) @@ -329,17 +311,6 @@ async def set_prompt( finally: self._manager.unregister_prompt_wait(prompt) - async def play_audio(self, audio: Union[bytes, str, Path]) -> None: - """Play audio through the avatar stream. Resolves when audio finishes. - - Only available for live_avatar connections without a user-provided audio track. - """ - if self._audio_stream_manager is None: - raise InvalidInputError( - "play_audio() is only available for live_avatar without a user-provided audio track" - ) - await self._audio_stream_manager.play_audio(audio) - async def set_image( self, image: Optional[FileInput], @@ -370,9 +341,6 @@ def get_connection_state(self) -> ConnectionState: async def disconnect(self) -> None: self._buffering = False self._buffer.clear() - if self._audio_stream_manager: - self._audio_stream_manager.cleanup() - self._audio_stream_manager = None await self._manager.cleanup() if self._http_session and not self._http_session.closed: await self._http_session.close() diff --git a/decart/realtime/webrtc_connection.py b/decart/realtime/webrtc_connection.py index a51e8ed..2c2ce9c 100644 --- a/decart/realtime/webrtc_connection.py +++ b/decart/realtime/webrtc_connection.py @@ -60,7 +60,6 @@ def __init__( self._pending_prompts: dict[str, tuple[asyncio.Event, dict]] = {} self._pending_image_set: Optional[tuple[asyncio.Event, dict]] = None self._local_track: Optional[MediaStreamTrack] = None - self._model_name: Optional[str] = None self._connection_error: Optional[str] = None # Per-connect() dedup: _handle_error and connect()'s except branches both # may see the same error; whichever fires first flips this to True and the @@ -73,13 +72,11 @@ async def connect( local_track: Optional[MediaStreamTrack], timeout: float, integration: Optional[str] = None, - model_name: Optional[str] = None, initial_image: Optional[str] = None, initial_prompt: Optional[dict] = None, ) -> None: try: self._local_track = local_track - self._model_name = model_name self._connection_error = None self._on_error_fired = False @@ -107,7 +104,7 @@ async def connect( elif local_track is not None: # No image and no prompt — send passthrough (skip for subscribe mode which has no local stream) await self._send_passthrough_and_wait() - await self._setup_peer_connection(local_track, model_name=model_name) + await self._setup_peer_connection(local_track) await self._create_and_send_offer() @@ -218,7 +215,6 @@ async def _send_passthrough_and_wait(self, timeout: float = 30.0) -> None: async def _setup_peer_connection( self, local_track: Optional[MediaStreamTrack], - model_name: Optional[str] = None, ) -> None: config = RTCConfiguration(iceServers=[RTCIceServer(urls=["stun:stun.l.google.com:19302"])]) @@ -263,9 +259,6 @@ async def on_ice_connection_state_change(): self._pc.addTransceiver("audio", direction="recvonly") logger.debug("Added video+audio transceivers (recvonly) for subscribe mode") else: - if model_name in ("live_avatar", "live-avatar"): - self._pc.addTransceiver("video", direction="recvonly") - logger.debug("Added video transceiver (recvonly) for avatar-live mode") self._pc.addTrack(local_track) logger.debug("Added local track to peer connection") diff --git a/decart/realtime/webrtc_manager.py b/decart/realtime/webrtc_manager.py index d2d27f0..5ea830d 100644 --- a/decart/realtime/webrtc_manager.py +++ b/decart/realtime/webrtc_manager.py @@ -48,7 +48,6 @@ class WebRTCConfiguration: initial_state: Optional[ModelState] = None customize_offer: Optional[Callable] = None integration: Optional[str] = None - model_name: Optional[str] = None def _is_permanent_error(exception: BaseException) -> bool: @@ -155,7 +154,6 @@ async def _attempt(): local_track=self._local_track, timeout=CONNECTION_TIMEOUT, integration=self._config.integration, - model_name=self._config.model_name, ) if self._intentional_disconnect or reconnect_generation != self._reconnect_generation: @@ -192,7 +190,6 @@ async def connect( local_track=local_track, timeout=CONNECTION_TIMEOUT, integration=self._config.integration, - model_name=self._config.model_name, initial_image=initial_image, initial_prompt=initial_prompt, ) diff --git a/decart/types.py b/decart/types.py index f6653cc..5cc7fb9 100644 --- a/decart/types.py +++ b/decart/types.py @@ -21,9 +21,3 @@ class Prompt(BaseModel): class ModelState(BaseModel): prompt: Optional[Prompt] = None image: Optional[Union[bytes, str, Path]] = None - - -class MotionTrajectoryInput(BaseModel): - frame: int = Field(..., ge=0) - x: float = Field(..., ge=0) - y: float = Field(..., ge=0) diff --git a/examples/README.md b/examples/README.md index e5ab50b..053a314 100644 --- a/examples/README.md +++ b/examples/README.md @@ -23,7 +23,6 @@ export DECART_API_KEY="your-api-key-here" - **`process_video.py`** - Edit a local video with `lucy-clip` - **`process_image.py`** - Edit the bundled example image with `lucy-image-2` - **`process_url.py`** - Transform videos from URLs -- **`queue_image_example.py`** - Turn the bundled example image into motion with `lucy-motion` ### Realtime API @@ -38,7 +37,7 @@ pip install decart[realtime] ### Running Examples -`process_image.py` and `queue_image_example.py` use the bundled `examples/files/image.png` asset. +`process_image.py` uses the bundled `examples/files/image.png` asset. `process_video.py` expects you to place a local video at `examples/assets/example_video.mp4` first. ```bash @@ -48,9 +47,6 @@ python examples/process_video.py # Edit the bundled example image python examples/process_image.py -# Turn the bundled example image into motion -python examples/queue_image_example.py - # Transform video from URL python examples/process_url.py diff --git a/examples/avatar_live.py b/examples/avatar_live.py deleted file mode 100644 index 03180fd..0000000 --- a/examples/avatar_live.py +++ /dev/null @@ -1,184 +0,0 @@ -""" -Avatar Live Example - -This example demonstrates how to use the avatar-live model to animate an avatar image. -The avatar can be animated with audio input (microphone or audio file). - -Usage: - # With audio file: - DECART_API_KEY=your-key python avatar_live.py avatar.png audio.mp3 - - # With just avatar image (will wait for audio): - DECART_API_KEY=your-key python avatar_live.py avatar.png - -Requirements: - pip install decart[realtime] -""" - -import asyncio -import os -import sys -from pathlib import Path - -try: - from aiortc.contrib.media import MediaPlayer, MediaRecorder -except ImportError: - print("aiortc is required for this example.") - print("Install with: pip install decart[realtime]") - sys.exit(1) - -from decart import DecartClient, models - - -async def main(): - api_key = os.getenv("DECART_API_KEY") - if not api_key: - print("Error: DECART_API_KEY environment variable not set") - print("Usage: DECART_API_KEY=your-key python avatar_live.py [audio_file]") - return - - if len(sys.argv) < 2: - print("Usage: python avatar_live.py [audio_file]") - print("") - print("Arguments:") - print(" avatar_image - Path to avatar image (PNG, JPG)") - print(" audio_file - Optional: Path to audio file for the avatar to speak") - print("") - print("Examples:") - print(" python avatar_live.py avatar.png") - print(" python avatar_live.py avatar.png speech.mp3") - return - - avatar_image = sys.argv[1] - if not os.path.exists(avatar_image): - print(f"Error: Avatar image not found: {avatar_image}") - return - - audio_file = sys.argv[2] if len(sys.argv) > 2 else None - if audio_file and not os.path.exists(audio_file): - print(f"Error: Audio file not found: {audio_file}") - return - - print(f"šŸ–¼ļø Avatar image: {avatar_image}") - if audio_file: - print(f"šŸ”Š Audio file: {audio_file}") - - audio_track = None - - if audio_file: - print("Loading audio file...") - player = MediaPlayer(audio_file) - if player.audio: - audio_track = player.audio - print("āœ“ Audio loaded") - else: - print("āš ļø Warning: No audio stream found in file, continuing without audio") - - try: - from decart.realtime.client import RealtimeClient - from decart.realtime.types import RealtimeConnectOptions - from decart.types import ModelState - except ImportError: - print("Error: Realtime API not available") - print("Install with: pip install decart[realtime]") - return - - print("\nCreating Decart client...") - async with DecartClient(api_key=api_key) as client: - model = models.realtime("live-avatar") - print(f"Using model: {model.name}") - - frame_count = 0 - recorder = None - output_file = Path("output_avatar_live.mp4") - - def on_remote_stream(track): - nonlocal frame_count, recorder - frame_count += 1 - if frame_count % 25 == 0: - print(f"šŸ“¹ Received {frame_count} frames...") - - if recorder is None: - print(f"šŸ’¾ Recording to {output_file}") - recorder = MediaRecorder(str(output_file)) - recorder.addTrack(track) - asyncio.create_task(recorder.start()) - - def on_connection_change(state): - print(f"šŸ”„ Connection state: {state}") - - def on_error(error): - print(f"āŒ Error: {error.__class__.__name__} - {error.message}") - - print("\nConnecting to Avatar Live API...") - print("(Sending avatar image...)") - - try: - realtime_client = await RealtimeClient.connect( - base_url=client.realtime_base_url, - api_key=client.api_key, - local_track=audio_track, # Can be None if no audio - options=RealtimeConnectOptions( - model=model, - on_remote_stream=on_remote_stream, - initial_state=ModelState(image=avatar_image), - ), - ) - - realtime_client.on("connection_change", on_connection_change) - realtime_client.on("error", on_error) - - print("āœ“ Connected!") - print(f"Session ID: {realtime_client.session_id}") - - if audio_file and not audio_track: - print("\nPlaying audio via play_audio()...") - await realtime_client.play_audio(audio_file) - print("āœ“ Audio playback complete") - elif audio_file: - print("\nStreaming audio through avatar via MediaStreamTrack...") - else: - print("\nNo audio provided - avatar will be idle") - print("You can play audio dynamically using play_audio()") - - print("\nPress Ctrl+C to stop and save the recording...") - - # Demo: Update avatar image after 5 seconds (if you want to test set_image) - # Uncomment the following to test dynamic image updates: - # await asyncio.sleep(5) - # print("Updating avatar image...") - # await realtime_client.set_image(Path("new_avatar.png")) - # print("āœ“ Avatar image updated!") - - try: - while True: - await asyncio.sleep(1) - except KeyboardInterrupt: - print(f"\n\nāœ“ Received {frame_count} frames total") - - finally: - if recorder: - try: - print(f"šŸ’¾ Saving video to {output_file}...") - await asyncio.sleep(0.5) - await recorder.stop() - print(f"āœ“ Video saved to {output_file}") - except Exception as e: - print(f"āš ļø Warning: Could not save video cleanly: {e}") - print(" Video file may be incomplete") - - except Exception as e: - print(f"\nāŒ Connection failed: {e}") - import traceback - - traceback.print_exc() - - finally: - if "realtime_client" in locals(): - print("\nDisconnecting...") - await realtime_client.disconnect() - print("āœ“ Disconnected") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/lipsync_file.py b/examples/lipsync_file.py deleted file mode 100644 index 006f95c..0000000 --- a/examples/lipsync_file.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python -""" -Example of using Decart's Realtime Lipsync API to synchronize audio with video. - -This example loads a video file and an audio file, processes them through the -Decart Lipsync API, and saves the lipsynced result to a new video file. - -Usage: - python lipsync_file.py - -Example: - python lipsync_file.py input.mp4 speech.wav output_lipsynced.mp4 - python lipsync_file.py input.mp4 speech.mp3 output_lipsynced.mp4 -""" - -import asyncio -import os -import sys -import cv2 -from pathlib import Path - -from decart.lipsync import RealtimeLipsyncClient - - -async def process_lipsync(video_path: str, audio_path: str, output_path: str): - """Process video and audio through Decart's lipsync API.""" - - # Get API key - api_key = os.getenv("DECART_API_KEY") - if not api_key: - print("Error: Please set DECART_API_KEY environment variable") - return - - # Initialize client - client = RealtimeLipsyncClient(api_key=api_key) - - print(f"Processing: {video_path} + {audio_path} -> {output_path}") - - # Connect to server - await client.connect() - print("Connected to Decart Lipsync server") - - try: - # Load audio data - handle different formats - with open(audio_path, "rb") as f: - audio_data = f.read() - - # Send audio to server (server handles chunking) - await client.send_audio(audio_data) - - # Load video frames and convert to RGB - frame_count = 0 - cap = cv2.VideoCapture(video_path) - while True: - ret, frame = cap.read() - if not ret: - break - frame_count += 1 - - # Convert from BGR (OpenCV default) to RGB - rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - print(rgb_frame.shape) - await client.send_video_frame(rgb_frame) - cap.release() - - # Receive lipsynced output - out = cv2.VideoWriter( - output_path, - cv2.VideoWriter_fourcc(*"mp4v"), - client._video_fps, - (rgb_frame.shape[1], rgb_frame.shape[0]), - ) - for i in range(frame_count): - try: - video_frame, audio_frame = await client.get_synced_output(timeout=1.0) - bgr_frame = cv2.cvtColor(video_frame, cv2.COLOR_RGB2BGR) - out.write(bgr_frame) - except asyncio.TimeoutError: - print(f"Warning: Timeout at frame {i}") - break - out.release() - - finally: - await client.disconnect() - print("Disconnected from server") - - -async def main(): - """Main entry point.""" - if len(sys.argv) != 4: - print("Usage: python lipsync_file.py ") - print("Example: python lipsync_file.py input.mp4 speech.wav output_lipsynced.mp4") - sys.exit(1) - - video_path = sys.argv[1] - audio_path = sys.argv[2] - output_path = sys.argv[3] - - # Check input files exist - if not Path(video_path).exists(): - print(f"Error: Video file not found: {video_path}") - sys.exit(1) - - if not Path(audio_path).exists(): - print(f"Error: Audio file not found: {audio_path}") - sys.exit(1) - - # Process the files - await process_lipsync(video_path, audio_path, output_path) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/queue_image_example.py b/examples/queue_image_example.py deleted file mode 100644 index 06708fe..0000000 --- a/examples/queue_image_example.py +++ /dev/null @@ -1,54 +0,0 @@ -import asyncio -import os -from pathlib import Path -from decart import DecartClient, models - - -async def main() -> None: - # Load the bundled example image - image_path = Path(__file__).parent / "files" / "image.png" - - if not image_path.exists(): - print(f"Missing bundled example image at: {image_path}") - return - - async with DecartClient(api_key=os.getenv("DECART_API_KEY", "your-api-key-here")) as client: - print(f"Loading image: {image_path}") - - # Manual polling - submit and poll yourself - print("Submitting image-to-motion job...") - job = await client.queue.submit( - { - "model": models.video("lucy-motion"), - "data": image_path, - "resolution": "480p", - "trajectory": [ - {"frame": 0, "x": 0.35, "y": 0.5}, - {"frame": 30, "x": 0.5, "y": 0.45}, - {"frame": 60, "x": 0.65, "y": 0.5}, - ], - } - ) - print(f"Job submitted: {job.job_id}") - - # Poll manually - status = await client.queue.status(job.job_id) - while status.status in ("pending", "processing"): - print(f"Status: {status.status}") - await asyncio.sleep(2) - status = await client.queue.status(job.job_id) - - print(f"Final status: {status.status}") - - if status.status == "completed": - print("Fetching result...") - data = await client.queue.result(job.job_id) - with open("output_motion.mp4", "wb") as f: - f.write(data) - print("Video saved to output_motion.mp4") - else: - print("Job failed") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/playground/playground.py b/playground/playground.py index 8eebdf3..c28fdc4 100644 --- a/playground/playground.py +++ b/playground/playground.py @@ -15,8 +15,6 @@ python playground.py # Interactive mode python playground.py --model lucy-restyle-2 # Camera model python playground.py --model lucy-restyle-2 --prompt "Anime" # With initial prompt - python playground.py --model live-avatar --image face.png # Avatar mode - python playground.py --model live-avatar --image face.png --audio speech.mp3 Controls (while running): Type text + Enter → Send prompt to Decart @@ -92,15 +90,10 @@ def _check_deps() -> None: # ── Constants ──────────────────────────────────────────────────────────────── REALTIME_MODELS = [ - "lucy", "lucy-2.1", "lucy-2.1-vton", - "lucy-restyle", "lucy-restyle-2", - "live-avatar", ] -CAMERA_MODELS = {"lucy", "lucy-2.1", "lucy-2.1-vton", "lucy-restyle", "lucy-restyle-2"} -AVATAR_MODELS = {"live-avatar"} BANNER = """ ╔══════════════════════════════════════╗ @@ -157,23 +150,6 @@ def stop(self) -> None: self._cap.release() -# ── Audio Track ────────────────────────────────────────────────────────────── - - -def load_audio_track(path: str) -> Optional[MediaStreamTrack]: - """Load an audio track from a file using aiortc's MediaPlayer.""" - try: - from aiortc.contrib.media import MediaPlayer - - player = MediaPlayer(path) - if player.audio: - return player.audio - print(f" ⚠ No audio stream in {path}") - except Exception as e: - print(f" ⚠ Failed to load audio: {e}") - return None - - # ── CLI ────────────────────────────────────────────────────────────────────── @@ -185,16 +161,17 @@ def parse_args() -> argparse.Namespace: Examples: %(prog)s --model lucy-restyle-2 %(prog)s --model lucy-restyle-2 --prompt "Anime style" - %(prog)s --model live-avatar --image avatar.png - %(prog)s --model live-avatar --image avatar.png --audio speech.mp3 %(prog)s --model lucy-2.1 --image ref.png --prompt "Lego World" """, ) p.add_argument("--model", "-m", choices=REALTIME_MODELS, help="Model name") p.add_argument("--api-key", "-k", help="API key (or set DECART_API_KEY env var)") - p.add_argument("--image", "-i", help="Initial image path (required for avatar-live)") + p.add_argument( + "--image", + "-i", + help="Optional reference image (for lucy-2.1 / lucy-2.1-vton / lucy-restyle-2)", + ) p.add_argument("--prompt", "-p", help="Initial prompt text") - p.add_argument("--audio", "-a", help="Audio file path (for avatar-live)") p.add_argument("--camera", "-c", type=int, default=0, help="Camera device index (default: 0)") p.add_argument("--no-local", action="store_true", help="Hide local camera feed") p.add_argument("--verbose", "-v", action="store_true", help="Enable debug logging") @@ -206,9 +183,7 @@ def select_model_interactive() -> str: print("\nAvailable realtime models:") for i, name in enumerate(REALTIME_MODELS, 1): note = "" - if name in AVATAR_MODELS: - note = " (requires --image)" - elif name in ("lucy-2.1", "lucy-2.1-vton", "lucy-restyle-2"): + if name in ("lucy-2.1", "lucy-2.1-vton", "lucy-restyle-2"): note = " (supports reference image)" print(f" {i}. {name}{note}") @@ -248,25 +223,15 @@ async def run() -> None: model_name = select_model_interactive() model = models.realtime(cast(RealTimeModels, model_name)) - needs_camera = model_name in CAMERA_MODELS - is_avatar = model_name in AVATAR_MODELS print(f"\n Model : {model_name}") print(f" Res : {model.width}x{model.height} @ {model.fps}fps") # ── Validate ───────────────────────────────────────────────────────── - if is_avatar and not args.image: - print("\nError: --image is required for avatar-live model") - return - if args.image and not Path(args.image).exists(): print(f"\nError: Image not found: {args.image}") return - if args.audio and not Path(args.audio).exists(): - print(f"\nError: Audio file not found: {args.audio}") - return - # ── Initial State ──────────────────────────────────────────────────── initial_state: Optional[ModelState] = None if args.image or args.prompt: @@ -285,22 +250,16 @@ async def run() -> None: camera_track: Optional[CameraTrack] = None local_track: Optional[MediaStreamTrack] = None - if needs_camera: - print(f"\n Opening camera (device {args.camera})...") - try: - camera_track = CameraTrack(args.camera, model.width, model.height, model.fps) - local_track = camera_track - actual_w = int(camera_track._cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - actual_h = int(camera_track._cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - print(f" āœ“ Camera opened ({actual_w}x{actual_h})") - except RuntimeError as e: - print(f" āœ— {e}") - return - elif args.audio: - print(f" Loading audio: {args.audio}") - local_track = load_audio_track(args.audio) - if local_track: - print(" āœ“ Audio loaded") + print(f"\n Opening camera (device {args.camera})...") + try: + camera_track = CameraTrack(args.camera, model.width, model.height, model.fps) + local_track = camera_track + actual_w = int(camera_track._cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + actual_h = int(camera_track._cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + print(f" āœ“ Camera opened ({actual_w}x{actual_h})") + except RuntimeError as e: + print(f" āœ— {e}") + return # ── Connect ────────────────────────────────────────────────────────── remote_track_ready = asyncio.Event() diff --git a/tests/test_lipsync.py b/tests/test_lipsync.py deleted file mode 100644 index 4e6170a..0000000 --- a/tests/test_lipsync.py +++ /dev/null @@ -1,378 +0,0 @@ -import pytest -import asyncio -import json -from unittest.mock import AsyncMock, patch -from decart.lipsync import RealtimeLipsyncClient -from decart.lipsync.messages import ( - LipsyncConfigMessage, - LipsyncConfigAckMessage, - LipsyncAudioInputMessage, - LipsyncVideoInputMessage, - LipsyncInterruptAudioMessage, - LipsyncSyncedOutputMessage, - LipsyncErrorMessage, - LipsyncServerMessageAdapter, -) - - -class TestLipsyncMessages: - """Test Lipsync message serialization and deserialization""" - - def test_config_message_serialization(self): - """Test LipsyncConfigMessage serialization""" - msg = LipsyncConfigMessage(video_fps=25, audio_sample_rate=16000) - data = msg.model_dump() - assert data["type"] == "config" - assert data["video_fps"] == 25 - assert data["audio_sample_rate"] == 16000 - - def test_config_ack_message_deserialization(self): - """Test LipsyncConfigAckMessage deserialization""" - data = '{"type": "config_ack"}' - msg = LipsyncServerMessageAdapter.validate_json(data) - assert isinstance(msg, LipsyncConfigAckMessage) - assert msg.type == "config_ack" - - def test_audio_input_message_with_bytes(self): - """Test LipsyncAudioInputMessage with binary data""" - audio_data = b"test audio data" - msg = LipsyncAudioInputMessage(audio_data=audio_data) - assert msg.type == "audio_input" - assert msg.audio_data == audio_data - - # Test JSON serialization with base64 encoding - json_str = msg.model_dump_json() - assert "audio_input" in json_str - - def test_video_input_message_with_bytes(self): - """Test LipsyncVideoInputMessage with binary data""" - video_frame = b"test video frame" - msg = LipsyncVideoInputMessage(video_frame=video_frame) - assert msg.type == "video_input" - assert msg.video_frame == video_frame - - def test_interrupt_audio_message(self): - """Test LipsyncInterruptAudioMessage""" - msg = LipsyncInterruptAudioMessage() - assert msg.type == "interrupt_audio" - - def test_synced_output_message_deserialization(self): - """Test LipsyncSyncedOutputMessage deserialization""" - import base64 - - video_data = base64.b64encode(b"video").decode() - audio_data = base64.b64encode(b"audio").decode() - data = f'{{"type": "synced_result", "video_frame": "{video_data}", "audio_frame": "{audio_data}"}}' - - msg = LipsyncServerMessageAdapter.validate_json(data) - assert isinstance(msg, LipsyncSyncedOutputMessage) - assert msg.type == "synced_result" - assert msg.video_frame == b"video" - assert msg.audio_frame == b"audio" - - def test_error_message_deserialization(self): - """Test LipsyncErrorMessage deserialization""" - data = '{"type": "error", "message": "Test error"}' - msg = LipsyncServerMessageAdapter.validate_json(data) - assert isinstance(msg, LipsyncErrorMessage) - assert msg.type == "error" - assert msg.message == "Test error" - - def test_message_discriminator(self): - """Test that message discriminator works correctly""" - messages = [ - ('{"type": "config_ack"}', LipsyncConfigAckMessage), - ('{"type": "error", "message": "test"}', LipsyncErrorMessage), - ] - - for json_data, expected_type in messages: - msg = LipsyncServerMessageAdapter.validate_json(json_data) - assert isinstance(msg, expected_type) - - -class TestRealtimeLipsyncClient: - """Test RealtimeLipsyncClient functionality""" - - @pytest.fixture - def client(self): - """Create a test client""" - return RealtimeLipsyncClient( - api_key="test-api-key", - base_url="https://api.decart.ai", - audio_sample_rate=16000, - video_fps=25, - sync_latency=0.1, - ) - - def test_client_initialization(self, client): - """Test client initialization""" - assert client._api_key == "test-api-key" - assert client._audio_sample_rate == 16000 - assert client._video_fps == 25 - assert client._sync_latency == 0.1 - assert "wss://api.decart.ai" in client._url - assert "/router/lipsync/ws" in client._url - - def test_url_conversion(self): - """Test URL conversion from HTTP to WebSocket""" - # Test HTTPS to WSS - client1 = RealtimeLipsyncClient("key", "https://api.example.com") - assert client1._url == "wss://api.example.com/router/lipsync/ws" - - # Test HTTP to WS - client2 = RealtimeLipsyncClient("key", "http://localhost:8000") - assert client2._url == "ws://localhost:8000/router/lipsync/ws" - - @pytest.mark.asyncio - async def test_connect_success(self, client): - """Test successful connection to lipsync server""" - mock_websocket = AsyncMock() - - # Mock the configuration acknowledgment - mock_websocket.recv = AsyncMock(return_value='{"type": "config_ack"}') - mock_websocket.send = AsyncMock() - - mock_connect = AsyncMock(return_value=mock_websocket) - with patch("decart.lipsync.client.websockets.connect", mock_connect): - await client.connect() - - # Verify WebSocket connection was made - assert client._websocket == mock_websocket - - # Verify configuration was sent - mock_websocket.send.assert_called_once() - sent_data = json.loads(mock_websocket.send.call_args[0][0]) - assert sent_data["type"] == "config" - assert sent_data["video_fps"] == 25 - assert sent_data["audio_sample_rate"] == 16000 - - # Verify response handling task was created - assert client._response_handling_task is not None - - # Clean up - client._response_handling_task.cancel() - await client.disconnect() - - @pytest.mark.asyncio - async def test_connect_config_not_acknowledged(self, client): - """Test connection failure when config is not acknowledged""" - mock_websocket = AsyncMock() - - # Mock an error response instead of config_ack - mock_websocket.recv = AsyncMock( - return_value='{"type": "error", "message": "Invalid config"}' - ) - mock_websocket.send = AsyncMock() - - mock_connect = AsyncMock(return_value=mock_websocket) - with patch("decart.lipsync.client.websockets.connect", mock_connect): - with pytest.raises(Exception, match="Configuration not acknowledged"): - await client.connect() - - @pytest.mark.asyncio - async def test_disconnect(self, client): - """Test client disconnection""" - mock_websocket = AsyncMock() - client._websocket = mock_websocket - - await client.disconnect() - - mock_websocket.close.assert_called_once() - assert client._websocket is None - - @pytest.mark.asyncio - async def test_send_audio(self, client): - """Test sending audio data""" - mock_websocket = AsyncMock() - client._websocket = mock_websocket - - audio_data = b"test audio" - await client.send_audio(audio_data) - - mock_websocket.send.assert_called_once() - sent_data = json.loads(mock_websocket.send.call_args[0][0]) - assert sent_data["type"] == "audio_input" - - @pytest.mark.asyncio - async def test_send_video_frame_bytes(self, client): - """Test sending video frame bytes""" - mock_websocket = AsyncMock() - client._websocket = mock_websocket - - video_frame = b"test video" - await client.send_video_frame_bytes(video_frame) - - mock_websocket.send.assert_called_once() - sent_data = json.loads(mock_websocket.send.call_args[0][0]) - assert sent_data["type"] == "video_input" - - @pytest.mark.asyncio - async def test_send_video_frame(self, client): - """Test sending video frame from numpy array""" - import numpy as np - - mock_websocket = AsyncMock() - client._websocket = mock_websocket - - # Create a test image - test_image = np.zeros((100, 100, 3), dtype=np.uint8) - - # Mock cv2.imencode to return success - with patch("decart.lipsync.client.cv2.imencode") as mock_encode: - mock_encode.return_value = (True, np.array([1, 2, 3], dtype=np.uint8)) - - await client.send_video_frame(test_image) - - # Verify encoding was called - mock_encode.assert_called_once_with(".jpeg", test_image) - - # Verify message was sent - mock_websocket.send.assert_called_once() - sent_data = json.loads(mock_websocket.send.call_args[0][0]) - assert sent_data["type"] == "video_input" - - @pytest.mark.asyncio - async def test_encode_video_frame_failure(self, client): - """Test video encoding failure handling""" - import numpy as np - - test_image = np.zeros((100, 100, 3), dtype=np.uint8) - - # Mock cv2.imencode to return failure - with patch("decart.lipsync.client.cv2.imencode") as mock_encode: - mock_encode.return_value = (False, None) - - with pytest.raises(Exception, match="Failed to encode video frame as JPEG"): - await client._encode_video_frame(test_image) - - @pytest.mark.asyncio - async def test_interrupt_audio(self, client): - """Test sending audio interrupt""" - mock_websocket = AsyncMock() - client._websocket = mock_websocket - - await client.interrupt_audio() - - mock_websocket.send.assert_called_once() - sent_data = json.loads(mock_websocket.send.call_args[0][0]) - assert sent_data["type"] == "interrupt_audio" - - @pytest.mark.asyncio - async def test_handle_server_responses_synced_output(self, client): - """Test handling synced output from server""" - import base64 - - # Prepare test data - video_data = base64.b64encode(b"video").decode() - audio_data = base64.b64encode(b"audio").decode() - - mock_websocket = AsyncMock() - mock_websocket.recv = AsyncMock( - side_effect=[ - f'{{"type": "synced_result", "video_frame": "{video_data}", "audio_frame": "{audio_data}"}}', - asyncio.CancelledError(), # Stop the loop - ] - ) - - client._websocket = mock_websocket - - # Run the handler briefly - task = asyncio.create_task(client._handle_server_responses()) - await asyncio.sleep(0.1) - task.cancel() - - # Check that message was added to queue - assert not client._out_queue.empty() - msg = await client._out_queue.get() - assert isinstance(msg, LipsyncSyncedOutputMessage) - assert msg.video_frame == b"video" - assert msg.audio_frame == b"audio" - - @pytest.mark.asyncio - async def test_handle_server_responses_error(self, client): - """Test handling error message from server""" - mock_websocket = AsyncMock() - mock_websocket.recv = AsyncMock(return_value='{"type": "error", "message": "Server error"}') - - client._websocket = mock_websocket - - # Should raise exception on error message - with pytest.raises(Exception, match="Server error"): - await client._handle_server_responses() - - @pytest.mark.asyncio - async def test_get_synced_output_with_timing(self, client): - """Test getting synced output with proper timing""" - import numpy as np - - # Create mock video frame (small valid JPEG) - video_data = b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C\x00\x08\x06\x06\x07\x06\x05\x08\x07\x07\x07\t\t\x08\n\x0c\x14\r\x0c\x0b\x0b\x0c\x19\x12\x13\x0f\x14\x1d\x1a\x1f\x1e\x1d\x1a\x1c\x1c $.' \",#\x1c\x1c(7),01444\x1f'9=82<.342\xff\xc0\x00\x0b\x08\x00\x01\x00\x01\x01\x01\x11\x00\xff\xc4\x00\x14\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xc4\x00\x14\x10\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xda\x00\x08\x01\x01\x00\x00?\x00T\xbf\xff\xd9" - audio_data = b"audio" - - msg = LipsyncSyncedOutputMessage(video_frame=video_data, audio_frame=audio_data) - - # Add message to queue - await client._out_queue.put(msg) - - # Mock cv2.imdecode to return a simple array - with patch("decart.lipsync.client.cv2.imdecode") as mock_decode: - mock_decode.return_value = np.zeros((100, 100, 3), dtype=np.uint8) - - # Get output - video, audio = await client.get_synced_output() - - # Verify decoding was attempted - mock_decode.assert_called_once() - assert audio == audio_data - assert isinstance(video, np.ndarray) - - @pytest.mark.asyncio - async def test_full_connection_flow(self): - """Test complete connection and communication flow""" - client = RealtimeLipsyncClient( - api_key="test-key", - base_url="https://api.test.com", - audio_sample_rate=16000, - video_fps=30, - ) - - mock_websocket = AsyncMock() - responses = [ - '{"type": "config_ack"}', # Initial config ack - ] - mock_websocket.recv = AsyncMock(side_effect=responses) - mock_websocket.send = AsyncMock() - - mock_connect = AsyncMock(return_value=mock_websocket) - with patch("decart.lipsync.client.websockets.connect", mock_connect): - # Connect - await client.connect() - - # Send some data - await client.send_audio(b"audio1") - await client.send_video_frame_bytes(b"video1") - await client.interrupt_audio() - - # Verify all messages were sent - assert mock_websocket.send.call_count == 4 # config + 3 messages - - # Disconnect - client._response_handling_task.cancel() - await client.disconnect() - mock_websocket.close.assert_called_once() - - -class TestLipsyncIntegration: - """Integration tests for lipsync with the main client""" - - def test_lipsync_client_import(self): - """Test that LipsyncClient can be imported""" - from decart.lipsync import RealtimeLipsyncClient - - assert RealtimeLipsyncClient is not None - - def test_lipsync_client_in_package(self): - """Test that lipsync is properly integrated in the package""" - import decart.lipsync - - assert hasattr(decart.lipsync, "RealtimeLipsyncClient") diff --git a/tests/test_models.py b/tests/test_models.py index e253d77..3cfe7bb 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -5,24 +5,12 @@ def test_canonical_realtime_models() -> None: - model = models.realtime("lucy-restyle") - assert model.name == "lucy-restyle" - assert model.fps == 25 - assert model.width == 1280 - assert model.height == 704 - assert model.url_path == "/v1/stream" - model = models.realtime("lucy-restyle-2") assert model.name == "lucy-restyle-2" assert model.fps == 22 assert model.width == 1280 assert model.height == 704 - - model = models.realtime("lucy") - assert model.name == "lucy" - assert model.fps == 25 - assert model.width == 1280 - assert model.height == 704 + assert model.url_path == "/v1/stream" model = models.realtime("lucy-2.1") assert model.name == "lucy-2.1" @@ -36,42 +24,18 @@ def test_canonical_realtime_models() -> None: assert model.width == 1088 assert model.height == 624 - model = models.realtime("live-avatar") - assert model.name == "live-avatar" - assert model.fps == 25 - assert model.width == 1280 - assert model.height == 720 - def test_deprecated_realtime_models() -> None: _warned_aliases.clear() - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - model = models.realtime("mirage") - assert model.name == "mirage" - assert len(w) == 1 - assert "deprecated" in str(w[0].message).lower() - assert "lucy-restyle" in str(w[0].message) - - _warned_aliases.clear() - with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") model = models.realtime("mirage_v2") assert model.name == "mirage_v2" assert len(w) == 1 + assert "deprecated" in str(w[0].message).lower() assert "lucy-restyle-2" in str(w[0].message) - _warned_aliases.clear() - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - model = models.realtime("live_avatar") - assert model.name == "live_avatar" - assert len(w) == 1 - assert "live-avatar" in str(w[0].message) - def test_canonical_video_models() -> None: model = models.video("lucy-clip") @@ -96,10 +60,6 @@ def test_canonical_video_models() -> None: assert model.name == "lucy-restyle-2" assert model.url_path == "/v1/jobs/lucy-restyle-2" - model = models.video("lucy-motion") - assert model.name == "lucy-motion" - assert model.url_path == "/v1/jobs/lucy-motion" - def test_deprecated_video_models() -> None: _warned_aliases.clear() @@ -143,9 +103,9 @@ def test_deprecation_warning_only_once() -> None: with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - models.realtime("mirage") - models.realtime("mirage") - models.realtime("mirage") + models.realtime("mirage_v2") + models.realtime("mirage_v2") + models.realtime("mirage_v2") assert len(w) == 1 @@ -197,11 +157,6 @@ def test_latest_video_models() -> None: assert model.url_path == "/v1/jobs/lucy-clip-latest" assert model.fps == 25 - model = models.video("lucy-motion-latest") - assert model.name == "lucy-motion-latest" - assert model.url_path == "/v1/jobs/lucy-motion-latest" - assert model.fps == 25 - def test_latest_image_models() -> None: model = models.image("lucy-image-latest") @@ -221,7 +176,6 @@ def test_latest_aliases_no_deprecation_warning() -> None: models.video("lucy-vton-latest") models.video("lucy-restyle-latest") models.video("lucy-clip-latest") - models.video("lucy-motion-latest") models.image("lucy-image-latest") assert len(w) == 0 diff --git a/tests/test_realtime_unit.py b/tests/test_realtime_unit.py index 0486bcc..bd8c78a 100644 --- a/tests/test_realtime_unit.py +++ b/tests/test_realtime_unit.py @@ -25,27 +25,13 @@ def test_realtime_client_available(): def test_realtime_models_available(): """Test that realtime models are available""" - model = models.realtime("lucy-restyle") - assert model.name == "lucy-restyle" - assert model.fps == 25 + model = models.realtime("lucy-restyle-2") + assert model.name == "lucy-restyle-2" + assert model.fps == 22 assert model.width == 1280 assert model.height == 704 assert model.url_path == "/v1/stream" - model2 = models.realtime("lucy-restyle-2") - assert model2.name == "lucy-restyle-2" - assert model2.fps == 22 - assert model2.width == 1280 - assert model2.height == 704 - assert model2.url_path == "/v1/stream" - - model2 = models.realtime("lucy") - assert model2.name == "lucy" - assert model2.fps == 25 - assert model2.width == 1280 - assert model2.height == 704 - assert model2.url_path == "/v1/stream" - model2 = models.realtime("lucy-2.1") assert model2.name == "lucy-2.1" assert model2.fps == 20 @@ -86,7 +72,7 @@ async def test_realtime_client_creation_with_mock(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, initial_state=ModelState(prompt=Prompt(text="Test", enhance=True)), ), @@ -158,7 +144,6 @@ async def test_realtime_connect_accepts_custom_model_definition(): call_args = mock_manager_class.call_args config = call_args[0][0] if call_args[0] else call_args[1]["configuration"] assert "model=lucy_2_rt_preview" in config.webrtc_url - assert config.model_name == "lucy_2_rt_preview" assert config.fps == 20 await realtime_client.disconnect() @@ -195,7 +180,7 @@ def register_prompt_wait(prompt): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -234,7 +219,7 @@ async def test_buffered_events_delivered_after_handler_registration(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -271,7 +256,7 @@ async def test_realtime_events(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -333,7 +318,7 @@ def register_prompt_wait(prompt): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -380,7 +365,7 @@ def register_prompt_wait(prompt): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -400,114 +385,6 @@ async def set_event(): mock_manager.unregister_prompt_wait.assert_called_with("New prompt") -# Tests for avatar-live model - - -def test_avatar_live_model_available(): - """Test that avatar-live model is available""" - model = models.realtime("live-avatar") - assert model.name == "live-avatar" - assert model.fps == 25 - assert model.width == 1280 - assert model.height == 720 - assert model.url_path == "/v1/stream" - - -@pytest.mark.asyncio -async def test_avatar_live_connect_with_initial_image(): - """Test avatar-live connection with initial_state.image option""" - - client = DecartClient(api_key="test-key") - - with ( - patch("decart.realtime.client.WebRTCManager") as mock_manager_class, - patch( - "decart.realtime.client._image_to_base64", new_callable=AsyncMock - ) as mock_image_to_b64, - patch("decart.realtime.client.aiohttp.ClientSession") as mock_session_cls, - ): - mock_manager = AsyncMock() - mock_manager.connect = AsyncMock(return_value=True) - mock_manager.is_connected = MagicMock(return_value=True) - mock_manager_class.return_value = mock_manager - - mock_image_to_b64.return_value = "base64encodedimage" - - mock_session = MagicMock() - mock_session.closed = False - mock_session.close = AsyncMock() - mock_session_cls.return_value = mock_session - - mock_track = MagicMock() - - from decart.realtime.types import RealtimeConnectOptions - from decart.types import ModelState - - realtime_client = await RealtimeClient.connect( - base_url=client.realtime_base_url, - api_key=client.api_key, - local_track=mock_track, - options=RealtimeConnectOptions( - model=models.realtime("live-avatar"), - on_remote_stream=lambda t: None, - initial_state=ModelState(image=b"fake image bytes"), - ), - ) - - assert realtime_client is not None - assert realtime_client._model_name == "live-avatar" - mock_image_to_b64.assert_called_once() - # Verify initial_image was passed to connect - mock_manager.connect.assert_called_once() - call_kwargs = mock_manager.connect.call_args[1] - assert "initial_image" in call_kwargs - assert call_kwargs["initial_image"] == "base64encodedimage" - - -@pytest.mark.asyncio -async def test_avatar_live_set_image(): - """Test set_image method for avatar-live""" - - client = DecartClient(api_key="test-key") - - with ( - patch("decart.realtime.client.WebRTCManager") as mock_manager_class, - patch("decart.realtime.client.file_input_to_bytes") as mock_file_input, - patch("decart.realtime.client.aiohttp.ClientSession") as mock_session_cls, - ): - mock_manager = AsyncMock() - mock_manager.connect = AsyncMock(return_value=True) - mock_manager.set_image = AsyncMock() - mock_manager_class.return_value = mock_manager - - mock_file_input.return_value = (b"new image data", "image/png") - - mock_session = MagicMock() - mock_session.closed = False - mock_session.close = AsyncMock() - mock_session_cls.return_value = mock_session - - mock_track = MagicMock() - - from decart.realtime.types import RealtimeConnectOptions - - realtime_client = await RealtimeClient.connect( - base_url=client.realtime_base_url, - api_key=client.api_key, - local_track=mock_track, - options=RealtimeConnectOptions( - model=models.realtime("live-avatar"), - on_remote_stream=lambda t: None, - ), - ) - - await realtime_client.set_image(b"new avatar image") - - mock_manager.set_image.assert_called_once() - image_base64_arg = mock_manager.set_image.call_args[0][0] - assert image_base64_arg is not None - - @pytest.mark.asyncio async def test_set_image_works_for_any_model(): """Test that set_image works for non-avatar-live models""" @@ -539,7 +416,7 @@ async def test_set_image_works_for_any_model(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -576,7 +453,7 @@ async def test_set_image_null_clears_image(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -617,7 +494,7 @@ async def test_set_image_with_prompt_and_enhance(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -628,96 +505,6 @@ async def test_set_image_with_prompt_and_enhance(): assert opts["enhance"] is False -@pytest.mark.asyncio -async def test_avatar_live_set_image_timeout(): - """Test set_image raises on timeout""" - - client = DecartClient(api_key="test-key") - - with ( - patch("decart.realtime.client.WebRTCManager") as mock_manager_class, - patch("decart.realtime.client.file_input_to_bytes") as mock_file_input, - patch("decart.realtime.client.aiohttp.ClientSession") as mock_session_cls, - ): - from decart.errors import DecartSDKError - - mock_manager = AsyncMock() - mock_manager.connect = AsyncMock(return_value=True) - mock_manager.set_image = AsyncMock(side_effect=DecartSDKError("Image send timed out")) - mock_manager_class.return_value = mock_manager - - mock_file_input.return_value = (b"image data", "image/png") - - mock_session = MagicMock() - mock_session.closed = False - mock_session.close = AsyncMock() - mock_session_cls.return_value = mock_session - - mock_track = MagicMock() - - from decart.realtime.types import RealtimeConnectOptions - - realtime_client = await RealtimeClient.connect( - base_url=client.realtime_base_url, - api_key=client.api_key, - local_track=mock_track, - options=RealtimeConnectOptions( - model=models.realtime("live-avatar"), - on_remote_stream=lambda t: None, - ), - ) - - with pytest.raises(DecartSDKError) as exc_info: - await realtime_client.set_image(b"test image") - - assert "timed out" in str(exc_info.value).lower() - - -@pytest.mark.asyncio -async def test_avatar_live_set_image_server_error(): - """Test set_image raises on server error""" - - client = DecartClient(api_key="test-key") - - with ( - patch("decart.realtime.client.WebRTCManager") as mock_manager_class, - patch("decart.realtime.client.file_input_to_bytes") as mock_file_input, - patch("decart.realtime.client.aiohttp.ClientSession") as mock_session_cls, - ): - from decart.errors import DecartSDKError - - mock_manager = AsyncMock() - mock_manager.connect = AsyncMock(return_value=True) - mock_manager.set_image = AsyncMock(side_effect=DecartSDKError("Invalid image format")) - mock_manager_class.return_value = mock_manager - - mock_file_input.return_value = (b"image data", "image/png") - - mock_session = MagicMock() - mock_session.closed = False - mock_session.close = AsyncMock() - mock_session_cls.return_value = mock_session - - mock_track = MagicMock() - - from decart.realtime.types import RealtimeConnectOptions - - realtime_client = await RealtimeClient.connect( - base_url=client.realtime_base_url, - api_key=client.api_key, - local_track=mock_track, - options=RealtimeConnectOptions( - model=models.realtime("live-avatar"), - on_remote_stream=lambda t: None, - ), - ) - - with pytest.raises(DecartSDKError) as exc_info: - await realtime_client.set_image(b"test image") - - assert "Invalid image format" in str(exc_info.value) - - # Tests for set() method @@ -750,7 +537,7 @@ async def test_set_rejects_when_neither_prompt_nor_image(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -788,7 +575,7 @@ async def test_set_rejects_empty_prompt(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -826,7 +613,7 @@ async def test_set_sends_prompt_only(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -872,7 +659,7 @@ async def test_set_sends_prompt_with_enhance(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -921,7 +708,7 @@ async def test_set_sends_image_only(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -971,7 +758,7 @@ async def test_set_sends_prompt_and_image(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -1020,7 +807,7 @@ async def test_set_converts_bytes_image(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, ), ) @@ -1069,7 +856,7 @@ async def test_connect_with_initial_prompt(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, initial_state=ModelState(prompt=Prompt(text="Test prompt", enhance=False)), ), @@ -1175,7 +962,7 @@ async def test_connect_without_initial_state_sends_passthrough(): api_key=client.api_key, local_track=mock_track, options=RealtimeConnectOptions( - model=models.realtime("lucy-restyle"), + model=models.realtime("lucy-restyle-2"), on_remote_stream=lambda t: None, # No initial_state — should trigger passthrough ), diff --git a/uv.lock b/uv.lock index 74fbd3a..47915c7 100644 --- a/uv.lock +++ b/uv.lock @@ -597,7 +597,7 @@ wheels = [ [[package]] name = "decart" -version = "0.0.36" +version = "0.0.37" source = { editable = "." } dependencies = [ { name = "aiofiles" },