chore: automatic commit 2025-04-30 12:48
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
from .microphone import Microphone
|
||||
from .local_audio_player import LocalAudioPlayer
|
||||
|
||||
__all__ = ["Microphone", "LocalAudioPlayer"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,165 @@
|
||||
# mypy: ignore-errors
|
||||
from __future__ import annotations
|
||||
|
||||
import queue
|
||||
import asyncio
|
||||
from typing import Any, Union, Callable, AsyncGenerator, cast
|
||||
from typing_extensions import TYPE_CHECKING
|
||||
|
||||
from .. import _legacy_response
|
||||
from .._extras import numpy as np, sounddevice as sd
|
||||
from .._response import StreamedBinaryAPIResponse, AsyncStreamedBinaryAPIResponse
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy.typing as npt
|
||||
|
||||
SAMPLE_RATE = 24000
|
||||
|
||||
|
||||
class LocalAudioPlayer:
|
||||
def __init__(
|
||||
self,
|
||||
should_stop: Union[Callable[[], bool], None] = None,
|
||||
):
|
||||
self.channels = 1
|
||||
self.dtype = np.float32
|
||||
self.should_stop = should_stop
|
||||
|
||||
async def _tts_response_to_buffer(
|
||||
self,
|
||||
response: Union[
|
||||
_legacy_response.HttpxBinaryResponseContent,
|
||||
AsyncStreamedBinaryAPIResponse,
|
||||
StreamedBinaryAPIResponse,
|
||||
],
|
||||
) -> npt.NDArray[np.float32]:
|
||||
chunks: list[bytes] = []
|
||||
if isinstance(response, _legacy_response.HttpxBinaryResponseContent) or isinstance(
|
||||
response, StreamedBinaryAPIResponse
|
||||
):
|
||||
for chunk in response.iter_bytes(chunk_size=1024):
|
||||
if chunk:
|
||||
chunks.append(chunk)
|
||||
else:
|
||||
async for chunk in response.iter_bytes(chunk_size=1024):
|
||||
if chunk:
|
||||
chunks.append(chunk)
|
||||
|
||||
audio_bytes = b"".join(chunks)
|
||||
audio_np = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32767.0
|
||||
audio_np = audio_np.reshape(-1, 1)
|
||||
return audio_np
|
||||
|
||||
async def play(
|
||||
self,
|
||||
input: Union[
|
||||
npt.NDArray[np.int16],
|
||||
npt.NDArray[np.float32],
|
||||
_legacy_response.HttpxBinaryResponseContent,
|
||||
AsyncStreamedBinaryAPIResponse,
|
||||
StreamedBinaryAPIResponse,
|
||||
],
|
||||
) -> None:
|
||||
audio_content: npt.NDArray[np.float32]
|
||||
if isinstance(input, np.ndarray):
|
||||
if input.dtype == np.int16 and self.dtype == np.float32:
|
||||
audio_content = (input.astype(np.float32) / 32767.0).reshape(-1, self.channels)
|
||||
elif input.dtype == np.float32:
|
||||
audio_content = cast('npt.NDArray[np.float32]', input)
|
||||
else:
|
||||
raise ValueError(f"Unsupported dtype: {input.dtype}")
|
||||
else:
|
||||
audio_content = await self._tts_response_to_buffer(input)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
event = asyncio.Event()
|
||||
idx = 0
|
||||
|
||||
def callback(
|
||||
outdata: npt.NDArray[np.float32],
|
||||
frame_count: int,
|
||||
_time_info: Any,
|
||||
_status: Any,
|
||||
):
|
||||
nonlocal idx
|
||||
|
||||
remainder = len(audio_content) - idx
|
||||
if remainder == 0 or (callable(self.should_stop) and self.should_stop()):
|
||||
loop.call_soon_threadsafe(event.set)
|
||||
raise sd.CallbackStop
|
||||
valid_frames = frame_count if remainder >= frame_count else remainder
|
||||
outdata[:valid_frames] = audio_content[idx : idx + valid_frames]
|
||||
outdata[valid_frames:] = 0
|
||||
idx += valid_frames
|
||||
|
||||
stream = sd.OutputStream(
|
||||
samplerate=SAMPLE_RATE,
|
||||
callback=callback,
|
||||
dtype=audio_content.dtype,
|
||||
channels=audio_content.shape[1],
|
||||
)
|
||||
with stream:
|
||||
await event.wait()
|
||||
|
||||
async def play_stream(
|
||||
self,
|
||||
buffer_stream: AsyncGenerator[Union[npt.NDArray[np.float32], npt.NDArray[np.int16], None], None],
|
||||
) -> None:
|
||||
loop = asyncio.get_event_loop()
|
||||
event = asyncio.Event()
|
||||
buffer_queue: queue.Queue[Union[npt.NDArray[np.float32], npt.NDArray[np.int16], None]] = queue.Queue(maxsize=50)
|
||||
|
||||
async def buffer_producer():
|
||||
async for buffer in buffer_stream:
|
||||
if buffer is None:
|
||||
break
|
||||
await loop.run_in_executor(None, buffer_queue.put, buffer)
|
||||
await loop.run_in_executor(None, buffer_queue.put, None) # Signal completion
|
||||
|
||||
def callback(
|
||||
outdata: npt.NDArray[np.float32],
|
||||
frame_count: int,
|
||||
_time_info: Any,
|
||||
_status: Any,
|
||||
):
|
||||
nonlocal current_buffer, buffer_pos
|
||||
|
||||
frames_written = 0
|
||||
while frames_written < frame_count:
|
||||
if current_buffer is None or buffer_pos >= len(current_buffer):
|
||||
try:
|
||||
current_buffer = buffer_queue.get(timeout=0.1)
|
||||
if current_buffer is None:
|
||||
loop.call_soon_threadsafe(event.set)
|
||||
raise sd.CallbackStop
|
||||
buffer_pos = 0
|
||||
|
||||
if current_buffer.dtype == np.int16 and self.dtype == np.float32:
|
||||
current_buffer = (current_buffer.astype(np.float32) / 32767.0).reshape(-1, self.channels)
|
||||
|
||||
except queue.Empty:
|
||||
outdata[frames_written:] = 0
|
||||
return
|
||||
|
||||
remaining_frames = len(current_buffer) - buffer_pos
|
||||
frames_to_write = min(frame_count - frames_written, remaining_frames)
|
||||
outdata[frames_written : frames_written + frames_to_write] = current_buffer[
|
||||
buffer_pos : buffer_pos + frames_to_write
|
||||
]
|
||||
buffer_pos += frames_to_write
|
||||
frames_written += frames_to_write
|
||||
|
||||
current_buffer = None
|
||||
buffer_pos = 0
|
||||
|
||||
producer_task = asyncio.create_task(buffer_producer())
|
||||
|
||||
with sd.OutputStream(
|
||||
samplerate=SAMPLE_RATE,
|
||||
channels=self.channels,
|
||||
dtype=self.dtype,
|
||||
callback=callback,
|
||||
):
|
||||
await event.wait()
|
||||
|
||||
await producer_task
|
||||
100
venv/lib/python3.11/site-packages/openai/helpers/microphone.py
Normal file
100
venv/lib/python3.11/site-packages/openai/helpers/microphone.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# mypy: ignore-errors
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import time
|
||||
import wave
|
||||
import asyncio
|
||||
from typing import Any, Type, Union, Generic, TypeVar, Callable, overload
|
||||
from typing_extensions import TYPE_CHECKING, Literal
|
||||
|
||||
from .._types import FileTypes, FileContent
|
||||
from .._extras import numpy as np, sounddevice as sd
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import numpy.typing as npt
|
||||
|
||||
SAMPLE_RATE = 24000
|
||||
|
||||
DType = TypeVar("DType", bound=np.generic)
|
||||
|
||||
|
||||
class Microphone(Generic[DType]):
|
||||
def __init__(
|
||||
self,
|
||||
channels: int = 1,
|
||||
dtype: Type[DType] = np.int16,
|
||||
should_record: Union[Callable[[], bool], None] = None,
|
||||
timeout: Union[float, None] = None,
|
||||
):
|
||||
self.channels = channels
|
||||
self.dtype = dtype
|
||||
self.should_record = should_record
|
||||
self.buffer_chunks = []
|
||||
self.timeout = timeout
|
||||
self.has_record_function = callable(should_record)
|
||||
|
||||
def _ndarray_to_wav(self, audio_data: npt.NDArray[DType]) -> FileTypes:
|
||||
buffer: FileContent = io.BytesIO()
|
||||
with wave.open(buffer, "w") as wav_file:
|
||||
wav_file.setnchannels(self.channels)
|
||||
wav_file.setsampwidth(np.dtype(self.dtype).itemsize)
|
||||
wav_file.setframerate(SAMPLE_RATE)
|
||||
wav_file.writeframes(audio_data.tobytes())
|
||||
buffer.seek(0)
|
||||
return ("audio.wav", buffer, "audio/wav")
|
||||
|
||||
@overload
|
||||
async def record(self, return_ndarray: Literal[True]) -> npt.NDArray[DType]: ...
|
||||
|
||||
@overload
|
||||
async def record(self, return_ndarray: Literal[False]) -> FileTypes: ...
|
||||
|
||||
@overload
|
||||
async def record(self, return_ndarray: None = ...) -> FileTypes: ...
|
||||
|
||||
async def record(self, return_ndarray: Union[bool, None] = False) -> Union[npt.NDArray[DType], FileTypes]:
|
||||
loop = asyncio.get_event_loop()
|
||||
event = asyncio.Event()
|
||||
self.buffer_chunks: list[npt.NDArray[DType]] = []
|
||||
start_time = time.perf_counter()
|
||||
|
||||
def callback(
|
||||
indata: npt.NDArray[DType],
|
||||
_frame_count: int,
|
||||
_time_info: Any,
|
||||
_status: Any,
|
||||
):
|
||||
execution_time = time.perf_counter() - start_time
|
||||
reached_recording_timeout = execution_time > self.timeout if self.timeout is not None else False
|
||||
if reached_recording_timeout:
|
||||
loop.call_soon_threadsafe(event.set)
|
||||
raise sd.CallbackStop
|
||||
|
||||
should_be_recording = self.should_record() if callable(self.should_record) else True
|
||||
if not should_be_recording:
|
||||
loop.call_soon_threadsafe(event.set)
|
||||
raise sd.CallbackStop
|
||||
|
||||
self.buffer_chunks.append(indata.copy())
|
||||
|
||||
stream = sd.InputStream(
|
||||
callback=callback,
|
||||
dtype=self.dtype,
|
||||
samplerate=SAMPLE_RATE,
|
||||
channels=self.channels,
|
||||
)
|
||||
with stream:
|
||||
await event.wait()
|
||||
|
||||
# Concatenate all chunks into a single buffer, handle empty case
|
||||
concatenated_chunks: npt.NDArray[DType] = (
|
||||
np.concatenate(self.buffer_chunks, axis=0)
|
||||
if len(self.buffer_chunks) > 0
|
||||
else np.array([], dtype=self.dtype)
|
||||
)
|
||||
|
||||
if return_ndarray:
|
||||
return concatenated_chunks
|
||||
else:
|
||||
return self._ndarray_to_wav(concatenated_chunks)
|
||||
Reference in New Issue
Block a user