From cbf465a8a2f1801aa394e531f7dbe53f0843050b Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 12 Jun 2026 21:04:55 +0200 Subject: [PATCH 01/20] feat(sdk): stream file uploads and downloads instead of buffering in memory - Volume.writeFile/write_file: stream ReadableStream (JS, non-browser) and file-like objects (Python) to the API instead of buffering them in memory - Sandbox.files.write with octet-stream upload: stream ReadableStream data (JS, non-browser) and file-like objects (Python), with chunked gzip compression - Python Sandbox.files.read(format="stream"): stream the response body instead of downloading it into memory before iterating (sync and async) - JS Sandbox.files.read({ format: 'stream' }): bound only the initial handshake by the request timeout instead of killing an actively-consumed stream; the user signal can still cancel it mid-stream Co-Authored-By: Claude Fable 5 --- .changeset/cuddly-pots-stream.md | 11 +++ .../js-sdk/src/sandbox/filesystem/index.ts | 73 +++++++++++++++++++ packages/js-sdk/src/utils.ts | 12 ++- packages/js-sdk/src/volume/index.ts | 12 ++- .../js-sdk/tests/sandbox/files/read.test.ts | 23 ++++++ .../js-sdk/tests/sandbox/files/write.test.ts | 43 +++++++++++ packages/js-sdk/tests/volume/file.test.ts | 14 ++++ packages/python-sdk/e2b/io_utils.py | 47 ++++++++++++ .../e2b/sandbox/filesystem/filesystem.py | 49 ++++++++++--- .../sandbox_async/filesystem/filesystem.py | 37 ++++++++-- .../e2b/sandbox_sync/filesystem/filesystem.py | 32 +++++++- .../python-sdk/e2b/volume/volume_async.py | 20 ++--- packages/python-sdk/e2b/volume/volume_sync.py | 23 +++--- .../async/sandbox_async/files/test_read.py | 33 +++++++++ .../async/sandbox_async/files/test_write.py | 42 +++++++++++ .../tests/async/volume_async/test_file.py | 12 ++- .../sync/sandbox_sync/files/test_read.py | 27 +++++++ .../sync/sandbox_sync/files/test_write.py | 42 +++++++++++ .../tests/sync/volume_sync/test_file.py | 12 ++- 19 files changed, 514 insertions(+), 50 deletions(-) create mode 100644 .changeset/cuddly-pots-stream.md create mode 100644 packages/python-sdk/e2b/io_utils.py diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md new file mode 100644 index 0000000000..6dfad351b8 --- /dev/null +++ b/.changeset/cuddly-pots-stream.md @@ -0,0 +1,11 @@ +--- +"e2b": patch +"@e2b/python-sdk": patch +--- + +Stream uploads instead of buffering streaming input entirely in memory: + +- `Volume.writeFile()` / `Volume.write_file()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are now streamed to the API in chunks. +- `Sandbox.files.write()` / `write_files()` with `useOctetStream`/`use_octet_stream`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). +- Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). +- JS `Sandbox.files.read({ format: 'stream' })`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed; pass `signal` to cancel an in-flight stream. diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index d6dd5f8957..cf285e311f 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -250,6 +250,8 @@ export interface FilesystemWriteOpts extends FilesystemRequestOpts { gzip?: boolean /** * When true, the upload uses `application/octet-stream` instead of `multipart/form-data`. + * Outside the browser, `ReadableStream` data is then streamed to the sandbox + * instead of being buffered in memory. * * Defaults to `false`. Requires envd 0.5.7 or later — when not supported by * the sandbox's envd version, the upload falls back to `multipart/form-data`. @@ -380,6 +382,10 @@ export class Filesystem { * * You can pass `text`, `bytes`, `blob`, or `stream` to `opts.format` to change the return type. * + * The request timeout bounds only the initial handshake—the returned + * stream is not killed by it while being consumed. Use `opts.signal` to + * cancel an in-flight stream. + * * @param path path to the file. * @param opts connection options. * @param [opts.format] format of the file content—`stream`. @@ -411,6 +417,71 @@ export class Filesystem { headers['Accept-Encoding'] = 'gzip' } + if (format === 'stream') { + // The request timeout bounds only the initial handshake; once the + // response arrives, the stream lives until it's consumed, cancelled, + // or the user signal aborts. + const { controller, clearStartTimeout, cleanup } = setupRequestController( + opts?.requestTimeoutMs ?? this.connectionConfig.requestTimeoutMs, + opts?.signal + ) + + try { + const res = await this.envdApi.api.GET('/files', { + params: { + query: { + path, + username: user, + }, + }, + parseAs: 'stream', + signal: controller.signal, + headers, + }) + + const err = await handleFilesystemEnvdApiError(res) + if (err) { + throw err + } + + clearStartTimeout() + + const body = res.data as ReadableStream | null + if (!body) { + cleanup() + return new Blob([]).stream() + } + + const reader = body.getReader() + return new ReadableStream({ + async pull(streamController) { + try { + const { done, value } = await reader.read() + if (done) { + streamController.close() + cleanup() + } else { + streamController.enqueue(value) + } + } catch (err) { + cleanup() + streamController.error(err) + } + }, + async cancel(reason) { + try { + await reader.cancel(reason) + } finally { + cleanup() + } + }, + }) + } catch (err) { + cleanup() + throw err + } + } + const res = await this.envdApi.api.GET('/files', { params: { query: { @@ -569,6 +640,8 @@ export class Filesystem { writeOpts?.signal ), body: {}, + // Streaming request bodies require half-duplex mode. + ...(body instanceof ReadableStream && { duplex: 'half' as const }), }) const err = await handleFilesystemEnvdApiError(res) diff --git a/packages/js-sdk/src/utils.ts b/packages/js-sdk/src/utils.ts index 1037ae8100..bfb8a74416 100644 --- a/packages/js-sdk/src/utils.ts +++ b/packages/js-sdk/src/utils.ts @@ -131,7 +131,11 @@ export function shellQuote(s: string): string { /** * Prepare data for upload as a BodyInit, optionally gzip-compressed. - * When gzip is enabled, compresses the data and returns a Blob. + * + * Outside the browser, streams (and gzip-compressed data) are returned as + * `ReadableStream` so they can be uploaded without buffering in memory. + * Browsers don't support streaming request bodies, so data is buffered into + * a Blob there. */ export async function toUploadBody( data: string | ArrayBuffer | Blob | ReadableStream, @@ -145,7 +149,11 @@ export async function toUploadBody( ? data.stream() : new Blob([data]).stream() const compressed = stream.pipeThrough(new CompressionStream('gzip')) - return new Response(compressed).blob() + return runtime === 'browser' ? new Response(compressed).blob() : compressed + } + + if (data instanceof ReadableStream && runtime !== 'browser') { + return data } return toBlob(data) diff --git a/packages/js-sdk/src/volume/index.ts b/packages/js-sdk/src/volume/index.ts index 7a09e74ec3..ee254d81f1 100644 --- a/packages/js-sdk/src/volume/index.ts +++ b/packages/js-sdk/src/volume/index.ts @@ -8,7 +8,7 @@ import { } from './client' import { ConnectionConfig, ConnectionOpts } from '../connectionConfig' import { NotFoundError, VolumeError } from '../errors' -import { toBlob } from '../utils' +import { runtime, toBlob } from '../utils' import { VolumeFileType } from './types' import type { VolumeAndToken, @@ -587,7 +587,7 @@ export class Volume { * Writing to a file that already exists overwrites the file. * * @param path path to the file. - * @param data data to write to the file. Data can be a string, `ArrayBuffer`, `Blob`, or `ReadableStream`. + * @param data data to write to the file. Data can be a string, `ArrayBuffer`, `Blob`, or `ReadableStream`. Outside the browser, `ReadableStream` data is streamed to the API instead of being buffered in memory. * @param options file creation options. * @param opts connection options. * @@ -604,7 +604,9 @@ export class Volume { }) const client = new VolumeApiClient(config) - const blob = await toBlob(data) + // Browsers don't support streaming request bodies, so buffer there. + const isStream = data instanceof ReadableStream && runtime !== 'browser' + const body = isStream ? data : await toBlob(data) const res = await client.api.PUT('/volumecontent/{volumeID}/file', { params: { @@ -619,12 +621,14 @@ export class Volume { force: opts?.force, }, }, - bodySerializer: () => blob, + bodySerializer: () => body, body: {} as any, headers: { 'Content-Type': 'application/octet-stream', }, signal: config.getSignal(), + // Streaming request bodies require half-duplex mode. + ...(isStream && { duplex: 'half' as const }), }) if (res.response.status === 404) { diff --git a/packages/js-sdk/tests/sandbox/files/read.test.ts b/packages/js-sdk/tests/sandbox/files/read.test.ts index 4dad2fb33c..6e0b1e3395 100644 --- a/packages/js-sdk/tests/sandbox/files/read.test.ts +++ b/packages/js-sdk/tests/sandbox/files/read.test.ts @@ -38,3 +38,26 @@ sandboxTest('empty file', async ({ sandbox }) => { const content = await sandbox.files.read(filename) expect(content).toBe('') }) + +sandboxTest('read file as stream', async ({ sandbox }) => { + const filename = 'test_read_stream.txt' + const content = 'Streamed read content. '.repeat(10_000) + + await sandbox.files.write(filename, content) + const stream = await sandbox.files.read(filename, { format: 'stream' }) + + const chunks: Uint8Array[] = [] + for await (const chunk of stream as unknown as AsyncIterable) { + chunks.push(chunk) + } + const readContent = Buffer.concat(chunks).toString('utf-8') + assert.equal(readContent, content) +}) + +sandboxTest('read non-existing file as stream', async ({ sandbox }) => { + const filename = 'non_existing_file.txt' + + await expect( + sandbox.files.read(filename, { format: 'stream' }) + ).rejects.toThrowError(FileNotFoundError) +}) diff --git a/packages/js-sdk/tests/sandbox/files/write.test.ts b/packages/js-sdk/tests/sandbox/files/write.test.ts index d07de1426b..61f046cfe0 100644 --- a/packages/js-sdk/tests/sandbox/files/write.test.ts +++ b/packages/js-sdk/tests/sandbox/files/write.test.ts @@ -321,3 +321,46 @@ sandboxTest('writeFiles overwrites existing files', async ({ sandbox }) => { await sandbox.files.remove(filename) } }) + +sandboxTest( + 'write ReadableStream with octet stream upload', + async ({ sandbox }) => { + const filename = 'test_write_octet_stream.bin' + const content = 'Streamed octet-stream upload. '.repeat(10_000) + const stream = new Blob([content]).stream() + + const info = await sandbox.files.write(filename, stream, { + useOctetStream: true, + }) + assert.equal(info.path, `/home/user/${filename}`) + + const readContent = await sandbox.files.read(filename) + assert.equal(readContent, content) + + if (isDebug) { + await sandbox.files.remove(filename) + } + } +) + +sandboxTest( + 'write ReadableStream with octet stream upload and gzip', + async ({ sandbox }) => { + const filename = 'test_write_octet_stream_gzip.bin' + const content = 'Streamed gzipped octet-stream upload. '.repeat(10_000) + const stream = new Blob([content]).stream() + + const info = await sandbox.files.write(filename, stream, { + useOctetStream: true, + gzip: true, + }) + assert.equal(info.path, `/home/user/${filename}`) + + const readContent = await sandbox.files.read(filename) + assert.equal(readContent, content) + + if (isDebug) { + await sandbox.files.remove(filename) + } + } +) diff --git a/packages/js-sdk/tests/volume/file.test.ts b/packages/js-sdk/tests/volume/file.test.ts index f15131b7eb..ff713de55b 100644 --- a/packages/js-sdk/tests/volume/file.test.ts +++ b/packages/js-sdk/tests/volume/file.test.ts @@ -58,6 +58,20 @@ describe('Volume File Operations', () => { } ) + volumeTest( + 'should write and read a file from a ReadableStream', + async ({ volume }) => { + const path = '/test-stream.txt' + const content = 'Test stream content' + const stream = new Blob([content]).stream() + + await volume.writeFile(path, stream) + const readContent = await volume.readFile(path, { format: 'text' }) + + expect(readContent).toBe(content) + } + ) + volumeTest('should write and read an empty file', async ({ volume }) => { const path = '/empty.txt' const content = '' diff --git a/packages/python-sdk/e2b/io_utils.py b/packages/python-sdk/e2b/io_utils.py new file mode 100644 index 0000000000..d9e6dea4f7 --- /dev/null +++ b/packages/python-sdk/e2b/io_utils.py @@ -0,0 +1,47 @@ +import zlib +from typing import IO, AsyncIterable, AsyncIterator, Iterable, Iterator + +IO_CHUNK_SIZE = 65_536 + + +def iter_io_chunks(data: IO) -> Iterator[bytes]: + """Read a file-like object in chunks, encoding text chunks to UTF-8.""" + while True: + chunk = data.read(IO_CHUNK_SIZE) + if not chunk: + break + yield chunk if isinstance(chunk, bytes) else chunk.encode("utf-8") + + +async def aiter_io_chunks(data: IO) -> AsyncIterator[bytes]: + """Read a file-like object in chunks, encoding text chunks to UTF-8.""" + while True: + chunk = data.read(IO_CHUNK_SIZE) + if not chunk: + break + yield chunk if isinstance(chunk, bytes) else chunk.encode("utf-8") + + +def _gzip_compressor(): + # wbits > 16 makes zlib produce a gzip-formatted stream. + return zlib.compressobj(wbits=zlib.MAX_WBITS | 16) + + +def gzip_iter(chunks: Iterable[bytes]) -> Iterator[bytes]: + """Gzip-compress a byte stream chunk by chunk.""" + compressor = _gzip_compressor() + for chunk in chunks: + compressed = compressor.compress(chunk) + if compressed: + yield compressed + yield compressor.flush() + + +async def agzip_iter(chunks: AsyncIterable[bytes]) -> AsyncIterator[bytes]: + """Gzip-compress a byte stream chunk by chunk.""" + compressor = _gzip_compressor() + async for chunk in chunks: + compressed = compressor.compress(chunk) + if compressed: + yield compressed + yield compressor.flush() diff --git a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py index 6337fafaa1..5838c8604d 100644 --- a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py @@ -4,10 +4,11 @@ from datetime import datetime from enum import Enum from io import IOBase, TextIOBase -from typing import IO, Dict, Optional, Union, TypedDict +from typing import IO, AsyncIterator, Dict, Iterator, Optional, Union, TypedDict from e2b.envd.filesystem import filesystem_pb2 from e2b.exceptions import InvalidArgumentException +from e2b.io_utils import agzip_iter, aiter_io_chunks, gzip_iter, iter_io_chunks class FileType(Enum): @@ -149,20 +150,44 @@ def _to_httpx_file(file_path: str, file_data: Union[str, bytes, IO]): def to_upload_body( data: Union[str, bytes, IO], use_gzip: bool = False, -) -> bytes: - """Prepare file data for upload, optionally gzip-compressed.""" - if isinstance(data, str): - raw = data.encode("utf-8") - elif isinstance(data, bytes): - raw = data - elif isinstance(data, TextIOBase): - raw = data.read().encode("utf-8") - elif isinstance(data, IOBase): - raw = data.read() +) -> Union[bytes, IO, Iterator[bytes]]: + """Prepare file data for upload, optionally gzip-compressed. + + File-like objects are streamed in chunks instead of being buffered in + memory. + """ + if isinstance(data, (str, bytes)): + raw = data.encode("utf-8") if isinstance(data, str) else data + return gzip.compress(raw) if use_gzip else raw + elif isinstance(data, (TextIOBase, IOBase)): + if use_gzip: + return gzip_iter(iter_io_chunks(data)) + if isinstance(data, TextIOBase): + # Text-mode IO yields str chunks—encode them while streaming. + return iter_io_chunks(data) + # httpx streams binary file-like objects in chunks without buffering. + return data else: raise InvalidArgumentException(f"Unsupported data type: {type(data)}") - return gzip.compress(raw) if use_gzip else raw + +def to_upload_body_async( + data: Union[str, bytes, IO], + use_gzip: bool = False, +) -> Union[bytes, AsyncIterator[bytes]]: + """Prepare file data for upload with async httpx, optionally gzip-compressed. + + File-like objects are streamed in chunks instead of being buffered in + memory. Async httpx requires an async iterable for streamed request bodies. + """ + if isinstance(data, (str, bytes)): + raw = data.encode("utf-8") if isinstance(data, str) else data + return gzip.compress(raw) if use_gzip else raw + elif isinstance(data, (TextIOBase, IOBase)): + chunks = aiter_io_chunks(data) + return agzip_iter(chunks) if use_gzip else chunks + else: + raise InvalidArgumentException(f"Unsupported data type: {type(data)}") METADATA_HEADER_PREFIX = "X-Metadata-" diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index be1b993972..6345a04fca 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -38,7 +38,7 @@ map_entry_info, map_file_type, metadata_to_headers, - to_upload_body, + to_upload_body_async, validate_metadata, ) from e2b.sandbox.filesystem.watch_handle import FilesystemEvent @@ -177,11 +177,38 @@ async def read( if gzip: headers["Accept-Encoding"] = "gzip" + timeout = self._connection_config.get_request_timeout(request_timeout) + + if format == "stream": + # Stream the response body instead of buffering it in memory. + request = self._envd_api.build_request( + "GET", + ENVD_API_FILES_ROUTE, + params=params, + headers=headers, + timeout=timeout, + ) + r = await self._envd_api.send(request, stream=True) + + err = await _ahandle_filesystem_envd_api_exception(r) + if err: + await r.aclose() + raise err + + async def stream_file() -> AsyncIterator[bytes]: + try: + async for chunk in r.aiter_bytes(): + yield chunk + finally: + await r.aclose() + + return stream_file() + r = await self._envd_api.get( ENVD_API_FILES_ROUTE, params=params, headers=headers, - timeout=self._connection_config.get_request_timeout(request_timeout), + timeout=timeout, ) err = await _ahandle_filesystem_envd_api_exception(r) @@ -192,8 +219,6 @@ async def read( return r.text elif format == "bytes": return bytearray(r.content) - elif format == "stream": - return r.aiter_bytes() async def write( self, @@ -212,7 +237,7 @@ async def write( Writing to a file at path that doesn't exist creates the necessary directories. :param path: Path to the file - :param data: Data to write to the file, can be a `str`, `bytes`, or `IO`. + :param data: Data to write to the file, can be a `str`, `bytes`, or `IO`. File-like objects are streamed in chunks instead of being buffered in memory. :param user: Run the operation as this user :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the request @@ -296,7 +321,7 @@ async def _upload_file(file): r = await self._envd_api.post( ENVD_API_FILES_ROUTE, - content=to_upload_body(file_data, gzip), + content=to_upload_body_async(file_data, gzip), headers=headers, params=params, timeout=self._connection_config.get_request_timeout( diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index be4f13412c..53100892a6 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -199,11 +199,37 @@ def read( if gzip: headers["Accept-Encoding"] = "gzip" + timeout = self._connection_config.get_request_timeout(request_timeout) + + if format == "stream": + # Stream the response body instead of buffering it in memory. + request = self._envd_api.build_request( + "GET", + ENVD_API_FILES_ROUTE, + params=params, + headers=headers, + timeout=timeout, + ) + r = self._envd_api.send(request, stream=True) + + err = _handle_filesystem_envd_api_exception(r) + if err: + r.close() + raise err + + def stream_file() -> Iterator[bytes]: + try: + yield from r.iter_bytes() + finally: + r.close() + + return stream_file() + r = self._envd_api.get( ENVD_API_FILES_ROUTE, params=params, headers=headers, - timeout=self._connection_config.get_request_timeout(request_timeout), + timeout=timeout, ) err = _handle_filesystem_envd_api_exception(r) @@ -214,8 +240,6 @@ def read( return r.text elif format == "bytes": return bytearray(r.content) - elif format == "stream": - return r.iter_bytes() def write( self, @@ -234,7 +258,7 @@ def write( Writing to a file at path that doesn't exist creates the necessary directories. :param path: Path to the file - :param data: Data to write to the file, can be a `str`, `bytes`, or `IO`. + :param data: Data to write to the file, can be a `str`, `bytes`, or `IO`. File-like objects are streamed in chunks instead of being buffered in memory. :param user: Run the operation as this user :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the request diff --git a/packages/python-sdk/e2b/volume/volume_async.py b/packages/python-sdk/e2b/volume/volume_async.py index c8932af9af..047e19f827 100644 --- a/packages/python-sdk/e2b/volume/volume_async.py +++ b/packages/python-sdk/e2b/volume/volume_async.py @@ -46,6 +46,7 @@ VolumeInfo, VolumeEntryStat, ) +from e2b.io_utils import aiter_io_chunks from e2b.volume.utils import DualMethod, convert_volume_entry_stat @@ -525,7 +526,7 @@ async def stream_file() -> AsyncIterator[bytes]: async def write_file( self, path: str, - data: Union[str, bytes, IO[bytes]], + data: Union[str, bytes, IO], uid: Optional[int] = None, gid: Optional[int] = None, mode: Optional[int] = None, @@ -539,7 +540,7 @@ async def write_file( Writing to a file that already exists overwrites the file. :param path: Path to the file - :param data: Data to write to the file. Data can be a string, bytes, or IO. + :param data: Data to write to the file. Data can be a string, bytes, or IO. File-like objects are streamed in chunks instead of being buffered in memory. :param uid: User ID of the created file :param gid: Group ID of the created file :param mode: Mode of the created file @@ -556,22 +557,21 @@ async def write_file( if upload_timeout is not None: api_client = api_client.with_timeout(httpx.Timeout(upload_timeout)) + content: Union[bytes, AsyncIterator[bytes]] if isinstance(data, str): - data_bytes = data.encode("utf-8") + content = data.encode("utf-8") elif isinstance(data, bytes): - data_bytes = data + content = data elif hasattr(data, "read"): - content = data.read() - if isinstance(content, bytes): - data_bytes = content - else: - data_bytes = content.encode("utf-8") + # Stream file-like objects in chunks without buffering them in + # memory. Async httpx requires an async iterable request body. + content = aiter_io_chunks(data) else: raise ValueError(f"Unsupported data type: {type(data)}") res = await put_file.asyncio_detailed( self._volume_id, - body=FilePayload(payload=data_bytes), # type: ignore[arg-type] # Pass bytes directly for async httpx compatibility + body=FilePayload(payload=content), # type: ignore[arg-type] # httpx accepts bytes and streamable content directly path=path, uid=uid if uid is not None else UNSET, gid=gid if gid is not None else UNSET, diff --git a/packages/python-sdk/e2b/volume/volume_sync.py b/packages/python-sdk/e2b/volume/volume_sync.py index 94a4f5be60..4ffc5a1477 100644 --- a/packages/python-sdk/e2b/volume/volume_sync.py +++ b/packages/python-sdk/e2b/volume/volume_sync.py @@ -1,3 +1,4 @@ +import io from typing import IO, Iterator, List, Literal, Optional, Union, cast, overload from http import HTTPStatus @@ -46,6 +47,7 @@ VolumeInfo, VolumeEntryStat, ) +from e2b.io_utils import iter_io_chunks from e2b.volume.utils import DualMethod, convert_volume_entry_stat @@ -522,7 +524,7 @@ def stream_file() -> Iterator[bytes]: def write_file( self, path: str, - data: Union[str, bytes, IO[bytes]], + data: Union[str, bytes, IO], uid: Optional[int] = None, gid: Optional[int] = None, mode: Optional[int] = None, @@ -536,7 +538,7 @@ def write_file( Writing to a file that already exists overwrites the file. :param path: Path to the file - :param data: Data to write to the file. Data can be a string, bytes, or IO. + :param data: Data to write to the file. Data can be a string, bytes, or IO. File-like objects are streamed in chunks instead of being buffered in memory. :param uid: User ID of the created file :param gid: Group ID of the created file :param mode: Mode of the created file @@ -553,22 +555,23 @@ def write_file( if upload_timeout is not None: api_client = api_client.with_timeout(httpx.Timeout(upload_timeout)) + content: Union[bytes, IO[bytes], Iterator[bytes]] if isinstance(data, str): - data_bytes = data.encode("utf-8") + content = data.encode("utf-8") elif isinstance(data, bytes): - data_bytes = data + content = data + elif isinstance(data, io.TextIOBase): + # Text-mode IO yields str chunks—encode them while streaming. + content = iter_io_chunks(data) elif hasattr(data, "read"): - content = data.read() - if isinstance(content, bytes): - data_bytes = content - else: - data_bytes = content.encode("utf-8") + # httpx streams file-like objects in chunks without buffering. + content = data else: raise ValueError(f"Unsupported data type: {type(data)}") res = put_file.sync_detailed( self._volume_id, - body=FilePayload(payload=data_bytes), # type: ignore[arg-type] # Pass bytes directly for sync httpx compatibility + body=FilePayload(payload=content), # type: ignore[arg-type] # httpx accepts bytes and streamable content directly path=path, uid=uid if uid is not None else UNSET, gid=gid if gid is not None else UNSET, diff --git a/packages/python-sdk/tests/async/sandbox_async/files/test_read.py b/packages/python-sdk/tests/async/sandbox_async/files/test_read.py index 6bb871e470..365d405305 100644 --- a/packages/python-sdk/tests/async/sandbox_async/files/test_read.py +++ b/packages/python-sdk/tests/async/sandbox_async/files/test_read.py @@ -35,3 +35,36 @@ async def test_read_empty_file(async_sandbox: AsyncSandbox): await async_sandbox.commands.run(f"touch {filename}") read_content = await async_sandbox.files.read(filename) assert read_content == content + + +async def test_read_file_as_stream(async_sandbox: AsyncSandbox): + filename = "test_read_stream.txt" + content = "Streamed read content. " * 10_000 + + await async_sandbox.files.write(filename, content) + stream = await async_sandbox.files.read(filename, format="stream") + chunks = [] + async for chunk in stream: + chunks.append(chunk) + read_content = b"".join(chunks).decode("utf-8") + assert read_content == content + + +async def test_read_file_as_stream_with_gzip(async_sandbox: AsyncSandbox): + filename = "test_read_stream_gzip.txt" + content = "Streamed gzipped read content. " * 10_000 + + await async_sandbox.files.write(filename, content) + stream = await async_sandbox.files.read(filename, format="stream", gzip=True) + chunks = [] + async for chunk in stream: + chunks.append(chunk) + read_content = b"".join(chunks).decode("utf-8") + assert read_content == content + + +async def test_read_non_existing_file_as_stream(async_sandbox: AsyncSandbox): + filename = "non_existing_file.txt" + + with pytest.raises(FileNotFoundException): + await async_sandbox.files.read(filename, format="stream") diff --git a/packages/python-sdk/tests/async/sandbox_async/files/test_write.py b/packages/python-sdk/tests/async/sandbox_async/files/test_write.py index 13925ed64b..5b352548f7 100644 --- a/packages/python-sdk/tests/async/sandbox_async/files/test_write.py +++ b/packages/python-sdk/tests/async/sandbox_async/files/test_write.py @@ -174,3 +174,45 @@ async def test_write_files_with_different_data_types( if debug: for file in files: await async_sandbox.files.remove(file["path"]) + + +async def test_write_io_with_octet_stream(async_sandbox: AsyncSandbox, debug): + filename = "test_write_octet_io.bin" + text = "Streamed octet-stream upload. " * 10_000 + content = io.BytesIO(text.encode("utf-8")) + + info = await async_sandbox.files.write(filename, content, use_octet_stream=True) + assert info.path == f"/home/user/{filename}" + + read_content = await async_sandbox.files.read(filename) + assert read_content == text + + if debug: + await async_sandbox.files.remove(filename) + + +async def test_write_text_io_with_octet_stream(async_sandbox: AsyncSandbox, debug): + filename = "test_write_octet_text_io.txt" + text = "Streamed text octet-stream upload." + + await async_sandbox.files.write(filename, io.StringIO(text), use_octet_stream=True) + + read_content = await async_sandbox.files.read(filename) + assert read_content == text + + if debug: + await async_sandbox.files.remove(filename) + + +async def test_write_io_with_octet_stream_and_gzip(async_sandbox: AsyncSandbox, debug): + filename = "test_write_octet_io_gzip.bin" + text = "Streamed gzipped octet-stream upload. " * 10_000 + content = io.BytesIO(text.encode("utf-8")) + + await async_sandbox.files.write(filename, content, use_octet_stream=True, gzip=True) + + read_content = await async_sandbox.files.read(filename) + assert read_content == text + + if debug: + await async_sandbox.files.remove(filename) diff --git a/packages/python-sdk/tests/async/volume_async/test_file.py b/packages/python-sdk/tests/async/volume_async/test_file.py index 014eb33a2c..f901dcb434 100644 --- a/packages/python-sdk/tests/async/volume_async/test_file.py +++ b/packages/python-sdk/tests/async/volume_async/test_file.py @@ -1,5 +1,5 @@ import datetime -from io import BytesIO +from io import BytesIO, StringIO import pytest @@ -50,6 +50,16 @@ async def test_write_and_read_stream(self, async_volume: AsyncVolume): assert read_content == content + async def test_write_and_read_text_stream(self, async_volume: AsyncVolume): + path = "/test-text-stream.txt" + content = "Test text stream content" + stream = StringIO(content) + + await async_volume.write_file(path, stream) + read_content = await async_volume.read_file(path, format="text") + + assert read_content == content + async def test_write_and_read_empty_file(self, async_volume: AsyncVolume): path = "/empty.txt" content = "" diff --git a/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py b/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py index f5dc3a32e3..d19c5bada6 100644 --- a/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py +++ b/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py @@ -32,3 +32,30 @@ def test_read_empty_file(sandbox): sandbox.commands.run(f"touch {filename}") read_content = sandbox.files.read(filename) assert read_content == content + + +def test_read_file_as_stream(sandbox): + filename = "test_read_stream.txt" + content = "Streamed read content. " * 10_000 + + sandbox.files.write(filename, content) + stream = sandbox.files.read(filename, format="stream") + read_content = b"".join(stream).decode("utf-8") + assert read_content == content + + +def test_read_file_as_stream_with_gzip(sandbox): + filename = "test_read_stream_gzip.txt" + content = "Streamed gzipped read content. " * 10_000 + + sandbox.files.write(filename, content) + stream = sandbox.files.read(filename, format="stream", gzip=True) + read_content = b"".join(stream).decode("utf-8") + assert read_content == content + + +def test_read_non_existing_file_as_stream(sandbox): + filename = "non_existing_file.txt" + + with pytest.raises(FileNotFoundException): + sandbox.files.read(filename, format="stream") diff --git a/packages/python-sdk/tests/sync/sandbox_sync/files/test_write.py b/packages/python-sdk/tests/sync/sandbox_sync/files/test_write.py index f5c5a43f7f..e50232ff6c 100644 --- a/packages/python-sdk/tests/sync/sandbox_sync/files/test_write.py +++ b/packages/python-sdk/tests/sync/sandbox_sync/files/test_write.py @@ -172,3 +172,45 @@ def test_write_files_with_different_data_types(sandbox, debug): if debug: for file in files: sandbox.files.remove(file["path"]) + + +def test_write_io_with_octet_stream(sandbox, debug): + filename = "test_write_octet_io.bin" + text = "Streamed octet-stream upload. " * 10_000 + content = io.BytesIO(text.encode("utf-8")) + + info = sandbox.files.write(filename, content, use_octet_stream=True) + assert info.path == f"/home/user/{filename}" + + read_content = sandbox.files.read(filename) + assert read_content == text + + if debug: + sandbox.files.remove(filename) + + +def test_write_text_io_with_octet_stream(sandbox, debug): + filename = "test_write_octet_text_io.txt" + text = "Streamed text octet-stream upload." + + sandbox.files.write(filename, io.StringIO(text), use_octet_stream=True) + + read_content = sandbox.files.read(filename) + assert read_content == text + + if debug: + sandbox.files.remove(filename) + + +def test_write_io_with_octet_stream_and_gzip(sandbox, debug): + filename = "test_write_octet_io_gzip.bin" + text = "Streamed gzipped octet-stream upload. " * 10_000 + content = io.BytesIO(text.encode("utf-8")) + + sandbox.files.write(filename, content, use_octet_stream=True, gzip=True) + + read_content = sandbox.files.read(filename) + assert read_content == text + + if debug: + sandbox.files.remove(filename) diff --git a/packages/python-sdk/tests/sync/volume_sync/test_file.py b/packages/python-sdk/tests/sync/volume_sync/test_file.py index c51db81691..eb2dfa0c0a 100644 --- a/packages/python-sdk/tests/sync/volume_sync/test_file.py +++ b/packages/python-sdk/tests/sync/volume_sync/test_file.py @@ -1,5 +1,5 @@ import datetime -from io import BytesIO +from io import BytesIO, StringIO import pytest @@ -47,6 +47,16 @@ def test_write_and_read_stream(self, volume: Volume): assert read_content == content + def test_write_and_read_text_stream(self, volume: Volume): + path = "/test-text-stream.txt" + content = "Test text stream content" + stream = StringIO(content) + + volume.write_file(path, stream) + read_content = volume.read_file(path, format="text") + + assert read_content == content + def test_write_and_read_empty_file(self, volume: Volume): path = "/empty.txt" content = "" From 9d72b2ecb7412e12d5c42efe75df20a8cf8530c4 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 12 Jun 2026 21:16:27 +0200 Subject: [PATCH 02/20] fix(js-sdk): return proper empty values from read() for empty files Empty files short-circuit response parsing (Content-Length: 0), so Sandbox.files.read() with format 'blob' returned '' and Volume.readFile() with 'blob'/'stream' returned undefined. Return an empty Blob or ReadableStream matching the requested format instead. Co-Authored-By: Claude Fable 5 --- .changeset/cuddly-pots-stream.md | 1 + .../js-sdk/src/sandbox/filesystem/index.ts | 5 ++-- .../js-sdk/tests/sandbox/files/read.test.ts | 24 +++++++++++++++++++ packages/js-sdk/tests/volume/file.test.ts | 24 +++++++++++++++++++ 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md index 6dfad351b8..4895bc6685 100644 --- a/.changeset/cuddly-pots-stream.md +++ b/.changeset/cuddly-pots-stream.md @@ -9,3 +9,4 @@ Stream uploads instead of buffering streaming input entirely in memory: - `Sandbox.files.write()` / `write_files()` with `useOctetStream`/`use_octet_stream`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). - Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). - JS `Sandbox.files.read({ format: 'stream' })`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed; pass `signal` to cancel an in-flight stream. +- JS `Sandbox.files.read()` with `blob` or `stream` format now returns an empty `Blob`/`ReadableStream` for empty files instead of `""`. diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index cf285e311f..fc5a7eab75 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -506,9 +506,10 @@ export class Filesystem { return new Uint8Array(res.data as ArrayBuffer) } - // When the file is empty, res.data is parsed as `{}`. This is a workaround to return an empty string. + // When the file is empty, the response body is skipped and `res.data` is + // `undefined`. Return the proper empty value for the requested format. if (res.response.headers.get('content-length') === '0') { - return '' + return format === 'blob' ? new Blob([]) : '' } return res.data diff --git a/packages/js-sdk/tests/sandbox/files/read.test.ts b/packages/js-sdk/tests/sandbox/files/read.test.ts index 6e0b1e3395..045db4790b 100644 --- a/packages/js-sdk/tests/sandbox/files/read.test.ts +++ b/packages/js-sdk/tests/sandbox/files/read.test.ts @@ -61,3 +61,27 @@ sandboxTest('read non-existing file as stream', async ({ sandbox }) => { sandbox.files.read(filename, { format: 'stream' }) ).rejects.toThrowError(FileNotFoundError) }) + +sandboxTest('read empty file in all formats', async ({ sandbox }) => { + const filename = 'empty-file-formats.txt' + await sandbox.commands.run(`touch ${filename}`) + + const text = await sandbox.files.read(filename, { format: 'text' }) + expect(text).toBe('') + + const bytes = await sandbox.files.read(filename, { format: 'bytes' }) + expect(bytes).toBeInstanceOf(Uint8Array) + expect(bytes.length).toBe(0) + + const blob = await sandbox.files.read(filename, { format: 'blob' }) + expect(blob).toBeInstanceOf(Blob) + expect(blob.size).toBe(0) + + const stream = await sandbox.files.read(filename, { format: 'stream' }) + expect(stream).toBeInstanceOf(ReadableStream) + const chunks: Uint8Array[] = [] + for await (const chunk of stream as unknown as AsyncIterable) { + chunks.push(chunk) + } + expect(Buffer.concat(chunks).length).toBe(0) +}) diff --git a/packages/js-sdk/tests/volume/file.test.ts b/packages/js-sdk/tests/volume/file.test.ts index ff713de55b..5819b332be 100644 --- a/packages/js-sdk/tests/volume/file.test.ts +++ b/packages/js-sdk/tests/volume/file.test.ts @@ -82,6 +82,30 @@ describe('Volume File Operations', () => { expect(readContent).toBe(content) }) + volumeTest( + 'should read an empty file in all formats', + async ({ volume }) => { + const path = '/empty-formats.txt' + await volume.writeFile(path, '') + + const bytes = await volume.readFile(path, { format: 'bytes' }) + expect(bytes).toBeInstanceOf(Uint8Array) + expect(bytes.length).toBe(0) + + const blob = await volume.readFile(path, { format: 'blob' }) + expect(blob).toBeInstanceOf(Blob) + expect(blob.size).toBe(0) + + const stream = await volume.readFile(path, { format: 'stream' }) + expect(stream).toBeInstanceOf(ReadableStream) + const chunks: Uint8Array[] = [] + for await (const chunk of stream as unknown as AsyncIterable) { + chunks.push(chunk) + } + expect(chunks.reduce((n, c) => n + c.length, 0)).toBe(0) + } + ) + volumeTest( 'should overwrite an existing file with force option', async ({ volume }) => { From 6cda03fa927fe45fcd577f23962de2092f04534e Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 12 Jun 2026 22:25:51 +0200 Subject: [PATCH 03/20] fix: address review comments on streaming reads - js-sdk: handle empty files explicitly in the bytes path of read() instead of relying on new Uint8Array(undefined) coercion - python-sdk: bound the request timeout to the initial handshake for read(format="stream") in both sync and async implementations, matching the JS SDK behavior; document the semantics in the stream overloads Co-Authored-By: Claude Fable 5 --- packages/js-sdk/src/sandbox/filesystem/index.ts | 11 +++++++---- .../e2b/sandbox_async/filesystem/filesystem.py | 9 +++++++++ .../e2b/sandbox_sync/filesystem/filesystem.py | 9 +++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index fc5a7eab75..cbbc90de7a 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -502,16 +502,19 @@ export class Filesystem { throw err } - if (format === 'bytes') { - return new Uint8Array(res.data as ArrayBuffer) - } - // When the file is empty, the response body is skipped and `res.data` is // `undefined`. Return the proper empty value for the requested format. if (res.response.headers.get('content-length') === '0') { + if (format === 'bytes') { + return new Uint8Array(0) + } return format === 'blob' ? new Blob([]) : '' } + if (format === 'bytes') { + return new Uint8Array(res.data as ArrayBuffer) + } + return res.data } diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index 6345a04fca..26f21c58b8 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -147,6 +147,9 @@ async def read( """ Read file content as a `AsyncIterator[bytes]`. + The request timeout bounds only the initial handshake—the returned + iterator is not killed by it while being consumed. + :param path: Path to the file :param user: Run the operation as this user :param format: Format of the file content—`stream` @@ -195,6 +198,12 @@ async def read( await r.aclose() raise err + # The request timeout bounds only the initial handshake. Disable + # the read timeout for body reads so consuming the stream isn't + # killed by it. The timeout dict is shared by reference with the + # transport and read again when body iteration starts. + request.extensions.get("timeout", {})["read"] = None + async def stream_file() -> AsyncIterator[bytes]: try: async for chunk in r.aiter_bytes(): diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index 53100892a6..b989085ca7 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -169,6 +169,9 @@ def read( """ Read file content as a `Iterator[bytes]`. + The request timeout bounds only the initial handshake—the returned + iterator is not killed by it while being consumed. + :param path: Path to the file :param user: Run the operation as this user :param format: Format of the file content—`stream` @@ -217,6 +220,12 @@ def read( r.close() raise err + # The request timeout bounds only the initial handshake. Disable + # the read timeout for body reads so consuming the stream isn't + # killed by it. The timeout dict is shared by reference with the + # transport and read again when body iteration starts. + request.extensions.get("timeout", {})["read"] = None + def stream_file() -> Iterator[bytes]: try: yield from r.iter_bytes() From 1ae9a34096f65c2010433fd86064dbfdbabe948d Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:22:02 +0200 Subject: [PATCH 04/20] fix(python-sdk): prevent connection leaks from abandoned stream reads read(format="stream") returned a bare (async) generator whose finally only closed the response if iteration had begun. A reader that was created but never consumed (or never started) held its pooled connection open until the client was closed, leaking connections. Wrap the streamed response in FileStreamReader / AsyncFileStreamReader, which: - release the connection when the stream is fully consumed or errors, - expose deterministic cleanup via close()/aclose() and (async) context manager support, - register a weakref.finalize safety net so an abandoned reader releases its connection on garbage collection (the async variant schedules aclose() on the running loop). Both remain Iterator[bytes] / AsyncIterator[bytes], so existing usage is unchanged. Adds credential-free unit tests covering consume/context manager/close/GC, plus live-sandbox tests for the context manager and partial-then-close paths. Co-Authored-By: Claude Fable 5 --- .../e2b/sandbox/filesystem/filesystem.py | 119 ++++++++++++++ .../sandbox_async/filesystem/filesystem.py | 26 +-- .../e2b/sandbox_sync/filesystem/filesystem.py | 24 +-- .../async/sandbox_async/files/test_read.py | 27 ++++ .../sync/sandbox_sync/files/test_read.py | 24 +++ .../tests/test_file_stream_reader.py | 148 ++++++++++++++++++ 6 files changed, 343 insertions(+), 25 deletions(-) create mode 100644 packages/python-sdk/tests/test_file_stream_reader.py diff --git a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py index 5838c8604d..849bd626c0 100644 --- a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py @@ -1,11 +1,15 @@ +import asyncio import gzip import re +import weakref from dataclasses import dataclass, field from datetime import datetime from enum import Enum from io import IOBase, TextIOBase from typing import IO, AsyncIterator, Dict, Iterator, Optional, Union, TypedDict +import httpx + from e2b.envd.filesystem import filesystem_pb2 from e2b.exceptions import InvalidArgumentException from e2b.io_utils import agzip_iter, aiter_io_chunks, gzip_iter, iter_io_chunks @@ -135,6 +139,121 @@ class WriteEntry(TypedDict): data: Union[str, bytes, IO] +class FileStreamReader(Iterator[bytes]): + """Iterator over a streamed file download. + + Returned by ``Sandbox.files.read(format="stream")``. It owns the underlying + HTTP response and releases its pooled connection as soon as the stream is + fully consumed, an error is raised while reading, or the reader is closed. + + Iterate it directly (``for chunk in stream``) or, for deterministic + cleanup when you don't read it to the end, use it as a context manager or + call :meth:`close`:: + + with sandbox.files.read(path, format="stream") as stream: + for chunk in stream: + ... + + As a safety net, the connection is also released when the reader is garbage + collected, so an abandoned stream does not leak a connection indefinitely. + """ + + def __init__(self, response: httpx.Response): + self._response = response + self._iterator = response.iter_bytes() + # Releases the connection on GC if the reader is abandoned without + # being consumed or closed. Calling it explicitly (via close) runs the + # callback once and is then a no-op, so close is idempotent. + self._finalizer = weakref.finalize(self, response.close) + + def __iter__(self) -> Iterator[bytes]: + return self + + def __next__(self) -> bytes: + try: + return next(self._iterator) + except BaseException: + # Covers normal end (StopIteration) and read errors alike. + self.close() + raise + + def close(self) -> None: + """Release the underlying HTTP connection. Safe to call multiple times.""" + self._finalizer() + + def __enter__(self) -> "FileStreamReader": + return self + + def __exit__(self, *exc_info) -> None: + self.close() + + +def _schedule_response_aclose(response: httpx.Response) -> None: + """Best-effort cleanup for an abandoned async stream. + + Closing an async response requires awaiting ``aclose()``, which a (sync) + garbage-collection finalizer cannot do. When the reader is dropped while an + event loop is running we schedule the close on it; otherwise there is + nothing safe to await and the connection is reclaimed when the client is + closed. + """ + try: + loop = asyncio.get_running_loop() + except RuntimeError: + return + if loop.is_running(): + loop.create_task(response.aclose()) + + +class AsyncFileStreamReader(AsyncIterator[bytes]): + """Async iterator over a streamed file download. + + Returned by ``AsyncSandbox.files.read(format="stream")``. It owns the + underlying HTTP response and releases its pooled connection as soon as the + stream is fully consumed, an error is raised while reading, or the reader is + closed. + + Iterate it directly (``async for chunk in stream``) or, for deterministic + cleanup when you don't read it to the end, use it as an async context + manager or call :meth:`aclose`:: + + async with await sandbox.files.read(path, format="stream") as stream: + async for chunk in stream: + ... + + As a safety net, the connection is also released when the reader is garbage + collected while an event loop is running, so an abandoned stream does not + leak a connection indefinitely. + """ + + def __init__(self, response: httpx.Response): + self._response = response + self._iterator = response.aiter_bytes() + self._finalizer = weakref.finalize(self, _schedule_response_aclose, response) + + def __aiter__(self) -> AsyncIterator[bytes]: + return self + + async def __anext__(self) -> bytes: + try: + return await self._iterator.__anext__() + except BaseException: + # Covers normal end (StopAsyncIteration) and read errors alike. + await self.aclose() + raise + + async def aclose(self) -> None: + """Release the underlying HTTP connection. Safe to call multiple times.""" + self._finalizer.detach() + await self._response.aclose() + + async def __aenter__(self) -> "AsyncFileStreamReader": + return self + + async def __aexit__(self, *exc_info) -> None: + await self.aclose() + + def _to_httpx_file(file_path: str, file_data: Union[str, bytes, IO]): """Build an httpx multipart `("file", (name, data))` tuple for the upload.""" if isinstance(file_data, (str, bytes)): diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index a53adcfcab..66f5dfce49 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -1,5 +1,5 @@ import asyncio -from typing import IO, AsyncIterator, Dict, List, Literal, Optional, Union, overload +from typing import IO, Dict, List, Literal, Optional, Union, overload import httpcore @@ -37,6 +37,7 @@ TemplateException, ) from e2b.sandbox.filesystem.filesystem import ( + AsyncFileStreamReader, EntryInfo, WriteEntry, WriteInfo, @@ -153,12 +154,15 @@ async def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - ) -> AsyncIterator[bytes]: + ) -> AsyncFileStreamReader: """ - Read file content as a `AsyncIterator[bytes]`. + Read file content as an `AsyncFileStreamReader` (an `AsyncIterator[bytes]`). The request timeout bounds only the initial handshake—the returned - iterator is not killed by it while being consumed. + iterator is not killed by it while being consumed. The reader releases + its connection once fully consumed; if you don't read it to the end, + use it as an async context manager or call `aclose()` for deterministic + cleanup. :param path: Path to the file :param user: Run the operation as this user @@ -166,7 +170,7 @@ async def read( :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the request - :return: File content as an `AsyncIterator[bytes]` + :return: File content as an `AsyncFileStreamReader` """ ... @@ -214,14 +218,10 @@ async def read( # transport and read again when body iteration starts. request.extensions.get("timeout", {})["read"] = None - async def stream_file() -> AsyncIterator[bytes]: - try: - async for chunk in r.aiter_bytes(): - yield chunk - finally: - await r.aclose() - - return stream_file() + # AsyncFileStreamReader owns the response and releases the + # connection when the stream is consumed, closed, errors, or is + # GC'd while an event loop is running. + return AsyncFileStreamReader(r) try: r = await self._envd_api.get( diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index dc75f64188..f3d2c503b8 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -1,5 +1,5 @@ import threading -from typing import IO, Dict, Iterator, List, Literal, Optional, Union, overload +from typing import IO, Dict, List, Literal, Optional, Union, overload import httpx from packaging.version import Version @@ -39,6 +39,7 @@ ) from e2b.sandbox.filesystem.filesystem import ( EntryInfo, + FileStreamReader, WriteEntry, WriteInfo, _to_httpx_file, @@ -173,12 +174,15 @@ def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - ) -> Iterator[bytes]: + ) -> FileStreamReader: """ - Read file content as a `Iterator[bytes]`. + Read file content as a `FileStreamReader` (an `Iterator[bytes]`). The request timeout bounds only the initial handshake—the returned - iterator is not killed by it while being consumed. + iterator is not killed by it while being consumed. The reader releases + its connection once fully consumed; if you don't read it to the end, + use it as a context manager or call `close()` for deterministic + cleanup. :param path: Path to the file :param user: Run the operation as this user @@ -186,7 +190,7 @@ def read( :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the request - :return: File content as an `Iterator[bytes]` + :return: File content as a `FileStreamReader` """ ... @@ -234,13 +238,9 @@ def read( # transport and read again when body iteration starts. request.extensions.get("timeout", {})["read"] = None - def stream_file() -> Iterator[bytes]: - try: - yield from r.iter_bytes() - finally: - r.close() - - return stream_file() + # FileStreamReader owns the response and releases the connection + # when the stream is consumed, closed, errors, or is GC'd. + return FileStreamReader(r) try: r = self._envd_api.get( diff --git a/packages/python-sdk/tests/async/sandbox_async/files/test_read.py b/packages/python-sdk/tests/async/sandbox_async/files/test_read.py index 365d405305..643c7aab01 100644 --- a/packages/python-sdk/tests/async/sandbox_async/files/test_read.py +++ b/packages/python-sdk/tests/async/sandbox_async/files/test_read.py @@ -68,3 +68,30 @@ async def test_read_non_existing_file_as_stream(async_sandbox: AsyncSandbox): with pytest.raises(FileNotFoundException): await async_sandbox.files.read(filename, format="stream") + + +async def test_read_file_as_stream_context_manager(async_sandbox: AsyncSandbox): + filename = "test_read_stream_ctx.txt" + content = "Streamed read content. " * 10_000 + + await async_sandbox.files.write(filename, content) + chunks = [] + async with await async_sandbox.files.read(filename, format="stream") as stream: + async for chunk in stream: + chunks.append(chunk) + read_content = b"".join(chunks).decode("utf-8") + assert read_content == content + + +async def test_read_file_as_stream_partial_then_close(async_sandbox: AsyncSandbox): + filename = "test_read_stream_partial.txt" + content = "Streamed read content. " * 10_000 + + await async_sandbox.files.write(filename, content) + # Reading only the first chunk and closing must not raise or leak. + stream = await async_sandbox.files.read(filename, format="stream") + first = await stream.__anext__() + assert len(first) > 0 + await stream.aclose() + # aclose is idempotent + await stream.aclose() diff --git a/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py b/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py index d19c5bada6..a3e98fc09a 100644 --- a/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py +++ b/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py @@ -59,3 +59,27 @@ def test_read_non_existing_file_as_stream(sandbox): with pytest.raises(FileNotFoundException): sandbox.files.read(filename, format="stream") + + +def test_read_file_as_stream_context_manager(sandbox): + filename = "test_read_stream_ctx.txt" + content = "Streamed read content. " * 10_000 + + sandbox.files.write(filename, content) + with sandbox.files.read(filename, format="stream") as stream: + read_content = b"".join(stream).decode("utf-8") + assert read_content == content + + +def test_read_file_as_stream_partial_then_close(sandbox): + filename = "test_read_stream_partial.txt" + content = "Streamed read content. " * 10_000 + + sandbox.files.write(filename, content) + # Reading only the first chunk and closing must not raise or leak. + stream = sandbox.files.read(filename, format="stream") + first = next(iter(stream)) + assert len(first) > 0 + stream.close() + # close is idempotent + stream.close() diff --git a/packages/python-sdk/tests/test_file_stream_reader.py b/packages/python-sdk/tests/test_file_stream_reader.py new file mode 100644 index 0000000000..052169d157 --- /dev/null +++ b/packages/python-sdk/tests/test_file_stream_reader.py @@ -0,0 +1,148 @@ +"""Unit tests for the streamed-read helpers. + +These exercise connection lifecycle (consume / context manager / explicit +close / garbage collection) without hitting a real sandbox, using a local +chunked HTTP server. +""" + +import gc +import socket +import threading + +import httpx +import pytest + +from e2b.sandbox.filesystem.filesystem import ( + AsyncFileStreamReader, + FileStreamReader, +) + +CHUNKS = [f"chunk{i}".encode() for i in range(5)] +EXPECTED = b"".join(CHUNKS) + + +def _start_chunked_server() -> int: + """Start a one-shot HTTP server that replies with a chunked body. Returns its port.""" + sock = socket.socket() + sock.bind(("127.0.0.1", 0)) + sock.listen(1) + port = sock.getsockname()[1] + + def serve(): + try: + conn, _ = sock.accept() + while b"\r\n\r\n" not in conn.recv(65536): + pass + conn.sendall( + b"HTTP/1.1 200 OK\r\n" + b"Content-Type: application/octet-stream\r\n" + b"Transfer-Encoding: chunked\r\n\r\n" + ) + for chunk in CHUNKS: + conn.sendall(f"{len(chunk):x}\r\n".encode() + chunk + b"\r\n") + conn.sendall(b"0\r\n\r\n") + conn.close() + except OSError: + pass + finally: + sock.close() + + threading.Thread(target=serve, daemon=True).start() + return port + + +def _active_connections(client) -> int: + return len(client._transport._pool.connections) + + +def _open_stream(client, port): + request = client.build_request("GET", f"http://127.0.0.1:{port}/files") + return client.send(request, stream=True) + + +def test_sync_full_consume_releases_connection(): + with httpx.Client() as client: + port = _start_chunked_server() + reader = FileStreamReader(_open_stream(client, port)) + assert b"".join(reader) == EXPECTED + assert _active_connections(client) == 0 + + +def test_sync_context_manager_releases_on_exit(): + with httpx.Client() as client: + port = _start_chunked_server() + with FileStreamReader(_open_stream(client, port)) as reader: + assert next(iter(reader)) == CHUNKS[0] + # Exiting the context releases the connection even though the stream + # was only partially consumed. + assert _active_connections(client) == 0 + + +def test_sync_close_is_idempotent(): + with httpx.Client() as client: + port = _start_chunked_server() + reader = FileStreamReader(_open_stream(client, port)) + reader.close() + reader.close() + assert _active_connections(client) == 0 + + +def test_sync_abandoned_reader_does_not_leak(): + with httpx.Client() as client: + port = _start_chunked_server() + reader = FileStreamReader(_open_stream(client, port)) + assert _active_connections(client) == 1 + del reader + gc.collect() + # The finalizer releases the connection when the reader is collected. + assert _active_connections(client) == 0 + + +async def test_async_full_consume_releases_connection(): + async with httpx.AsyncClient() as client: + port = _start_chunked_server() + request = client.build_request("GET", f"http://127.0.0.1:{port}/files") + reader = AsyncFileStreamReader(await client.send(request, stream=True)) + collected = b"".join([chunk async for chunk in reader]) + assert collected == EXPECTED + assert _active_connections(client) == 0 + + +async def test_async_context_manager_releases_on_exit(): + async with httpx.AsyncClient() as client: + port = _start_chunked_server() + request = client.build_request("GET", f"http://127.0.0.1:{port}/files") + async with AsyncFileStreamReader( + await client.send(request, stream=True) + ) as reader: + assert await reader.__anext__() == CHUNKS[0] + assert _active_connections(client) == 0 + + +async def test_async_aclose_is_idempotent(): + async with httpx.AsyncClient() as client: + port = _start_chunked_server() + request = client.build_request("GET", f"http://127.0.0.1:{port}/files") + reader = AsyncFileStreamReader(await client.send(request, stream=True)) + await reader.aclose() + await reader.aclose() + assert _active_connections(client) == 0 + + +async def test_async_abandoned_reader_does_not_leak(): + import asyncio + + async with httpx.AsyncClient() as client: + port = _start_chunked_server() + request = client.build_request("GET", f"http://127.0.0.1:{port}/files") + reader = AsyncFileStreamReader(await client.send(request, stream=True)) + assert _active_connections(client) == 1 + del reader + gc.collect() + # The finalizer schedules aclose on the running loop; let it run. + await asyncio.sleep(0.05) + assert _active_connections(client) == 0 + + +if __name__ == "__main__": + raise SystemExit(pytest.main([__file__, "-v"])) From 22d98a392de4d723c98ec6c94b1f22db22835f44 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 17 Jun 2026 16:02:51 +0200 Subject: [PATCH 05/20] fix(js-sdk): release stream-read connections on error and GC Match the Python SDK's connection lifecycle for read(format='stream'): - explicitly cancel the unconsumed error body before propagating, instead of relying solely on the abort controller (parity with r.close()) - add a FinalizationRegistry safety net so an abandoned stream releases its connection on GC, mirroring Python's weakref.finalize on FileStreamReader Co-Authored-By: Claude Opus 4.8 (1M context) --- .../js-sdk/src/sandbox/filesystem/index.ts | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index 86e524a18a..4ff6844208 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -57,6 +57,17 @@ const FILESYSTEM_HTTP_ERROR_MAP: Record Error> = { 404: (message: string) => new FileNotFoundError(message), } +// GC safety net for streamed reads: if the consumer drops the stream returned +// by `read({ format: 'stream' })` without reading it to completion or +// cancelling it, the registered cleanup releases the underlying connection when +// the stream is garbage collected. This mirrors the Python SDK's +// `weakref.finalize` on `FileStreamReader`. The held value is the cleanup +// function, which must not reference the stream itself or it would never be +// collected. +const streamReadFinalizers = new FinalizationRegistry<() => void>((cleanup) => + cleanup() +) + const FILESYSTEM_RPC_ERROR_MAP: Partial< Record Error> > = { @@ -469,6 +480,14 @@ export class Filesystem { const err = await handleFilesystemEnvdApiError(res) if (err) { + // Cancel the unconsumed error body so the pooled connection is + // released before we propagate, matching the Python stream path's + // `r.close()`. `cleanup()`'s abort would also release it, but + // cancelling is explicit and independent of runtime abort semantics. + if (res.response.body && !res.response.bodyUsed) { + await res.response.body.cancel().catch(() => {}) + } + cleanup() throw err } @@ -481,18 +500,26 @@ export class Filesystem { } const reader = body.getReader() - return new ReadableStream({ + // Detach the GC finalizer and release the connection. Idempotent via + // `cleanup`, so it's safe to call from multiple stream callbacks. + const release = () => { + streamReadFinalizers.unregister(unregisterToken) + cleanup() + } + const unregisterToken = {} + + const stream = new ReadableStream({ async pull(streamController) { try { const { done, value } = await reader.read() if (done) { streamController.close() - cleanup() + release() } else { streamController.enqueue(value) } } catch (err) { - cleanup() + release() streamController.error(err) } }, @@ -500,10 +527,16 @@ export class Filesystem { try { await reader.cancel(reason) } finally { - cleanup() + release() } }, }) + + // Release the connection if the consumer abandons the stream without + // reading it to completion or cancelling it. + streamReadFinalizers.register(stream, cleanup, unregisterToken) + + return stream } catch (err) { cleanup() throw err From 9b34ecf4d31e292c71a44a112075bf234a1926f3 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 17 Jun 2026 16:22:44 +0200 Subject: [PATCH 06/20] refactor(python-sdk): drop fragile async GC net from AsyncFileStreamReader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The async stream reader's garbage-collection safety net (_schedule_response_aclose via weakref.finalize) was best-effort at best: loop.create_task(aclose()) from a finalizer is not thread-safe, has no guarantee of running before loop teardown, and is useless once the loop is gone. Remove it and rely on the cleanup that actually works—auto-close on full consume / read error, aclose(), and the async context manager. The sync FileStreamReader keeps its weakref.finalize(response.close) net, which is reliable because close() is synchronous. Document that an abandoned async stream holds its pooled connection until the client is closed, and update the unit test accordingly. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../e2b/sandbox/filesystem/filesystem.py | 32 ++++++------------- .../sandbox_async/filesystem/filesystem.py | 8 +++-- .../tests/test_file_stream_reader.py | 28 +++++++++------- 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py index 849bd626c0..b6c0336255 100644 --- a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py @@ -1,4 +1,3 @@ -import asyncio import gzip import re import weakref @@ -188,23 +187,6 @@ def __exit__(self, *exc_info) -> None: self.close() -def _schedule_response_aclose(response: httpx.Response) -> None: - """Best-effort cleanup for an abandoned async stream. - - Closing an async response requires awaiting ``aclose()``, which a (sync) - garbage-collection finalizer cannot do. When the reader is dropped while an - event loop is running we schedule the close on it; otherwise there is - nothing safe to await and the connection is reclaimed when the client is - closed. - """ - try: - loop = asyncio.get_running_loop() - except RuntimeError: - return - if loop.is_running(): - loop.create_task(response.aclose()) - - class AsyncFileStreamReader(AsyncIterator[bytes]): """Async iterator over a streamed file download. @@ -221,15 +203,17 @@ class AsyncFileStreamReader(AsyncIterator[bytes]): async for chunk in stream: ... - As a safety net, the connection is also released when the reader is garbage - collected while an event loop is running, so an abandoned stream does not - leak a connection indefinitely. + Unlike the sync reader there is no garbage-collection safety net: releasing + an async connection requires awaiting ``aclose()``, which a finalizer cannot + do reliably. An abandoned stream holds its pooled connection until the + client is closed, so always consume it fully, use the context manager, or + call :meth:`aclose`. """ def __init__(self, response: httpx.Response): self._response = response self._iterator = response.aiter_bytes() - self._finalizer = weakref.finalize(self, _schedule_response_aclose, response) + self._closed = False def __aiter__(self) -> AsyncIterator[bytes]: return self @@ -244,7 +228,9 @@ async def __anext__(self) -> bytes: async def aclose(self) -> None: """Release the underlying HTTP connection. Safe to call multiple times.""" - self._finalizer.detach() + if self._closed: + return + self._closed = True await self._response.aclose() async def __aenter__(self) -> "AsyncFileStreamReader": diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index 66f5dfce49..7ca4bfc3b6 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -162,7 +162,8 @@ async def read( iterator is not killed by it while being consumed. The reader releases its connection once fully consumed; if you don't read it to the end, use it as an async context manager or call `aclose()` for deterministic - cleanup. + cleanup. Unlike the sync reader there is no garbage-collection safety + net—an abandoned stream holds its connection until the client is closed. :param path: Path to the file :param user: Run the operation as this user @@ -219,8 +220,9 @@ async def read( request.extensions.get("timeout", {})["read"] = None # AsyncFileStreamReader owns the response and releases the - # connection when the stream is consumed, closed, errors, or is - # GC'd while an event loop is running. + # connection when the stream is consumed, closed, or errors. There + # is no GC safety net: an abandoned reader holds its connection + # until the client is closed. return AsyncFileStreamReader(r) try: diff --git a/packages/python-sdk/tests/test_file_stream_reader.py b/packages/python-sdk/tests/test_file_stream_reader.py index 052169d157..1672cb6166 100644 --- a/packages/python-sdk/tests/test_file_stream_reader.py +++ b/packages/python-sdk/tests/test_file_stream_reader.py @@ -129,19 +129,25 @@ async def test_async_aclose_is_idempotent(): assert _active_connections(client) == 0 -async def test_async_abandoned_reader_does_not_leak(): +async def test_async_abandoned_reader_is_reclaimed_on_client_close(): import asyncio - async with httpx.AsyncClient() as client: - port = _start_chunked_server() - request = client.build_request("GET", f"http://127.0.0.1:{port}/files") - reader = AsyncFileStreamReader(await client.send(request, stream=True)) - assert _active_connections(client) == 1 - del reader - gc.collect() - # The finalizer schedules aclose on the running loop; let it run. - await asyncio.sleep(0.05) - assert _active_connections(client) == 0 + client = httpx.AsyncClient() + port = _start_chunked_server() + request = client.build_request("GET", f"http://127.0.0.1:{port}/files") + reader = AsyncFileStreamReader(await client.send(request, stream=True)) + assert _active_connections(client) == 1 + + # The async reader has no GC safety net: dropping it without closing keeps + # the connection checked out (releasing one requires awaiting aclose()). + del reader + gc.collect() + await asyncio.sleep(0.05) + assert _active_connections(client) == 1 + + # Closing the client reclaims the abandoned connection. + await client.aclose() + assert _active_connections(client) == 0 if __name__ == "__main__": From a0d564f8c073a89029eefddc69dce0e2914a9dcd Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:04:18 +0200 Subject: [PATCH 07/20] fix(sdks): align streaming connection lifecycle across files and volumes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-ups on the stream upload/download work, applying the established stream policy consistently and addressing review findings: - volumes: drop the client read timeout on `read_file`/`readFile` streams (Python `httpx.Timeout(..., read=None)`, JS handshake-bounded controller + `wrapStreamWithConnectionCleanup`), matching the sandbox files stream path and the RPC streams. The request timeout now bounds only the handshake, not body consumption. - JS sandbox streaming uploads: use the file-transfer timeout (1h) instead of the 60s request default so large streamed uploads aren't aborted mid-transfer; buffered uploads keep the short default. Centralize `FILE_TIMEOUT_MS` in connectionConfig and reuse it from volume. - JS: factor the stream cleanup + GC-finalizer logic into a shared `wrapStreamWithConnectionCleanup` used by both sandbox files and volumes. - stream handshake error mapping (Bugbot): map dropped connections during the stream handshake to typed, health-checked errors — JS via `handleEnvdApiFetchError`, Python via the `httpx.RemoteProtocolError` wrapper — mirroring the non-stream read paths. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/js-sdk/src/connectionConfig.ts | 78 +++++++++++++ .../js-sdk/src/sandbox/filesystem/index.ts | 103 ++++++------------ packages/js-sdk/src/volume/client.ts | 3 +- packages/js-sdk/src/volume/index.ts | 62 ++++++++++- .../sandbox_async/filesystem/filesystem.py | 7 +- .../e2b/sandbox_sync/filesystem/filesystem.py | 5 +- .../python-sdk/e2b/volume/volume_async.py | 8 +- packages/python-sdk/e2b/volume/volume_sync.py | 8 +- 8 files changed, 193 insertions(+), 81 deletions(-) diff --git a/packages/js-sdk/src/connectionConfig.ts b/packages/js-sdk/src/connectionConfig.ts index 29b9378c0d..e633461025 100644 --- a/packages/js-sdk/src/connectionConfig.ts +++ b/packages/js-sdk/src/connectionConfig.ts @@ -7,6 +7,10 @@ const supportedDomains = ['e2b.app', 'e2b.dev', 'e2b.pro', 'e2b-staging.dev'] export const REQUEST_TIMEOUT_MS = 60_000 // 60 seconds export const DEFAULT_SANDBOX_TIMEOUT_MS = 300_000 // 300 seconds +// Default timeout for streaming file transfers (uploads/downloads). A streamed +// body can take far longer than a regular request, so it must not inherit the +// short `REQUEST_TIMEOUT_MS`. +export const FILE_TIMEOUT_MS = 3_600_000 // 1 hour export const KEEPALIVE_PING_INTERVAL_SEC = 50 // 50 seconds export const KEEPALIVE_PING_HEADER = 'Keepalive-Ping-Interval' @@ -191,6 +195,80 @@ export function setupRequestController( return { controller, clearStartTimeout, cleanup } } +// GC safety net for streamed reads: if the consumer drops a streamed response +// body without reading it to completion or cancelling it, the registered +// cleanup releases the underlying connection when the stream is garbage +// collected. This mirrors the Python SDK's `weakref.finalize` on +// `FileStreamReader`. The held value is the cleanup function, which must not +// reference the stream itself or it would never be collected. +const streamReadFinalizers = new FinalizationRegistry<() => void>((cleanup) => + cleanup() +) + +/** + * Wrap a streaming response body so its pooled connection is released when the + * stream is fully read, cancelled, errors, or (as a GC safety net) abandoned. + * + * The request timeout configured via {@link setupRequestController} bounds only + * the initial handshake; this clears that timeout so consuming the body is not + * killed by it. Call once the handshake has succeeded (after error handling). + * + * @internal + */ +export function wrapStreamWithConnectionCleanup( + body: ReadableStream | null, + { + clearStartTimeout, + cleanup, + }: { clearStartTimeout: () => void; cleanup: () => void } +): ReadableStream { + clearStartTimeout() + + if (!body) { + cleanup() + return new Blob([]).stream() + } + + const reader = body.getReader() + const unregisterToken = {} + // Detach the GC finalizer and release the connection. Idempotent via + // `cleanup`, so it's safe to call from multiple stream callbacks. + const release = () => { + streamReadFinalizers.unregister(unregisterToken) + cleanup() + } + + const stream = new ReadableStream({ + async pull(streamController) { + try { + const { done, value } = await reader.read() + if (done) { + streamController.close() + release() + } else { + streamController.enqueue(value) + } + } catch (err) { + release() + streamController.error(err) + } + }, + async cancel(reason) { + try { + await reader.cancel(reason) + } finally { + release() + } + }, + }) + + // Release the connection if the consumer abandons the stream without + // reading it to completion or cancelling it. + streamReadFinalizers.register(stream, cleanup, unregisterToken) + + return stream +} + /** * Configuration for connecting to the API. */ diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index 248a2fcb9f..0a50f5ab34 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -9,10 +9,12 @@ import { ConnectionConfig, ConnectionOpts, defaultUsername, + FILE_TIMEOUT_MS, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, setupRequestController, Username, + wrapStreamWithConnectionCleanup, } from '../../connectionConfig' import { @@ -57,17 +59,6 @@ const FILESYSTEM_HTTP_ERROR_MAP: Record Error> = { 404: (message: string) => new FileNotFoundError(message), } -// GC safety net for streamed reads: if the consumer drops the stream returned -// by `read({ format: 'stream' })` without reading it to completion or -// cancelling it, the registered cleanup releases the underlying connection when -// the stream is garbage collected. This mirrors the Python SDK's -// `weakref.finalize` on `FileStreamReader`. The held value is the cleanup -// function, which must not reference the stream itself or it would never be -// collected. -const streamReadFinalizers = new FinalizationRegistry<() => void>((cleanup) => - cleanup() -) - const FILESYSTEM_RPC_ERROR_MAP: Partial< Record Error> > = { @@ -470,17 +461,24 @@ export class Filesystem { ) try { - const res = await this.envdApi.api.GET('/files', { - params: { - query: { - path, - username: user, + const res = await this.envdApi.api + .GET('/files', { + params: { + query: { + path, + username: user, + }, }, - }, - parseAs: 'stream', - signal: controller.signal, - headers, - }) + parseAs: 'stream', + signal: controller.signal, + headers, + }) + .catch(async (err) => { + // Map a dropped connection during the handshake (e.g. killed + // sandbox) to a typed error via the health check, matching the + // non-stream read path below. + throw await handleEnvdApiFetchError(err, this.checkHealth) + }) const err = await handleFilesystemEnvdApiError(res) if (err) { @@ -495,52 +493,10 @@ export class Filesystem { throw err } - clearStartTimeout() - - const body = res.data as ReadableStream | null - if (!body) { - cleanup() - return new Blob([]).stream() - } - - const reader = body.getReader() - // Detach the GC finalizer and release the connection. Idempotent via - // `cleanup`, so it's safe to call from multiple stream callbacks. - const release = () => { - streamReadFinalizers.unregister(unregisterToken) - cleanup() - } - const unregisterToken = {} - - const stream = new ReadableStream({ - async pull(streamController) { - try { - const { done, value } = await reader.read() - if (done) { - streamController.close() - release() - } else { - streamController.enqueue(value) - } - } catch (err) { - release() - streamController.error(err) - } - }, - async cancel(reason) { - try { - await reader.cancel(reason) - } finally { - release() - } - }, - }) - - // Release the connection if the consumer abandons the stream without - // reading it to completion or cancelling it. - streamReadFinalizers.register(stream, cleanup, unregisterToken) - - return stream + return wrapStreamWithConnectionCleanup( + res.data as ReadableStream | null, + { clearStartTimeout, cleanup } + ) } catch (err) { cleanup() throw err @@ -701,6 +657,15 @@ export class Filesystem { writeFiles.map(async (file) => { const filePath = path ?? (file as WriteEntry).path const body = await toUploadBody(file.data, useGzip) + const isStream = body instanceof ReadableStream + // A streamed upload can take far longer than the 60s request default, + // so fall back to the file-transfer timeout (matching volume writes) + // unless the caller set one explicitly. The signal is a total + // deadline—unlike downloads there's no post-handshake point to clear + // it, since the response only arrives once the body has been sent. + const uploadTimeoutMs = + writeOpts?.requestTimeoutMs ?? + (isStream ? FILE_TIMEOUT_MS : undefined) const res = await this.envdApi.api .POST('/files', { @@ -713,12 +678,12 @@ export class Filesystem { bodySerializer: () => body, headers, signal: this.connectionConfig.getSignal( - writeOpts?.requestTimeoutMs, + uploadTimeoutMs, writeOpts?.signal ), body: {}, // Streaming request bodies require half-duplex mode. - ...(body instanceof ReadableStream && { + ...(isStream && { duplex: 'half' as const, }), }) diff --git a/packages/js-sdk/src/volume/client.ts b/packages/js-sdk/src/volume/client.ts index d63057b597..8a5bc11555 100644 --- a/packages/js-sdk/src/volume/client.ts +++ b/packages/js-sdk/src/volume/client.ts @@ -3,12 +3,11 @@ import createClient from 'openapi-fetch' import type { components, paths } from './schema.gen' import { defaultHeaders, getEnvVar } from '../api/metadata' import { createApiFetch } from '../api/http2' -import { buildRequestSignal } from '../connectionConfig' +import { buildRequestSignal, FILE_TIMEOUT_MS } from '../connectionConfig' import { createApiLogger, Logger } from '../logs' import type { Volume } from './index' const REQUEST_TIMEOUT_MS = 60_000 // 60 seconds -const FILE_TIMEOUT_MS = 3_600_000 // 1 hour export interface VolumeApiOpts { /** diff --git a/packages/js-sdk/src/volume/index.ts b/packages/js-sdk/src/volume/index.ts index ee254d81f1..13062addcb 100644 --- a/packages/js-sdk/src/volume/index.ts +++ b/packages/js-sdk/src/volume/index.ts @@ -6,7 +6,12 @@ import { VolumeApiOpts, FILE_TIMEOUT_MS, } from './client' -import { ConnectionConfig, ConnectionOpts } from '../connectionConfig' +import { + ConnectionConfig, + ConnectionOpts, + setupRequestController, + wrapStreamWithConnectionCleanup, +} from '../connectionConfig' import { NotFoundError, VolumeError } from '../errors' import { runtime, toBlob } from '../utils' import { VolumeFileType } from './types' @@ -539,6 +544,54 @@ export class Volume { }) const client = new VolumeApiClient(config) + if (format === 'stream') { + // The request timeout bounds only the initial handshake; once the + // response arrives, the stream lives until it's consumed, cancelled, or + // the user signal aborts. Matches the sandbox `files.read` stream path. + const { controller, clearStartTimeout, cleanup } = setupRequestController( + config.requestTimeoutMs, + opts?.signal + ) + + try { + const res = await client.api.GET('/volumecontent/{volumeID}/file', { + params: { + path: { volumeID: this.volumeId }, + query: { path }, + }, + parseAs: 'stream', + signal: controller.signal, + }) + + if (res.response.status === 404) { + // Cancel the unconsumed body so the pooled connection is released + // before we propagate. + if (res.response.body && !res.response.bodyUsed) { + await res.response.body.cancel().catch(() => {}) + } + cleanup() + throw new NotFoundError(`Path ${path} not found`) + } + + const err = handleApiError(res, VolumeError) + if (err) { + if (res.response.body && !res.response.bodyUsed) { + await res.response.body.cancel().catch(() => {}) + } + cleanup() + throw err + } + + return wrapStreamWithConnectionCleanup( + res.data as ReadableStream | null, + { clearStartTimeout, cleanup } + ) + } catch (err) { + cleanup() + throw err + } + } + const res = await client.api.GET('/volumecontent/{volumeID}/file', { params: { path: { @@ -572,11 +625,8 @@ export class Volume { return typeof res.data === 'string' ? res.data : '' } - if (format === 'blob') { - return res.data instanceof Blob ? res.data : new Blob([]) - } - - return res.data instanceof ReadableStream ? res.data : new Blob([]).stream() + // format === 'blob' + return res.data instanceof Blob ? res.data : new Blob([]) } /** diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index 8424b3332f..35f20119e1 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -206,7 +206,12 @@ async def read( headers=headers, timeout=timeout, ) - r = await self._envd_api.send(request, stream=True) + try: + r = await self._envd_api.send(request, stream=True) + except httpx.RemoteProtocolError as e: + raise await ahandle_envd_api_transport_exception_with_health( + e, self._envd_api + ) err = await _ahandle_filesystem_envd_api_exception(r) if err: diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index 325e45bb97..8c72ffe026 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -225,7 +225,10 @@ def read( headers=headers, timeout=timeout, ) - r = self._envd_api.send(request, stream=True) + try: + r = self._envd_api.send(request, stream=True) + except httpx.RemoteProtocolError as e: + raise handle_envd_api_transport_exception_with_health(e, self._envd_api) err = _handle_filesystem_envd_api_exception(r) if err: diff --git a/packages/python-sdk/e2b/volume/volume_async.py b/packages/python-sdk/e2b/volume/volume_async.py index 047e19f827..6e03f792d4 100644 --- a/packages/python-sdk/e2b/volume/volume_async.py +++ b/packages/python-sdk/e2b/volume/volume_async.py @@ -474,13 +474,19 @@ async def read_file( ) if format == "stream": + # The request timeout bounds connection setup, not the stream read: + # consuming the body must not be killed by it. Mirrors the sandbox + # files stream path and the RPC streams, which carry no client-side + # read timeout (the server enforces deadlines, keepalive pings + # detect dropped connections). + stream_timeout = httpx.Timeout(timeout, read=None) async def stream_file() -> AsyncIterator[bytes]: async with api_client.get_async_httpx_client().stream( method="GET", url=f"/volumecontent/{self._volume_id}/file", params=params, - timeout=timeout, + timeout=stream_timeout, ) as response: if response.status_code == 404: raise NotFoundException(f"Path {path} not found") diff --git a/packages/python-sdk/e2b/volume/volume_sync.py b/packages/python-sdk/e2b/volume/volume_sync.py index 4ffc5a1477..2a167e83dd 100644 --- a/packages/python-sdk/e2b/volume/volume_sync.py +++ b/packages/python-sdk/e2b/volume/volume_sync.py @@ -473,13 +473,19 @@ def read_file( ) if format == "stream": + # The request timeout bounds connection setup, not the stream read: + # consuming the body must not be killed by it. Mirrors the sandbox + # files stream path and the RPC streams, which carry no client-side + # read timeout (the server enforces deadlines, keepalive pings + # detect dropped connections). + stream_timeout = httpx.Timeout(timeout, read=None) def stream_file() -> Iterator[bytes]: with api_client.get_httpx_client().stream( method="GET", url=f"/volumecontent/{self._volume_id}/file", params=params, - timeout=timeout, + timeout=stream_timeout, ) as response: if response.status_code == 404: raise NotFoundException(f"Path {path} not found") From e828699e29a3f37150f24cc4bc19cdcd6edc714a Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:19:18 +0200 Subject: [PATCH 08/20] feat(sdks): default file writes to octet-stream when data is streamable Streaming an upload only happens on the octet-stream path; the multipart path buffers (`toBlob` in JS, `.read()` for text file-likes in Python), so with the old `useOctetStream`/`use_octet_stream` default of false a streamed write was silently buffered into memory. Default the flag to auto-detect instead: use octet-stream when any write entry is streamable (JS `ReadableStream`; Python file-like / non-str-bytes), and `multipart/form-data` otherwise. Browsers stay on multipart since they can't stream request bodies. An explicit flag value still wins, gzip still implies octet-stream, and the old-envd fallback is preserved. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/cuddly-pots-stream.md | 4 +-- .../js-sdk/src/sandbox/filesystem/index.ts | 25 +++++++++++++------ .../sandbox_async/filesystem/filesystem.py | 17 ++++++++++--- .../e2b/sandbox_sync/filesystem/filesystem.py | 17 ++++++++++--- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md index 4895bc6685..5d70e7b360 100644 --- a/.changeset/cuddly-pots-stream.md +++ b/.changeset/cuddly-pots-stream.md @@ -6,7 +6,7 @@ Stream uploads instead of buffering streaming input entirely in memory: - `Volume.writeFile()` / `Volume.write_file()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are now streamed to the API in chunks. -- `Sandbox.files.write()` / `write_files()` with `useOctetStream`/`use_octet_stream`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). +- `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. Streamed uploads also use a longer transfer timeout instead of the default request timeout, so large uploads aren't cut off. +- `Sandbox.files.read(format="stream")` and `Volume.readFile()`/`read_file()` streams: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed (Python disables the read timeout; JS bounds the handshake and supports `signal` to cancel an in-flight stream). A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. - Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). -- JS `Sandbox.files.read({ format: 'stream' })`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed; pass `signal` to cancel an in-flight stream. - JS `Sandbox.files.read()` with `blob` or `stream` format now returns an empty `Blob`/`ReadableStream` for empty files instead of `""`. diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index 0a50f5ab34..b68e7ad464 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -53,7 +53,7 @@ import { InvalidArgumentError, TemplateError, } from '../../errors' -import { toBlob, toUploadBody } from '../../utils' +import { runtime, toBlob, toUploadBody } from '../../utils' const FILESYSTEM_HTTP_ERROR_MAP: Record Error> = { 404: (message: string) => new FileNotFoundError(message), @@ -276,8 +276,12 @@ export interface FilesystemWriteOpts extends FilesystemRequestOpts { * Outside the browser, `ReadableStream` data is then streamed to the sandbox * instead of being buffered in memory. * - * Defaults to `false`. Requires envd 0.5.7 or later — when not supported by - * the sandbox's envd version, the upload falls back to `multipart/form-data`. + * Defaults to `undefined`, which uses octet-stream when any entry is a + * `ReadableStream` (so streamed uploads aren't buffered) and + * `multipart/form-data` otherwise; browsers always use `multipart/form-data` + * since they can't stream request bodies. Requires envd 0.5.7 or later — when + * not supported by the sandbox's envd version, the upload falls back to + * `multipart/form-data`. */ useOctetStream?: boolean /** @@ -623,11 +627,18 @@ export class Filesystem { const supportsOctetStream = compareVersions(this.envdApi.version, ENVD_OCTET_STREAM_UPLOAD) >= 0 - // Gzip compression only works with the octet-stream upload (the - // Content-Encoding header applies to the whole request body), so - // requesting gzip implies it when envd supports it. + // Streaming a request body only happens on the octet-stream path; the + // multipart path buffers via `toBlob`. So default to octet-stream when any + // entry is a `ReadableStream`, otherwise a streamed upload would be + // silently buffered. Browsers can't stream request bodies, so they stay on + // multipart. Gzip also implies octet-stream (the Content-Encoding header + // applies to the whole request body). An explicit `useOctetStream` wins. + const hasStreamableData = + runtime !== 'browser' && + writeFiles.some((file) => file.data instanceof ReadableStream) const useOctetStream = - ((writeOpts?.useOctetStream ?? false) || useGzip) && supportsOctetStream + ((writeOpts?.useOctetStream ?? hasStreamableData) || useGzip) && + supportsOctetStream const metadata = writeOpts?.metadata validateMetadata(metadata) diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index 35f20119e1..c1399f1260 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -258,7 +258,7 @@ async def write( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - use_octet_stream: bool = False, + use_octet_stream: Optional[bool] = None, metadata: Optional[Dict[str, str]] = None, ) -> WriteInfo: """ @@ -272,7 +272,7 @@ async def write( :param user: Run the operation as this user :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the upload. Implies the `application/octet-stream` upload. Requires envd 0.5.7 or later — when not supported, the upload falls back to uncompressed `multipart/form-data`. - :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `False`. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. + :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `None`, which uses octet-stream when `data` is a file-like object (so streamed uploads aren't buffered) and `multipart/form-data` otherwise. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. :param metadata: User-defined metadata to persist on the uploaded file as extended attributes. Keys are lowercased by the sandbox; invalid keys or values raise an `InvalidArgumentException`. Requires envd 0.6.2 or later. :return: Information about the written file @@ -297,7 +297,7 @@ async def write_files( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - use_octet_stream: bool = False, + use_octet_stream: Optional[bool] = None, metadata: Optional[Dict[str, str]] = None, ) -> List[WriteInfo]: """ @@ -312,7 +312,7 @@ async def write_files( :param user: Run the operation as this user :param request_timeout: Timeout for the request :param gzip: Use gzip compression for the upload. Implies the `application/octet-stream` upload. Requires envd 0.5.7 or later — when not supported, the upload falls back to uncompressed `multipart/form-data`. - :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `False`. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. + :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `None`, which uses octet-stream when any entry is a file-like object (so streamed uploads aren't buffered) and `multipart/form-data` otherwise. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. :param metadata: User-defined metadata to persist on each uploaded file as extended attributes; the same map is applied to every file. Keys are lowercased by the sandbox; invalid keys or values raise an `InvalidArgumentException`. Requires envd 0.6.2 or later. :return: Information about the written files """ @@ -328,6 +328,15 @@ async def write_files( if metadata and self._envd_version < ENVD_FILE_METADATA: raise TemplateException("File metadata requires envd 0.6.2 or later.") + if use_octet_stream is None: + # Streaming an upload only happens on the octet-stream path; the + # multipart path buffers file-like data. Default to octet-stream + # when any entry is a file-like object so a streamed upload isn't + # silently buffered. + use_octet_stream = any( + not isinstance(file["data"], (str, bytes)) for file in files + ) + supports_octet_stream = self._envd_version >= ENVD_OCTET_STREAM_UPLOAD # Gzip compression only works with the octet-stream upload (the # Content-Encoding header applies to the whole request body), so diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index 8c72ffe026..04bffbcd08 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -271,7 +271,7 @@ def write( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - use_octet_stream: bool = False, + use_octet_stream: Optional[bool] = None, metadata: Optional[Dict[str, str]] = None, ) -> WriteInfo: """ @@ -285,7 +285,7 @@ def write( :param user: Run the operation as this user :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the upload. Implies the `application/octet-stream` upload. Requires envd 0.5.7 or later — when not supported, the upload falls back to uncompressed `multipart/form-data`. - :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `False`. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. + :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `None`, which uses octet-stream when `data` is a file-like object (so streamed uploads aren't buffered) and `multipart/form-data` otherwise. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. :param metadata: User-defined metadata to persist on the uploaded file as extended attributes. Keys are lowercased by the sandbox; invalid keys or values raise an `InvalidArgumentException`. Requires envd 0.6.2 or later. :return: Information about the written file @@ -310,7 +310,7 @@ def write_files( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - use_octet_stream: bool = False, + use_octet_stream: Optional[bool] = None, metadata: Optional[Dict[str, str]] = None, ) -> List[WriteInfo]: """ @@ -325,7 +325,7 @@ def write_files( :param user: Run the operation as this user :param request_timeout: Timeout for the request :param gzip: Use gzip compression for the upload. Implies the `application/octet-stream` upload. Requires envd 0.5.7 or later — when not supported, the upload falls back to uncompressed `multipart/form-data`. - :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `False`. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. + :param use_octet_stream: Upload using `application/octet-stream` instead of `multipart/form-data`. Defaults to `None`, which uses octet-stream when any entry is a file-like object (so streamed uploads aren't buffered) and `multipart/form-data` otherwise. Requires envd 0.5.7 or later — when not supported, the upload falls back to `multipart/form-data`. :param metadata: User-defined metadata to persist on each uploaded file as extended attributes; the same map is applied to every file. Keys are lowercased by the sandbox; invalid keys or values raise an `InvalidArgumentException`. Requires envd 0.6.2 or later. :return: Information about the written files """ @@ -341,6 +341,15 @@ def write_files( if metadata and self._envd_version < ENVD_FILE_METADATA: raise TemplateException("File metadata requires envd 0.6.2 or later.") + if use_octet_stream is None: + # Streaming an upload only happens on the octet-stream path; the + # multipart path buffers file-like data. Default to octet-stream + # when any entry is a file-like object so a streamed upload isn't + # silently buffered. + use_octet_stream = any( + not isinstance(file["data"], (str, bytes)) for file in files + ) + supports_octet_stream = self._envd_version >= ENVD_OCTET_STREAM_UPLOAD # Gzip compression only works with the octet-stream upload (the # Content-Encoding header applies to the whole request body), so From b0b10182f1bcbac9b8931478a6f8db9e1a9381da Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:45:49 +0200 Subject: [PATCH 09/20] fix(python-sdk): give streamed file uploads the file-transfer timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streamed (file-like) sandbox writes used the 60s request timeout for the write phase, so a large or slow streamed upload could trip WriteTimeout while the body was still being sent — inconsistent with the JS SDK (1h) and Python volume writes (1h). Relax the write timeout to FILE_TIMEOUT (1h) when any write entry is streamable, keeping connection setup and the response read bounded by the request timeout. Buffered str/bytes uploads keep the request timeout. FILE_TIMEOUT is shared via e2b/connection_config.py, mirroring the JS SDK's FILE_TIMEOUT_MS. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python-sdk/e2b/connection_config.py | 4 +++ .../sandbox_async/filesystem/filesystem.py | 32 +++++++++++++------ .../e2b/sandbox_sync/filesystem/filesystem.py | 32 +++++++++++++------ 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/packages/python-sdk/e2b/connection_config.py b/packages/python-sdk/e2b/connection_config.py index 01eda5ce74..debeffae2b 100644 --- a/packages/python-sdk/e2b/connection_config.py +++ b/packages/python-sdk/e2b/connection_config.py @@ -10,6 +10,10 @@ REQUEST_TIMEOUT: float = 60.0 # 60 seconds +# Timeout for streaming file transfers. A streamed body can take far longer +# than a regular request, so it must not inherit the short REQUEST_TIMEOUT. +FILE_TIMEOUT: float = 3600.0 # 1 hour + KEEPALIVE_PING_INTERVAL_SEC = 50 # 50 seconds KEEPALIVE_PING_HEADER = "Keepalive-Ping-Interval" diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index c1399f1260..bc80069fb3 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -8,6 +8,7 @@ import e2b_connect as connect from e2b.connection_config import ( + FILE_TIMEOUT, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, ConnectionConfig, @@ -328,14 +329,17 @@ async def write_files( if metadata and self._envd_version < ENVD_FILE_METADATA: raise TemplateException("File metadata requires envd 0.6.2 or later.") + # A file-like entry is streamed; str/bytes are sent from memory. + has_streamable_data = any( + not isinstance(file["data"], (str, bytes)) for file in files + ) + if use_octet_stream is None: # Streaming an upload only happens on the octet-stream path; the # multipart path buffers file-like data. Default to octet-stream # when any entry is a file-like object so a streamed upload isn't # silently buffered. - use_octet_stream = any( - not isinstance(file["data"], (str, bytes)) for file in files - ) + use_octet_stream = has_streamable_data supports_octet_stream = self._envd_version >= ENVD_OCTET_STREAM_UPLOAD # Gzip compression only works with the octet-stream upload (the @@ -343,6 +347,20 @@ async def write_files( # requesting gzip implies it when envd supports it. use_octet_stream = (use_octet_stream or gzip) and supports_octet_stream + request_timeout_value = self._connection_config.get_request_timeout( + request_timeout + ) + # A streamed body send can take far longer than the default request + # timeout, so give the write phase the file-transfer budget while + # keeping connection setup and the response read bounded. Matches the + # JS SDK's 1h streamed-upload timeout (httpx applies `write` per chunk + # rather than as a total deadline). + upload_timeout = ( + httpx.Timeout(request_timeout_value, write=FILE_TIMEOUT) + if has_streamable_data + else request_timeout_value + ) + # Metadata is sent as request-scoped X-Metadata-* headers, so the same # metadata is applied to every file in a multi-file upload. extra_headers = metadata_to_headers(metadata) @@ -368,9 +386,7 @@ async def _upload_file(file): content=to_upload_body_async(file_data, gzip), headers=headers, params=params, - timeout=self._connection_config.get_request_timeout( - request_timeout - ), + timeout=upload_timeout, ) except httpx.RemoteProtocolError as e: raise await ahandle_envd_api_transport_exception_with_health( @@ -413,9 +429,7 @@ async def _upload_file(file): files=httpx_files, params=params, headers=extra_headers, - timeout=self._connection_config.get_request_timeout( - request_timeout - ), + timeout=upload_timeout, ) except httpx.RemoteProtocolError as e: raise await ahandle_envd_api_transport_exception_with_health( diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index 04bffbcd08..8b56bb0538 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -7,6 +7,7 @@ import e2b_connect from e2b.api.client_sync import get_envd_transport from e2b.connection_config import ( + FILE_TIMEOUT, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, ConnectionConfig, @@ -341,14 +342,17 @@ def write_files( if metadata and self._envd_version < ENVD_FILE_METADATA: raise TemplateException("File metadata requires envd 0.6.2 or later.") + # A file-like entry is streamed; str/bytes are sent from memory. + has_streamable_data = any( + not isinstance(file["data"], (str, bytes)) for file in files + ) + if use_octet_stream is None: # Streaming an upload only happens on the octet-stream path; the # multipart path buffers file-like data. Default to octet-stream # when any entry is a file-like object so a streamed upload isn't # silently buffered. - use_octet_stream = any( - not isinstance(file["data"], (str, bytes)) for file in files - ) + use_octet_stream = has_streamable_data supports_octet_stream = self._envd_version >= ENVD_OCTET_STREAM_UPLOAD # Gzip compression only works with the octet-stream upload (the @@ -356,6 +360,20 @@ def write_files( # requesting gzip implies it when envd supports it. use_octet_stream = (use_octet_stream or gzip) and supports_octet_stream + request_timeout_value = self._connection_config.get_request_timeout( + request_timeout + ) + # A streamed body send can take far longer than the default request + # timeout, so give the write phase the file-transfer budget while + # keeping connection setup and the response read bounded. Matches the + # JS SDK's 1h streamed-upload timeout (httpx applies `write` per chunk + # rather than as a total deadline). + upload_timeout = ( + httpx.Timeout(request_timeout_value, write=FILE_TIMEOUT) + if has_streamable_data + else request_timeout_value + ) + # Metadata is sent as request-scoped X-Metadata-* headers, so the same # metadata is applied to every file in a multi-file upload. extra_headers = metadata_to_headers(metadata) @@ -380,9 +398,7 @@ def write_files( content=to_upload_body(file_data, gzip), headers=headers, params=params, - timeout=self._connection_config.get_request_timeout( - request_timeout - ), + timeout=upload_timeout, ) except httpx.RemoteProtocolError as e: raise handle_envd_api_transport_exception_with_health( @@ -419,9 +435,7 @@ def write_files( files=httpx_files, params=params, headers=extra_headers, - timeout=self._connection_config.get_request_timeout( - request_timeout - ), + timeout=upload_timeout, ) except httpx.RemoteProtocolError as e: raise handle_envd_api_transport_exception_with_health(e, self._envd_api) From d2b7329574a0eeff6a16eadffdd8809af5238e0f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:49:24 +0200 Subject: [PATCH 10/20] refactor(sdks): split volume streaming changes into a follow-up PR Revert the volume read/write streaming changes so this PR is scoped to the sandbox files streaming work. The volume changes land in a follow-up PR that builds on the shared streaming infrastructure introduced here. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/cuddly-pots-stream.md | 3 +- packages/js-sdk/src/volume/client.ts | 3 +- packages/js-sdk/src/volume/index.ts | 74 +++---------------- packages/js-sdk/tests/volume/file.test.ts | 38 ---------- .../python-sdk/e2b/volume/volume_async.py | 28 +++---- packages/python-sdk/e2b/volume/volume_sync.py | 31 +++----- .../tests/async/volume_async/test_file.py | 12 +-- .../tests/sync/volume_sync/test_file.py | 12 +-- 8 files changed, 37 insertions(+), 164 deletions(-) diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md index 5d70e7b360..5721b2735a 100644 --- a/.changeset/cuddly-pots-stream.md +++ b/.changeset/cuddly-pots-stream.md @@ -5,8 +5,7 @@ Stream uploads instead of buffering streaming input entirely in memory: -- `Volume.writeFile()` / `Volume.write_file()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are now streamed to the API in chunks. - `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. Streamed uploads also use a longer transfer timeout instead of the default request timeout, so large uploads aren't cut off. -- `Sandbox.files.read(format="stream")` and `Volume.readFile()`/`read_file()` streams: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed (Python disables the read timeout; JS bounds the handshake and supports `signal` to cancel an in-flight stream). A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. +- `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed (Python disables the read timeout; JS bounds the handshake and supports `signal` to cancel an in-flight stream). A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. - Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). - JS `Sandbox.files.read()` with `blob` or `stream` format now returns an empty `Blob`/`ReadableStream` for empty files instead of `""`. diff --git a/packages/js-sdk/src/volume/client.ts b/packages/js-sdk/src/volume/client.ts index 8a5bc11555..d63057b597 100644 --- a/packages/js-sdk/src/volume/client.ts +++ b/packages/js-sdk/src/volume/client.ts @@ -3,11 +3,12 @@ import createClient from 'openapi-fetch' import type { components, paths } from './schema.gen' import { defaultHeaders, getEnvVar } from '../api/metadata' import { createApiFetch } from '../api/http2' -import { buildRequestSignal, FILE_TIMEOUT_MS } from '../connectionConfig' +import { buildRequestSignal } from '../connectionConfig' import { createApiLogger, Logger } from '../logs' import type { Volume } from './index' const REQUEST_TIMEOUT_MS = 60_000 // 60 seconds +const FILE_TIMEOUT_MS = 3_600_000 // 1 hour export interface VolumeApiOpts { /** diff --git a/packages/js-sdk/src/volume/index.ts b/packages/js-sdk/src/volume/index.ts index 13062addcb..7a09e74ec3 100644 --- a/packages/js-sdk/src/volume/index.ts +++ b/packages/js-sdk/src/volume/index.ts @@ -6,14 +6,9 @@ import { VolumeApiOpts, FILE_TIMEOUT_MS, } from './client' -import { - ConnectionConfig, - ConnectionOpts, - setupRequestController, - wrapStreamWithConnectionCleanup, -} from '../connectionConfig' +import { ConnectionConfig, ConnectionOpts } from '../connectionConfig' import { NotFoundError, VolumeError } from '../errors' -import { runtime, toBlob } from '../utils' +import { toBlob } from '../utils' import { VolumeFileType } from './types' import type { VolumeAndToken, @@ -544,54 +539,6 @@ export class Volume { }) const client = new VolumeApiClient(config) - if (format === 'stream') { - // The request timeout bounds only the initial handshake; once the - // response arrives, the stream lives until it's consumed, cancelled, or - // the user signal aborts. Matches the sandbox `files.read` stream path. - const { controller, clearStartTimeout, cleanup } = setupRequestController( - config.requestTimeoutMs, - opts?.signal - ) - - try { - const res = await client.api.GET('/volumecontent/{volumeID}/file', { - params: { - path: { volumeID: this.volumeId }, - query: { path }, - }, - parseAs: 'stream', - signal: controller.signal, - }) - - if (res.response.status === 404) { - // Cancel the unconsumed body so the pooled connection is released - // before we propagate. - if (res.response.body && !res.response.bodyUsed) { - await res.response.body.cancel().catch(() => {}) - } - cleanup() - throw new NotFoundError(`Path ${path} not found`) - } - - const err = handleApiError(res, VolumeError) - if (err) { - if (res.response.body && !res.response.bodyUsed) { - await res.response.body.cancel().catch(() => {}) - } - cleanup() - throw err - } - - return wrapStreamWithConnectionCleanup( - res.data as ReadableStream | null, - { clearStartTimeout, cleanup } - ) - } catch (err) { - cleanup() - throw err - } - } - const res = await client.api.GET('/volumecontent/{volumeID}/file', { params: { path: { @@ -625,8 +572,11 @@ export class Volume { return typeof res.data === 'string' ? res.data : '' } - // format === 'blob' - return res.data instanceof Blob ? res.data : new Blob([]) + if (format === 'blob') { + return res.data instanceof Blob ? res.data : new Blob([]) + } + + return res.data instanceof ReadableStream ? res.data : new Blob([]).stream() } /** @@ -637,7 +587,7 @@ export class Volume { * Writing to a file that already exists overwrites the file. * * @param path path to the file. - * @param data data to write to the file. Data can be a string, `ArrayBuffer`, `Blob`, or `ReadableStream`. Outside the browser, `ReadableStream` data is streamed to the API instead of being buffered in memory. + * @param data data to write to the file. Data can be a string, `ArrayBuffer`, `Blob`, or `ReadableStream`. * @param options file creation options. * @param opts connection options. * @@ -654,9 +604,7 @@ export class Volume { }) const client = new VolumeApiClient(config) - // Browsers don't support streaming request bodies, so buffer there. - const isStream = data instanceof ReadableStream && runtime !== 'browser' - const body = isStream ? data : await toBlob(data) + const blob = await toBlob(data) const res = await client.api.PUT('/volumecontent/{volumeID}/file', { params: { @@ -671,14 +619,12 @@ export class Volume { force: opts?.force, }, }, - bodySerializer: () => body, + bodySerializer: () => blob, body: {} as any, headers: { 'Content-Type': 'application/octet-stream', }, signal: config.getSignal(), - // Streaming request bodies require half-duplex mode. - ...(isStream && { duplex: 'half' as const }), }) if (res.response.status === 404) { diff --git a/packages/js-sdk/tests/volume/file.test.ts b/packages/js-sdk/tests/volume/file.test.ts index 5819b332be..f15131b7eb 100644 --- a/packages/js-sdk/tests/volume/file.test.ts +++ b/packages/js-sdk/tests/volume/file.test.ts @@ -58,20 +58,6 @@ describe('Volume File Operations', () => { } ) - volumeTest( - 'should write and read a file from a ReadableStream', - async ({ volume }) => { - const path = '/test-stream.txt' - const content = 'Test stream content' - const stream = new Blob([content]).stream() - - await volume.writeFile(path, stream) - const readContent = await volume.readFile(path, { format: 'text' }) - - expect(readContent).toBe(content) - } - ) - volumeTest('should write and read an empty file', async ({ volume }) => { const path = '/empty.txt' const content = '' @@ -82,30 +68,6 @@ describe('Volume File Operations', () => { expect(readContent).toBe(content) }) - volumeTest( - 'should read an empty file in all formats', - async ({ volume }) => { - const path = '/empty-formats.txt' - await volume.writeFile(path, '') - - const bytes = await volume.readFile(path, { format: 'bytes' }) - expect(bytes).toBeInstanceOf(Uint8Array) - expect(bytes.length).toBe(0) - - const blob = await volume.readFile(path, { format: 'blob' }) - expect(blob).toBeInstanceOf(Blob) - expect(blob.size).toBe(0) - - const stream = await volume.readFile(path, { format: 'stream' }) - expect(stream).toBeInstanceOf(ReadableStream) - const chunks: Uint8Array[] = [] - for await (const chunk of stream as unknown as AsyncIterable) { - chunks.push(chunk) - } - expect(chunks.reduce((n, c) => n + c.length, 0)).toBe(0) - } - ) - volumeTest( 'should overwrite an existing file with force option', async ({ volume }) => { diff --git a/packages/python-sdk/e2b/volume/volume_async.py b/packages/python-sdk/e2b/volume/volume_async.py index 6e03f792d4..c8932af9af 100644 --- a/packages/python-sdk/e2b/volume/volume_async.py +++ b/packages/python-sdk/e2b/volume/volume_async.py @@ -46,7 +46,6 @@ VolumeInfo, VolumeEntryStat, ) -from e2b.io_utils import aiter_io_chunks from e2b.volume.utils import DualMethod, convert_volume_entry_stat @@ -474,19 +473,13 @@ async def read_file( ) if format == "stream": - # The request timeout bounds connection setup, not the stream read: - # consuming the body must not be killed by it. Mirrors the sandbox - # files stream path and the RPC streams, which carry no client-side - # read timeout (the server enforces deadlines, keepalive pings - # detect dropped connections). - stream_timeout = httpx.Timeout(timeout, read=None) async def stream_file() -> AsyncIterator[bytes]: async with api_client.get_async_httpx_client().stream( method="GET", url=f"/volumecontent/{self._volume_id}/file", params=params, - timeout=stream_timeout, + timeout=timeout, ) as response: if response.status_code == 404: raise NotFoundException(f"Path {path} not found") @@ -532,7 +525,7 @@ async def stream_file() -> AsyncIterator[bytes]: async def write_file( self, path: str, - data: Union[str, bytes, IO], + data: Union[str, bytes, IO[bytes]], uid: Optional[int] = None, gid: Optional[int] = None, mode: Optional[int] = None, @@ -546,7 +539,7 @@ async def write_file( Writing to a file that already exists overwrites the file. :param path: Path to the file - :param data: Data to write to the file. Data can be a string, bytes, or IO. File-like objects are streamed in chunks instead of being buffered in memory. + :param data: Data to write to the file. Data can be a string, bytes, or IO. :param uid: User ID of the created file :param gid: Group ID of the created file :param mode: Mode of the created file @@ -563,21 +556,22 @@ async def write_file( if upload_timeout is not None: api_client = api_client.with_timeout(httpx.Timeout(upload_timeout)) - content: Union[bytes, AsyncIterator[bytes]] if isinstance(data, str): - content = data.encode("utf-8") + data_bytes = data.encode("utf-8") elif isinstance(data, bytes): - content = data + data_bytes = data elif hasattr(data, "read"): - # Stream file-like objects in chunks without buffering them in - # memory. Async httpx requires an async iterable request body. - content = aiter_io_chunks(data) + content = data.read() + if isinstance(content, bytes): + data_bytes = content + else: + data_bytes = content.encode("utf-8") else: raise ValueError(f"Unsupported data type: {type(data)}") res = await put_file.asyncio_detailed( self._volume_id, - body=FilePayload(payload=content), # type: ignore[arg-type] # httpx accepts bytes and streamable content directly + body=FilePayload(payload=data_bytes), # type: ignore[arg-type] # Pass bytes directly for async httpx compatibility path=path, uid=uid if uid is not None else UNSET, gid=gid if gid is not None else UNSET, diff --git a/packages/python-sdk/e2b/volume/volume_sync.py b/packages/python-sdk/e2b/volume/volume_sync.py index 2a167e83dd..94a4f5be60 100644 --- a/packages/python-sdk/e2b/volume/volume_sync.py +++ b/packages/python-sdk/e2b/volume/volume_sync.py @@ -1,4 +1,3 @@ -import io from typing import IO, Iterator, List, Literal, Optional, Union, cast, overload from http import HTTPStatus @@ -47,7 +46,6 @@ VolumeInfo, VolumeEntryStat, ) -from e2b.io_utils import iter_io_chunks from e2b.volume.utils import DualMethod, convert_volume_entry_stat @@ -473,19 +471,13 @@ def read_file( ) if format == "stream": - # The request timeout bounds connection setup, not the stream read: - # consuming the body must not be killed by it. Mirrors the sandbox - # files stream path and the RPC streams, which carry no client-side - # read timeout (the server enforces deadlines, keepalive pings - # detect dropped connections). - stream_timeout = httpx.Timeout(timeout, read=None) def stream_file() -> Iterator[bytes]: with api_client.get_httpx_client().stream( method="GET", url=f"/volumecontent/{self._volume_id}/file", params=params, - timeout=stream_timeout, + timeout=timeout, ) as response: if response.status_code == 404: raise NotFoundException(f"Path {path} not found") @@ -530,7 +522,7 @@ def stream_file() -> Iterator[bytes]: def write_file( self, path: str, - data: Union[str, bytes, IO], + data: Union[str, bytes, IO[bytes]], uid: Optional[int] = None, gid: Optional[int] = None, mode: Optional[int] = None, @@ -544,7 +536,7 @@ def write_file( Writing to a file that already exists overwrites the file. :param path: Path to the file - :param data: Data to write to the file. Data can be a string, bytes, or IO. File-like objects are streamed in chunks instead of being buffered in memory. + :param data: Data to write to the file. Data can be a string, bytes, or IO. :param uid: User ID of the created file :param gid: Group ID of the created file :param mode: Mode of the created file @@ -561,23 +553,22 @@ def write_file( if upload_timeout is not None: api_client = api_client.with_timeout(httpx.Timeout(upload_timeout)) - content: Union[bytes, IO[bytes], Iterator[bytes]] if isinstance(data, str): - content = data.encode("utf-8") + data_bytes = data.encode("utf-8") elif isinstance(data, bytes): - content = data - elif isinstance(data, io.TextIOBase): - # Text-mode IO yields str chunks—encode them while streaming. - content = iter_io_chunks(data) + data_bytes = data elif hasattr(data, "read"): - # httpx streams file-like objects in chunks without buffering. - content = data + content = data.read() + if isinstance(content, bytes): + data_bytes = content + else: + data_bytes = content.encode("utf-8") else: raise ValueError(f"Unsupported data type: {type(data)}") res = put_file.sync_detailed( self._volume_id, - body=FilePayload(payload=content), # type: ignore[arg-type] # httpx accepts bytes and streamable content directly + body=FilePayload(payload=data_bytes), # type: ignore[arg-type] # Pass bytes directly for sync httpx compatibility path=path, uid=uid if uid is not None else UNSET, gid=gid if gid is not None else UNSET, diff --git a/packages/python-sdk/tests/async/volume_async/test_file.py b/packages/python-sdk/tests/async/volume_async/test_file.py index f901dcb434..014eb33a2c 100644 --- a/packages/python-sdk/tests/async/volume_async/test_file.py +++ b/packages/python-sdk/tests/async/volume_async/test_file.py @@ -1,5 +1,5 @@ import datetime -from io import BytesIO, StringIO +from io import BytesIO import pytest @@ -50,16 +50,6 @@ async def test_write_and_read_stream(self, async_volume: AsyncVolume): assert read_content == content - async def test_write_and_read_text_stream(self, async_volume: AsyncVolume): - path = "/test-text-stream.txt" - content = "Test text stream content" - stream = StringIO(content) - - await async_volume.write_file(path, stream) - read_content = await async_volume.read_file(path, format="text") - - assert read_content == content - async def test_write_and_read_empty_file(self, async_volume: AsyncVolume): path = "/empty.txt" content = "" diff --git a/packages/python-sdk/tests/sync/volume_sync/test_file.py b/packages/python-sdk/tests/sync/volume_sync/test_file.py index eb2dfa0c0a..c51db81691 100644 --- a/packages/python-sdk/tests/sync/volume_sync/test_file.py +++ b/packages/python-sdk/tests/sync/volume_sync/test_file.py @@ -1,5 +1,5 @@ import datetime -from io import BytesIO, StringIO +from io import BytesIO import pytest @@ -47,16 +47,6 @@ def test_write_and_read_stream(self, volume: Volume): assert read_content == content - def test_write_and_read_text_stream(self, volume: Volume): - path = "/test-text-stream.txt" - content = "Test text stream content" - stream = StringIO(content) - - volume.write_file(path, stream) - read_content = volume.read_file(path, format="text") - - assert read_content == content - def test_write_and_read_empty_file(self, volume: Volume): path = "/empty.txt" content = "" From 21045a299111e0ff58118b860d81cf8cd8404536 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 18 Jun 2026 19:09:45 +0200 Subject: [PATCH 11/20] test(python-sdk): make stream connection-leak assertions race-free A fully consumed stream returns its connection to the pool, where it can linger as an idle keep-alive entry until the server-side close is observed. Asserting on total pool size therefore flaked under load (test_sync_full_ consume_releases_connection saw 1 instead of 0 on CI). Count only checked-out (non-idle) connections, which is what the helper name promises and is the actual leak condition. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python-sdk/tests/test_file_stream_reader.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/python-sdk/tests/test_file_stream_reader.py b/packages/python-sdk/tests/test_file_stream_reader.py index 1672cb6166..34dcea2964 100644 --- a/packages/python-sdk/tests/test_file_stream_reader.py +++ b/packages/python-sdk/tests/test_file_stream_reader.py @@ -52,7 +52,13 @@ def serve(): def _active_connections(client) -> int: - return len(client._transport._pool.connections) + # Count connections that are still checked out (a leaked/in-use stream), + # not the total pool size. A fully consumed stream returns its connection + # to the pool, where it may linger as an idle keep-alive entry until the + # server-side close is observed; that lingering idle connection is not a + # leak. Asserting on total pool size makes this racy under load (the basis + # of a CI flake); counting only non-idle connections is deterministic. + return sum(1 for conn in client._transport._pool.connections if not conn.is_idle()) def _open_stream(client, port): From eb12c661899cbdc8fca41e5660039a88c8eb5d9d Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 18 Jun 2026 19:26:22 +0200 Subject: [PATCH 12/20] fix(js-sdk): cancel underlying body reader when abandoned stream is GC'd The FinalizationRegistry safety net for `read({ format: 'stream' })` only ran `cleanup()` (aborting the handshake AbortController), unlike the cancel and error paths which explicitly cancel the response body to release the pooled envd connection. Abandoned streams could leave connections checked out until the client was torn down. Mirror the cancel/error paths (and the Python sync finalizer's `response.close`) by cancelling the body reader before cleanup. Adds unit tests for wrapStreamWithConnectionCleanup, including a GC-abandonment test (needs --expose-gc, enabled for the connectionConfig vitest project). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/js-sdk/src/connectionConfig.ts | 23 +++- .../js-sdk/tests/connectionConfig.test.ts | 115 ++++++++++++++++++ packages/js-sdk/vitest.config.mts | 4 + 3 files changed, 136 insertions(+), 6 deletions(-) diff --git a/packages/js-sdk/src/connectionConfig.ts b/packages/js-sdk/src/connectionConfig.ts index cda730d7e0..9cf2295a9c 100644 --- a/packages/js-sdk/src/connectionConfig.ts +++ b/packages/js-sdk/src/connectionConfig.ts @@ -207,12 +207,12 @@ export function setupRequestController( // GC safety net for streamed reads: if the consumer drops a streamed response // body without reading it to completion or cancelling it, the registered -// cleanup releases the underlying connection when the stream is garbage +// callback releases the underlying connection when the stream is garbage // collected. This mirrors the Python SDK's `weakref.finalize` on -// `FileStreamReader`. The held value is the cleanup function, which must not +// `FileStreamReader`. The held value is a release callback, which must not // reference the stream itself or it would never be collected. -const streamReadFinalizers = new FinalizationRegistry<() => void>((cleanup) => - cleanup() +const streamReadFinalizers = new FinalizationRegistry<() => void>((release) => + release() ) /** @@ -242,11 +242,22 @@ export function wrapStreamWithConnectionCleanup( const reader = body.getReader() const unregisterToken = {} // Detach the GC finalizer and release the connection. Idempotent via - // `cleanup`, so it's safe to call from multiple stream callbacks. + // `cleanup`, so it's safe to call from multiple stream callbacks. The body + // reader is cancelled separately by the callbacks that have already drained + // or are cancelling it. const release = () => { streamReadFinalizers.unregister(unregisterToken) cleanup() } + // GC safety net: when the wrapped stream is abandoned without being read to + // completion or cancelled, cancel the underlying body reader so the pooled + // connection is released (matching the cancel/error paths) and then run + // cleanup. Must reference `reader`/`cleanup` only — never the wrapped + // `stream`, or it would never be garbage collected. + const releaseOnAbandon = () => { + reader.cancel().catch(() => {}) + cleanup() + } const stream = new ReadableStream({ async pull(streamController) { @@ -274,7 +285,7 @@ export function wrapStreamWithConnectionCleanup( // Release the connection if the consumer abandons the stream without // reading it to completion or cancelling it. - streamReadFinalizers.register(stream, cleanup, unregisterToken) + streamReadFinalizers.register(stream, releaseOnAbandon, unregisterToken) return stream } diff --git a/packages/js-sdk/tests/connectionConfig.test.ts b/packages/js-sdk/tests/connectionConfig.test.ts index 04bf245109..71f7c5d43a 100644 --- a/packages/js-sdk/tests/connectionConfig.test.ts +++ b/packages/js-sdk/tests/connectionConfig.test.ts @@ -2,6 +2,7 @@ import { assert, test, beforeEach, afterEach } from 'vitest' import { ConnectionConfig, setupRequestController, + wrapStreamWithConnectionCleanup, } from '../src/connectionConfig' // Store original env vars to restore after tests @@ -331,3 +332,117 @@ test('setupRequestController user signal still cancels after clearStartTimeout', userController.abort() assert.equal(controller.signal.aborted, true) }) + +// Builds a source ReadableStream that records whether its underlying reader was +// cancelled, standing in for a fetch response body backed by a pooled +// connection. `cancel` being invoked is what releases that connection. +function trackedSource() { + const state: { cancelled: boolean; cancelReason: unknown } = { + cancelled: false, + cancelReason: undefined, + } + const chunks = ['a', 'b'].map((s) => new TextEncoder().encode(s)) + let i = 0 + const body = new ReadableStream({ + pull(controller) { + if (i < chunks.length) { + controller.enqueue(chunks[i++]) + } else { + controller.close() + } + }, + cancel(reason) { + state.cancelled = true + state.cancelReason = reason + }, + }) + return { body, state } +} + +async function readAll(stream: ReadableStream): Promise { + const reader = stream.getReader() + let out = '' + const decoder = new TextDecoder() + for (;;) { + const { done, value } = await reader.read() + if (done) break + out += decoder.decode(value, { stream: true }) + } + return out +} + +test('wrapStreamWithConnectionCleanup releases once on full read', async () => { + const { body } = trackedSource() + let cleanups = 0 + const stream = wrapStreamWithConnectionCleanup(body, { + clearStartTimeout: () => {}, + cleanup: () => { + cleanups++ + }, + }) + assert.equal(await readAll(stream), 'ab') + assert.equal(cleanups, 1) +}) + +test('wrapStreamWithConnectionCleanup cancel cancels the underlying reader', async () => { + const { body, state } = trackedSource() + let cleanups = 0 + const stream = wrapStreamWithConnectionCleanup(body, { + clearStartTimeout: () => {}, + cleanup: () => { + cleanups++ + }, + }) + await stream.cancel('done') + assert.equal(state.cancelled, true) + assert.equal(state.cancelReason, 'done') + assert.equal(cleanups, 1) +}) + +test('wrapStreamWithConnectionCleanup handles a null body', async () => { + let cleanups = 0 + let cleared = 0 + const stream = wrapStreamWithConnectionCleanup(null, { + clearStartTimeout: () => { + cleared++ + }, + cleanup: () => { + cleanups++ + }, + }) + assert.equal(cleared, 1) + assert.equal(cleanups, 1) + assert.equal(await readAll(stream), '') +}) + +test('wrapStreamWithConnectionCleanup cancels the underlying reader when abandoned (GC)', async () => { + // Requires --expose-gc, which the connectionConfig vitest project enables. + assert.equal( + typeof global.gc, + 'function', + 'this test must run with --expose-gc' + ) + const { body, state } = trackedSource() + let cleanups = 0 + // Create and drop the wrapped stream without reading or cancelling it, so + // only the FinalizationRegistry safety net can release the connection. The + // wrapped stream must not escape this scope or it would never be collected. + ;(() => { + wrapStreamWithConnectionCleanup(body, { + clearStartTimeout: () => {}, + cleanup: () => { + cleanups++ + }, + }) + })() + + for (let i = 0; i < 100 && !state.cancelled; i++) { + global.gc!() + await new Promise((resolve) => setTimeout(resolve, 10)) + } + + // The finalizer must cancel the body reader (releasing the pooled + // connection), not merely abort the handshake controller. + assert.equal(state.cancelled, true) + assert.equal(cleanups, 1) +}) diff --git a/packages/js-sdk/vitest.config.mts b/packages/js-sdk/vitest.config.mts index 39a5034acb..02fdef450d 100644 --- a/packages/js-sdk/vitest.config.mts +++ b/packages/js-sdk/vitest.config.mts @@ -86,6 +86,10 @@ export default defineConfig({ isolate: true, testTimeout: 10_000, environment: 'node', + // Expose `global.gc` so the streamed-read GC safety-net test can + // force collection and observe the FinalizationRegistry callback. + pool: 'forks', + execArgv: ['--expose-gc'], }, }, ], From ed277d00d03fbb4d0d4acdf8aa1be1e1974a4670 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 18 Jun 2026 20:02:54 +0200 Subject: [PATCH 13/20] refactor(sdks): replace stream GC nets with an idle-read timeout Drop the FinalizationRegistry (JS) and weakref.finalize (sync Python) safety nets on streamed reads in favor of a deterministic idle-read timeout that reclaims a stalled stream's pooled connection. Python maps it to httpx's per-chunk read timeout; JS arms a per-chunk timer that aborts the request controller. Configurable via streamIdleTimeoutMs / stream_idle_timeout (default 60s, 0/None disables), and the consume/close contract is now documented consistently across all three readers. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/cuddly-pots-stream.md | 2 +- packages/js-sdk/src/connectionConfig.ts | 86 ++++++++++-------- .../js-sdk/src/sandbox/filesystem/index.ts | 24 ++++- .../js-sdk/tests/connectionConfig.test.ts | 82 +++++++++++------ packages/js-sdk/vitest.config.mts | 4 - packages/python-sdk/e2b/connection_config.py | 5 ++ .../e2b/sandbox/filesystem/filesystem.py | 38 ++++---- .../sandbox_async/filesystem/filesystem.py | 33 ++++--- .../e2b/sandbox_sync/filesystem/filesystem.py | 28 +++--- .../tests/test_file_stream_reader.py | 87 +++++++++++++++---- 10 files changed, 250 insertions(+), 139 deletions(-) diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md index 5721b2735a..75af61dd34 100644 --- a/.changeset/cuddly-pots-stream.md +++ b/.changeset/cuddly-pots-stream.md @@ -6,6 +6,6 @@ Stream uploads instead of buffering streaming input entirely in memory: - `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. Streamed uploads also use a longer transfer timeout instead of the default request timeout, so large uploads aren't cut off. -- `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed (Python disables the read timeout; JS bounds the handshake and supports `signal` to cancel an in-flight stream). A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. +- `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed. A new idle-read timeout (`streamIdleTimeoutMs` in JS, `stream_idle_timeout` in Python, default 60s, `0`/`None` to disable) bounds a stalled stream — whether the stall is on the wire or because the consumer stopped reading — without limiting the total transfer time of an actively-flowing stream, so an idle stream no longer holds its pooled connection indefinitely. Use `signal` (JS) to cancel an in-flight stream. A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. The stream holds a pooled connection until it is consumed to the end, cancelled/closed, errors, or the idle timeout fires — consume it fully, use the context manager, or close it. - Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). - JS `Sandbox.files.read()` with `blob` or `stream` format now returns an empty `Blob`/`ReadableStream` for empty files instead of `""`. diff --git a/packages/js-sdk/src/connectionConfig.ts b/packages/js-sdk/src/connectionConfig.ts index 9cf2295a9c..e27561a0c7 100644 --- a/packages/js-sdk/src/connectionConfig.ts +++ b/packages/js-sdk/src/connectionConfig.ts @@ -11,6 +11,10 @@ export const DEFAULT_SANDBOX_TIMEOUT_MS = 300_000 // 300 seconds // body can take far longer than a regular request, so it must not inherit the // short `REQUEST_TIMEOUT_MS`. export const FILE_TIMEOUT_MS = 3_600_000 // 1 hour +// Idle timeout for a streamed read: abort if no chunk arrives within this +// window. Resets on every chunk, so it bounds a stalled stream without +// limiting an actively-flowing one. +export const STREAM_IDLE_TIMEOUT_MS = 60_000 // 60 seconds export const KEEPALIVE_PING_INTERVAL_SEC = 50 // 50 seconds export const KEEPALIVE_PING_HEADER = 'Keepalive-Ping-Interval' @@ -205,23 +209,17 @@ export function setupRequestController( return { controller, clearStartTimeout, cleanup } } -// GC safety net for streamed reads: if the consumer drops a streamed response -// body without reading it to completion or cancelling it, the registered -// callback releases the underlying connection when the stream is garbage -// collected. This mirrors the Python SDK's `weakref.finalize` on -// `FileStreamReader`. The held value is a release callback, which must not -// reference the stream itself or it would never be collected. -const streamReadFinalizers = new FinalizationRegistry<() => void>((release) => - release() -) - /** * Wrap a streaming response body so its pooled connection is released when the - * stream is fully read, cancelled, errors, or (as a GC safety net) abandoned. + * stream is fully read, cancelled, errors, or stays idle for too long. * - * The request timeout configured via {@link setupRequestController} bounds only - * the initial handshake; this clears that timeout so consuming the body is not - * killed by it. Call once the handshake has succeeded (after error handling). + * Clears the handshake timeout from {@link setupRequestController} (so + * consuming the body isn't killed by it) and replaces it with an idle-read + * timeout: if no chunk arrives within `idleTimeoutMs` it aborts `controller`, + * tearing down the fetch and releasing the connection. The timer resets on + * every chunk, so it bounds a stalled stream without limiting an + * actively-flowing one. Pass `0`/`undefined` to disable. Call once the + * handshake has succeeded. * * @internal */ @@ -230,7 +228,14 @@ export function wrapStreamWithConnectionCleanup( { clearStartTimeout, cleanup, - }: { clearStartTimeout: () => void; cleanup: () => void } + controller, + idleTimeoutMs, + }: { + clearStartTimeout: () => void + cleanup: () => void + controller: AbortController + idleTimeoutMs?: number + } ): ReadableStream { clearStartTimeout() @@ -240,33 +245,44 @@ export function wrapStreamWithConnectionCleanup( } const reader = body.getReader() - const unregisterToken = {} - // Detach the GC finalizer and release the connection. Idempotent via - // `cleanup`, so it's safe to call from multiple stream callbacks. The body - // reader is cancelled separately by the callbacks that have already drained - // or are cancelling it. - const release = () => { - streamReadFinalizers.unregister(unregisterToken) - cleanup() + + let idleTimer: ReturnType | undefined + const clearIdleTimer = () => { + if (idleTimer) { + clearTimeout(idleTimer) + idleTimer = undefined + } + } + const armIdleTimer = () => { + if (!idleTimeoutMs) return + clearIdleTimer() + idleTimer = setTimeout( + () => + controller.abort( + new DOMException(`Stream idle for ${idleTimeoutMs}ms`, 'TimeoutError') + ), + idleTimeoutMs + ) } - // GC safety net: when the wrapped stream is abandoned without being read to - // completion or cancelled, cancel the underlying body reader so the pooled - // connection is released (matching the cancel/error paths) and then run - // cleanup. Must reference `reader`/`cleanup` only — never the wrapped - // `stream`, or it would never be garbage collected. - const releaseOnAbandon = () => { - reader.cancel().catch(() => {}) + + // Idempotent: safe to call from multiple stream callbacks. + const release = () => { + clearIdleTimer() cleanup() } - const stream = new ReadableStream({ + return new ReadableStream({ + start() { + armIdleTimer() + }, async pull(streamController) { try { const { done, value } = await reader.read() if (done) { - streamController.close() release() + streamController.close() } else { + armIdleTimer() streamController.enqueue(value) } } catch (err) { @@ -282,12 +298,6 @@ export function wrapStreamWithConnectionCleanup( } }, }) - - // Release the connection if the consumer abandons the stream without - // reading it to completion or cancelling it. - streamReadFinalizers.register(stream, releaseOnAbandon, unregisterToken) - - return stream } function buildUserAgent(integration?: string) { diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index b68e7ad464..d9fcd9b638 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -13,6 +13,7 @@ import { KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, setupRequestController, + STREAM_IDLE_TIMEOUT_MS, Username, wrapStreamWithConnectionCleanup, } from '../../connectionConfig' @@ -302,6 +303,15 @@ export interface FilesystemReadOpts extends FilesystemRequestOpts { * When true, the download will request gzip-encoded responses. */ gzip?: boolean + /** + * Idle timeout for a streamed read (`format: 'stream'`) in **milliseconds**: + * abort if no chunk arrives within this window. Resets on every chunk, so it + * bounds a stalled stream without limiting an actively-flowing one. Pass `0` + * to disable. + * + * @default 60_000 // 60 seconds + */ + streamIdleTimeoutMs?: number } export interface FilesystemListOpts extends FilesystemRequestOpts { @@ -420,9 +430,10 @@ export class Filesystem { * * You can pass `text`, `bytes`, `blob`, or `stream` to `opts.format` to change the return type. * - * The request timeout bounds only the initial handshake—the returned - * stream is not killed by it while being consumed. Use `opts.signal` to - * cancel an in-flight stream. + * The request timeout bounds only the initial handshake. The returned stream + * holds a pooled connection until it is fully read, cancelled, errors, or the + * idle timeout (`opts.streamIdleTimeoutMs`) fires—so consume it to the end or + * cancel it (`opts.signal`). * * @param path path to the file. * @param opts connection options. @@ -499,7 +510,12 @@ export class Filesystem { return wrapStreamWithConnectionCleanup( res.data as ReadableStream | null, - { clearStartTimeout, cleanup } + { + clearStartTimeout, + cleanup, + controller, + idleTimeoutMs: opts?.streamIdleTimeoutMs ?? STREAM_IDLE_TIMEOUT_MS, + } ) } catch (err) { cleanup() diff --git a/packages/js-sdk/tests/connectionConfig.test.ts b/packages/js-sdk/tests/connectionConfig.test.ts index 71f7c5d43a..3624ed18bf 100644 --- a/packages/js-sdk/tests/connectionConfig.test.ts +++ b/packages/js-sdk/tests/connectionConfig.test.ts @@ -371,6 +371,24 @@ async function readAll(stream: ReadableStream): Promise { return out } +// Builds a source ReadableStream that never produces a chunk and errors its +// pending read when `signal` aborts, standing in for a stalled fetch response +// body whose connection is torn down by aborting the request controller. +function stallingSource(signal: AbortSignal) { + const state: { cancelled: boolean } = { cancelled: false } + const body = new ReadableStream({ + start(controller) { + signal.addEventListener('abort', () => controller.error(signal.reason), { + once: true, + }) + }, + cancel() { + state.cancelled = true + }, + }) + return { body, state } +} + test('wrapStreamWithConnectionCleanup releases once on full read', async () => { const { body } = trackedSource() let cleanups = 0 @@ -379,6 +397,7 @@ test('wrapStreamWithConnectionCleanup releases once on full read', async () => { cleanup: () => { cleanups++ }, + controller: new AbortController(), }) assert.equal(await readAll(stream), 'ab') assert.equal(cleanups, 1) @@ -392,6 +411,7 @@ test('wrapStreamWithConnectionCleanup cancel cancels the underlying reader', asy cleanup: () => { cleanups++ }, + controller: new AbortController(), }) await stream.cancel('done') assert.equal(state.cancelled, true) @@ -409,40 +429,50 @@ test('wrapStreamWithConnectionCleanup handles a null body', async () => { cleanup: () => { cleanups++ }, + controller: new AbortController(), }) assert.equal(cleared, 1) assert.equal(cleanups, 1) assert.equal(await readAll(stream), '') }) -test('wrapStreamWithConnectionCleanup cancels the underlying reader when abandoned (GC)', async () => { - // Requires --expose-gc, which the connectionConfig vitest project enables. - assert.equal( - typeof global.gc, - 'function', - 'this test must run with --expose-gc' - ) - const { body, state } = trackedSource() +test('wrapStreamWithConnectionCleanup aborts and releases an idle stream', async () => { + const controller = new AbortController() + const { body } = stallingSource(controller.signal) let cleanups = 0 - // Create and drop the wrapped stream without reading or cancelling it, so - // only the FinalizationRegistry safety net can release the connection. The - // wrapped stream must not escape this scope or it would never be collected. - ;(() => { - wrapStreamWithConnectionCleanup(body, { - clearStartTimeout: () => {}, - cleanup: () => { - cleanups++ - }, - }) - })() - - for (let i = 0; i < 100 && !state.cancelled; i++) { - global.gc!() - await new Promise((resolve) => setTimeout(resolve, 10)) + const stream = wrapStreamWithConnectionCleanup(body, { + clearStartTimeout: () => {}, + cleanup: () => { + cleanups++ + }, + controller, + idleTimeoutMs: 20, + }) + + // No chunk ever arrives, so the idle timer fires, aborts the controller, + // and the read rejects with the TimeoutError reason. + let error: unknown + try { + await readAll(stream) + } catch (err) { + error = err } + assert.equal((error as DOMException)?.name, 'TimeoutError') + assert.equal(cleanups, 1) + assert.equal(controller.signal.aborted, true) +}) - // The finalizer must cancel the body reader (releasing the pooled - // connection), not merely abort the handshake controller. - assert.equal(state.cancelled, true) +test('wrapStreamWithConnectionCleanup with idle timeout 0 never auto-aborts', async () => { + const { body } = trackedSource() + let cleanups = 0 + const stream = wrapStreamWithConnectionCleanup(body, { + clearStartTimeout: () => {}, + cleanup: () => { + cleanups++ + }, + controller: new AbortController(), + idleTimeoutMs: 0, + }) + assert.equal(await readAll(stream), 'ab') assert.equal(cleanups, 1) }) diff --git a/packages/js-sdk/vitest.config.mts b/packages/js-sdk/vitest.config.mts index 02fdef450d..39a5034acb 100644 --- a/packages/js-sdk/vitest.config.mts +++ b/packages/js-sdk/vitest.config.mts @@ -86,10 +86,6 @@ export default defineConfig({ isolate: true, testTimeout: 10_000, environment: 'node', - // Expose `global.gc` so the streamed-read GC safety-net test can - // force collection and observe the FinalizationRegistry callback. - pool: 'forks', - execArgv: ['--expose-gc'], }, }, ], diff --git a/packages/python-sdk/e2b/connection_config.py b/packages/python-sdk/e2b/connection_config.py index 5c287fe7fc..87de6be501 100644 --- a/packages/python-sdk/e2b/connection_config.py +++ b/packages/python-sdk/e2b/connection_config.py @@ -14,6 +14,11 @@ # than a regular request, so it must not inherit the short REQUEST_TIMEOUT. FILE_TIMEOUT: float = 3600.0 # 1 hour +# Idle timeout for a streamed read: abort if no chunk arrives within this +# window. Maps to httpx's per-chunk `read` timeout, so it bounds a stalled +# stream without limiting an actively-flowing one. `0`/`None` disables it. +STREAM_IDLE_TIMEOUT: float = 60.0 # 60 seconds + KEEPALIVE_PING_INTERVAL_SEC = 50 # 50 seconds KEEPALIVE_PING_HEADER = "Keepalive-Ping-Interval" diff --git a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py index ddcaea1416..4d0455653b 100644 --- a/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox/filesystem/filesystem.py @@ -1,6 +1,5 @@ import gzip import re -import weakref from dataclasses import dataclass, field from datetime import datetime, timezone from enum import Enum @@ -152,27 +151,21 @@ class FileStreamReader(Iterator[bytes]): Returned by ``Sandbox.files.read(format="stream")``. It owns the underlying HTTP response and releases its pooled connection as soon as the stream is - fully consumed, an error is raised while reading, or the reader is closed. + fully consumed, an error is raised while reading (including the idle-read + timeout, which raises ``httpx.ReadTimeout``), or the reader is closed. - Iterate it directly (``for chunk in stream``) or, for deterministic - cleanup when you don't read it to the end, use it as a context manager or - call :meth:`close`:: + There is no garbage-collection safety net, so always consume it fully, use + it as a context manager, or call :meth:`close`:: with sandbox.files.read(path, format="stream") as stream: for chunk in stream: ... - - As a safety net, the connection is also released when the reader is garbage - collected, so an abandoned stream does not leak a connection indefinitely. """ def __init__(self, response: httpx.Response): self._response = response self._iterator = response.iter_bytes() - # Releases the connection on GC if the reader is abandoned without - # being consumed or closed. Calling it explicitly (via close) runs the - # callback once and is then a no-op, so close is idempotent. - self._finalizer = weakref.finalize(self, response.close) + self._closed = False def __iter__(self) -> Iterator[bytes]: return self @@ -187,7 +180,10 @@ def __next__(self) -> bytes: def close(self) -> None: """Release the underlying HTTP connection. Safe to call multiple times.""" - self._finalizer() + if self._closed: + return + self._closed = True + self._response.close() def __enter__(self) -> "FileStreamReader": return self @@ -201,22 +197,18 @@ class AsyncFileStreamReader(AsyncIterator[bytes]): Returned by ``AsyncSandbox.files.read(format="stream")``. It owns the underlying HTTP response and releases its pooled connection as soon as the - stream is fully consumed, an error is raised while reading, or the reader is + stream is fully consumed, an error is raised while reading (including the + idle-read timeout, which raises ``httpx.ReadTimeout``), or the reader is closed. - Iterate it directly (``async for chunk in stream``) or, for deterministic - cleanup when you don't read it to the end, use it as an async context - manager or call :meth:`aclose`:: + There is no garbage-collection safety net (releasing an async connection + requires awaiting ``aclose()``, which a finalizer cannot do reliably), so + always consume it fully, use it as an async context manager, or call + :meth:`aclose`:: async with await sandbox.files.read(path, format="stream") as stream: async for chunk in stream: ... - - Unlike the sync reader there is no garbage-collection safety net: releasing - an async connection requires awaiting ``aclose()``, which a finalizer cannot - do reliably. An abandoned stream holds its pooled connection until the - client is closed, so always consume it fully, use the context manager, or - call :meth:`aclose`. """ def __init__(self, response: httpx.Response): diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index bc80069fb3..2ccefd7766 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -11,6 +11,7 @@ FILE_TIMEOUT, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, + STREAM_IDLE_TIMEOUT, ConnectionConfig, Username, default_username, @@ -155,22 +156,29 @@ async def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, + stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, ) -> AsyncFileStreamReader: """ Read file content as an `AsyncFileStreamReader` (an `AsyncIterator[bytes]`). The request timeout bounds only the initial handshake—the returned - iterator is not killed by it while being consumed. The reader releases - its connection once fully consumed; if you don't read it to the end, - use it as an async context manager or call `aclose()` for deterministic - cleanup. Unlike the sync reader there is no garbage-collection safety - net—an abandoned stream holds its connection until the client is closed. + iterator is not killed by it while being consumed. A stalled stream is + reclaimed by `stream_idle_timeout` (raising `httpx.ReadTimeout`). The + reader releases its connection once fully consumed; if you don't read it + to the end, use it as an async context manager or call `aclose()` for + deterministic cleanup. There is no garbage-collection safety net—an + abandoned stream holds its connection until the idle timeout fires or + the client is closed. :param path: Path to the file :param user: Run the operation as this user :param format: Format of the file content—`stream` :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the request + :param stream_idle_timeout: Idle timeout in **seconds** for the streamed + body—abort if no chunk arrives within this window. Resets on every + chunk, so it bounds a stalled stream without limiting total transfer + time. Pass `0`/`None` to disable. :return: File content as an `AsyncFileStreamReader` """ @@ -183,6 +191,7 @@ async def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, + stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, ): username = user if username is None and self._envd_version < ENVD_DEFAULT_USER: @@ -219,16 +228,12 @@ async def read( await r.aclose() raise err - # The request timeout bounds only the initial handshake. Disable - # the read timeout for body reads so consuming the stream isn't - # killed by it. The timeout dict is shared by reference with the - # transport and read again when body iteration starts. - request.extensions.get("timeout", {})["read"] = None + # The request timeout bounds only the initial handshake; httpx's + # per-chunk `read` timeout becomes the idle-read timeout for the + # body. The timeout dict is shared by reference with the transport + # and read again when body iteration starts. + request.extensions.get("timeout", {})["read"] = stream_idle_timeout or None - # AsyncFileStreamReader owns the response and releases the - # connection when the stream is consumed, closed, or errors. There - # is no GC safety net: an abandoned reader holds its connection - # until the client is closed. return AsyncFileStreamReader(r) try: diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index 8b56bb0538..915a755374 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -10,6 +10,7 @@ FILE_TIMEOUT, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, + STREAM_IDLE_TIMEOUT, ConnectionConfig, Username, default_username, @@ -175,21 +176,27 @@ def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, + stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, ) -> FileStreamReader: """ Read file content as a `FileStreamReader` (an `Iterator[bytes]`). The request timeout bounds only the initial handshake—the returned - iterator is not killed by it while being consumed. The reader releases - its connection once fully consumed; if you don't read it to the end, - use it as a context manager or call `close()` for deterministic - cleanup. + iterator is not killed by it while being consumed. A stalled stream is + reclaimed by `stream_idle_timeout` (raising `httpx.ReadTimeout`). The + reader releases its connection once fully consumed; if you don't read it + to the end, use it as a context manager or call `close()` for + deterministic cleanup. :param path: Path to the file :param user: Run the operation as this user :param format: Format of the file content—`stream` :param request_timeout: Timeout for the request in **seconds** :param gzip: Use gzip compression for the request + :param stream_idle_timeout: Idle timeout in **seconds** for the streamed + body—abort if no chunk arrives within this window. Resets on every + chunk, so it bounds a stalled stream without limiting total transfer + time. Pass `0`/`None` to disable. :return: File content as a `FileStreamReader` """ @@ -202,6 +209,7 @@ def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, + stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, ): username = user if username is None and self._envd_version < ENVD_DEFAULT_USER: @@ -236,14 +244,12 @@ def read( r.close() raise err - # The request timeout bounds only the initial handshake. Disable - # the read timeout for body reads so consuming the stream isn't - # killed by it. The timeout dict is shared by reference with the - # transport and read again when body iteration starts. - request.extensions.get("timeout", {})["read"] = None + # The request timeout bounds only the initial handshake; httpx's + # per-chunk `read` timeout becomes the idle-read timeout for the + # body. The timeout dict is shared by reference with the transport + # and read again when body iteration starts. + request.extensions.get("timeout", {})["read"] = stream_idle_timeout or None - # FileStreamReader owns the response and releases the connection - # when the stream is consumed, closed, errors, or is GC'd. return FileStreamReader(r) try: diff --git a/packages/python-sdk/tests/test_file_stream_reader.py b/packages/python-sdk/tests/test_file_stream_reader.py index 34dcea2964..9047e5f599 100644 --- a/packages/python-sdk/tests/test_file_stream_reader.py +++ b/packages/python-sdk/tests/test_file_stream_reader.py @@ -1,13 +1,15 @@ """Unit tests for the streamed-read helpers. These exercise connection lifecycle (consume / context manager / explicit -close / garbage collection) without hitting a real sandbox, using a local -chunked HTTP server. +close / idle timeout / abandonment) without hitting a real sandbox, using a +local chunked HTTP server. """ -import gc +import asyncio import socket import threading +import time +from typing import Optional import httpx import pytest @@ -21,8 +23,16 @@ EXPECTED = b"".join(CHUNKS) -def _start_chunked_server() -> int: - """Start a one-shot HTTP server that replies with a chunked body. Returns its port.""" +def _start_chunked_server( + stall_before: Optional[int] = None, + stall_seconds: float = 0.0, +) -> int: + """Start a one-shot HTTP server that replies with a chunked body. + + When ``stall_before`` is set, the server sleeps ``stall_seconds`` before + sending that chunk index, so a reader with a shorter idle timeout times out. + Returns the server's port. + """ sock = socket.socket() sock.bind(("127.0.0.1", 0)) sock.listen(1) @@ -38,7 +48,9 @@ def serve(): b"Content-Type: application/octet-stream\r\n" b"Transfer-Encoding: chunked\r\n\r\n" ) - for chunk in CHUNKS: + for idx, chunk in enumerate(CHUNKS): + if stall_before is not None and idx == stall_before: + time.sleep(stall_seconds) conn.sendall(f"{len(chunk):x}\r\n".encode() + chunk + b"\r\n") conn.sendall(b"0\r\n\r\n") conn.close() @@ -61,8 +73,13 @@ def _active_connections(client) -> int: return sum(1 for conn in client._transport._pool.connections if not conn.is_idle()) -def _open_stream(client, port): - request = client.build_request("GET", f"http://127.0.0.1:{port}/files") +def _open_stream(client, port, read_timeout: Optional[float] = None): + request = client.build_request( + "GET", f"http://127.0.0.1:{port}/files", timeout=httpx.Timeout(5.0) + ) + if read_timeout is not None: + # Mirror the SDK: the per-chunk `read` timeout bounds idle gaps. + request.extensions["timeout"]["read"] = read_timeout return client.send(request, stream=True) @@ -93,17 +110,37 @@ def test_sync_close_is_idempotent(): assert _active_connections(client) == 0 -def test_sync_abandoned_reader_does_not_leak(): +def test_sync_idle_timeout_releases_connection(): with httpx.Client() as client: - port = _start_chunked_server() - reader = FileStreamReader(_open_stream(client, port)) - assert _active_connections(client) == 1 - del reader - gc.collect() - # The finalizer releases the connection when the reader is collected. + # The server stalls before the second chunk for longer than the + # reader's idle (read) timeout. + port = _start_chunked_server(stall_before=1, stall_seconds=0.5) + reader = FileStreamReader(_open_stream(client, port, read_timeout=0.05)) + it = iter(reader) + assert next(it) + # The stalled read trips the idle timeout, which propagates and + # releases the connection. + with pytest.raises(httpx.ReadTimeout): + next(it) assert _active_connections(client) == 0 +def test_sync_abandoned_reader_is_reclaimed_on_client_close(): + client = httpx.Client() + port = _start_chunked_server() + reader = FileStreamReader(_open_stream(client, port)) + assert _active_connections(client) == 1 + + # The sync reader has no GC safety net: dropping it without closing keeps + # the connection checked out (an idle timeout would reclaim a stalled one). + del reader + assert _active_connections(client) == 1 + + # Closing the client reclaims the abandoned connection. + client.close() + assert _active_connections(client) == 0 + + async def test_async_full_consume_releases_connection(): async with httpx.AsyncClient() as client: port = _start_chunked_server() @@ -135,9 +172,24 @@ async def test_async_aclose_is_idempotent(): assert _active_connections(client) == 0 -async def test_async_abandoned_reader_is_reclaimed_on_client_close(): - import asyncio +async def test_async_idle_timeout_releases_connection(): + async with httpx.AsyncClient() as client: + port = _start_chunked_server(stall_before=1, stall_seconds=0.5) + request = client.build_request( + "GET", f"http://127.0.0.1:{port}/files", timeout=httpx.Timeout(5.0) + ) + request.extensions["timeout"]["read"] = 0.05 + reader = AsyncFileStreamReader(await client.send(request, stream=True)) + it = reader.__aiter__() + assert await it.__anext__() + # The stalled read trips the idle timeout, which propagates and + # releases the connection. + with pytest.raises(httpx.ReadTimeout): + await it.__anext__() + assert _active_connections(client) == 0 + +async def test_async_abandoned_reader_is_reclaimed_on_client_close(): client = httpx.AsyncClient() port = _start_chunked_server() request = client.build_request("GET", f"http://127.0.0.1:{port}/files") @@ -147,7 +199,6 @@ async def test_async_abandoned_reader_is_reclaimed_on_client_close(): # The async reader has no GC safety net: dropping it without closing keeps # the connection checked out (releasing one requires awaiting aclose()). del reader - gc.collect() await asyncio.sleep(0.05) assert _active_connections(client) == 1 From 40a31039134c1c6e6a8aac02f53731f34ab0d18f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 18 Jun 2026 21:02:28 +0200 Subject: [PATCH 14/20] feat(sdks): add per-chunk idle timeout to streamed reads and writes Bound a stalled streamed transfer with a per-chunk idle timeout (default the request timeout, configurable via streamIdleTimeoutMs / stream_idle_timeout, 0/None disables) on both reads and writes, so a producer or consumer that stops making progress no longer holds the pooled connection. Reads map it to httpx's per-chunk read timeout / a JS idle-abort wrapper; writes use httpx's per-write timeout / a JS upload idle-abort wrapper. The total-transfer cap is intentionally left to the server (envd): a client cap is advisory, can't protect against non-conforming clients, and would mean maintaining the same ceiling across three SDKs. The pre-existing client-side 1h upload total is removed for consistency with reads. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/cuddly-pots-stream.md | 4 +- packages/js-sdk/src/connectionConfig.ts | 115 +++++++++++---- .../js-sdk/src/sandbox/filesystem/index.ts | 138 +++++++++++------- .../js-sdk/tests/connectionConfig.test.ts | 29 ++++ packages/python-sdk/e2b/connection_config.py | 5 - .../sandbox_async/filesystem/filesystem.py | 35 ++--- .../e2b/sandbox_sync/filesystem/filesystem.py | 35 ++--- 7 files changed, 225 insertions(+), 136 deletions(-) diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md index 75af61dd34..9ab27b73ef 100644 --- a/.changeset/cuddly-pots-stream.md +++ b/.changeset/cuddly-pots-stream.md @@ -5,7 +5,7 @@ Stream uploads instead of buffering streaming input entirely in memory: -- `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. Streamed uploads also use a longer transfer timeout instead of the default request timeout, so large uploads aren't cut off. -- `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed. A new idle-read timeout (`streamIdleTimeoutMs` in JS, `stream_idle_timeout` in Python, default 60s, `0`/`None` to disable) bounds a stalled stream — whether the stall is on the wire or because the consumer stopped reading — without limiting the total transfer time of an actively-flowing stream, so an idle stream no longer holds its pooled connection indefinitely. Use `signal` (JS) to cancel an in-flight stream. A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. The stream holds a pooled connection until it is consumed to the end, cancelled/closed, errors, or the idle timeout fires — consume it fully, use the context manager, or close it. +- `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. A streamed upload is bounded by a per-chunk idle timeout (`streamIdleTimeoutMs` in JS, default the request timeout, `0` to disable) that aborts a stalled upload — a producer that stops yielding or a server that stops reading — so a stall no longer holds the connection indefinitely. +- `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed. The body is bounded by a per-chunk idle timeout (`streamIdleTimeoutMs` in JS, `stream_idle_timeout` in Python, default the request timeout — 60s — `0`/`None` to disable) that bounds a stalled stream without limiting an actively-flowing one. Use `signal` (JS) to cancel an in-flight stream. A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. The stream holds a pooled connection until it is consumed to the end, cancelled/closed, errors, or the idle timeout fires — consume it fully, use the context manager, or close it. - Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). - JS `Sandbox.files.read()` with `blob` or `stream` format now returns an empty `Blob`/`ReadableStream` for empty files instead of `""`. diff --git a/packages/js-sdk/src/connectionConfig.ts b/packages/js-sdk/src/connectionConfig.ts index e27561a0c7..1dd9841613 100644 --- a/packages/js-sdk/src/connectionConfig.ts +++ b/packages/js-sdk/src/connectionConfig.ts @@ -7,14 +7,6 @@ const supportedDomains = ['e2b.app', 'e2b.dev', 'e2b.pro', 'e2b-staging.dev'] export const REQUEST_TIMEOUT_MS = 60_000 // 60 seconds export const DEFAULT_SANDBOX_TIMEOUT_MS = 300_000 // 300 seconds -// Default timeout for streaming file transfers (uploads/downloads). A streamed -// body can take far longer than a regular request, so it must not inherit the -// short `REQUEST_TIMEOUT_MS`. -export const FILE_TIMEOUT_MS = 3_600_000 // 1 hour -// Idle timeout for a streamed read: abort if no chunk arrives within this -// window. Resets on every chunk, so it bounds a stalled stream without -// limiting an actively-flowing one. -export const STREAM_IDLE_TIMEOUT_MS = 60_000 // 60 seconds export const KEEPALIVE_PING_INTERVAL_SEC = 50 // 50 seconds export const KEEPALIVE_PING_HEADER = 'Keepalive-Ping-Interval' @@ -209,6 +201,42 @@ export function setupRequestController( return { controller, clearStartTimeout, cleanup } } +/** + * Create a resettable idle-timeout that aborts `controller` when no progress is + * made within `idleTimeoutMs`. `arm` (re)starts the timer; call it on each + * chunk. `clear` stops it. `0`/`undefined` disables it (both are no-ops). + * + * @internal + */ +function createIdleAbort( + controller: AbortController, + idleTimeoutMs: number | undefined, + label: string +): { arm: () => void; clear: () => void } { + let timer: ReturnType | undefined + const clear = () => { + if (timer) { + clearTimeout(timer) + timer = undefined + } + } + const arm = () => { + if (!idleTimeoutMs) return + clear() + timer = setTimeout( + () => + controller.abort( + new DOMException( + `${label} idle for ${idleTimeoutMs}ms`, + 'TimeoutError' + ) + ), + idleTimeoutMs + ) + } + return { arm, clear } +} + /** * Wrap a streaming response body so its pooled connection is released when the * stream is fully read, cancelled, errors, or stays idle for too long. @@ -245,35 +273,17 @@ export function wrapStreamWithConnectionCleanup( } const reader = body.getReader() - - let idleTimer: ReturnType | undefined - const clearIdleTimer = () => { - if (idleTimer) { - clearTimeout(idleTimer) - idleTimer = undefined - } - } - const armIdleTimer = () => { - if (!idleTimeoutMs) return - clearIdleTimer() - idleTimer = setTimeout( - () => - controller.abort( - new DOMException(`Stream idle for ${idleTimeoutMs}ms`, 'TimeoutError') - ), - idleTimeoutMs - ) - } + const idle = createIdleAbort(controller, idleTimeoutMs, 'Stream') // Idempotent: safe to call from multiple stream callbacks. const release = () => { - clearIdleTimer() + idle.clear() cleanup() } return new ReadableStream({ start() { - armIdleTimer() + idle.arm() }, async pull(streamController) { try { @@ -282,7 +292,7 @@ export function wrapStreamWithConnectionCleanup( release() streamController.close() } else { - armIdleTimer() + idle.arm() streamController.enqueue(value) } } catch (err) { @@ -300,6 +310,51 @@ export function wrapStreamWithConnectionCleanup( }) } +/** + * Wrap an outgoing (upload) request body so the request is aborted if no chunk + * is sent within `idleTimeoutMs`. The timer resets on every chunk, bounding a + * stalled upload — a producer that stops yielding or a server that stops + * reading — without limiting an actively-flowing one. Pass `0`/`undefined` to + * disable, returning the body unwrapped. + * + * @internal + */ +export function wrapUploadStreamWithIdleTimeout( + body: ReadableStream, + controller: AbortController, + idleTimeoutMs?: number +): ReadableStream { + if (!idleTimeoutMs) return body + + const reader = body.getReader() + const idle = createIdleAbort(controller, idleTimeoutMs, 'Upload') + + return new ReadableStream({ + start() { + idle.arm() + }, + async pull(streamController) { + try { + const { done, value } = await reader.read() + if (done) { + idle.clear() + streamController.close() + } else { + idle.arm() + streamController.enqueue(value) + } + } catch (err) { + idle.clear() + streamController.error(err) + } + }, + async cancel(reason) { + idle.clear() + await reader.cancel(reason) + }, + }) +} + function buildUserAgent(integration?: string) { const userAgentParts = [`e2b-js-sdk/${version}`] diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index d9fcd9b638..a9f8d68ee3 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -9,13 +9,12 @@ import { ConnectionConfig, ConnectionOpts, defaultUsername, - FILE_TIMEOUT_MS, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, setupRequestController, - STREAM_IDLE_TIMEOUT_MS, Username, wrapStreamWithConnectionCleanup, + wrapUploadStreamWithIdleTimeout, } from '../../connectionConfig' import { @@ -293,6 +292,15 @@ export interface FilesystemWriteOpts extends FilesystemRequestOpts { * Requires envd 0.6.2 or later. */ metadata?: Record + /** + * Idle timeout for a streamed upload (`ReadableStream` data, outside the + * browser) in **milliseconds**: abort if no chunk is sent within this window. + * Resets on every chunk, so it bounds a stalled upload — a producer that + * stops yielding or a server that stops reading — without limiting an + * actively-flowing one. Defaults to the request timeout (60s); pass `0` to + * disable. + */ + streamIdleTimeoutMs?: number } /** @@ -306,10 +314,8 @@ export interface FilesystemReadOpts extends FilesystemRequestOpts { /** * Idle timeout for a streamed read (`format: 'stream'`) in **milliseconds**: * abort if no chunk arrives within this window. Resets on every chunk, so it - * bounds a stalled stream without limiting an actively-flowing one. Pass `0` - * to disable. - * - * @default 60_000 // 60 seconds + * bounds a stalled stream without limiting an actively-flowing one. Defaults + * to the request timeout (60s); pass `0` to disable. */ streamIdleTimeoutMs?: number } @@ -468,10 +474,12 @@ export class Filesystem { if (format === 'stream') { // The request timeout bounds only the initial handshake; once the - // response arrives, the stream lives until it's consumed, cancelled, - // or the user signal aborts. + // response arrives, the stream lives until it's consumed, cancelled, the + // user signal aborts, or the per-chunk idle timeout fires. + const requestTimeoutMs = + opts?.requestTimeoutMs ?? this.connectionConfig.requestTimeoutMs const { controller, clearStartTimeout, cleanup } = setupRequestController( - opts?.requestTimeoutMs ?? this.connectionConfig.requestTimeoutMs, + requestTimeoutMs, opts?.signal ) @@ -514,7 +522,7 @@ export class Filesystem { clearStartTimeout, cleanup, controller, - idleTimeoutMs: opts?.streamIdleTimeoutMs ?? STREAM_IDLE_TIMEOUT_MS, + idleTimeoutMs: opts?.streamIdleTimeoutMs ?? requestTimeoutMs, } ) } catch (err) { @@ -685,56 +693,76 @@ export class Filesystem { const filePath = path ?? (file as WriteEntry).path const body = await toUploadBody(file.data, useGzip) const isStream = body instanceof ReadableStream - // A streamed upload can take far longer than the 60s request default, - // so fall back to the file-transfer timeout (matching volume writes) - // unless the caller set one explicitly. The signal is a total - // deadline—unlike downloads there's no post-handshake point to clear - // it, since the response only arrives once the body has been sent. - const uploadTimeoutMs = - writeOpts?.requestTimeoutMs ?? - (isStream ? FILE_TIMEOUT_MS : undefined) - - const res = await this.envdApi.api - .POST('/files', { - params: { - query: { - path: filePath, - username: user, - }, - }, - bodySerializer: () => body, - headers, - signal: this.connectionConfig.getSignal( - uploadTimeoutMs, - writeOpts?.signal - ), - body: {}, - // Streaming request bodies require half-duplex mode. - ...(isStream && { - duplex: 'half' as const, - }), - }) - .catch(async (err) => { - throw await handleEnvdApiFetchError(err, this.checkHealth) - }) - - const err = await handleFilesystemEnvdApiError(res) - if (err) { - throw err - } - const files = res.data as WriteInfo[] - if (!files || files.length === 0) { - throw new Error( - 'Expected to receive information about written file' + let uploadBody: BodyInit = body + let signal: AbortSignal | undefined + let cleanup: (() => void) | undefined + if (body instanceof ReadableStream) { + // No handshake timeout—the response only arrives once the whole + // body has been sent. Each chunk is bounded by the per-chunk idle + // timeout (default: the request timeout); the overall upload is + // bounded server-side. + const idleTimeoutMs = + writeOpts?.streamIdleTimeoutMs ?? + writeOpts?.requestTimeoutMs ?? + this.connectionConfig.requestTimeoutMs + const ctrl = setupRequestController(undefined, writeOpts?.signal) + uploadBody = wrapUploadStreamWithIdleTimeout( + body, + ctrl.controller, + idleTimeoutMs + ) + signal = ctrl.controller.signal + cleanup = ctrl.cleanup + } else { + signal = this.connectionConfig.getSignal( + writeOpts?.requestTimeoutMs, + writeOpts?.signal ) } - for (const f of files) { - f.metadata = mapMetadata(f.metadata) + try { + const res = await this.envdApi.api + .POST('/files', { + params: { + query: { + path: filePath, + username: user, + }, + }, + bodySerializer: () => uploadBody, + headers, + signal, + body: {}, + // Streaming request bodies require half-duplex mode. + ...(isStream && { + duplex: 'half' as const, + }), + }) + .catch(async (err) => { + throw await handleEnvdApiFetchError(err, this.checkHealth) + }) + + const err = await handleFilesystemEnvdApiError(res) + if (err) { + throw err + } + + const files = res.data as WriteInfo[] + if (!files || files.length === 0) { + throw new Error( + 'Expected to receive information about written file' + ) + } + + for (const f of files) { + f.metadata = mapMetadata(f.metadata) + } + + return files + } finally { + cleanup?.() } - - return files }) ) diff --git a/packages/js-sdk/tests/connectionConfig.test.ts b/packages/js-sdk/tests/connectionConfig.test.ts index 3624ed18bf..4c2e846255 100644 --- a/packages/js-sdk/tests/connectionConfig.test.ts +++ b/packages/js-sdk/tests/connectionConfig.test.ts @@ -3,6 +3,7 @@ import { ConnectionConfig, setupRequestController, wrapStreamWithConnectionCleanup, + wrapUploadStreamWithIdleTimeout, } from '../src/connectionConfig' // Store original env vars to restore after tests @@ -476,3 +477,31 @@ test('wrapStreamWithConnectionCleanup with idle timeout 0 never auto-aborts', as assert.equal(await readAll(stream), 'ab') assert.equal(cleanups, 1) }) + +test('wrapUploadStreamWithIdleTimeout aborts an idle upload', async () => { + const controller = new AbortController() + const { body } = stallingSource(controller.signal) + const stream = wrapUploadStreamWithIdleTimeout(body, controller, 20) + let error: unknown + try { + await readAll(stream) + } catch (err) { + error = err + } + assert.equal((error as DOMException)?.name, 'TimeoutError') + assert.equal(controller.signal.aborted, true) +}) + +test('wrapUploadStreamWithIdleTimeout passes chunks through and does not abort an active upload', async () => { + const { body } = trackedSource() + const controller = new AbortController() + const stream = wrapUploadStreamWithIdleTimeout(body, controller, 1000) + assert.equal(await readAll(stream), 'ab') + assert.equal(controller.signal.aborted, false) +}) + +test('wrapUploadStreamWithIdleTimeout with idle timeout 0 returns the body unwrapped', () => { + const { body } = trackedSource() + const controller = new AbortController() + assert.equal(wrapUploadStreamWithIdleTimeout(body, controller, 0), body) +}) diff --git a/packages/python-sdk/e2b/connection_config.py b/packages/python-sdk/e2b/connection_config.py index 87de6be501..5c287fe7fc 100644 --- a/packages/python-sdk/e2b/connection_config.py +++ b/packages/python-sdk/e2b/connection_config.py @@ -14,11 +14,6 @@ # than a regular request, so it must not inherit the short REQUEST_TIMEOUT. FILE_TIMEOUT: float = 3600.0 # 1 hour -# Idle timeout for a streamed read: abort if no chunk arrives within this -# window. Maps to httpx's per-chunk `read` timeout, so it bounds a stalled -# stream without limiting an actively-flowing one. `0`/`None` disables it. -STREAM_IDLE_TIMEOUT: float = 60.0 # 60 seconds - KEEPALIVE_PING_INTERVAL_SEC = 50 # 50 seconds KEEPALIVE_PING_HEADER = "Keepalive-Ping-Interval" diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index 2ccefd7766..08b557cdbe 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -8,10 +8,8 @@ import e2b_connect as connect from e2b.connection_config import ( - FILE_TIMEOUT, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, - STREAM_IDLE_TIMEOUT, ConnectionConfig, Username, default_username, @@ -156,7 +154,7 @@ async def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, + stream_idle_timeout: Optional[float] = None, ) -> AsyncFileStreamReader: """ Read file content as an `AsyncFileStreamReader` (an `AsyncIterator[bytes]`). @@ -178,7 +176,7 @@ async def read( :param stream_idle_timeout: Idle timeout in **seconds** for the streamed body—abort if no chunk arrives within this window. Resets on every chunk, so it bounds a stalled stream without limiting total transfer - time. Pass `0`/`None` to disable. + time. Defaults to the request timeout; pass `0` to disable. :return: File content as an `AsyncFileStreamReader` """ @@ -191,7 +189,7 @@ async def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, + stream_idle_timeout: Optional[float] = None, ): username = user if username is None and self._envd_version < ENVD_DEFAULT_USER: @@ -229,10 +227,13 @@ async def read( raise err # The request timeout bounds only the initial handshake; httpx's - # per-chunk `read` timeout becomes the idle-read timeout for the - # body. The timeout dict is shared by reference with the transport - # and read again when body iteration starts. - request.extensions.get("timeout", {})["read"] = stream_idle_timeout or None + # per-chunk `read` timeout becomes the idle-read timeout for the body + # (defaults to the request timeout). The timeout dict is shared by + # reference with the transport and read again when iteration starts. + idle_timeout = ( + timeout if stream_idle_timeout is None else stream_idle_timeout + ) + request.extensions.get("timeout", {})["read"] = idle_timeout or None return AsyncFileStreamReader(r) @@ -352,19 +353,9 @@ async def write_files( # requesting gzip implies it when envd supports it. use_octet_stream = (use_octet_stream or gzip) and supports_octet_stream - request_timeout_value = self._connection_config.get_request_timeout( - request_timeout - ) - # A streamed body send can take far longer than the default request - # timeout, so give the write phase the file-transfer budget while - # keeping connection setup and the response read bounded. Matches the - # JS SDK's 1h streamed-upload timeout (httpx applies `write` per chunk - # rather than as a total deadline). - upload_timeout = ( - httpx.Timeout(request_timeout_value, write=FILE_TIMEOUT) - if has_streamable_data - else request_timeout_value - ) + # Each chunk send is bounded by the request timeout (httpx applies it + # per write); the total streamed upload is bounded server-side. + upload_timeout = self._connection_config.get_request_timeout(request_timeout) # Metadata is sent as request-scoped X-Metadata-* headers, so the same # metadata is applied to every file in a multi-file upload. diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index 915a755374..70d4909d10 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -7,10 +7,8 @@ import e2b_connect from e2b.api.client_sync import get_envd_transport from e2b.connection_config import ( - FILE_TIMEOUT, KEEPALIVE_PING_HEADER, KEEPALIVE_PING_INTERVAL_SEC, - STREAM_IDLE_TIMEOUT, ConnectionConfig, Username, default_username, @@ -176,7 +174,7 @@ def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, + stream_idle_timeout: Optional[float] = None, ) -> FileStreamReader: """ Read file content as a `FileStreamReader` (an `Iterator[bytes]`). @@ -196,7 +194,7 @@ def read( :param stream_idle_timeout: Idle timeout in **seconds** for the streamed body—abort if no chunk arrives within this window. Resets on every chunk, so it bounds a stalled stream without limiting total transfer - time. Pass `0`/`None` to disable. + time. Defaults to the request timeout; pass `0` to disable. :return: File content as a `FileStreamReader` """ @@ -209,7 +207,7 @@ def read( user: Optional[Username] = None, request_timeout: Optional[float] = None, gzip: bool = False, - stream_idle_timeout: Optional[float] = STREAM_IDLE_TIMEOUT, + stream_idle_timeout: Optional[float] = None, ): username = user if username is None and self._envd_version < ENVD_DEFAULT_USER: @@ -245,10 +243,13 @@ def read( raise err # The request timeout bounds only the initial handshake; httpx's - # per-chunk `read` timeout becomes the idle-read timeout for the - # body. The timeout dict is shared by reference with the transport - # and read again when body iteration starts. - request.extensions.get("timeout", {})["read"] = stream_idle_timeout or None + # per-chunk `read` timeout becomes the idle-read timeout for the body + # (defaults to the request timeout). The timeout dict is shared by + # reference with the transport and read again when iteration starts. + idle_timeout = ( + timeout if stream_idle_timeout is None else stream_idle_timeout + ) + request.extensions.get("timeout", {})["read"] = idle_timeout or None return FileStreamReader(r) @@ -366,19 +367,9 @@ def write_files( # requesting gzip implies it when envd supports it. use_octet_stream = (use_octet_stream or gzip) and supports_octet_stream - request_timeout_value = self._connection_config.get_request_timeout( - request_timeout - ) - # A streamed body send can take far longer than the default request - # timeout, so give the write phase the file-transfer budget while - # keeping connection setup and the response read bounded. Matches the - # JS SDK's 1h streamed-upload timeout (httpx applies `write` per chunk - # rather than as a total deadline). - upload_timeout = ( - httpx.Timeout(request_timeout_value, write=FILE_TIMEOUT) - if has_streamable_data - else request_timeout_value - ) + # Each chunk send is bounded by the request timeout (httpx applies it + # per write); the total streamed upload is bounded server-side. + upload_timeout = self._connection_config.get_request_timeout(request_timeout) # Metadata is sent as request-scoped X-Metadata-* headers, so the same # metadata is applied to every file in a multi-file upload. From 2022d3a2684bc61293f181759524e72da03805e6 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 18 Jun 2026 21:13:12 +0200 Subject: [PATCH 15/20] fix(python-sdk): run streamed-upload reads and gzip off the event loop aiter_io_chunks / agzip_iter were async generators doing synchronous file reads and zlib compression inline, stalling the asyncio event loop for the duration of those operations on large AsyncSandbox uploads. Offload both to a worker thread via asyncio.to_thread. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/cuddly-pots-stream.md | 2 +- packages/python-sdk/e2b/io_utils.py | 20 ++++-- packages/python-sdk/tests/test_io_utils.py | 74 ++++++++++++++++++++++ 3 files changed, 90 insertions(+), 6 deletions(-) create mode 100644 packages/python-sdk/tests/test_io_utils.py diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md index 9ab27b73ef..3a7efb49fb 100644 --- a/.changeset/cuddly-pots-stream.md +++ b/.changeset/cuddly-pots-stream.md @@ -5,7 +5,7 @@ Stream uploads instead of buffering streaming input entirely in memory: -- `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. A streamed upload is bounded by a per-chunk idle timeout (`streamIdleTimeoutMs` in JS, default the request timeout, `0` to disable) that aborts a stalled upload — a producer that stops yielding or a server that stops reading — so a stall no longer holds the connection indefinitely. +- `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. A streamed upload is bounded by a per-chunk idle timeout (`streamIdleTimeoutMs` in JS, default the request timeout, `0` to disable) that aborts a stalled upload — a producer that stops yielding or a server that stops reading — so a stall no longer holds the connection indefinitely. On Python's `AsyncSandbox`, the blocking file reads and gzip compression of a streamed upload now run in a worker thread so a large upload doesn't stall the event loop. - `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed. The body is bounded by a per-chunk idle timeout (`streamIdleTimeoutMs` in JS, `stream_idle_timeout` in Python, default the request timeout — 60s — `0`/`None` to disable) that bounds a stalled stream without limiting an actively-flowing one. Use `signal` (JS) to cancel an in-flight stream. A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. The stream holds a pooled connection until it is consumed to the end, cancelled/closed, errors, or the idle timeout fires — consume it fully, use the context manager, or close it. - Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). - JS `Sandbox.files.read()` with `blob` or `stream` format now returns an empty `Blob`/`ReadableStream` for empty files instead of `""`. diff --git a/packages/python-sdk/e2b/io_utils.py b/packages/python-sdk/e2b/io_utils.py index d9e6dea4f7..8e02449a7f 100644 --- a/packages/python-sdk/e2b/io_utils.py +++ b/packages/python-sdk/e2b/io_utils.py @@ -1,3 +1,4 @@ +import asyncio import zlib from typing import IO, AsyncIterable, AsyncIterator, Iterable, Iterator @@ -14,9 +15,13 @@ def iter_io_chunks(data: IO) -> Iterator[bytes]: async def aiter_io_chunks(data: IO) -> AsyncIterator[bytes]: - """Read a file-like object in chunks, encoding text chunks to UTF-8.""" + """Read a file-like object in chunks, encoding text chunks to UTF-8. + + `data.read` is a synchronous (potentially disk-blocking) call, so it runs in + a worker thread to avoid stalling the event loop during large uploads. + """ while True: - chunk = data.read(IO_CHUNK_SIZE) + chunk = await asyncio.to_thread(data.read, IO_CHUNK_SIZE) if not chunk: break yield chunk if isinstance(chunk, bytes) else chunk.encode("utf-8") @@ -38,10 +43,15 @@ def gzip_iter(chunks: Iterable[bytes]) -> Iterator[bytes]: async def agzip_iter(chunks: AsyncIterable[bytes]) -> AsyncIterator[bytes]: - """Gzip-compress a byte stream chunk by chunk.""" + """Gzip-compress a byte stream chunk by chunk. + + Compression is CPU-bound, so it runs in a worker thread to avoid stalling + the event loop during large uploads (zlib releases the GIL while + compressing, so the offload genuinely overlaps with the loop). + """ compressor = _gzip_compressor() async for chunk in chunks: - compressed = compressor.compress(chunk) + compressed = await asyncio.to_thread(compressor.compress, chunk) if compressed: yield compressed - yield compressor.flush() + yield await asyncio.to_thread(compressor.flush) diff --git a/packages/python-sdk/tests/test_io_utils.py b/packages/python-sdk/tests/test_io_utils.py new file mode 100644 index 0000000000..bcb8e12dd0 --- /dev/null +++ b/packages/python-sdk/tests/test_io_utils.py @@ -0,0 +1,74 @@ +"""Unit tests for the streamed-upload IO helpers.""" + +import asyncio +import gzip +import threading +from typing import IO, cast + +from e2b.io_utils import agzip_iter, aiter_io_chunks, gzip_iter, iter_io_chunks + + +def test_iter_io_chunks_encodes_text(): + import io + + assert list(iter_io_chunks(io.BytesIO(b"abc"))) == [b"abc"] + assert list(iter_io_chunks(io.StringIO("abc"))) == [b"abc"] + + +def test_gzip_iter_roundtrip(): + compressed = b"".join(gzip_iter([b"hello ", b"world"])) + assert gzip.decompress(compressed) == b"hello world" + + +async def test_aiter_io_chunks_roundtrip(): + import io + + chunks = [chunk async for chunk in aiter_io_chunks(io.BytesIO(b"hello"))] + assert b"".join(chunks) == b"hello" + + +async def test_agzip_iter_roundtrip(): + async def source(): + yield b"hello " + yield b"world" + + compressed = b"".join([c async for c in agzip_iter(source())]) + assert gzip.decompress(compressed) == b"hello world" + + +async def test_aiter_io_chunks_offloads_reads_to_a_thread(): + """A blocking ``read`` must not stall the event loop. + + The reader's ``read`` blocks until a concurrent task releases it; that task + can only run if the read is off the loop. If the read ran on the loop, the + releaser would never run and ``release.wait`` would time out, failing the + ``released`` assertion. + """ + started = threading.Event() + release = threading.Event() + result = {"released": None} + + class BlockingReader: + def __init__(self): + self._done = False + + def read(self, _n): + if self._done: + return b"" + started.set() + result["released"] = release.wait(2) + self._done = True + return b"data" + + async def releaser(): + while not started.is_set(): + await asyncio.sleep(0.01) + release.set() + + async def collect(): + reader = cast(IO, BlockingReader()) + return [chunk async for chunk in aiter_io_chunks(reader)] + + _, chunks = await asyncio.gather(releaser(), collect()) + assert result["released"] is True + assert chunks == [b"data"] From cc2cd5726fa38cafb42d773f62c2185619ed63d3 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 18 Jun 2026 22:17:16 +0200 Subject: [PATCH 16/20] refactor(sdks): make the JS read idle timeout bound only the wire Arm the JS read idle timer only around the network read and clear it the moment a chunk arrives, so a slow or paused consumer no longer trips it; it fires only when the server stops sending mid-stream (a held-but-unread stream is reclaimed server-side). Matches Python's httpx read timeout, which only counts during socket reads. Drop the JS upload idle wrapper: it bounded producer latency (local), not the upload wire (not observable through fetch). Stalled uploads are bounded server-side or via the caller's signal; Python keeps its per-write httpx timeout, which does bound the wire. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/cuddly-pots-stream.md | 4 +- packages/js-sdk/src/connectionConfig.ts | 66 ++------- .../js-sdk/src/sandbox/filesystem/index.ts | 127 +++++++----------- .../js-sdk/tests/connectionConfig.test.ts | 43 +++--- .../tests/test_file_stream_reader.py | 15 +++ 5 files changed, 97 insertions(+), 158 deletions(-) diff --git a/.changeset/cuddly-pots-stream.md b/.changeset/cuddly-pots-stream.md index 3a7efb49fb..895cea101f 100644 --- a/.changeset/cuddly-pots-stream.md +++ b/.changeset/cuddly-pots-stream.md @@ -5,7 +5,7 @@ Stream uploads instead of buffering streaming input entirely in memory: -- `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. A streamed upload is bounded by a per-chunk idle timeout (`streamIdleTimeoutMs` in JS, default the request timeout, `0` to disable) that aborts a stalled upload — a producer that stops yielding or a server that stops reading — so a stall no longer holds the connection indefinitely. On Python's `AsyncSandbox`, the blocking file reads and gzip compression of a streamed upload now run in a worker thread so a large upload doesn't stall the event loop. -- `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed. The body is bounded by a per-chunk idle timeout (`streamIdleTimeoutMs` in JS, `stream_idle_timeout` in Python, default the request timeout — 60s — `0`/`None` to disable) that bounds a stalled stream without limiting an actively-flowing one. Use `signal` (JS) to cancel an in-flight stream. A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. The stream holds a pooled connection until it is consumed to the end, cancelled/closed, errors, or the idle timeout fires — consume it fully, use the context manager, or close it. +- `Sandbox.files.write()` / `write_files()`: `ReadableStream` data (JS, outside the browser) and file-like objects (Python) are streamed to the sandbox, including when `gzip` is enabled (compression now happens chunk by chunk). `useOctetStream`/`use_octet_stream` now defaults to auto-detect — octet-stream is used when any entry is streamable (so streamed uploads aren't silently buffered) and `multipart/form-data` otherwise; browsers always use `multipart/form-data`. A streamed upload is bounded by a per-chunk timeout on the wire (Python's per-write `httpx` timeout, default the request timeout); a stalled upload that this can't observe is bounded server-side. On Python's `AsyncSandbox`, the blocking file reads and gzip compression of a streamed upload now run in a worker thread so a large upload doesn't stall the event loop. +- `Sandbox.files.read(format="stream")`: the request timeout now bounds only the initial handshake instead of killing the stream while it's being consumed. The body is bounded by a per-chunk idle timeout on the wire (`streamIdleTimeoutMs` in JS, `stream_idle_timeout` in Python, default the request timeout — 60s — `0`/`None` to disable): it aborts only when the server stops sending mid-stream, and a slow or paused consumer never trips it (a held-but-unread stream is reclaimed server-side). Use `signal` (JS) to cancel an in-flight stream. A dropped connection during the stream handshake now surfaces the same typed, health-checked error as non-stream reads. The stream holds a pooled connection until it is consumed to the end, cancelled/closed, errors, or the idle timeout fires — consume it fully, use the context manager, or close it. - Python `Sandbox.files.read(format="stream")`: the response body is now streamed from the sandbox instead of being downloaded into memory before iteration (sync and async). - JS `Sandbox.files.read()` with `blob` or `stream` format now returns an empty `Blob`/`ReadableStream` for empty files instead of `""`. diff --git a/packages/js-sdk/src/connectionConfig.ts b/packages/js-sdk/src/connectionConfig.ts index 1dd9841613..a1f578d9ac 100644 --- a/packages/js-sdk/src/connectionConfig.ts +++ b/packages/js-sdk/src/connectionConfig.ts @@ -243,11 +243,11 @@ function createIdleAbort( * * Clears the handshake timeout from {@link setupRequestController} (so * consuming the body isn't killed by it) and replaces it with an idle-read - * timeout: if no chunk arrives within `idleTimeoutMs` it aborts `controller`, - * tearing down the fetch and releasing the connection. The timer resets on - * every chunk, so it bounds a stalled stream without limiting an - * actively-flowing one. Pass `0`/`undefined` to disable. Call once the - * handshake has succeeded. + * timeout that bounds only the wire: it's armed while waiting on a network + * read and cleared the moment a chunk arrives, so a slow or paused consumer + * never trips it (only a server that stops sending mid-stream does). On expiry + * it aborts `controller`, tearing down the fetch and releasing the connection. + * Pass `0`/`undefined` to disable. Call once the handshake has succeeded. * * @internal */ @@ -282,17 +282,20 @@ export function wrapStreamWithConnectionCleanup( } return new ReadableStream({ - start() { - idle.arm() - }, async pull(streamController) { + // Bound only the wire: arm before reading from the network and clear the + // moment a chunk (or EOF) arrives, so a slow or paused consumer never + // counts against the idle timeout. A consumer that holds the stream but + // stops reading is never pulled here, so nothing arms—that case is + // reclaimed server-side, not by this timer. + idle.arm() try { const { done, value } = await reader.read() + idle.clear() if (done) { release() streamController.close() } else { - idle.arm() streamController.enqueue(value) } } catch (err) { @@ -310,51 +313,6 @@ export function wrapStreamWithConnectionCleanup( }) } -/** - * Wrap an outgoing (upload) request body so the request is aborted if no chunk - * is sent within `idleTimeoutMs`. The timer resets on every chunk, bounding a - * stalled upload — a producer that stops yielding or a server that stops - * reading — without limiting an actively-flowing one. Pass `0`/`undefined` to - * disable, returning the body unwrapped. - * - * @internal - */ -export function wrapUploadStreamWithIdleTimeout( - body: ReadableStream, - controller: AbortController, - idleTimeoutMs?: number -): ReadableStream { - if (!idleTimeoutMs) return body - - const reader = body.getReader() - const idle = createIdleAbort(controller, idleTimeoutMs, 'Upload') - - return new ReadableStream({ - start() { - idle.arm() - }, - async pull(streamController) { - try { - const { done, value } = await reader.read() - if (done) { - idle.clear() - streamController.close() - } else { - idle.arm() - streamController.enqueue(value) - } - } catch (err) { - idle.clear() - streamController.error(err) - } - }, - async cancel(reason) { - idle.clear() - await reader.cancel(reason) - }, - }) -} - function buildUserAgent(integration?: string) { const userAgentParts = [`e2b-js-sdk/${version}`] diff --git a/packages/js-sdk/src/sandbox/filesystem/index.ts b/packages/js-sdk/src/sandbox/filesystem/index.ts index a9f8d68ee3..2748934c70 100644 --- a/packages/js-sdk/src/sandbox/filesystem/index.ts +++ b/packages/js-sdk/src/sandbox/filesystem/index.ts @@ -14,7 +14,6 @@ import { setupRequestController, Username, wrapStreamWithConnectionCleanup, - wrapUploadStreamWithIdleTimeout, } from '../../connectionConfig' import { @@ -292,15 +291,6 @@ export interface FilesystemWriteOpts extends FilesystemRequestOpts { * Requires envd 0.6.2 or later. */ metadata?: Record - /** - * Idle timeout for a streamed upload (`ReadableStream` data, outside the - * browser) in **milliseconds**: abort if no chunk is sent within this window. - * Resets on every chunk, so it bounds a stalled upload — a producer that - * stops yielding or a server that stops reading — without limiting an - * actively-flowing one. Defaults to the request timeout (60s); pass `0` to - * disable. - */ - streamIdleTimeoutMs?: number } /** @@ -313,9 +303,10 @@ export interface FilesystemReadOpts extends FilesystemRequestOpts { gzip?: boolean /** * Idle timeout for a streamed read (`format: 'stream'`) in **milliseconds**: - * abort if no chunk arrives within this window. Resets on every chunk, so it - * bounds a stalled stream without limiting an actively-flowing one. Defaults - * to the request timeout (60s); pass `0` to disable. + * abort if no chunk arrives from the server within this window *while + * reading*. It bounds only the wire — a slow or paused consumer never trips + * it (a consumer that holds the stream but stops reading is reclaimed + * server-side). Defaults to the request timeout (60s); pass `0` to disable. */ streamIdleTimeoutMs?: number } @@ -693,76 +684,56 @@ export class Filesystem { const filePath = path ?? (file as WriteEntry).path const body = await toUploadBody(file.data, useGzip) const isStream = body instanceof ReadableStream + // A streamed upload carries no client-side timeout: the socket-write + // "wire" isn't observable through fetch, and a stalled producer is + // the caller's own code, so a stuck streamed upload is bounded + // server-side (or via `writeOpts.signal`). Buffered uploads keep the + // normal request timeout. + const signal = isStream + ? writeOpts?.signal + : this.connectionConfig.getSignal( + writeOpts?.requestTimeoutMs, + writeOpts?.signal + ) - let uploadBody: BodyInit = body - let signal: AbortSignal | undefined - let cleanup: (() => void) | undefined - if (body instanceof ReadableStream) { - // No handshake timeout—the response only arrives once the whole - // body has been sent. Each chunk is bounded by the per-chunk idle - // timeout (default: the request timeout); the overall upload is - // bounded server-side. - const idleTimeoutMs = - writeOpts?.streamIdleTimeoutMs ?? - writeOpts?.requestTimeoutMs ?? - this.connectionConfig.requestTimeoutMs - const ctrl = setupRequestController(undefined, writeOpts?.signal) - uploadBody = wrapUploadStreamWithIdleTimeout( - body, - ctrl.controller, - idleTimeoutMs - ) - signal = ctrl.controller.signal - cleanup = ctrl.cleanup - } else { - signal = this.connectionConfig.getSignal( - writeOpts?.requestTimeoutMs, - writeOpts?.signal - ) - } - - try { - const res = await this.envdApi.api - .POST('/files', { - params: { - query: { - path: filePath, - username: user, - }, + const res = await this.envdApi.api + .POST('/files', { + params: { + query: { + path: filePath, + username: user, }, - bodySerializer: () => uploadBody, - headers, - signal, - body: {}, - // Streaming request bodies require half-duplex mode. - ...(isStream && { - duplex: 'half' as const, - }), - }) - .catch(async (err) => { - throw await handleEnvdApiFetchError(err, this.checkHealth) - }) - - const err = await handleFilesystemEnvdApiError(res) - if (err) { - throw err - } - - const files = res.data as WriteInfo[] - if (!files || files.length === 0) { - throw new Error( - 'Expected to receive information about written file' - ) - } + }, + bodySerializer: () => body, + headers, + signal, + body: {}, + // Streaming request bodies require half-duplex mode. + ...(isStream && { + duplex: 'half' as const, + }), + }) + .catch(async (err) => { + throw await handleEnvdApiFetchError(err, this.checkHealth) + }) + + const err = await handleFilesystemEnvdApiError(res) + if (err) { + throw err + } - for (const f of files) { - f.metadata = mapMetadata(f.metadata) - } + const files = res.data as WriteInfo[] + if (!files || files.length === 0) { + throw new Error( + 'Expected to receive information about written file' + ) + } - return files - } finally { - cleanup?.() + for (const f of files) { + f.metadata = mapMetadata(f.metadata) } + + return files }) ) diff --git a/packages/js-sdk/tests/connectionConfig.test.ts b/packages/js-sdk/tests/connectionConfig.test.ts index 4c2e846255..c799572f2c 100644 --- a/packages/js-sdk/tests/connectionConfig.test.ts +++ b/packages/js-sdk/tests/connectionConfig.test.ts @@ -3,7 +3,6 @@ import { ConnectionConfig, setupRequestController, wrapStreamWithConnectionCleanup, - wrapUploadStreamWithIdleTimeout, } from '../src/connectionConfig' // Store original env vars to restore after tests @@ -478,30 +477,26 @@ test('wrapStreamWithConnectionCleanup with idle timeout 0 never auto-aborts', as assert.equal(cleanups, 1) }) -test('wrapUploadStreamWithIdleTimeout aborts an idle upload', async () => { +test('wrapStreamWithConnectionCleanup does not abort a slow consumer (wire-only)', async () => { + // Source has both chunks ready immediately; the consumer pauses far longer + // than the idle timeout between reads. Because the timer is armed only around + // the network read and cleared as soon as a chunk arrives, the consumer's + // pace must not trip it. const controller = new AbortController() - const { body } = stallingSource(controller.signal) - const stream = wrapUploadStreamWithIdleTimeout(body, controller, 20) - let error: unknown - try { - await readAll(stream) - } catch (err) { - error = err - } - assert.equal((error as DOMException)?.name, 'TimeoutError') - assert.equal(controller.signal.aborted, true) -}) - -test('wrapUploadStreamWithIdleTimeout passes chunks through and does not abort an active upload', async () => { const { body } = trackedSource() - const controller = new AbortController() - const stream = wrapUploadStreamWithIdleTimeout(body, controller, 1000) - assert.equal(await readAll(stream), 'ab') - assert.equal(controller.signal.aborted, false) -}) + const stream = wrapStreamWithConnectionCleanup(body, { + clearStartTimeout: () => {}, + cleanup: () => {}, + controller, + idleTimeoutMs: 20, + }) + const reader = stream.getReader() + const decoder = new TextDecoder() -test('wrapUploadStreamWithIdleTimeout with idle timeout 0 returns the body unwrapped', () => { - const { body } = trackedSource() - const controller = new AbortController() - assert.equal(wrapUploadStreamWithIdleTimeout(body, controller, 0), body) + const first = await reader.read() + assert.equal(decoder.decode(first.value), 'a') + await new Promise((resolve) => setTimeout(resolve, 50)) + const second = await reader.read() + assert.equal(decoder.decode(second.value), 'b') + assert.equal(controller.signal.aborted, false) }) diff --git a/packages/python-sdk/tests/test_file_stream_reader.py b/packages/python-sdk/tests/test_file_stream_reader.py index 9047e5f599..d499aec633 100644 --- a/packages/python-sdk/tests/test_file_stream_reader.py +++ b/packages/python-sdk/tests/test_file_stream_reader.py @@ -125,6 +125,21 @@ def test_sync_idle_timeout_releases_connection(): assert _active_connections(client) == 0 +def test_sync_slow_consumer_does_not_trip_idle_timeout(): + # The server sends every chunk promptly; the consumer then pauses far + # longer than the read (idle) timeout between iterations. httpx's read + # timeout only counts while it's waiting on the wire, so a slow consumer + # must not trip it (parity with the JS wire-only idle timeout). + with httpx.Client() as client: + port = _start_chunked_server() + reader = FileStreamReader(_open_stream(client, port, read_timeout=0.05)) + out = [] + for chunk in reader: + out.append(chunk) + time.sleep(0.2) + assert b"".join(out) == EXPECTED + + def test_sync_abandoned_reader_is_reclaimed_on_client_close(): client = httpx.Client() port = _start_chunked_server() From d2939b02fc05bb75ef5f8c64e9e9ea40a060b245 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 18 Jun 2026 22:26:18 +0200 Subject: [PATCH 17/20] docs(python-sdk): scope the FILE_TIMEOUT comment to volume transfers After the filesystem streaming revert, FILE_TIMEOUT is used only by the volume client; sandbox filesystem streaming bounds each chunk by the request timeout and leaves the total to the server. Reword the comment so it no longer reads as a general streaming-transfer timeout (addresses a Bugbot review note). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python-sdk/e2b/connection_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/python-sdk/e2b/connection_config.py b/packages/python-sdk/e2b/connection_config.py index 5c287fe7fc..c658b4a4af 100644 --- a/packages/python-sdk/e2b/connection_config.py +++ b/packages/python-sdk/e2b/connection_config.py @@ -10,8 +10,9 @@ REQUEST_TIMEOUT: float = 60.0 # 60 seconds -# Timeout for streaming file transfers. A streamed body can take far longer -# than a regular request, so it must not inherit the short REQUEST_TIMEOUT. +# Timeout for volume file transfers, which stream large bodies and so must not +# inherit the short REQUEST_TIMEOUT. (Sandbox filesystem streaming instead +# bounds each chunk by the request timeout and leaves the total to the server.) FILE_TIMEOUT: float = 3600.0 # 1 hour KEEPALIVE_PING_INTERVAL_SEC = 50 # 50 seconds From 8c5b640b5c22015c75b76d6b5f9457a52c3f2208 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 19 Jun 2026 15:55:08 +0200 Subject: [PATCH 18/20] docs(python-sdk): note envd backstops the streamed-upload per-write timeout The per-write httpx timeout on streamed uploads guards a stuck socket write (server stops reading); it can't observe the opposite direction. Record that envd >= 0.6.7's per-read idle timeout backstops that case. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py | 3 ++- packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py index 08b557cdbe..eb6dd82c2e 100644 --- a/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_async/filesystem/filesystem.py @@ -354,7 +354,8 @@ async def write_files( use_octet_stream = (use_octet_stream or gzip) and supports_octet_stream # Each chunk send is bounded by the request timeout (httpx applies it - # per write); the total streamed upload is bounded server-side. + # per write); a stalled upload the per-write timeout can't observe is + # bounded server-side (envd's per-read idle timeout, envd >= 0.6.7). upload_timeout = self._connection_config.get_request_timeout(request_timeout) # Metadata is sent as request-scoped X-Metadata-* headers, so the same diff --git a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py index 70d4909d10..69ed1a0014 100644 --- a/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py +++ b/packages/python-sdk/e2b/sandbox_sync/filesystem/filesystem.py @@ -368,7 +368,8 @@ def write_files( use_octet_stream = (use_octet_stream or gzip) and supports_octet_stream # Each chunk send is bounded by the request timeout (httpx applies it - # per write); the total streamed upload is bounded server-side. + # per write); a stalled upload the per-write timeout can't observe is + # bounded server-side (envd's per-read idle timeout, envd >= 0.6.7). upload_timeout = self._connection_config.get_request_timeout(request_timeout) # Metadata is sent as request-scoped X-Metadata-* headers, so the same From b8db6b5fdb5055496b0a0a4c9873a56d51b84a9f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 19 Jun 2026 16:22:32 +0200 Subject: [PATCH 19/20] test(sdks): drop redundant streamed-read test coverage Remove the JS 'empty file' read test now subsumed by 'read empty file in all formats', and trim the duplicate double-close idempotency assertions from the Python partial-then-close tests (idempotency is covered by the stream-reader unit tests). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/js-sdk/tests/sandbox/files/read.test.ts | 8 -------- .../tests/async/sandbox_async/files/test_read.py | 2 -- .../python-sdk/tests/sync/sandbox_sync/files/test_read.py | 2 -- 3 files changed, 12 deletions(-) diff --git a/packages/js-sdk/tests/sandbox/files/read.test.ts b/packages/js-sdk/tests/sandbox/files/read.test.ts index 045db4790b..b4f481c310 100644 --- a/packages/js-sdk/tests/sandbox/files/read.test.ts +++ b/packages/js-sdk/tests/sandbox/files/read.test.ts @@ -31,14 +31,6 @@ sandboxTest( } ) -sandboxTest('empty file', async ({ sandbox }) => { - const filename = 'empty-file.txt' - - await sandbox.commands.run(`touch ${filename}`) - const content = await sandbox.files.read(filename) - expect(content).toBe('') -}) - sandboxTest('read file as stream', async ({ sandbox }) => { const filename = 'test_read_stream.txt' const content = 'Streamed read content. '.repeat(10_000) diff --git a/packages/python-sdk/tests/async/sandbox_async/files/test_read.py b/packages/python-sdk/tests/async/sandbox_async/files/test_read.py index 643c7aab01..89db6886e4 100644 --- a/packages/python-sdk/tests/async/sandbox_async/files/test_read.py +++ b/packages/python-sdk/tests/async/sandbox_async/files/test_read.py @@ -93,5 +93,3 @@ async def test_read_file_as_stream_partial_then_close(async_sandbox: AsyncSandbo first = await stream.__anext__() assert len(first) > 0 await stream.aclose() - # aclose is idempotent - await stream.aclose() diff --git a/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py b/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py index a3e98fc09a..4464ce96e2 100644 --- a/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py +++ b/packages/python-sdk/tests/sync/sandbox_sync/files/test_read.py @@ -81,5 +81,3 @@ def test_read_file_as_stream_partial_then_close(sandbox): first = next(iter(stream)) assert len(first) > 0 stream.close() - # close is idempotent - stream.close() From 4351aae42e63b879015b448279a7a7138f90789d Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 19 Jun 2026 17:19:01 +0200 Subject: [PATCH 20/20] refactor(python-sdk): drop the now-unused FILE_TIMEOUT constant The main connection_config FILE_TIMEOUT was never imported or re-exported; volume transfers use the constant in volume/connection_config.py and sandbox filesystem streaming now bounds each chunk by the request timeout. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/python-sdk/e2b/connection_config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/packages/python-sdk/e2b/connection_config.py b/packages/python-sdk/e2b/connection_config.py index c658b4a4af..d67a9151e1 100644 --- a/packages/python-sdk/e2b/connection_config.py +++ b/packages/python-sdk/e2b/connection_config.py @@ -10,11 +10,6 @@ REQUEST_TIMEOUT: float = 60.0 # 60 seconds -# Timeout for volume file transfers, which stream large bodies and so must not -# inherit the short REQUEST_TIMEOUT. (Sandbox filesystem streaming instead -# bounds each chunk by the request timeout and leaves the total to the server.) -FILE_TIMEOUT: float = 3600.0 # 1 hour - KEEPALIVE_PING_INTERVAL_SEC = 50 # 50 seconds KEEPALIVE_PING_HEADER = "Keepalive-Ping-Interval"