From d4241088f6e9b0203c450b2dcae2264fc64160d7 Mon Sep 17 00:00:00 2001 From: "F.D.Castel" Date: Wed, 13 May 2026 03:42:36 -0300 Subject: [PATCH] Add Server.readlines_to_eof_timed for bulk trace-stream drain (#70) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `readline_timed` requests one line per services-API round-trip (`isc_info_svc_line`). Under sustained trace-stream throughput the per-call overhead caps a Python consumer at roughly 480 trace events/sec, while Firebird's own `fbtracemgr` reaches roughly 3000 trace events/sec because it uses `isc_info_svc_to_eof` (drain many lines per round-trip). The slow-drain path causes server-side trace buffer overruns and event loss — invisible to the client. This commit adds `readlines_to_eof_timed(timeout)` as a parallel method, mirroring fbtracemgr's request shape: - Sends `bytes([SrvInfoCode.TO_EOF])` as the request item. - Inspects the trailing indicator after the sized-string payload: isc_info_end (1) -> end of service, return None isc_info_svc_timeout (64) -> transient, return TIMEOUT isc_info_data_not_ready (4) -> transient, return data or TIMEOUT isc_info_truncated (2) -> more data coming, return data This explicitly distinguishes "service finished" from "no data right now". `readline_timed` only checks for `isc_info_svc_timeout` and treats every other empty payload as `None` (EOF), which prematurely terminates long-running consumers on transient idle rounds. - Returns the multi-line chunk verbatim; callers split with `.splitlines(keepends=True)`. `readline_timed` is left unchanged because `Server.readline`, stdin/STDOUT handlers, and any third-party callers depend on its one-line semantics. The new method is opt-in for consumers that need fbtracemgr-equivalent throughput. Tests: - `test_readlines_to_eof_timed_drain` — runs `database.get_statistics` twice (once via `readlines()`, once via the new method) and asserts identical normalised output, confirming the bulk path captures the full service response. - `test_readlines_to_eof_timed_returns_timeout_when_idle` — starts a trace session against an idle database; the very first read MUST return `TIMEOUT`, not `None`. Pins the "transient idle round != EOF" contract that the bug fix depends on. --- CHANGELOG.md | 6 ++++ src/firebird/driver/core.py | 43 ++++++++++++++++++++++++ tests/test_server.py | 67 +++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57c5dea..6db34cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [Unreleased] + +### Added + +- #70: `Server.readlines_to_eof_timed(timeout)` — bulk-drain variant of `readline_timed` using the `isc_info_svc_to_eof` request mode. Required for long-running trace-stream consumers whose throughput exceeds what one-line-per-call polling can drain (the existing `readline_timed` caps a Python consumer at roughly 480 trace-events/sec while `fbtracemgr` reaches roughly 3000/sec on the same workload). The new method returns one chunk per services-API round-trip, distinguishes `isc_info_end` (end-of-service → `None`) from transient indicators (`isc_info_svc_timeout` / `isc_info_data_not_ready` / `isc_info_truncated` → `TIMEOUT` sentinel), and matches what `fbtracemgr` does at the protocol level. + ## [2.0.3] - 2026-04-20 ### Fixed diff --git a/src/firebird/driver/core.py b/src/firebird/driver/core.py index 211188c..2380dfa 100644 --- a/src/firebird/driver/core.py +++ b/src/firebird/driver/core.py @@ -5735,6 +5735,49 @@ def readline_timed(self, timeout: int) -> str | Sentinel | None: if data and data.endswith('\r '): data = data[:-1] # Remove space, keep '\r' return data if data else None + def readlines_to_eof_timed(self, timeout: int) -> str | Sentinel | None: + """Drain all currently-available textual output from the last service query in one round-trip. + + Unlike `.readline_timed`, which sends the `isc_info_svc_line` request and gets a single + line per services-API call, this method sends `isc_info_svc_to_eof` and lets the server + pack as many lines as fit into the response buffer. For long-running services like + Firebird trace sessions (whose stream rate routinely exceeds what one-line-per-call + polling can drain), the bulk variant matches what the `fbtracemgr` C utility does and + avoids server-side trace-buffer overruns under sustained load. + + Arguments: + timeout: Time in seconds to wait for output. The server enforces this per call; + transient empty rounds during a still-active service return `.TIMEOUT`, + not `None`. + + Returns: + A (possibly multi-line) chunk of service output, `None` when the service reports + end of stream (trailing `isc_info_end` with no transient indicator), or `.TIMEOUT` + when the server timed out or signalled `isc_info_data_not_ready` / `isc_info_truncated` + with no payload this round. + + The returned chunk preserves the server's line-ending convention as delivered; + callers that need per-line semantics can split with `.splitlines(keepends=True)`. + Whitespace artifacts of the `isc_info_svc_line` path (trailing `'\\r '`) do not apply + here because the server emits multi-line buffers verbatim under `isc_info_svc_to_eof`. + """ + self.response.clear() + self._svc.query(self._make_request(timeout), + bytes([SrvInfoCode.TO_EOF]), self.response.raw) + if (tag := self.response.get_tag()) != SrvInfoCode.TO_EOF: # pragma: no cover + raise InterfaceError(f"Service responded with error code: {tag}") + data = self.response.read_sized_string(encoding=self.encoding, + errors=self.encoding_errors) + trailing = self.response.get_tag() + # Per `src/jrd/svc.cpp` in the Firebird engine, the trailing indicator distinguishes: + # isc_info_svc_timeout (= SrvInfoCode.TIMEOUT, 64) — server-side timeout, session live + # isc_info_data_not_ready (4) — no data right now, session live + # isc_info_truncated (2) — more data on the next call, session live + # isc_info_end (1) — service has finished + if trailing == isc_info_end: + return data if data else None + # Transient indicator: keep going. Return the payload we did get, or TIMEOUT if none. + return data if data else TIMEOUT def readline(self) -> str | None: """Get next line of textual output from last service query. diff --git a/tests/test_server.py b/tests/test_server.py index 93e1e7f..4e12a3c 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -204,6 +204,73 @@ def test_trace(server_connection, db_file, fb_vars): # Finalize svcx.trace.stop(session_id=trace1_id) +def test_readlines_to_eof_timed_drain(server_connection, db_file, fb_vars): + """`readlines_to_eof_timed` should drain a finite service's output to completion, + returning bulk chunks while the service is running and `None` once it reports + end-of-stream. Equivalent payload to `readlines()` for the same service call. + + Uses `database.get_statistics(verbose=True)` because it produces a known multi-line + output and terminates on its own — no need for a long-running trace session in the test. + """ + # Materialize the reference output once via the existing readlines() path. + server_connection.database.get_statistics(database=db_file, flags=SrvStatFlag.DEFAULT) + expected_lines = server_connection.readlines() + expected_text = '\n'.join(expected_lines).strip() + + # Now drain the same call via readlines_to_eof_timed. + server_connection.database.get_statistics(database=db_file, flags=SrvStatFlag.DEFAULT) + chunks: list[str] = [] + while True: + chunk = server_connection.readlines_to_eof_timed(1) + if chunk is None: # service reported end-of-stream + break + if chunk is driver.TIMEOUT: # idle round — non-trace services shouldn't hit this + continue + assert isinstance(chunk, str) + chunks.append(chunk) + + drained_text = ''.join(chunks).strip() + # Sanity: the bulk drain must have captured the same content as the per-line readlines() did. + # We compare on the normalized text rather than the byte-for-byte chunks because the two paths + # carry slightly different line-ending whitespace artifacts (readline_timed strips a trailing + # '\r '; the bulk path emits the multi-line buffer verbatim). + drained_normalized = drained_text.replace('\r\n', '\n').replace('\r', '\n').strip() + expected_normalized = expected_text.replace('\r\n', '\n').replace('\r', '\n').strip() + assert drained_normalized == expected_normalized + assert not server_connection.is_running() + + +def test_readlines_to_eof_timed_returns_timeout_when_idle(server_connection, db_file, fb_vars): + """When no service action is in flight, the server returns a transient indicator + (TIMEOUT / data_not_ready) without ending the stream. `readlines_to_eof_timed` + surfaces that as the `TIMEOUT` sentinel — distinct from `None` (which would mean + "service has finished and won't produce more output"). + + Exercised against a live trace session: the session is alive (we started it), but + the workload that would emit events is idle, so the response is empty + transient. + """ + trace_config = """database = %s + { + enabled = true + log_statement_finish = true + time_threshold = 0 + } + """ % str(db_file) + with connect_server(fb_vars['host'], user='SYSDBA', password=fb_vars['password']) as svcx: + trace_id = svcx.trace.start(config=trace_config, name='test_readlines_to_eof_timed_idle') + try: + # Server is alive and the trace session is registered, but nothing is hitting + # `db_file` yet — the very first read should be transient, not EOF. + result = svcx.readlines_to_eof_timed(1) + assert result is driver.TIMEOUT, ( + f"Idle trace session must return TIMEOUT, not {result!r}. Returning None " + "(EOF) here is the F-side bug: a transient buffer-empty round was conflated " + "with end-of-service and prematurely terminated trace capture." + ) + finally: + svcx.trace.stop(session_id=trace_id) + + def test_get_users(server_connection): users = server_connection.user.get_all() assert isinstance(users, type(list()))