From 28f283d7737ba61b5c6ee0eaa0886f2a48603099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C5=82eczek?= Date: Tue, 5 May 2026 06:26:38 +0200 Subject: [PATCH 1/2] fix: Start listening after schema cache load This change ensures PostgREST starts listening on a server socket only after it loaded the schema cache and is ready to handle requests. It is no longer going to return 503 errors during startup until the schema cache is loaded. --- CHANGELOG.md | 1 + src/PostgREST/Admin.hs | 16 +++++------ src/PostgREST/App.hs | 52 ++++++++++++++++++++---------------- src/PostgREST/AppState.hs | 23 +++++++++++----- src/PostgREST/SchemaCache.hs | 6 ++--- test/io/test_io.py | 40 ++++++++++++++++++--------- 6 files changed, 86 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 575c86f68d..f526d35a03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ All notable changes to this project will be documented in this file. From versio - Shutdown should wait for in flight requests by @mkleczek in #4702 - Remove automatic transaction retries on `40001 (serialization_failure)` errors to prevent replication lag by @laurenceisla in #3673 - Fix unexpected results when embedding and filtering the same table more than once by @laurenceisla in #4075 +- Stop reporting 503s errors unnecessarily while the schema cache is loading at startup by @mkleczek in #4880 ### Changed diff --git a/src/PostgREST/Admin.hs b/src/PostgREST/Admin.hs index 99733a6995..f8501417be 100644 --- a/src/PostgREST/Admin.hs +++ b/src/PostgREST/Admin.hs @@ -22,20 +22,20 @@ import qualified PostgREST.AppState as AppState import qualified Network.Socket as NS import Protolude -runAdmin :: AppState -> Maybe NS.Socket -> NS.Socket -> Warp.Settings -> IO () -runAdmin appState maybeAdminSocket socketREST settings = do +runAdmin :: AppState -> Maybe NS.Socket -> IO (Maybe NS.Socket) -> Warp.Settings -> IO () +runAdmin appState maybeAdminSocket getSocketREST settings = do whenJust maybeAdminSocket $ \adminSocket -> do address <- resolveSocketToAddress adminSocket observer $ AdminStartObs address void . forkIO $ Warp.runSettingsSocket settings adminSocket adminApp where - adminApp = admin appState socketREST + adminApp = admin appState getSocketREST observer = AppState.getObserver appState -- | PostgREST admin application -admin :: AppState.AppState -> NS.Socket -> Wai.Application -admin appState socketREST req respond = do - isMainAppReachable <- isRight <$> reachMainApp socketREST +admin :: AppState.AppState -> IO (Maybe NS.Socket) -> Wai.Application +admin appState getSocketREST req respond = do + isMainAppReachable <- getSocketREST >>= maybe (pure False) (fmap isRight . reachMainApp) isLoaded <- AppState.isLoaded appState isPending <- AppState.isPending appState @@ -44,8 +44,8 @@ admin appState socketREST req respond = do respond $ Wai.responseLBS (if isMainAppReachable then HTTP.status200 else HTTP.status500) [] mempty ["ready"] -> let - status | not isMainAppReachable = HTTP.status500 - | isPending = HTTP.status503 + status | isPending = HTTP.status503 + | not isMainAppReachable = HTTP.status500 | isLoaded = HTTP.status200 | otherwise = HTTP.status500 in diff --git a/src/PostgREST/App.hs b/src/PostgREST/App.hs index 0103625733..5ff0180e93 100644 --- a/src/PostgREST/App.hs +++ b/src/PostgREST/App.hs @@ -26,6 +26,8 @@ import System.IO.Error (ioeGetErrorType) import Control.Monad.Except (liftEither) import Control.Monad.Extra (whenJust) import Data.Either.Combinators (mapLeft, whenLeft) +import Data.IORef (atomicWriteIORef, newIORef, + readIORef) import Data.String (IsString (..)) import Network.Wai.Handler.Warp (defaultSettings, setHost, setOnException, setPort, @@ -68,7 +70,7 @@ import qualified Data.List as L import Data.Streaming.Network (bindPortTCP) import qualified Data.Text as T import qualified Network.HTTP.Types as HTTP -import qualified Network.HTTP.Types.Header as HTTP (hVary) +import qualified Network.HTTP.Types.Header as HTTP import qualified Network.Socket as NS import PostgREST.Unix (createAndBindDomainSocket) import Protolude hiding (Handler) @@ -77,22 +79,30 @@ run :: AppState -> IO () run appState = do conf <- AppState.getConfig appState - AppState.schemaCacheLoader appState -- Loads the initial SchemaCache - (mainSocket, adminSocket) <- initSockets conf + mainSocketRef <- newIORef Nothing + adminSocket <- initAdminServerSocket conf + let closeSockets = do whenJust adminSocket NS.close - NS.close mainSocket + readIORef mainSocketRef >>= foldMap NS.close Unix.installSignalHandlers observer closeSockets (AppState.schemaCacheLoader appState) (AppState.readInDbConfig False appState) + Admin.runAdmin appState adminSocket (readIORef mainSocketRef) (serverSettings conf) + Listener.runListener appState - Admin.runAdmin appState adminSocket mainSocket (serverSettings conf) + -- Kick off and wait for the initial SchemaCache load before creating the + -- main API socket. + AppState.schemaCacheLoader appState + AppState.waitForSchemaCacheInit appState + + mainSocket <- initServerSocket conf + atomicWriteIORef mainSocketRef $ Just mainSocket let app = postgrest appState (AppState.schemaCacheLoader appState) - do - address <- resolveSocketToAddress mainSocket - observer $ AppServerAddressObs address + address <- resolveSocketToAddress mainSocket + observer $ AppServerAddressObs address Warp.runSettingsSocket (serverSettings conf & setOnException onWarpException) mainSocket app where @@ -255,20 +265,16 @@ addRetryHint delay response = do isServiceUnavailable :: Wai.Response -> Bool isServiceUnavailable response = Wai.responseStatus response == HTTP.status503 -type AppSockets = (NS.Socket, Maybe NS.Socket) +initServerSocket :: AppConfig -> IO NS.Socket +initServerSocket AppConfig{..} = case configServerUnixSocket of + -- I'm not using `streaming-commons`' bindPath function here because it's not defined for Windows, + -- but we need to have runtime error if we try to use it in Windows, not compile time error + Just path -> createAndBindDomainSocket path configServerUnixSocketMode + Nothing -> bindPortTCP configServerPort (fromString $ T.unpack configServerHost) -initSockets :: AppConfig -> IO AppSockets -initSockets AppConfig{..} = do - sock <- case configServerUnixSocket of - -- I'm not using `streaming-commons`' bindPath function here because it's not defined for Windows, - -- but we need to have runtime error if we try to use it in Windows, not compile time error - Just path -> createAndBindDomainSocket path configServerUnixSocketMode - Nothing -> bindPortTCP configServerPort (fromString $ T.unpack configServerHost) - - adminSock <- case configAdminServerPort of - Just adminPort -> do - adminSock <- bindPortTCP adminPort (fromString $ T.unpack configAdminServerHost) - pure $ Just adminSock - Nothing -> pure Nothing +initAdminServerSocket :: AppConfig -> IO (Maybe NS.Socket) +initAdminServerSocket AppConfig{..} = + traverse (`bindPortTCP` adminHost) configAdminServerPort + where + adminHost = fromString $ T.unpack configAdminServerHost - pure (sock, adminSock) diff --git a/src/PostgREST/AppState.hs b/src/PostgREST/AppState.hs index d1f14280d9..c43495a496 100644 --- a/src/PostgREST/AppState.hs +++ b/src/PostgREST/AppState.hs @@ -27,6 +27,7 @@ module PostgREST.AppState , getObserver , isLoaded , isPending + , waitForSchemaCacheInit ) where import qualified Data.ByteString.Char8 as BS @@ -53,6 +54,9 @@ import Data.IORef (IORef, atomicWriteIORef, newIORef, readIORef) import Data.Time.Clock (UTCTime, getCurrentTime) +import Control.Concurrent.STM (TMVar, newEmptyTMVarIO, + putTMVar, readTMVar, + tryReadTMVar, tryTakeTMVar) import PostgREST.Auth.JwtCache (JwtCacheState, update) import PostgREST.Config (AppConfig (..), readAppConfig, @@ -102,9 +106,11 @@ data AppState = AppState } -- | Schema cache status. --- Empty means pending and full means loaded. +-- Empty means initial loading on startup, False means pending and True means loaded. +-- "Initial" state is needed so that we can wait with application socket listening +-- until after initial schema cache querying. newtype SchemaCacheStatus = SchemaCacheStatus - { getSCStatusMVar :: MVar () + { getSCStatusTMVar :: TMVar Bool } init :: AppConfig -> IO AppState @@ -380,16 +386,21 @@ retryingSchemaCacheLoad appState@AppState{stateObserver=observer, stateMainThrea oneSecondInUs = 1000000 -- one second in microseconds newSchemaCacheStatus :: IO SchemaCacheStatus -newSchemaCacheStatus = SchemaCacheStatus <$> newEmptyMVar +newSchemaCacheStatus = SchemaCacheStatus <$> newEmptyTMVarIO markSchemaCachePending :: AppState -> IO () -markSchemaCachePending = void . tryTakeMVar . getSCStatusMVar . stateSCacheStatus +markSchemaCachePending = atomically . liftA2 (*>) tryTakeTMVar (`putTMVar` False) . getSCStatusTMVar . stateSCacheStatus markSchemaCacheLoaded :: AppState -> IO () -markSchemaCacheLoaded = void . (`tryPutMVar` ()) . getSCStatusMVar . stateSCacheStatus +markSchemaCacheLoaded = atomically . liftA2 (*>) tryTakeTMVar (`putTMVar` True) . getSCStatusTMVar . stateSCacheStatus isSchemaCacheLoaded :: AppState -> IO Bool -isSchemaCacheLoaded = fmap not . isEmptyMVar . getSCStatusMVar . stateSCacheStatus +isSchemaCacheLoaded = atomically . (pure . fromMaybe False <=< tryReadTMVar) . getSCStatusTMVar . stateSCacheStatus + +-- | Wait for initial schema cache load to either finish or retry +-- | We wait until scStatusTMVar is not empty. +waitForSchemaCacheInit :: AppState -> IO () +waitForSchemaCacheInit = atomically . void . readTMVar . getSCStatusTMVar . stateSCacheStatus -- | Reads the in-db config and reads the config file again -- | We don't retry reading the in-db config after it fails immediately, because it could have user errors. We just report the error and continue. diff --git a/src/PostgREST/SchemaCache.hs b/src/PostgREST/SchemaCache.hs index e026dc1456..1f9d62fc0e 100644 --- a/src/PostgREST/SchemaCache.hs +++ b/src/PostgREST/SchemaCache.hs @@ -156,6 +156,9 @@ maxDbTablesForFuzzySearch = 500 querySchemaCache :: AppConfig -> SQL.Transaction (SchemaCache, Maybe QueryTimings) querySchemaCache conf@AppConfig{..} = do SQL.sql "set local schema ''" -- This voids the search path. The following queries need this for getting the fully qualified name(schema.name) of every db object + _ <- + let sleepCall = SQL.Statement "select pg_sleep($1 / 1000.0)" (param HE.int4) HD.noResult True in + for_ configInternalSCQuerySleep (`SQL.statement` sleepCall) -- only used for testing tabs <- sqlTimedStmt gucTbls conf allTables keyDeps <- sqlTimedStmt gucKDeps conf allViewsKeyDependencies m2oRels <- sqlTimedStmt gucRels mempty allM2OandO2ORels @@ -166,9 +169,6 @@ querySchemaCache conf@AppConfig{..} = do tzones <- if configDbTimezoneEnabled then sqlTimedStmt gucTzones mempty timezones else pure S.empty - _ <- - let sleepCall = SQL.Statement "select pg_sleep($1 / 1000.0)" (param HE.int4) HD.noResult True in - for_ configInternalSCQuerySleep (`SQL.statement` sleepCall) -- only used for testing qsTime <- if isLogDebug diff --git a/test/io/test_io.py b/test/io/test_io.py index aa3e923dcd..a506992587 100644 --- a/test/io/test_io.py +++ b/test/io/test_io.py @@ -6,6 +6,7 @@ import subprocess import time import pytest +import requests from config import CONFIGSDIR, FIXTURES, SECRET from util import ( @@ -1090,7 +1091,7 @@ def test_empty_schema_cache_log_contains_jwt_role(defaultenv): env = { **defaultenv, - "PGRST_INTERNAL_SCHEMA_CACHE_QUERY_SLEEP": "1000", + "PGRST_DB_SCHEMAS": "non_existent_schema_aaaa", "PGRST_JWT_SECRET": SECRET, } headers = jwtauthheader({"role": "postgrest_test_author"}, SECRET) @@ -1543,14 +1544,19 @@ def test_log_postgrest_host_and_port(host, defaultenv): with run( env=defaultenv, host=host, port=port, no_startup_stdout=False ) as postgrest: - output = postgrest.read_stdout(nlines=10) + output = postgrest.read_stdout(nlines=11) + # Cannot assume a particular log entry order + # Listening on a socket happens after schema querying + # but is concurrent to the schema loading process + # and migh happen before or after writing of the + # "Schema cache loaded" log entry if is_unix: - re.match(r'API server listening on "/tmp/.*\.sock"', output[2]) + match_log(output, [r".*API server listening on .*/tmp/.*\.sock"]) elif is_ipv6(host): - assert f"API server listening on [{host}]:{port}" in output[2] + match_log(output, [r".*API server listening on \[.+]:\d+"]) else: # IPv4 - assert f"API server listening on {host}:{port}" in output[2] + match_log(output, [r".*API server listening on .+:\d+"]) def test_succeed_w_role_having_superuser_settings(defaultenv): @@ -1898,17 +1904,24 @@ def test_pgrst_log_503_client_error_to_stderr(defaultenv): assert any(log_message in line for line in output) -def test_log_error_when_empty_schema_cache_on_startup_to_stderr(defaultenv): - "Should log the 503 error message when there is an empty schema cache on startup" +def test_log_error_when_schema_cache_load_error_on_startup_to_stderr(defaultenv): + "Should log the 503 error message when there is an error loading schema cache on startup" env = { **defaultenv, - "PGRST_INTERNAL_SCHEMA_CACHE_QUERY_SLEEP": "300", + "PGRST_INTERNAL_SCHEMA_CACHE_QUERY_SLEEP": "1000", + "PGRST_DB_SCHEMAS": "non_existent_schema_aaaa", } with run(env=env, wait_for=None) as postgrest: postgrest.wait_until_scache_starts_loading() + # First call should fail with connection refused + with pytest.raises(requests.ConnectionError): + postgrest.session.get("/projects") + + # Next call should return 503 + time.sleep(1) response = postgrest.session.get("/projects") assert response.status_code == 503 @@ -1920,7 +1933,7 @@ def test_log_error_when_empty_schema_cache_on_startup_to_stderr(defaultenv): def test_no_double_schema_cache_reload_on_empty_schema(defaultenv): - "Should only load the schema cache once on a 503 error when there's an empty schema cache on startup" + "Should only load the schema cache once when there's an empty schema cache on startup" env = { **defaultenv, @@ -1930,12 +1943,15 @@ def test_no_double_schema_cache_reload_on_empty_schema(defaultenv): with run(env=env, port=freeport(), wait_for=None) as postgrest: postgrest.wait_until_scache_starts_loading() - response = postgrest.session.get("/projects") - assert response.status_code == 503 + with pytest.raises(requests.ConnectionError): + postgrest.session.get("/projects") # Should wait enough time to load the schema cache twice to guarantee that the test is valid time.sleep(1) + response = postgrest.session.get("/projects") + assert response.status_code == 200 + response = postgrest.admin.get("/metrics") assert response.status_code == 200 assert 'pgrst_schema_cache_loads_total{status="SUCCESS"} 1.0' in response.text @@ -2017,7 +2033,7 @@ def test_schema_cache_error_observation(defaultenv): output = postgrest.read_stdout(nlines=9) assert ( "Failed to load the schema cache using db-schemas=public and db-extra-search-path=x" - in output[7] + in output[6] ) From e9fee6a4efb99e69f09a16198d8fb26fc8be8777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C5=82eczek?= Date: Tue, 10 Mar 2026 13:42:17 +0100 Subject: [PATCH 2/2] add: use SO_REUSEPORT on platform supporting it --- CHANGELOG.md | 1 + docs/how-tos/zero-downtime-upgrades.rst | 164 ++++++++++++++++++ docs/postgrest.dict | 4 + docs/references/admin_server.rst | 12 +- docs/references/configuration.rst | 49 ++++++ src/PostgREST/App.hs | 20 ++- src/PostgREST/AppState.hs | 4 + src/PostgREST/Config.hs | 4 + test/io/configs/expected/aliases.config | 1 + .../configs/expected/boolean-numeric.config | 1 + .../io/configs/expected/boolean-string.config | 1 + test/io/configs/expected/defaults.config | 1 + .../expected/jspath-str-op-dump1.config | 1 + .../expected/jspath-str-op-dump2.config | 1 + .../expected/jspath-str-op-dump3.config | 1 + .../expected/jspath-str-op-dump4.config | 1 + .../expected/jspath-str-op-dump5.config | 1 + ...efaults-with-db-other-authenticator.config | 1 + .../expected/no-defaults-with-db.config | 1 + test/io/configs/expected/no-defaults.config | 1 + test/io/configs/expected/types.config | 1 + test/io/configs/expected/utf-8.config | 1 + test/io/configs/no-defaults-env.yaml | 1 + test/io/configs/no-defaults.config | 1 + test/io/postgrest.py | 5 +- test/io/test_io.py | 33 +++- test/observability/ObsHelper.hs | 1 + test/spec/SpecHelper.hs | 1 + 28 files changed, 301 insertions(+), 13 deletions(-) create mode 100644 docs/how-tos/zero-downtime-upgrades.rst diff --git a/CHANGELOG.md b/CHANGELOG.md index f526d35a03..99d4aa4eeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file. From versio - Add config `db-timezone-enabled` for optional querying of timezones by @taimoorzaeem in #4751 - Log schema cache queries timings on `log-level=debug` by @steve-chavez in #4805 - Add GHC runtime metrics to the metrics endpoint by @mkleczek in #4862 +- Enable starting multiple PostgREST instances using the same ports on platforms supporting it by @mkleczek in #4703 #4694 ### Fixed diff --git a/docs/how-tos/zero-downtime-upgrades.rst b/docs/how-tos/zero-downtime-upgrades.rst new file mode 100644 index 0000000000..6f8cb31b91 --- /dev/null +++ b/docs/how-tos/zero-downtime-upgrades.rst @@ -0,0 +1,164 @@ +.. _zero_downtime_upgrades: + +Zero-Downtime Upgrades +====================== + +When :ref:`server-reuseport` is enabled on an operating system that supports +``SO_REUSEPORT``, PostgREST can start more than one process on the same +:ref:`server-host` and :ref:`server-port`. This allows a new PostgREST process +to start and become ready before the old process is stopped. + +While both processes are running, the operating system distributes new +connections between them. After the old process exits, the new process receives +all new connections. + +This is useful for upgrades and restarts: + +1. Keep the old PostgREST process serving requests. +2. Start the new PostgREST process on the same host and port. +3. Wait for the new process to report ``/ready``. +4. Stop the old process. + +Configuration +------------- + +Both processes should use the same public host and port: + +.. code-block:: ini + + # /etc/postgrest/postgrest.conf + server-host = "127.0.0.1" + server-port = 3000 + server-reuseport = true + + admin-server-host = "127.0.0.1" + admin-server-port = 3001 + +The second process can use the same configuration file and override only the +admin server port: + +.. code-block:: bash + + PGRST_ADMIN_SERVER_PORT=3002 postgrest /etc/postgrest/postgrest.conf + +.. important:: + + Use a different :ref:`admin-server-port` for each PostgREST process during + the handover. Admin ports are not shared between processes. This keeps + readiness checks unambiguous: ``/ready`` on the new admin port can only be + answered by the new process. + +Before using this in production, keep these details in mind: + +- This works for host and port based servers. It does not apply when + :ref:`server-unix-socket` is used. +- If :ref:`server-reuseport` is disabled, the new process will fail to start + with an address-in-use error and the old process will keep serving requests. +- If :ref:`server-reuseport` is enabled on an operating system that does not + support ``SO_REUSEPORT``, PostgREST will fail to start because the + configuration is not supported on that platform. +- If the new process uses the same :ref:`admin-server-port` as the old process, + it will fail to start because that admin port is already in use. +- Each PostgREST process has its own :ref:`db-pool`. During the handover, the + total possible database connections can temporarily double. +- The old and new processes may both serve requests for a short time. Database + migrations should be compatible with both versions while they overlap. + +Manual Handover +--------------- + +Assuming the old process is already serving on ``127.0.0.1:3000`` and its PID +is stored in ``OLD_PID``: + +.. code-block:: bash + + PGRST_ADMIN_SERVER_PORT=3002 postgrest /etc/postgrest/postgrest.conf & + NEW_PID=$! + + curl --fail http://127.0.0.1:3002/ready + + kill -TERM "$OLD_PID" + +The ``curl`` request checks the new process through its own admin server port. +If the new process cannot load its configuration, connect to the database, or +load the schema cache, ``/ready`` will not return a successful response and the +old process can keep serving traffic. + +Example Script +-------------- + +The following script shows the full sequence for a setup that stores the old +process PID in a PID file. Adapt the start and stop commands to your process +manager. + +.. code-block:: bash + + #!/usr/bin/env bash + set -euo pipefail + + POSTGREST=${POSTGREST:-postgrest} + CONFIG=${CONFIG:-/etc/postgrest/postgrest.conf} + PID_FILE=${PID_FILE:-/run/postgrest.pid} + + ADMIN_HOST=${ADMIN_HOST:-127.0.0.1} + NEW_ADMIN_PORT=${NEW_ADMIN_PORT:-3002} + READY_TIMEOUT=${READY_TIMEOUT:-30} + STOP_TIMEOUT=${STOP_TIMEOUT:-30} + + if [[ ! -s "$PID_FILE" ]]; then + echo "PID file not found or empty: $PID_FILE" >&2 + exit 1 + fi + + OLD_PID=$(<"$PID_FILE") + + if ! kill -0 "$OLD_PID" 2>/dev/null; then + echo "Old PostgREST process is not running: $OLD_PID" >&2 + exit 1 + fi + + PGRST_ADMIN_SERVER_HOST="$ADMIN_HOST" \ + PGRST_ADMIN_SERVER_PORT="$NEW_ADMIN_PORT" \ + "$POSTGREST" "$CONFIG" & + NEW_PID=$! + + cleanup_new_process() { + kill "$NEW_PID" 2>/dev/null || true + } + trap cleanup_new_process EXIT INT TERM + + READY_URL="http://$ADMIN_HOST:$NEW_ADMIN_PORT/ready" + READY_DEADLINE=$((SECONDS + READY_TIMEOUT)) + + until curl --fail --silent --show-error --output /dev/null "$READY_URL"; do + if ! kill -0 "$NEW_PID" 2>/dev/null; then + echo "New PostgREST process exited before it became ready" >&2 + exit 1 + fi + + if (( SECONDS >= READY_DEADLINE )); then + echo "New PostgREST process did not become ready at $READY_URL" >&2 + exit 1 + fi + + sleep 1 + done + + printf '%s\n' "$NEW_PID" > "$PID_FILE" + + kill -TERM "$OLD_PID" 2>/dev/null || true + + STOP_DEADLINE=$((SECONDS + STOP_TIMEOUT)) + + while kill -0 "$OLD_PID" 2>/dev/null; do + if (( SECONDS >= STOP_DEADLINE )); then + echo "Old PostgREST process did not stop after SIGTERM; sending SIGKILL" >&2 + kill -KILL "$OLD_PID" + break + fi + + sleep 1 + done + + trap - EXIT INT TERM + echo "PostgREST handover complete: $OLD_PID -> $NEW_PID" diff --git a/docs/postgrest.dict b/docs/postgrest.dict index 324d51b888..ef5d548c8d 100644 --- a/docs/postgrest.dict +++ b/docs/postgrest.dict @@ -34,6 +34,7 @@ DSL DevOps Dramatiq dockerize +downtime enum Enums Entra @@ -59,6 +60,7 @@ HMAC htmx Htmx Homebrew +handover hstore HTTP HTTPS @@ -113,6 +115,7 @@ ov parametrized passphrase PBKDF +PID PgBouncer pgcrypto pgjwt @@ -144,6 +147,7 @@ Redux refactor reloadable Reloadable +reuseport requester's RESTful RLS diff --git a/docs/references/admin_server.rst b/docs/references/admin_server.rst index cafd18e2d4..5251e2b0fc 100644 --- a/docs/references/admin_server.rst +++ b/docs/references/admin_server.rst @@ -16,9 +16,15 @@ Two endpoints ``live`` and ``ready`` will then be available. Both these endpoint .. important:: - If you have a machine with multiple network interfaces and multiple PostgREST instances in the same port, you need to specify a unique :ref:`hostname ` - in the configuration of each PostgREST instance for the health check to work correctly. Don't use the special values(``!4``, ``*``, etc) in this case because the health check - could report a false positive. + Multiple PostgREST instances can share the same public API host and port when + :ref:`server-reuseport` is enabled on operating systems that support + ``SO_REUSEPORT``. Admin ports are not shared: give each instance a different + :ref:`admin-server-port`, otherwise the new instance will fail to start. + + If the machine has multiple network interfaces, configure concrete + :ref:`server-host` and :ref:`admin-server-host` values when you need health + checks to target a specific process. Avoid special values (``!4``, ``*``, etc) + in this case because the health check could report a false positive. Live ---- diff --git a/docs/references/configuration.rst b/docs/references/configuration.rst index 48e1a92753..f50aca5491 100644 --- a/docs/references/configuration.rst +++ b/docs/references/configuration.rst @@ -176,6 +176,11 @@ admin-server-port Specifies the port for the :ref:`admin_server`. Cannot be equal to :ref:`server-port`. + When running multiple PostgREST instances on the same :ref:`server-port`, use + a different ``admin-server-port`` for each instance. Admin ports are not shared + between instances, so readiness checks always target one specific PostgREST + instance. See :ref:`zero_downtime_upgrades`. + .. _app.settings.*: app.settings.* @@ -899,6 +904,50 @@ server-port The TCP port to bind the web server. Use ``0`` to automatically assign a port. + When :ref:`server-reuseport` is enabled on an operating system that supports + ``SO_REUSEPORT``, you can start multiple PostgREST instances on the same + :ref:`server-host` and ``server-port``. For example, two PostgREST processes + can use the same configuration: + + .. code:: ini + + server-host = "127.0.0.1" + server-port = 3000 + server-reuseport = true + + New connections are then distributed by the operating system between the + running PostgREST processes. This can be used to start a replacement process + before stopping the old one, or to run several PostgREST processes behind one + port. + + If ``server-reuseport`` is disabled, starting another PostgREST process on + the same host and port will fail with the usual address-in-use error. + + For a step-by-step example, see :ref:`zero_downtime_upgrades`. + +.. _server-reuseport: + +server-reuseport +---------------- + + =============== ================================= + **Type** Bool + **Default** false + **Reloadable** N + **Environment** PGRST_SERVER_REUSEPORT + **In-Database** `n/a` + =============== ================================= + + Enables ``SO_REUSEPORT`` on the TCP server socket. This allows multiple + PostgREST processes to bind to the same :ref:`server-host` and + :ref:`server-port` when the operating system supports it. + + Enabling this setting on an operating system that does not support + ``SO_REUSEPORT`` is a configuration error. PostgREST will fail to start + instead of falling back to a normal TCP socket. + + This setting does not apply when :ref:`server-unix-socket` is used. + .. _server-trace-header: server-trace-header diff --git a/src/PostgREST/App.hs b/src/PostgREST/App.hs index 5ff0180e93..a3dae06526 100644 --- a/src/PostgREST/App.hs +++ b/src/PostgREST/App.hs @@ -67,7 +67,9 @@ import PostgREST.Version (docsVersion, prettyVersion) import Control.Monad.Writer import qualified Data.ByteString.Char8 as BS import qualified Data.List as L -import Data.Streaming.Network (bindPortTCP) +import Data.Streaming.Network (HostPreference, + bindPortGenEx, + bindPortTCP) import qualified Data.Text as T import qualified Network.HTTP.Types as HTTP import qualified Network.HTTP.Types.Header as HTTP @@ -77,7 +79,7 @@ import Protolude hiding (Handler) run :: AppState -> IO () run appState = do - conf <- AppState.getConfig appState + conf@AppConfig{configServerReusePort} <- AppState.getConfig appState mainSocketRef <- newIORef Nothing adminSocket <- initAdminServerSocket conf @@ -94,7 +96,10 @@ run appState = do -- Kick off and wait for the initial SchemaCache load before creating the -- main API socket. AppState.schemaCacheLoader appState - AppState.waitForSchemaCacheInit appState + if configServerReusePort then + AppState.waitForSchemaCacheLoaded appState + else + AppState.waitForSchemaCacheInit appState mainSocket <- initServerSocket conf atomicWriteIORef mainSocketRef $ Just mainSocket @@ -270,7 +275,9 @@ initServerSocket AppConfig{..} = case configServerUnixSocket of -- I'm not using `streaming-commons`' bindPath function here because it's not defined for Windows, -- but we need to have runtime error if we try to use it in Windows, not compile time error Just path -> createAndBindDomainSocket path configServerUnixSocketMode - Nothing -> bindPortTCP configServerPort (fromString $ T.unpack configServerHost) + Nothing + | configServerReusePort -> bindPortTCPWithReusePort configServerPort (fromString $ T.unpack configServerHost) + | otherwise -> bindPortTCP configServerPort (fromString $ T.unpack configServerHost) initAdminServerSocket :: AppConfig -> IO (Maybe NS.Socket) initAdminServerSocket AppConfig{..} = @@ -278,3 +285,8 @@ initAdminServerSocket AppConfig{..} = where adminHost = fromString $ T.unpack configAdminServerHost +bindPortTCPWithReusePort :: Int -> HostPreference -> IO NS.Socket +bindPortTCPWithReusePort port hostPreference = + bindPortGenEx [(NS.ReusePort, 1)] NS.Stream port hostPreference >>= listenSocket + where + listenSocket sock = NS.listen sock (max 2048 NS.maxListenQueue) $> sock diff --git a/src/PostgREST/AppState.hs b/src/PostgREST/AppState.hs index c43495a496..8f3fe98cff 100644 --- a/src/PostgREST/AppState.hs +++ b/src/PostgREST/AppState.hs @@ -28,6 +28,7 @@ module PostgREST.AppState , isLoaded , isPending , waitForSchemaCacheInit + , waitForSchemaCacheLoaded ) where import qualified Data.ByteString.Char8 as BS @@ -402,6 +403,9 @@ isSchemaCacheLoaded = atomically . (pure . fromMaybe False <=< tryReadTMVar) . g waitForSchemaCacheInit :: AppState -> IO () waitForSchemaCacheInit = atomically . void . readTMVar . getSCStatusTMVar . stateSCacheStatus +waitForSchemaCacheLoaded :: AppState -> IO () +waitForSchemaCacheLoaded = atomically . (check <=< readTMVar) . getSCStatusTMVar . stateSCacheStatus + -- | Reads the in-db config and reads the config file again -- | We don't retry reading the in-db config after it fails immediately, because it could have user errors. We just report the error and continue. readInDbConfig :: Bool -> AppState -> IO () diff --git a/src/PostgREST/Config.hs b/src/PostgREST/Config.hs index 47065856a3..9fc41ac7b5 100644 --- a/src/PostgREST/Config.hs +++ b/src/PostgREST/Config.hs @@ -118,6 +118,7 @@ data AppConfig = AppConfig , configServerCorsAllowedOrigins :: Maybe [Text] , configServerHost :: Text , configServerPort :: Int + , configServerReusePort :: Bool , configServerTraceHeader :: Maybe (CI.CI BS.ByteString) , configServerTimingEnabled :: Bool , configServerUnixSocket :: Maybe FilePath @@ -201,6 +202,7 @@ toText conf = ,("server-cors-allowed-origins", q . maybe "" (T.intercalate ",") . configServerCorsAllowedOrigins) ,("server-host", q . configServerHost) ,("server-port", show . configServerPort) + ,("server-reuseport", T.toLower . show . configServerReusePort) ,("server-trace-header", q . T.decodeUtf8 . maybe mempty CI.original . configServerTraceHeader) ,("server-timing-enabled", T.toLower . show . configServerTimingEnabled) ,("server-unix-socket", q . maybe mempty T.pack . configServerUnixSocket) @@ -316,6 +318,7 @@ parser optPath env dbSettings roleSettings roleIsolationLvl = <*> parseCORSAllowedOrigins "server-cors-allowed-origins" <*> (defaultServerHost <$> optString "server-host") <*> parseServerPort "server-port" + <*> (fromMaybe False <$> optBool "server-reuseport") <*> (fmap (CI.mk . encodeUtf8) <$> optString "server-trace-header") <*> (fromMaybe False <$> optBool "server-timing-enabled") <*> (fmap T.unpack <$> optString "server-unix-socket") @@ -775,6 +778,7 @@ exampleConfigFile = S.unlines , "" , "server-host = \"!4\"" , "server-port = 3000" + , "server-reuseport = false" , "" , "## Allow getting the request-response timing information through the `Server-Timing` header" , "server-timing-enabled = false" diff --git a/test/io/configs/expected/aliases.config b/test/io/configs/expected/aliases.config index d280c6254c..b7d921e43a 100644 --- a/test/io/configs/expected/aliases.config +++ b/test/io/configs/expected/aliases.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/boolean-numeric.config b/test/io/configs/expected/boolean-numeric.config index 7f1ac07a14..50c36efd47 100644 --- a/test/io/configs/expected/boolean-numeric.config +++ b/test/io/configs/expected/boolean-numeric.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/boolean-string.config b/test/io/configs/expected/boolean-string.config index 7f1ac07a14..50c36efd47 100644 --- a/test/io/configs/expected/boolean-string.config +++ b/test/io/configs/expected/boolean-string.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/defaults.config b/test/io/configs/expected/defaults.config index ddd9364c28..f7f8c5cfd5 100644 --- a/test/io/configs/expected/defaults.config +++ b/test/io/configs/expected/defaults.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/jspath-str-op-dump1.config b/test/io/configs/expected/jspath-str-op-dump1.config index 25fd233463..15fc2b755a 100644 --- a/test/io/configs/expected/jspath-str-op-dump1.config +++ b/test/io/configs/expected/jspath-str-op-dump1.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/jspath-str-op-dump2.config b/test/io/configs/expected/jspath-str-op-dump2.config index b53bf827ed..d3d41bc1be 100644 --- a/test/io/configs/expected/jspath-str-op-dump2.config +++ b/test/io/configs/expected/jspath-str-op-dump2.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/jspath-str-op-dump3.config b/test/io/configs/expected/jspath-str-op-dump3.config index bd1bed9936..821c82d5df 100644 --- a/test/io/configs/expected/jspath-str-op-dump3.config +++ b/test/io/configs/expected/jspath-str-op-dump3.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/jspath-str-op-dump4.config b/test/io/configs/expected/jspath-str-op-dump4.config index b169f03a22..79596b280b 100644 --- a/test/io/configs/expected/jspath-str-op-dump4.config +++ b/test/io/configs/expected/jspath-str-op-dump4.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/jspath-str-op-dump5.config b/test/io/configs/expected/jspath-str-op-dump5.config index 13596cc813..3af953d1fe 100644 --- a/test/io/configs/expected/jspath-str-op-dump5.config +++ b/test/io/configs/expected/jspath-str-op-dump5.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/no-defaults-with-db-other-authenticator.config b/test/io/configs/expected/no-defaults-with-db-other-authenticator.config index c47bb402ed..84a5ff3f15 100644 --- a/test/io/configs/expected/no-defaults-with-db-other-authenticator.config +++ b/test/io/configs/expected/no-defaults-with-db-other-authenticator.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "https://otherexample.org/api" server-cors-allowed-origins = "http://otherorigin.com" server-host = "0.0.0.0" server-port = 80 +server-reuseport = true server-trace-header = "traceparent" server-timing-enabled = true server-unix-socket = "/tmp/pgrst_io_test.sock" diff --git a/test/io/configs/expected/no-defaults-with-db.config b/test/io/configs/expected/no-defaults-with-db.config index 4363f3262c..a1d0437d0b 100644 --- a/test/io/configs/expected/no-defaults-with-db.config +++ b/test/io/configs/expected/no-defaults-with-db.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "https://example.org/api" server-cors-allowed-origins = "http://origin.com" server-host = "0.0.0.0" server-port = 80 +server-reuseport = true server-trace-header = "CF-Ray" server-timing-enabled = false server-unix-socket = "/tmp/pgrst_io_test.sock" diff --git a/test/io/configs/expected/no-defaults.config b/test/io/configs/expected/no-defaults.config index 3156287737..b1f52f8c65 100644 --- a/test/io/configs/expected/no-defaults.config +++ b/test/io/configs/expected/no-defaults.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "https://postgrest.org" server-cors-allowed-origins = "http://example.com" server-host = "0.0.0.0" server-port = 80 +server-reuseport = true server-trace-header = "X-Request-Id" server-timing-enabled = true server-unix-socket = "/tmp/pgrst_io_test.sock" diff --git a/test/io/configs/expected/types.config b/test/io/configs/expected/types.config index fac4d596df..8c35840ad3 100644 --- a/test/io/configs/expected/types.config +++ b/test/io/configs/expected/types.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/expected/utf-8.config b/test/io/configs/expected/utf-8.config index 7f29a498c9..0f97ed3a98 100644 --- a/test/io/configs/expected/utf-8.config +++ b/test/io/configs/expected/utf-8.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "" server-cors-allowed-origins = "" server-host = "!4" server-port = 3000 +server-reuseport = false server-trace-header = "" server-timing-enabled = false server-unix-socket = "" diff --git a/test/io/configs/no-defaults-env.yaml b/test/io/configs/no-defaults-env.yaml index ce9280e1e5..989e70b627 100644 --- a/test/io/configs/no-defaults-env.yaml +++ b/test/io/configs/no-defaults-env.yaml @@ -37,6 +37,7 @@ PGRST_OPENAPI_SERVER_PROXY_URI: 'https://postgrest.org' PGRST_SERVER_CORS_ALLOWED_ORIGINS: "http://example.com" PGRST_SERVER_HOST: 0.0.0.0 PGRST_SERVER_PORT: 80 +PGRST_SERVER_REUSEPORT: true PGRST_SERVER_TRACE_HEADER: X-Request-Id PGRST_SERVER_TIMING_ENABLED: true PGRST_SERVER_UNIX_SOCKET: /tmp/pgrst_io_test.sock diff --git a/test/io/configs/no-defaults.config b/test/io/configs/no-defaults.config index 6bb1cec158..62cd6c8bff 100644 --- a/test/io/configs/no-defaults.config +++ b/test/io/configs/no-defaults.config @@ -34,6 +34,7 @@ openapi-server-proxy-uri = "https://postgrest.org" server-cors-allowed-origins = "http://example.com" server-host = "0.0.0.0" server-port = 80 +server-reuseport = true server-trace-header = "X-Request-Id" server-timing-enabled = true server-unix-socket = "/tmp/pgrst_io_test.sock" diff --git a/test/io/postgrest.py b/test/io/postgrest.py index 65431035cd..afbb1ea83f 100644 --- a/test/io/postgrest.py +++ b/test/io/postgrest.py @@ -98,7 +98,7 @@ def run( admin_port=None, host=None, wait_for=Admin.ready, - wait_max_seconds=1, + wait_max_seconds=3, no_pool_connection_available=False, no_startup_stdout=True, ): @@ -249,6 +249,7 @@ def wait_until_exit(postgrest, timeout=1): def wait_until_status_code(url, max_seconds, status_code): "Wait for the given HTTP endpoint to return a status code" session = requests_unixsocket.Session() + response = None for _ in range(max_seconds * 10): try: @@ -260,7 +261,7 @@ def wait_until_status_code(url, max_seconds, status_code): time.sleep(0.1) - if response: + if response is not None: raise PostgrestTimedOut(f"{response.status_code}: {response.text}") else: raise PostgrestTimedOut() diff --git a/test/io/test_io.py b/test/io/test_io.py index a506992587..aeca657ec0 100644 --- a/test/io/test_io.py +++ b/test/io/test_io.py @@ -19,6 +19,7 @@ ) from postgrest import ( Admin, + PostgrestTimedOut, freeport, is_ipv6, reset_statement_timeout, @@ -176,7 +177,6 @@ def test_random_port_bound(defaultenv): assert True # liveness check is done by run(), so we just need to check that it doesn't fail -@pytest.mark.xfail(reason="PostgREST should not start on a used port", strict=True) def test_so_reuseport_zero_downtime_handover(defaultenv): "A second PostgREST instance should take over on the same main/admin ports without request failures." @@ -204,7 +204,7 @@ def test_so_reuseport_zero_downtime_handover(defaultenv): # 6. Stop second PostgREST instance # 7. Verify client did not get any errors with run( - env={**defaultenv}, + env={**defaultenv, "PGRST_SERVER_REUSEPORT": "true"}, port=port, host=host, admin_port=admin_port, @@ -226,10 +226,11 @@ def continuously_request(): try: time.sleep(1) with run( - env={**defaultenv}, + env={**defaultenv, "PGRST_SERVER_REUSEPORT": "true"}, port=port, host=host, - admin_port=admin_port, + # we do not set SO_REUSEPORT on admin socket + admin_port=freeport(used_ports=[port, admin_port]), ): time.sleep(1) first.process.terminate() @@ -243,6 +244,30 @@ def continuously_request(): assert failures == [] +def test_so_reuseport_defaults_to_false(defaultenv): + "A second PostgREST instance should not bind to the same port by default." + + host = "0.0.0.0" + port = freeport() + admin_port = freeport(used_ports=[port]) + + with run( + env={**defaultenv}, + port=port, + host=host, + admin_port=admin_port, + ): + with pytest.raises(PostgrestTimedOut): + with run( + env={**defaultenv}, + port=port, + host=host, + admin_port=freeport(used_ports=[port, admin_port]), + wait_max_seconds=1, + ): + pass + + def test_app_settings_reload(tmp_path, defaultenv): "App settings should be reloaded from file when PostgREST is sent SIGUSR2." config = (CONFIGSDIR / "sigusr2-settings.config").read_text() diff --git a/test/observability/ObsHelper.hs b/test/observability/ObsHelper.hs index 268bdbf67c..9ecf5ecf7d 100644 --- a/test/observability/ObsHelper.hs +++ b/test/observability/ObsHelper.hs @@ -108,6 +108,7 @@ baseCfg = let secret = encodeUtf8 "reallyreallyreallyreallyverysafe" in , configServerCorsAllowedOrigins = Nothing , configServerHost = "localhost" , configServerPort = 3000 + , configServerReusePort = False , configServerTraceHeader = Nothing , configServerUnixSocket = Nothing , configServerUnixSocketMode = 432 diff --git a/test/spec/SpecHelper.hs b/test/spec/SpecHelper.hs index 4e45a5a31b..de4f0bcca5 100644 --- a/test/spec/SpecHelper.hs +++ b/test/spec/SpecHelper.hs @@ -149,6 +149,7 @@ baseCfg = let secret = encodeUtf8 "reallyreallyreallyreallyverysafe" in , configServerCorsAllowedOrigins = Nothing , configServerHost = "localhost" , configServerPort = 3000 + , configServerReusePort = False , configServerTraceHeader = Nothing , configServerUnixSocket = Nothing , configServerUnixSocketMode = 432