diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f2017fb --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,43 @@ +name: CI + +on: + pull_request: + push: + branches: + - main + - master + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up GHC and Cabal + uses: haskell-actions/setup@v2 + with: + ghc-version: '9.10.3' + cabal-version: '3.16' + + - name: Cache Cabal artifacts + uses: actions/cache@v4 + with: + path: | + ~/.cabal/packages + ~/.cabal/store + dist-newstyle + key: ${{ runner.os }}-ghc-9.10.3-cabal-${{ hashFiles('**/*.cabal', 'cabal.project', 'cabal.project.local') }} + restore-keys: | + ${{ runner.os }}-ghc-9.10.3-cabal- + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y pkg-config zlib1g-dev + + - name: Run tests + run: | + cabal update + cabal test all --test-show-details=direct diff --git a/README.md b/README.md index 221be74..4c414f5 100644 --- a/README.md +++ b/README.md @@ -80,10 +80,50 @@ let ins = Insert.insert "events" ["id", "name"] rowEncoder mempty runInsert connection ins () [(1, "signup"), (2, "purchase")] ``` +For richer row types, the pattern is the same: define a record for one row, then compose a row encoder by projecting each field into the appropriate `Value`. + +```haskell +import Data.Functor.Contravariant (contramap) +import Data.HashMap.Strict qualified as HashMap +import Data.Text (Text) +import Data.Time (UTCTime) +import Data.Word (Word64) +import Database.ClickHouse +import Database.ClickHouse.Insert qualified as Insert +import Database.ClickHouse.Value qualified as Value + +data EventRow = EventRow + { eventId :: Word64 + , eventName :: Text + , userId :: Maybe Word64 + , tags :: [Text] + , attributes :: HashMap.HashMap Text Text + , createdAt :: UTCTime + } + +let rowEncoder = + contramap eventId Value.uint64 + <> contramap eventName Value.string + <> contramap userId (Value.nullable Value.uint64) + <> contramap tags (Value.array Value.string) + <> contramap attributes (Value.map Value.string Value.string) + <> contramap createdAt Value.dateTime + +let ins = + Insert.insert + "events" + ["event_id", "event_name", "user_id", "tags", "attributes", "created_at"] + rowEncoder + mempty +``` + +That composes one encoder for the whole row out of per-column encoders. See `examples/ComplexInsert.hs` for a full runnable example. + ## Examples - `examples/SimpleQuery.hs` - `examples/SimpleInsert.hs` +- `examples/ComplexInsert.hs` ## Supported value shapes diff --git a/clickhouse-client.cabal b/clickhouse-client.cabal index 353bde0..fd0e110 100644 --- a/clickhouse-client.cabal +++ b/clickhouse-client.cabal @@ -92,8 +92,10 @@ test-suite clickhouse-client-test , base >=4.19 && <4.21 , bytestring , clickhouse-client + , hashable , tasty , tasty-hunit , text , time , unordered-containers + , uuid diff --git a/examples/ComplexInsert.hs b/examples/ComplexInsert.hs new file mode 100644 index 0000000..ebfa093 --- /dev/null +++ b/examples/ComplexInsert.hs @@ -0,0 +1,71 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where + +import Data.Functor.Contravariant (contramap) +import Data.HashMap.Strict (HashMap) +import Data.HashMap.Strict qualified as HashMap +import Data.Text (Text) +import Data.Time (UTCTime) +import Data.Word (Word64) +import Database.ClickHouse +import Database.ClickHouse.Insert qualified as Insert +import Database.ClickHouse.Value qualified as Value + +-- A more realistic row type than a 2-tuple. +data EventRow = EventRow + { eventId :: Word64, + eventName :: Text, + userId :: Maybe Word64, + tags :: [Text], + attributes :: HashMap Text Text, + createdAt :: UTCTime + } + +main :: IO () +main = do + connection <- + newConnection + ConnectionOptions + { url = "http://localhost:8123", + database = Nothing, + user = Nothing, + password = Nothing, + httpManager = Nothing + } + + let rowEncoder = + contramap eventId Value.uint64 + <> contramap eventName Value.string + <> contramap userId (Value.nullable Value.uint64) + <> contramap tags (Value.array Value.string) + <> contramap attributes (Value.map Value.string Value.string) + <> contramap createdAt Value.dateTime + + ins = + Insert.insert + "events" + ["event_id", "event_name", "user_id", "tags", "attributes", "created_at"] + rowEncoder + mempty + + rows = + [ EventRow + { eventId = 1, + eventName = "signup", + userId = Just 42, + tags = ["marketing", "trial"], + attributes = HashMap.fromList [("plan", "pro"), ("campaign", "spring-launch")], + createdAt = read "2026-03-30 01:23:45 UTC" + }, + EventRow + { eventId = 2, + eventName = "anonymous-page-view", + userId = Nothing, + tags = ["landing-page"], + attributes = HashMap.fromList [("path", "/pricing")], + createdAt = read "2026-03-30 01:25:00 UTC" + } + ] + + runInsert connection ins () rows diff --git a/test/Main.hs b/test/Main.hs index e9f80b9..143b6e3 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -3,17 +3,30 @@ module Main (main) where import Data.ByteString (ByteString) +import Data.ByteString qualified as BS import Data.ByteString.Builder qualified as Builder import Data.ByteString.Lazy qualified as LBS +import Data.Functor.Contravariant (contramap) +import Data.Hashable (Hashable) +import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap +import Data.IORef qualified as IORef +import Data.Int (Int16, Int32, Int64, Int8) +import Data.String (fromString) +import Data.Text (Text) import Data.Text qualified as Text import Data.Text.Lazy qualified as LText import Data.Text.Lazy.Builder qualified as TBuilder import Data.Time (UTCTime (..), fromGregorian, secondsToDiffTime) -import Data.Word (Word16, Word32, Word64) +import Data.Time.Calendar qualified +import Data.Time.Clock.POSIX qualified +import Data.Time.Clock.System qualified +import Data.UUID qualified +import Data.Word (Word16, Word32, Word64, Word8) import Database.ClickHouse.Insert qualified as Insert import Database.ClickHouse.Params qualified as Params import Database.ClickHouse.Parser qualified as Parser +import Database.ClickHouse.Stream qualified as Stream import Database.ClickHouse.Value qualified as Value import Test.Tasty qualified as Tasty import Test.Tasty.HUnit ((@=?)) @@ -25,8 +38,14 @@ main = Tasty.testGroup "clickhouse-client" [ paramsTests, - insertTests, - valueRoundtripTests + primitiveTests, + nullableTests, + arrayTests, + mapTests, + multiColumnTests, + valueInsertTests, + stressTests, + wideTableSmoketest ] paramsTests :: Tasty.TestTree @@ -42,81 +61,339 @@ paramsTests = @=? [("param_created_at", Just "2024-06-15T10:00:00")] ] -insertTests :: Tasty.TestTree -insertTests = +primitiveTests :: Tasty.TestTree +primitiveTests = Tasty.testGroup - "Insert" - [ HUnit.testCase "renderInsert includes columns and settings" $ do + "Primitive data types" + [ roundtripCase "uint8" Value.uint8 Parser.word8 (42 :: Word8), + roundtripCase "uint16" Value.uint16 Parser.word16le (65535 :: Word16), + roundtripCase "uint32" Value.uint32 Parser.word32le (100000 :: Word32), + roundtripCase "uint64" Value.uint64 Parser.word64le (1234567890123 :: Word64), + roundtripCase "int8" Value.int8 Parser.int8 (-42 :: Int8), + roundtripCase "int16" Value.int16 Parser.int16le (-32768 :: Int16), + roundtripCase "int32" Value.int32 Parser.int32le (-100000 :: Int32), + roundtripCase "int64" Value.int64 Parser.int64le (-1234567890123 :: Int64), + roundtripCase "float32" Value.float32 Parser.float32le (3.5 :: Float), + roundtripCase "float64" Value.float64 Parser.float64le (2.5 :: Double), + roundtripCase "string ascii" Value.string parseText ("hello world" :: Text), + roundtripCase "string unicode" Value.string parseText ("こんにちは 😀" :: Text), + roundtripCase "bool true" Value.bool parseBool True, + roundtripCase "bool false" Value.bool parseBool False, + roundtripCase "date" Value.date parseDate (read "2024-01-31"), + roundtripCase "date32" Value.date32 parseDate32 (read "2024-01-31"), + roundtripCase + "dateTime" + Value.dateTime + parseDateTime + (UTCTime (fromGregorian 2024 1 15) (secondsToDiffTime 43200)), + roundtripCase + "dateTime64" + Value.dateTime64 + parseDateTime64 + (UTCTime (fromGregorian 2024 6 15) (secondsToDiffTime 36000)), + roundtripCase + "uuid" + Value.uuid + parseUuid + (read "123e4567-e89b-12d3-a456-426614174000") + ] + +nullableTests :: Tasty.TestTree +nullableTests = + Tasty.testGroup + "Nullable data types" + [ roundtripCase "nullable uint32 just" (Value.nullable Value.uint32) parseNullableWord32 (Just 42), + roundtripCase "nullable uint32 nothing" (Value.nullable Value.uint32) parseNullableWord32 Nothing, + roundtripCase "nullable string just" (Value.nullable Value.string) parseNullableText (Just "hello"), + roundtripCase "nullable string nothing" (Value.nullable Value.string) parseNullableText Nothing, + roundtripCase "nullable date just" (Value.nullable Value.date) parseNullableDate (Just (read "2024-02-01")), + roundtripCase "nullable date nothing" (Value.nullable Value.date) parseNullableDate Nothing + ] + +arrayTests :: Tasty.TestTree +arrayTests = + Tasty.testGroup + "Array data types" + [ roundtripCase "array uint8" (Value.array Value.uint8) (parseArray Parser.word8) ([1, 2, 3, 255] :: [Word8]), + roundtripCase "array int32" (Value.array Value.int32) (parseArray Parser.int32le) ([-1, 0, 1, 2147483647] :: [Int32]), + roundtripCase "array string" (Value.array Value.string) (parseArray parseText) (["a", "こんにちは", "😀"] :: [Text]), + roundtripCase "array nullable uint32" (Value.array (Value.nullable Value.uint32)) (parseArray parseNullableWord32) ([Just 1, Nothing, Just 3] :: [Maybe Word32]), + roundtripCase + "nested arrays" + (Value.array (Value.array Value.uint32)) + (parseArray (parseArray Parser.word32le)) + ([[1, 2], [3, 4, 5], []] :: [[Word32]]) + ] + +mapTests :: Tasty.TestTree +mapTests = + Tasty.testGroup + "Map data types" + [ roundtripCase + "map string->uint32" + (Value.map Value.string Value.uint32) + (parseMap parseText Parser.word32le) + (HashMap.fromList [("a", 1), ("b", 2), ("c", 3)]), + roundtripCase + "map string->string" + (Value.map Value.string Value.string) + (parseMap parseText parseText) + (HashMap.fromList [("name", "alice"), ("city", "tokyo")]), + roundtripCase + "map uint32->string" + (Value.map Value.uint32 Value.string) + (parseMap Parser.word32le parseText) + (HashMap.fromList [(1, "one"), (2, "two")]), + roundtripCase + "map string->array(uint32)" + (Value.map Value.string (Value.array Value.uint32)) + (parseMap parseText (parseArray Parser.word32le)) + (HashMap.fromList [("x", [1, 2, 3]), ("y", [4, 5])]), + roundtripCase + "map string->nullable(uint32)" + (Value.map Value.string (Value.nullable Value.uint32)) + (parseMap parseText parseNullableWord32) + (HashMap.fromList [("present", Just 42), ("missing", Nothing)]) + ] + +multiColumnTests :: Tasty.TestTree +multiColumnTests = + Tasty.testGroup + "Multi column tests" + [ HUnit.testCase "composed row encoder matches composed row parser" $ do + let encoder = + contramap (\(a, _, _, _) -> a) Value.uint32 + <> contramap (\(_, b, _, _) -> b) Value.string + <> contramap (\(_, _, c, _) -> c) Value.date + <> contramap (\(_, _, _, d) -> d) Value.bool + parser = (,,,) <$> Parser.word32le <*> parseText <*> parseDate <*> parseBool + rows = + [ (42 :: Word32, "hello" :: Text, read "2024-01-31", True), + (7 :: Word32, "world" :: Text, read "2024-02-01", False), + (999 :: Word32, "multi" :: Text, read "2024-03-15", True) + ] + actual <- decodeRowsFromChunks [1, 2, 5, 3, 8] encoder parser rows + rows @=? actual + ] + +valueInsertTests :: Tasty.TestTree +valueInsertTests = + Tasty.testGroup + "Value insert" + [ HUnit.testCase "renderInsert includes columns/settings/format" $ do let ins = Insert.modifySettings (("async_insert", "1") :) (Insert.insert "events" ["id", "name"] (Value.tuple Value.uint32 Value.string) mempty) - rendered = - Text.unpack . LText.toStrict . TBuilder.toLazyText $ Insert.renderInsert ins + rendered = Text.unpack . LText.toStrict . TBuilder.toLazyText $ Insert.renderInsert ins rendered - @=? "INSERT INTO events (\"id\", \"name\") SETTINGS async_insert = 1 FORMAT RowBinary" + @=? "INSERT INTO events (\"id\", \"name\") SETTINGS async_insert = 1 FORMAT RowBinary", + HUnit.testCase "tuple payload encodes and decodes" $ do + let payload = (99 :: Word32, "event" :: Text) + bytes = encode (Value.tuple Value.uint32 Value.string) payload + (result, remaining) = Parser.runParser ((,) <$> Parser.word32le <*> parseText) bytes + "" @=? remaining + assertParseSuccess payload result ] -valueRoundtripTests :: Tasty.TestTree -valueRoundtripTests = +stressTests :: Tasty.TestTree +stressTests = Tasty.testGroup - "Value encoding" - [ roundtripCase "uint64" Value.uint64 Parser.word64le (123456789 :: Word64), - roundtripCase "string" Value.string parseText "hello clickhouse", - roundtripCase "nullable just" (Value.nullable Value.uint32) parseNullableWord32 (Just 7), - roundtripCase "nullable nothing" (Value.nullable Value.uint32) parseNullableWord32 Nothing, - roundtripCase "array uint16" (Value.array Value.uint16) parseWord16Array [1, 2, 3, 65535], - roundtripCase - "map string->uint32" - (Value.map Value.string Value.uint32) - parseStringUInt32Map - (HashMap.fromList [("a", 1), ("bbb", 42)]), - roundtripCase - "tuple string uint32" - (Value.tuple Value.string Value.uint32) - ((,) <$> parseText <*> Parser.word32le) - ("event", 99) + "Stress test roundtrip" + [ HUnit.testCase "repeated composed rows across chunk boundaries" $ do + let encoder = + contramap (\(a, _, _, _) -> a) Value.uint32 + <> contramap (\(_, b, _, _) -> b) Value.string + <> contramap (\(_, _, c, _) -> c) Value.bool + <> contramap (\(_, _, _, d) -> d) (Value.array Value.uint16) + parser = + (,,,) + <$> Parser.word32le + <*> parseText + <*> parseBool + <*> parseArray Parser.word16le + rows = + [ (fromIntegral i :: Word32, "row-" <> fromStringInt i, odd i, [fromIntegral i, fromIntegral (i + 1)] :: [Word16]) + | i <- [1 .. 2000] + ] + actual <- decodeRowsFromChunks [1, 1, 2, 3, 5, 8] encoder parser rows + rows @=? actual, + HUnit.testCase "repeated rows with nullable and map" $ do + let encoder = + contramap (\(a, _, _) -> a) (Value.nullable Value.string) + <> contramap (\(_, b, _) -> b) (Value.map Value.string Value.uint32) + <> contramap (\(_, _, c) -> c) Value.int64 + parser = + (,,) + <$> parseNullableText + <*> parseMap parseText Parser.word32le + <*> Parser.int64le + rows = + [ (if even i then Just ("v-" <> fromStringInt i) else Nothing, HashMap.fromList [("k", fromIntegral i), ("z", fromIntegral (i * 2))], fromIntegral (negate i) :: Int64) + | i <- [1 .. 1500] + ] + actual <- decodeRowsFromChunks [2, 7, 4, 1, 9] encoder parser rows + rows @=? actual + ] + +wideTableSmoketest :: Tasty.TestTree +wideTableSmoketest = + Tasty.testGroup + "Wide table smoketest" + [ HUnit.testCase "wide composed rows with uuid/map/nullable/nested arrays and long strings" $ do + let encoder = + contramap (\(a, _, _, _, _, _, _) -> a) Value.uuid + <> contramap (\(_, b, _, _, _, _, _) -> b) (Value.map Value.string Value.uint32) + <> contramap (\(_, _, c, _, _, _, _) -> c) (Value.nullable Value.string) + <> contramap (\(_, _, _, d, _, _, _) -> d) (Value.array (Value.array Value.uint16)) + <> contramap (\(_, _, _, _, e, _, _) -> e) Value.string + <> contramap (\(_, _, _, _, _, f, _) -> f) Value.bool + <> contramap (\(_, _, _, _, _, _, g) -> g) Value.uint64 + parser = + (,,,,,,) + <$> parseUuid + <*> parseMap parseText Parser.word32le + <*> parseNullableText + <*> parseArray (parseArray Parser.word16le) + <*> parseText + <*> parseBool + <*> Parser.word64le + rows = [mkWideRow i | i <- [1 .. 300]] + actual <- decodeRowsFromChunks [1, 3, 2, 8, 5, 13] encoder parser rows + rows @=? actual ] +type WideRow = + ( Data.UUID.UUID, + HashMap Text Word32, + Maybe Text, + [[Word16]], + Text, + Bool, + Word64 + ) + +mkWideRow :: Int -> WideRow +mkWideRow i = + ( Data.UUID.fromWords64 (fromIntegral i) (fromIntegral (i * 37)), + HashMap.fromList [("a", fromIntegral i), ("b", fromIntegral (i * 3)), ("c", fromIntegral (i * 5))], + if even i then Just ("note-" <> fromStringInt i) else Nothing, + [ [fromIntegral i, fromIntegral (i + 1)], + [fromIntegral (i + 2)], + [] + ], + Text.replicate ((i `mod` 5) + 1) "long-value-", + odd i, + fromIntegral (i * 1000) + ) + +decodeRowsFromChunks :: (Eq a, Show a) => [Int] -> Value.Value a -> Parser.Parser a -> [a] -> IO [a] +decodeRowsFromChunks chunkSizes encoder parser rows = do + let payload = BS.concat (fmap (encode encoder) rows) + chunks = chunkBytes chunkSizes payload + ref <- IORef.newIORef chunks + let source = do + xs <- IORef.readIORef ref + case xs of + [] -> pure BS.empty + (x : rest) -> IORef.writeIORef ref rest >> pure x + decoded <- Stream.foldStream (\acc x -> pure (x : acc)) [] (Parser.parseFromSource source parser) + let ordered = reverse decoded + case [err | Left err <- ordered] of + (err : _) -> HUnit.assertFailure err >> pure [] + [] -> pure [x | Right x <- ordered] + +chunkBytes :: [Int] -> ByteString -> [ByteString] +chunkBytes sizes bs = go effectiveSizes bs + where + effectiveSizes = if null sizes then [4096] else sizes + go _ remBs | BS.null remBs = [] + go (n : ns) remBs + | n <= 0 = go ns remBs + | otherwise = + let (h, t) = BS.splitAt n remBs + in h : go (if null ns then effectiveSizes else ns) t + go [] remBs = go effectiveSizes remBs + roundtripCase :: (Eq a, Show a) => String -> Value.Value a -> Parser.Parser a -> a -> Tasty.TestTree roundtripCase label encoder parser expected = HUnit.testCase label $ do let bs = encode encoder expected (result, remaining) = Parser.runParser parser bs "" @=? remaining - case result of - Parser.ParseSuccess _ actual -> expected @=? actual - Parser.ParseFailure err -> HUnit.assertFailure err - Parser.UnexpectedEndOfInput -> HUnit.assertFailure "unexpected end of input" + assertParseSuccess expected result + +assertParseSuccess :: (Eq a, Show a) => a -> Parser.ParseResult a -> IO () +assertParseSuccess expected result = + case result of + Parser.ParseSuccess _ actual -> expected @=? actual + Parser.ParseFailure err -> HUnit.assertFailure err + Parser.UnexpectedEndOfInput -> HUnit.assertFailure "unexpected end of input" encode :: Value.Value a -> a -> ByteString encode encoder = LBS.toStrict . Builder.toLazyByteString . Value.runValue encoder -parseText :: Parser.Parser Text.Text +parseText :: Parser.Parser Text parseText = do len <- Parser.uLEB128 Parser.text (fromIntegral len) -parseNullableWord32 :: Parser.Parser (Maybe Word32) -parseNullableWord32 = do +parseBool :: Parser.Parser Bool +parseBool = (> 0) <$> Parser.word8 + +parseNullableWith :: Parser.Parser a -> Parser.Parser (Maybe a) +parseNullableWith p = do tag <- Parser.word8 case tag of - 0 -> Just <$> Parser.word32le + 0 -> Just <$> p 1 -> pure Nothing _ -> failParser "invalid nullable tag" -parseWord16Array :: Parser.Parser [Word16] -parseWord16Array = do +parseNullableWord32 :: Parser.Parser (Maybe Word32) +parseNullableWord32 = parseNullableWith Parser.word32le + +parseNullableText :: Parser.Parser (Maybe Text) +parseNullableText = parseNullableWith parseText + +parseNullableDate :: Parser.Parser (Maybe Data.Time.Calendar.Day) +parseNullableDate = parseNullableWith parseDate + +parseArray :: Parser.Parser a -> Parser.Parser [a] +parseArray p = do len <- Parser.uLEB128 - let n = fromIntegral len :: Int - sequence (replicate n Parser.word16le) + sequence (replicate (fromIntegral len) p) -parseStringUInt32Map :: Parser.Parser (HashMap.HashMap Text.Text Word32) -parseStringUInt32Map = do +parseMap :: (Eq k, Hashable k) => Parser.Parser k -> Parser.Parser v -> Parser.Parser (HashMap k v) +parseMap parseK parseV = do len <- Parser.uLEB128 - let n = fromIntegral len :: Int - pairs <- sequence (replicate n ((,) <$> parseText <*> Parser.word32le)) + pairs <- sequence (replicate (fromIntegral len) ((,) <$> parseK <*> parseV)) pure (HashMap.fromList pairs) +parseDate :: Parser.Parser Data.Time.Calendar.Day +parseDate = do + days <- Parser.int16le + pure (Data.Time.Calendar.addDays (fromIntegral days) Data.Time.Clock.System.systemEpochDay) + +parseDate32 :: Parser.Parser Data.Time.Calendar.Day +parseDate32 = do + days <- Parser.int32le + pure (Data.Time.Calendar.addDays (fromIntegral days) Data.Time.Clock.System.systemEpochDay) + +parseDateTime :: Parser.Parser UTCTime +parseDateTime = do + t <- Parser.int32le + pure (Data.Time.Clock.POSIX.posixSecondsToUTCTime (fromIntegral t)) + +parseDateTime64 :: Parser.Parser UTCTime +parseDateTime64 = do + t <- Parser.int64le + pure (Data.Time.Clock.POSIX.posixSecondsToUTCTime (fromIntegral t / 1000)) + +parseUuid :: Parser.Parser Data.UUID.UUID +parseUuid = Data.UUID.fromWords64 <$> Parser.word64le <*> Parser.word64le + failParser :: String -> Parser.Parser a failParser msg = Parser.Parser $ \_ _ -> pure (Parser.ParseFailure msg) + +fromStringInt :: Int -> Text +fromStringInt = fromString . show