diff --git a/pkg/cnservice/server_task.go b/pkg/cnservice/server_task.go index 747b3bf46a5b5..d4e8ad63c8d5d 100644 --- a/pkg/cnservice/server_task.go +++ b/pkg/cnservice/server_task.go @@ -22,6 +22,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/common/moerr" "github.com/matrixorigin/matrixone/pkg/common/runtime" + "github.com/matrixorigin/matrixone/pkg/datalink/casgc" "github.com/matrixorigin/matrixone/pkg/frontend" "github.com/matrixorigin/matrixone/pkg/iscp" "github.com/matrixorigin/matrixone/pkg/logutil" @@ -369,6 +370,16 @@ func (s *service) registerExecutorsLocked() { ), ) + s.task.runner.RegisterExecutor(task.TaskCode_DatalinkCASGCExecutor, + casgc.DatalinkCASGCExecutorFactory( + s.cfg.UUID, + s.storeEngine, + s._txnClient, + s.fileService, + common.ISCPAllocator, + ), + ) + s.task.runner.RegisterExecutor( task.TaskCode_SQLTask, taskservice.NewSQLTaskExecutor(ieFactory, ts, s.cfg.UUID).TaskExecutor(), diff --git a/pkg/datalink/cas.go b/pkg/datalink/cas.go new file mode 100644 index 0000000000000..a3b375790a53c --- /dev/null +++ b/pkg/datalink/cas.go @@ -0,0 +1,200 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datalink + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "iter" + "strconv" + "strings" + + "github.com/matrixorigin/matrixone/pkg/common/moerr" + "github.com/matrixorigin/matrixone/pkg/fileservice" +) + +const ( + // ContentHashKey is the datalink URL query parameter that marks a pinned + // value. Its value is the sha256 hex digest of the referenced bytes. + ContentHashKey = "contenthash" + + // casPrefix is the reserved key prefix for content-addressed datalink blobs + // inside the SHARED file service. + casPrefix = "datalink_cas" +) + +// ValidateContentHash checks that hash is a well-formed sha256 hex digest, so +// that callers (e.g. CASKey) can safely assume a fixed-length lower-case hex. +func ValidateContentHash(hash string) error { + if len(hash) != hex.EncodedLen(sha256.Size) { + return moerr.NewInternalErrorNoCtxf( + "invalid datalink contenthash length %d, want %d", len(hash), hex.EncodedLen(sha256.Size)) + } + if _, err := hex.DecodeString(hash); err != nil { + return moerr.NewInternalErrorNoCtxf("invalid datalink contenthash %q: not hex", hash) + } + return nil +} + +// CASKey returns the storage key of a content-addressed blob within the SHARED +// file service (without the service-name prefix). Layout: +// datalink_cas//

/. +// +// The blob is namespaced by accountID so that a contenthash is not a global +// bearer capability: a hash known to one account cannot be used to read another +// account's pinned bytes, and there is no cross-account dedup visibility. The +// accountID must come from the trusted execution context (defines.GetAccountId), +// never from the datalink URL. +// +// hash must be a validated sha256 hex digest (see ValidateContentHash). +// +// The read path reads this key directly from the SHARED FileService rather than +// routing through GetForETL, because SHARED may be a plain FileService (e.g. +// LocalFS in standalone) that does not implement ETLFileService. +func CASKey(accountID uint32, hash string) string { + return casPrefix + "/" + strconv.FormatUint(uint64(accountID), 10) + "/" + hash[:2] + "/" + hash +} + +// casHashFromKey extracts the hash from a CAS key produced by CASKey, reporting +// whether p is such a key. Deriving ContentHash from an already-parsed MoPath +// (rather than re-parsing the URL) keeps ContentHash and MoPath consistent even +// for malformed input with duplicate/mixed-case contenthash params. The trailing +// path segment is always the hash regardless of the account/

prefix. +func casHashFromKey(p string) (string, bool) { + prefix := casPrefix + "/" + if !strings.HasPrefix(p, prefix) { + return "", false + } + rest := p[len(prefix):] // "/

/" + if idx := strings.LastIndex(rest, "/"); idx >= 0 { + return rest[idx+1:], true + } + return "", false +} + +// CASPut writes data into the calling account's content-addressed namespace and +// returns its sha256 hex digest. The store is write-once and immutable: if the +// blob already exists the write is skipped (natural per-account dedup), so +// repeated Puts of identical bytes succeed without error and never overwrite the +// existing object. +func CASPut(ctx context.Context, fs fileservice.FileService, accountID uint32, data []byte) (string, error) { + sum := sha256.Sum256(data) + hash := hex.EncodeToString(sum[:]) + + vec := fileservice.IOVector{ + FilePath: CASKey(accountID, hash), + Entries: []fileservice.IOEntry{ + { + Offset: 0, + Size: int64(len(data)), + Data: data, + }, + }, + } + if err := fs.Write(ctx, vec); err != nil { + if moerr.IsMoErrCode(err, moerr.ErrFileAlreadyExists) { + return hash, nil + } + return "", err + } + return hash, nil +} + +// CASExists reports whether a blob addressed by hash exists in the given +// account's namespace. +func CASExists(ctx context.Context, fs fileservice.FileService, accountID uint32, hash string) (bool, error) { + if _, err := fs.StatFile(ctx, CASKey(accountID, hash)); err != nil { + if moerr.IsMoErrCode(err, moerr.ErrFileNotFound) { + return false, nil + } + return false, err + } + return true, nil +} + +// CASEntry is a single blob enumerated from an account's CAS namespace. +type CASEntry struct { + Hash string + Key string + Size int64 +} + +// CASAccountPrefix returns the key prefix that holds all CAS blobs of an +// account: "datalink_cas//". +func CASAccountPrefix(accountID uint32) string { + return casPrefix + "/" + strconv.FormatUint(uint64(accountID), 10) + "/" +} + +// CASDelete removes a single content-addressed blob. A missing object is not an +// error: deletion is idempotent so a sweep can safely retry. +func CASDelete(ctx context.Context, fs fileservice.FileService, accountID uint32, hash string) error { + if err := fs.Delete(ctx, CASKey(accountID, hash)); err != nil { + if moerr.IsMoErrCode(err, moerr.ErrFileNotFound) { + return nil + } + return err + } + return nil +} + +// CASListAccount enumerates every blob in the account's namespace. The CAS +// layout is datalink_cas//

/, so it lists the account dir, +// descends one level into each

bucket, and yields a CASEntry per blob with +// its sha256 hash, full storage key, and size. +func CASListAccount(ctx context.Context, fs fileservice.FileService, accountID uint32) iter.Seq2[CASEntry, error] { + prefix := CASAccountPrefix(accountID) + return func(yield func(CASEntry, error) bool) { + for bucket, err := range fs.List(ctx, prefix) { + if err != nil { + yield(CASEntry{}, err) + return + } + if !bucket.IsDir { + continue // CAS layout always nests under an

bucket + } + bucketPath := prefix + bucket.Name + "/" + for ent, err := range fs.List(ctx, bucketPath) { + if err != nil { + yield(CASEntry{}, err) + return + } + if ent.IsDir { + continue + } + e := CASEntry{Hash: ent.Name, Key: bucketPath + ent.Name, Size: ent.Size} + if !yield(e, nil) { + return + } + } + } + } +} + +// CASDeleteAccountPrefix removes the entire CAS namespace of one account. Used +// by DROP ACCOUNT so a removed tenant leaves no pinned blobs behind. +func CASDeleteAccountPrefix(ctx context.Context, fs fileservice.FileService, accountID uint32) error { + keys := make([]string, 0, 16) + for e, err := range CASListAccount(ctx, fs, accountID) { + if err != nil { + return err + } + keys = append(keys, e.Key) + } + if len(keys) == 0 { + return nil + } + return fs.Delete(ctx, keys...) +} diff --git a/pkg/datalink/cas_test.go b/pkg/datalink/cas_test.go new file mode 100644 index 0000000000000..88c244db88977 --- /dev/null +++ b/pkg/datalink/cas_test.go @@ -0,0 +1,202 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datalink + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "testing" + + "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/fileservice" + "github.com/stretchr/testify/require" +) + +func newTestCASFS(t *testing.T) fileservice.FileService { + fs, err := fileservice.NewMemoryFS(defines.SharedFileServiceName, fileservice.DisabledCacheConfig, nil) + require.NoError(t, err) + return fs +} + +const testAccountID = uint32(7) + +func TestCASPutStoresAndReturnsSha256(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + data := []byte("hello datalink") + + hash, err := CASPut(ctx, fs, testAccountID, data) + require.NoError(t, err) + + sum := sha256.Sum256(data) + require.Equal(t, hex.EncodeToString(sum[:]), hash) + + // the stored CAS object must be byte-identical to the input + vec := &fileservice.IOVector{ + FilePath: CASKey(testAccountID, hash), + Entries: []fileservice.IOEntry{{Offset: 0, Size: -1}}, + } + require.NoError(t, fs.Read(ctx, vec)) + require.Equal(t, data, vec.Entries[0].Data) +} + +func TestCASPutIsIdempotent(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + data := []byte("same content") + + h1, err := CASPut(ctx, fs, testAccountID, data) + require.NoError(t, err) + // write-once: a second Put of identical bytes must not fail with ErrFileAlreadyExists + h2, err := CASPut(ctx, fs, testAccountID, data) + require.NoError(t, err) + require.Equal(t, h1, h2) +} + +func TestCASPutDifferentBytesDifferentHash(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + + h1, err := CASPut(ctx, fs, testAccountID, []byte("alpha")) + require.NoError(t, err) + h2, err := CASPut(ctx, fs, testAccountID, []byte("beta")) + require.NoError(t, err) + require.NotEqual(t, h1, h2) +} + +func TestCASExists(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + data := []byte("exists check") + sum := sha256.Sum256(data) + hash := hex.EncodeToString(sum[:]) + + ok, err := CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.False(t, ok) + + _, err = CASPut(ctx, fs, testAccountID, data) + require.NoError(t, err) + + ok, err = CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.True(t, ok) +} + +func TestCASKey(t *testing.T) { + require.Equal(t, "datalink_cas/7/ab/abcd1234ef", CASKey(7, "abcd1234ef")) +} + +func TestCASDeleteIdempotent(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + data := []byte("to delete") + hash, err := CASPut(ctx, fs, testAccountID, data) + require.NoError(t, err) + + require.NoError(t, CASDelete(ctx, fs, testAccountID, hash)) + ok, err := CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.False(t, ok) + // deleting a non-existent object is a success (idempotent) + require.NoError(t, CASDelete(ctx, fs, testAccountID, hash)) +} + +func TestCASListAccount(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + h1, err := CASPut(ctx, fs, testAccountID, []byte("a")) + require.NoError(t, err) + h2, err := CASPut(ctx, fs, testAccountID, []byte("bb")) + require.NoError(t, err) + // another account's object must not appear + _, err = CASPut(ctx, fs, uint32(999), []byte("other")) + require.NoError(t, err) + + got := map[string]bool{} + for e, err := range CASListAccount(ctx, fs, testAccountID) { + require.NoError(t, err) + got[e.Hash] = true + } + require.True(t, got[h1]) + require.True(t, got[h2]) + require.Len(t, got, 2) +} + +func TestCASDeleteAccountPrefix(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + h1, err := CASPut(ctx, fs, testAccountID, []byte("a")) + require.NoError(t, err) + h2, err := CASPut(ctx, fs, testAccountID, []byte("bb")) + require.NoError(t, err) + keep, err := CASPut(ctx, fs, uint32(999), []byte("other")) + require.NoError(t, err) + + require.NoError(t, CASDeleteAccountPrefix(ctx, fs, testAccountID)) + for _, h := range []string{h1, h2} { + ok, err := CASExists(ctx, fs, testAccountID, h) + require.NoError(t, err) + require.False(t, ok) + } + // other account is unaffected + ok, err := CASExists(ctx, fs, uint32(999), keep) + require.NoError(t, err) + require.True(t, ok) +} + +// An account that never pinned anything has an empty namespace: listing yields +// nothing and deleting the prefix is a no-op success. +func TestCASEmptyNamespace(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + + count := 0 + for _, err := range CASListAccount(ctx, fs, testAccountID) { + require.NoError(t, err) + count++ + } + require.Equal(t, 0, count) + require.NoError(t, CASDeleteAccountPrefix(ctx, fs, testAccountID)) +} + +// CAS objects are namespaced per account: bytes pinned by one account are not +// visible to another account by hash alone. This is the core fix for the bearer +// capability concern — a contenthash is no longer a cross-account read token. +func TestCASAccountIsolation(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + data := []byte("tenant private bytes") + + const accountA = uint32(100) + const accountB = uint32(200) + + hash, err := CASPut(ctx, fs, accountA, data) + require.NoError(t, err) + + // same hash, different account namespaces -> different keys + require.NotEqual(t, CASKey(accountA, hash), CASKey(accountB, hash)) + + // account A can see its own object + ok, err := CASExists(ctx, fs, accountA, hash) + require.NoError(t, err) + require.True(t, ok) + + // account B cannot read account A's object by hash alone + ok, err = CASExists(ctx, fs, accountB, hash) + require.NoError(t, err) + require.False(t, ok) +} diff --git a/pkg/datalink/casgc/config.go b/pkg/datalink/casgc/config.go new file mode 100644 index 0000000000000..23037ad8fee0f --- /dev/null +++ b/pkg/datalink/casgc/config.go @@ -0,0 +1,46 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package casgc reclaims unreferenced datalink content-addressed store (CAS) +// blobs. It runs a periodic, reference-aware sweep that deletes per-account +// pinned blobs no longer reachable from any live datalink column or live +// snapshot, while a grace window protects blobs from a concurrent datalink_pin. +package casgc + +import "time" + +const ( + defaultInterval = time.Hour + defaultGraceWindow = 24 * time.Hour +) + +// Config controls the datalink CAS reclamation sweep. +type Config struct { + // Interval between full sweeps. Default 1h. + Interval time.Duration `toml:"interval"` + // GraceWindow protects freshly pinned blobs: only blobs whose object mtime + // is older than now-GraceWindow are eligible for deletion, so an in-flight + // datalink_pin() racing the sweep is never collected. Default 24h. + GraceWindow time.Duration `toml:"grace-window"` +} + +// Adjust fills in defaults for any unset (non-positive) field. +func (c *Config) Adjust() { + if c.Interval <= 0 { + c.Interval = defaultInterval + } + if c.GraceWindow <= 0 { + c.GraceWindow = defaultGraceWindow + } +} diff --git a/pkg/datalink/casgc/config_test.go b/pkg/datalink/casgc/config_test.go new file mode 100644 index 0000000000000..648c832bebf59 --- /dev/null +++ b/pkg/datalink/casgc/config_test.go @@ -0,0 +1,36 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestConfigAdjustDefaults(t *testing.T) { + var c Config + c.Adjust() + require.Equal(t, time.Hour, c.Interval) + require.Equal(t, 24*time.Hour, c.GraceWindow) +} + +func TestConfigAdjustKeepsExplicit(t *testing.T) { + c := Config{Interval: 5 * time.Minute, GraceWindow: 2 * time.Hour} + c.Adjust() + require.Equal(t, 5*time.Minute, c.Interval) + require.Equal(t, 2*time.Hour, c.GraceWindow) +} diff --git a/pkg/datalink/casgc/executor.go b/pkg/datalink/casgc/executor.go new file mode 100644 index 0000000000000..9ddc8eecd85ff --- /dev/null +++ b/pkg/datalink/casgc/executor.go @@ -0,0 +1,100 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "context" + "sync" + "sync/atomic" + + "github.com/matrixorigin/matrixone/pkg/common/mpool" + "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/fileservice" + "github.com/matrixorigin/matrixone/pkg/logutil" + "github.com/matrixorigin/matrixone/pkg/pb/task" + "github.com/matrixorigin/matrixone/pkg/txn/client" + "github.com/matrixorigin/matrixone/pkg/vm/engine" +) + +// DatalinkCASGCCronExpr fires the sweep at the top of every hour (6-field cron). +var DatalinkCASGCCronExpr = "0 0 * * * *" + +// DatalinkCASGCTaskMetadata builds the cron-task metadata for the CAS GC sweep. +func DatalinkCASGCTaskMetadata(id task.TaskCode) task.TaskMetadata { + return task.TaskMetadata{ + ID: "DatalinkCASGCTask", + Executor: id, + Options: task.TaskOptions{Concurrency: 1}, + } +} + +// running guards against overlapping cron firings stacking up. +var running atomic.Bool + +// sweeperOnce + sweeperSingleton make the Sweeper persist across cron firings. +// +// The Sweeper's two-pass grace window relies on in-memory `pending` state that +// records when each blob was first seen as an orphan. If a fresh Sweeper were +// built on every firing, that state would reset each hour and the grace window +// could never elapse, so nothing would ever be collected. We therefore build +// the Sweeper exactly once (lazily, on the first firing) and reuse it on every +// subsequent firing so `pending` survives between runs. +var ( + sweeperOnce sync.Once + sweeper *Sweeper +) + +// DatalinkCASGCExecutorFactory returns the cron executor closure for the +// datalink CAS garbage-collection sweep. The shared file service holding the +// CAS is provided by the CN service at registration time. +func DatalinkCASGCExecutorFactory( + cnUUID string, + txnEngine engine.Engine, + cnTxnClient client.TxnClient, + fs fileservice.FileService, + _ *mpool.MPool, +) func(ctx context.Context, t task.Task) error { + return func(ctx context.Context, _ task.Task) error { + if !running.CompareAndSwap(false, true) { + return nil + } + defer running.Store(false) + + // Build the persistent Sweeper exactly once so the grace-window state + // survives across firings. + sweeperOnce.Do(func() { + sharedFS, err := fileservice.Get[fileservice.FileService](fs, defines.SharedFileServiceName) + if err != nil { + logutil.Errorf("casgc: failed to get SHARED file service: %v", err) + return + } + env := &sqlEnv{uuid: cnUUID, engine: txnEngine, txnClient: cnTxnClient} + sweeper = NewSweeper(sharedFS, Config{}, env) + }) + + if sweeper == nil { + // SHARED fs acquisition failed; best-effort cron, skip this run. + return nil + } + + logutil.Infof("casgc: datalink CAS GC sweep START") + deleted, err := sweeper.SweepAll(ctx) + if err != nil { + logutil.Errorf("casgc: datalink CAS GC sweep error: %v", err) + } + logutil.Infof("casgc: datalink CAS GC sweep END, deleted %d blob(s)", deleted) + return nil + } +} diff --git a/pkg/datalink/casgc/production.go b/pkg/datalink/casgc/production.go new file mode 100644 index 0000000000000..256ad9b826eb8 --- /dev/null +++ b/pkg/datalink/casgc/production.go @@ -0,0 +1,198 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "context" + "time" + + "github.com/matrixorigin/matrixone/pkg/catalog" + "github.com/matrixorigin/matrixone/pkg/container/vector" + "github.com/matrixorigin/matrixone/pkg/txn/client" + "github.com/matrixorigin/matrixone/pkg/util/executor" + "github.com/matrixorigin/matrixone/pkg/vectorindex/sqlexec" + "github.com/matrixorigin/matrixone/pkg/vm/engine" +) + +// sqlTxnDuration is the per-query transaction budget for sweep reference reads. +const sqlTxnDuration = 5 * time.Minute + +// Package-level function variables so tests can stub the real SQL calls. +var ( + runTxnWithSqlContext = sqlexec.RunTxnWithSqlContext + runSql = func(sqlproc *sqlexec.SqlProcess, sql string) (executor.Result, error) { + return sqlexec.RunSql(sqlproc, sql) + } +) + +// sqlEnv implements sweepEnv over the real CN SQL executor. +type sqlEnv struct { + uuid string + engine engine.Engine + txnClient client.TxnClient +} + +// listAccountIDs reads all live account ids from the sys-account mo_account view. +func (e *sqlEnv) listAccountIDs(ctx context.Context) ([]uint32, error) { + var ids []uint32 + err := runTxnWithSqlContext(ctx, e.engine, e.txnClient, e.uuid, + catalog.System_Account, sqlTxnDuration, nil, nil, + func(sqlproc *sqlexec.SqlProcess, _ any) error { + res, err := runSql(sqlproc, "SELECT account_id FROM mo_catalog.mo_account") + if err != nil { + return err + } + defer res.Close() + + for _, bat := range res.Batches { + vec := bat.Vecs[0] + for i := 0; i < bat.RowCount(); i++ { + if vec.IsNull(uint64(i)) { + continue + } + // mo_account.account_id is INT SIGNED (T_int32). + ids = append(ids, uint32(vector.GetFixedAtWithTypeCheck[int32](vec, i))) + } + } + return nil + }) + if err != nil { + return nil, err + } + return ids, nil +} + +// refsForAccount returns the per-account reference source (no I/O). +func (e *sqlEnv) refsForAccount(_ context.Context, accountID uint32) (accountRefs, error) { + return &acctRefs{env: e, accountID: accountID}, nil +} + +// acctRefs implements accountRefs scoped to one account tenant context. +type acctRefs struct { + env *sqlEnv + accountID uint32 +} + +// datalinkColumns lists the account's datalink-typed, non-hidden, user-table columns. +func (r *acctRefs) datalinkColumns(ctx context.Context) ([]columnRef, error) { + var cols []columnRef + err := runTxnWithSqlContext(ctx, r.env.engine, r.env.txnClient, r.env.uuid, + r.accountID, sqlTxnDuration, nil, nil, + func(sqlproc *sqlexec.SqlProcess, _ any) error { + const sql = "SELECT att_database, att_relname, attname, atttyp FROM mo_catalog.mo_columns " + + "WHERE att_database NOT IN ('mo_catalog','information_schema','mysql','system','system_metrics') " + + "AND att_is_hidden = 0" + res, err := runSql(sqlproc, sql) + if err != nil { + return err + } + defer res.Close() + + for _, bat := range res.Batches { + dbVec := bat.Vecs[0] + tblVec := bat.Vecs[1] + colVec := bat.Vecs[2] + typVec := bat.Vecs[3] + for i := 0; i < bat.RowCount(); i++ { + if typVec.IsNull(uint64(i)) { + continue + } + atttyp := typVec.CloneBytesAt(i) + if !isDatalinkType(atttyp) { + continue + } + cols = append(cols, columnRef{ + DBName: dbVec.GetStringAt(i), + TableName: tblVec.GetStringAt(i), + ColName: colVec.GetStringAt(i), + }) + } + } + return nil + }) + if err != nil { + return nil, err + } + return cols, nil +} + +// scanColumn reads the non-null datalink URL strings of one column, optionally +// at a snapshot. snapshotHint is "" (live) or "{snapshot = 'name'}". +func (r *acctRefs) scanColumn(ctx context.Context, ref columnRef, snapshotHint string) ([]string, error) { + sql := "SELECT `" + ref.ColName + "` FROM `" + ref.DBName + "`.`" + ref.TableName + "`" + if snapshotHint != "" { + sql += " " + snapshotHint + } + + var values []string + err := runTxnWithSqlContext(ctx, r.env.engine, r.env.txnClient, r.env.uuid, + r.accountID, sqlTxnDuration, nil, nil, + func(sqlproc *sqlexec.SqlProcess, _ any) error { + res, err := runSql(sqlproc, sql) + if err != nil { + return err + } + defer res.Close() + + for _, bat := range res.Batches { + vec := bat.Vecs[0] + for i := 0; i < bat.RowCount(); i++ { + if vec.IsNull(uint64(i)) { + continue + } + values = append(values, vec.GetStringAt(i)) + } + } + return nil + }) + if err != nil { + return nil, err + } + return values, nil +} + +// liveSnapshots lists the account's currently-live snapshots. +func (r *acctRefs) liveSnapshots(ctx context.Context) ([]snapshotRef, error) { + var snaps []snapshotRef + err := runTxnWithSqlContext(ctx, r.env.engine, r.env.txnClient, r.env.uuid, + r.accountID, sqlTxnDuration, nil, nil, + func(sqlproc *sqlexec.SqlProcess, _ any) error { + res, err := runSql(sqlproc, "SELECT sname, ts FROM mo_catalog.mo_snapshots") + if err != nil { + return err + } + defer res.Close() + + for _, bat := range res.Batches { + nameVec := bat.Vecs[0] + tsVec := bat.Vecs[1] + for i := 0; i < bat.RowCount(); i++ { + if nameVec.IsNull(uint64(i)) { + continue + } + // mo_snapshots.ts is BIGINT (T_int64). + snaps = append(snaps, snapshotRef{ + Name: nameVec.GetStringAt(i), + TS: vector.GetFixedAtWithTypeCheck[int64](tsVec, i), + }) + } + } + return nil + }) + if err != nil { + return nil, err + } + return snaps, nil +} diff --git a/pkg/datalink/casgc/production_test.go b/pkg/datalink/casgc/production_test.go new file mode 100644 index 0000000000000..cb42b8b00179e --- /dev/null +++ b/pkg/datalink/casgc/production_test.go @@ -0,0 +1,395 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "context" + "errors" + "strings" + "testing" + "time" + + "github.com/matrixorigin/matrixone/pkg/common/mpool" + "github.com/matrixorigin/matrixone/pkg/container/batch" + "github.com/matrixorigin/matrixone/pkg/container/types" + "github.com/matrixorigin/matrixone/pkg/container/vector" + "github.com/matrixorigin/matrixone/pkg/pb/task" + "github.com/matrixorigin/matrixone/pkg/txn/client" + "github.com/matrixorigin/matrixone/pkg/util/executor" + "github.com/matrixorigin/matrixone/pkg/vectorindex/sqlexec" + "github.com/matrixorigin/matrixone/pkg/vm/engine" + "github.com/prashantv/gostub" + "github.com/stretchr/testify/require" +) + +// makeSqlEnv returns a minimal *sqlEnv for testing (fields are nil — safe +// because runTxnWithSqlContext and runSql are both stubbed before use). +func makeSqlEnv() *sqlEnv { + return &sqlEnv{uuid: "test-uuid"} +} + +// stubTxnPassthrough stubs runTxnWithSqlContext to call f directly without +// touching the real engine or txn client, then returns the stub so the caller +// can defer stub.Reset(). +func stubTxnPassthrough() *gostub.Stubs { + return gostub.Stub(&runTxnWithSqlContext, + func( + _ context.Context, + _ engine.Engine, + _ client.TxnClient, + _ string, + _ uint32, + _ time.Duration, + _ func(string, bool, bool) (interface{}, error), + cbdata any, + f func(*sqlexec.SqlProcess, any) error, + ) error { + return f(&sqlexec.SqlProcess{}, cbdata) + }, + ) +} + +// makeInt32Batch creates an executor.Result with a single T_int32 column +// populated with vals, using nullMask to mark null rows. +func makeInt32Batch(mp *mpool.MPool, vals []int32, nullMask []bool) executor.Result { + bat := batch.NewWithSize(1) + bat.Vecs[0] = vector.NewVec(types.New(types.T_int32, 4, 0)) + for i, v := range vals { + isNull := len(nullMask) > i && nullMask[i] + _ = vector.AppendFixed[int32](bat.Vecs[0], v, isNull, mp) + } + bat.SetRowCount(len(vals)) + return executor.Result{Mp: mp, Batches: []*batch.Batch{bat}} +} + +// makeVarcharBatch creates an executor.Result with a single T_varchar column. +func makeVarcharBatch(mp *mpool.MPool, vals []string, nullMask []bool) executor.Result { + bat := batch.NewWithSize(1) + bat.Vecs[0] = vector.NewVec(types.New(types.T_varchar, types.MaxVarcharLen, 0)) + for i, v := range vals { + isNull := len(nullMask) > i && nullMask[i] + _ = vector.AppendBytes(bat.Vecs[0], []byte(v), isNull, mp) + } + bat.SetRowCount(len(vals)) + return executor.Result{Mp: mp, Batches: []*batch.Batch{bat}} +} + +// makeColumnsBatch creates a 4-column batch matching the mo_columns query: +// att_database (varchar), att_relname (varchar), attname (varchar), atttyp (varbinary). +func makeColumnsBatch( + mp *mpool.MPool, + dbs, tables, cols []string, + atttypes [][]byte, + typNullMask []bool, +) executor.Result { + bat := batch.NewWithSize(4) + bat.Vecs[0] = vector.NewVec(types.New(types.T_varchar, types.MaxVarcharLen, 0)) + bat.Vecs[1] = vector.NewVec(types.New(types.T_varchar, types.MaxVarcharLen, 0)) + bat.Vecs[2] = vector.NewVec(types.New(types.T_varchar, types.MaxVarcharLen, 0)) + bat.Vecs[3] = vector.NewVec(types.New(types.T_varbinary, types.MaxVarcharLen, 0)) + + n := len(dbs) + for i := 0; i < n; i++ { + _ = vector.AppendBytes(bat.Vecs[0], []byte(dbs[i]), false, mp) + _ = vector.AppendBytes(bat.Vecs[1], []byte(tables[i]), false, mp) + _ = vector.AppendBytes(bat.Vecs[2], []byte(cols[i]), false, mp) + isNull := len(typNullMask) > i && typNullMask[i] + _ = vector.AppendBytes(bat.Vecs[3], atttypes[i], isNull, mp) + } + bat.SetRowCount(n) + return executor.Result{Mp: mp, Batches: []*batch.Batch{bat}} +} + +// makeSnapshotsBatch creates a 2-column batch: sname (varchar), ts (T_int64). +func makeSnapshotsBatch(mp *mpool.MPool, names []string, ts []int64, nameNullMask []bool) executor.Result { + bat := batch.NewWithSize(2) + bat.Vecs[0] = vector.NewVec(types.New(types.T_varchar, types.MaxVarcharLen, 0)) + bat.Vecs[1] = vector.NewVec(types.New(types.T_int64, 8, 0)) + + n := len(names) + for i := 0; i < n; i++ { + isNull := len(nameNullMask) > i && nameNullMask[i] + _ = vector.AppendBytes(bat.Vecs[0], []byte(names[i]), isNull, mp) + _ = vector.AppendFixed[int64](bat.Vecs[1], ts[i], false, mp) + } + bat.SetRowCount(n) + return executor.Result{Mp: mp, Batches: []*batch.Batch{bat}} +} + +// --------------------------------------------------------------------------- +// TestListAccountIDs +// --------------------------------------------------------------------------- + +func TestListAccountIDs(t *testing.T) { + mp := mpool.MustNewZero() + + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + // rows: account_id values [1, 0 (sys), 5, null] + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, _ string) (executor.Result, error) { + return makeInt32Batch(mp, + []int32{1, 0, 5, 0}, + []bool{false, false, false, true}, // last row is null + ), nil + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + ids, err := env.listAccountIDs(context.Background()) + require.NoError(t, err) + require.Equal(t, []uint32{1, 0, 5}, ids) +} + +func TestListAccountIDs_Error(t *testing.T) { + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + wantErr := errors.New("sql error") + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, _ string) (executor.Result, error) { + return executor.Result{}, wantErr + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + _, err := env.listAccountIDs(context.Background()) + require.ErrorIs(t, err, wantErr) +} + +// --------------------------------------------------------------------------- +// TestDatalinkColumns +// --------------------------------------------------------------------------- + +func TestDatalinkColumns(t *testing.T) { + mp := mpool.MustNewZero() + + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + // Build atttyp bytes: one datalink, one varchar, one will be null. + dlType := types.New(types.T_datalink, 0, 0) + dlBytes := types.EncodeType(&dlType) + vcType := types.New(types.T_varchar, 255, 0) + vcBytes := types.EncodeType(&vcType) + + var capturedSQL string + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, sql string) (executor.Result, error) { + capturedSQL = sql + return makeColumnsBatch( + mp, + []string{"mydb", "otherdb", "anydb"}, + []string{"t1", "t2", "t3"}, + []string{"dl_col", "vc_col", "null_col"}, + [][]byte{dlBytes, vcBytes, {}}, + // third row has null atttyp + []bool{false, false, true}, + ), nil + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + refs := &acctRefs{env: env, accountID: 7} + cols, err := refs.datalinkColumns(context.Background()) + require.NoError(t, err) + + // Only the datalink-typed row should survive. + require.Len(t, cols, 1) + require.Equal(t, "mydb", cols[0].DBName) + require.Equal(t, "t1", cols[0].TableName) + require.Equal(t, "dl_col", cols[0].ColName) + + // SQL must contain the mo_columns query with system-db exclusion and hidden filter. + require.Contains(t, capturedSQL, "mo_catalog.mo_columns") + require.Contains(t, capturedSQL, "mo_catalog") + require.Contains(t, capturedSQL, "att_is_hidden = 0") +} + +func TestDatalinkColumns_Error(t *testing.T) { + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + wantErr := errors.New("columns error") + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, _ string) (executor.Result, error) { + return executor.Result{}, wantErr + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + refs := &acctRefs{env: env, accountID: 7} + _, err := refs.datalinkColumns(context.Background()) + require.ErrorIs(t, err, wantErr) +} + +// --------------------------------------------------------------------------- +// TestScanColumn +// --------------------------------------------------------------------------- + +func TestScanColumn_Live(t *testing.T) { + mp := mpool.MustNewZero() + + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + var capturedSQL string + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, sql string) (executor.Result, error) { + capturedSQL = sql + return makeVarcharBatch( + mp, + []string{"mo://a?x=1", "mo://b?x=2", ""}, + []bool{false, false, true}, // last row is null + ), nil + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + refs := &acctRefs{env: env, accountID: 3} + ref := columnRef{DBName: "mydb", TableName: "mytable", ColName: "dl"} + vals, err := refs.scanColumn(context.Background(), ref, "") + require.NoError(t, err) + require.Equal(t, []string{"mo://a?x=1", "mo://b?x=2"}, vals) + + // SQL should be a backtick-quoted SELECT without snapshot hint. + require.True(t, strings.HasPrefix(capturedSQL, "SELECT `dl` FROM `mydb`.`mytable`"), + "unexpected SQL: %q", capturedSQL) + require.False(t, strings.Contains(capturedSQL, "{snapshot"), "live query must not have snapshot hint") +} + +func TestScanColumn_WithSnapshot(t *testing.T) { + mp := mpool.MustNewZero() + + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + var capturedSQL string + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, sql string) (executor.Result, error) { + capturedSQL = sql + return makeVarcharBatch(mp, []string{"mo://x"}, nil), nil + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + refs := &acctRefs{env: env, accountID: 3} + ref := columnRef{DBName: "db2", TableName: "tbl2", ColName: "c"} + vals, err := refs.scanColumn(context.Background(), ref, "{snapshot = 'snap1'}") + require.NoError(t, err) + require.Equal(t, []string{"mo://x"}, vals) + + require.Contains(t, capturedSQL, "{snapshot = 'snap1'}") +} + +func TestScanColumn_Error(t *testing.T) { + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + wantErr := errors.New("scan error") + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, _ string) (executor.Result, error) { + return executor.Result{}, wantErr + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + refs := &acctRefs{env: env, accountID: 3} + ref := columnRef{DBName: "db", TableName: "tbl", ColName: "c"} + _, err := refs.scanColumn(context.Background(), ref, "") + require.ErrorIs(t, err, wantErr) +} + +// --------------------------------------------------------------------------- +// TestLiveSnapshots +// --------------------------------------------------------------------------- + +func TestLiveSnapshots(t *testing.T) { + mp := mpool.MustNewZero() + + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + // Two rows; second has null name (should be skipped). + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, _ string) (executor.Result, error) { + return makeSnapshotsBatch( + mp, + []string{"snap_a", "snap_b"}, + []int64{1000, 2000}, + []bool{false, true}, // snap_b name is null → skipped + ), nil + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + refs := &acctRefs{env: env, accountID: 5} + snaps, err := refs.liveSnapshots(context.Background()) + require.NoError(t, err) + require.Len(t, snaps, 1) + require.Equal(t, snapshotRef{Name: "snap_a", TS: 1000}, snaps[0]) +} + +func TestLiveSnapshots_Error(t *testing.T) { + stubTxn := stubTxnPassthrough() + defer stubTxn.Reset() + + wantErr := errors.New("snapshots error") + stubSQL := gostub.Stub(&runSql, + func(_ *sqlexec.SqlProcess, _ string) (executor.Result, error) { + return executor.Result{}, wantErr + }, + ) + defer stubSQL.Reset() + + env := makeSqlEnv() + refs := &acctRefs{env: env, accountID: 5} + _, err := refs.liveSnapshots(context.Background()) + require.ErrorIs(t, err, wantErr) +} + +// --------------------------------------------------------------------------- +// TestRefsForAccount +// --------------------------------------------------------------------------- + +func TestRefsForAccount(t *testing.T) { + env := makeSqlEnv() + refs, err := env.refsForAccount(context.Background(), 42) + require.NoError(t, err) + ar, ok := refs.(*acctRefs) + require.True(t, ok) + require.Equal(t, uint32(42), ar.accountID) + require.Equal(t, env, ar.env) +} + +// --------------------------------------------------------------------------- +// TestExecutorMetadata — covers executor.go cheaply +// --------------------------------------------------------------------------- + +func TestDatalinkCASGCTaskMetadata(t *testing.T) { + md := DatalinkCASGCTaskMetadata(task.TaskCode_DatalinkCASGCExecutor) + require.Equal(t, "DatalinkCASGCTask", md.ID) + require.Equal(t, task.TaskCode_DatalinkCASGCExecutor, md.Executor) + require.Equal(t, uint32(1), md.Options.Concurrency) +} diff --git a/pkg/datalink/casgc/reference.go b/pkg/datalink/casgc/reference.go new file mode 100644 index 0000000000000..ed5196315f4c3 --- /dev/null +++ b/pkg/datalink/casgc/reference.go @@ -0,0 +1,90 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "context" + "net/url" + + "github.com/matrixorigin/matrixone/pkg/container/types" + "github.com/matrixorigin/matrixone/pkg/datalink" +) + +// columnRef identifies a single column within a database table. +type columnRef struct { + DBName string + TableName string + ColName string +} + +// rowScanner reads the non-null string values of one column, optionally at a +// snapshot. snapshotHint is "" for live data, or "{snapshot = 'name'}". +type rowScanner interface { + scanColumn(ctx context.Context, ref columnRef, snapshotHint string) ([]string, error) +} + +// parseContentHash extracts a validated sha256 hex contenthash from a datalink +// URL string. It returns ("", false) if the URL cannot be parsed, if the +// contenthash query parameter is absent, or if the hash is not a valid sha256 +// hex digest. +func parseContentHash(raw string) (string, bool) { + u, err := url.Parse(raw) + if err != nil { + return "", false + } + h := u.Query().Get(datalink.ContentHashKey) + if h == "" { + return "", false + } + if datalink.ValidateContentHash(h) != nil { + return "", false + } + return h, true +} + +// isDatalinkType reports whether the binary-encoded types.Type stored in +// atttyp represents a T_datalink column. atttyp must be the raw bytes returned +// by types.EncodeType. Returns false for nil or empty input. +func isDatalinkType(atttyp []byte) bool { + if len(atttyp) == 0 { + return false + } + return types.DecodeType(atttyp).Oid == types.T_datalink +} + +// collectHashesFromColumns scans each column in cols, parses contenthashes +// from the returned datalink URL strings, and returns the deduplicated set of +// all valid hashes. The returned map is never nil. The first scan error is +// returned immediately. +func collectHashesFromColumns( + ctx context.Context, + sc rowScanner, + cols []columnRef, + snapshotHint string, +) (map[string]struct{}, error) { + result := make(map[string]struct{}) + for _, col := range cols { + rows, err := sc.scanColumn(ctx, col, snapshotHint) + if err != nil { + return nil, err + } + for _, raw := range rows { + if h, ok := parseContentHash(raw); ok { + result[h] = struct{}{} + } + } + } + return result, nil +} diff --git a/pkg/datalink/casgc/reference_test.go b/pkg/datalink/casgc/reference_test.go new file mode 100644 index 0000000000000..46b470c901b94 --- /dev/null +++ b/pkg/datalink/casgc/reference_test.go @@ -0,0 +1,213 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "testing" + + "github.com/matrixorigin/matrixone/pkg/container/types" +) + +// realHash returns the sha256 hex digest of the given string, for use as a +// valid contenthash in tests. +func realHash(s string) string { + sum := sha256.Sum256([]byte(s)) + return hex.EncodeToString(sum[:]) +} + +func TestParseContentHash(t *testing.T) { + hash1 := realHash("blob-content-1") + hash2 := realHash("blob-content-2") + + tests := []struct { + name string + raw string + wantHash string + wantFound bool + }{ + { + name: "valid URL with contenthash", + raw: fmt.Sprintf("mo://bucket/path/file.txt?contenthash=%s", hash1), + wantHash: hash1, + wantFound: true, + }, + { + name: "valid URL without contenthash param", + raw: "mo://bucket/path/file.txt?other=value", + wantHash: "", + wantFound: false, + }, + { + name: "valid URL with invalid contenthash (too short)", + raw: "mo://bucket/path/file.txt?contenthash=ab12", + wantHash: "", + wantFound: false, + }, + { + name: "valid URL with another valid contenthash", + raw: fmt.Sprintf("file:///data/something?contenthash=%s", hash2), + wantHash: hash2, + wantFound: true, + }, + { + name: "empty string", + raw: "", + wantHash: "", + wantFound: false, + }, + { + name: "valid URL with non-hex contenthash", + raw: "mo://bucket/path/file.txt?contenthash=gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg", + wantHash: "", + wantFound: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + gotHash, gotFound := parseContentHash(tc.raw) + if gotFound != tc.wantFound { + t.Errorf("parseContentHash(%q) found=%v, want %v", tc.raw, gotFound, tc.wantFound) + } + if gotHash != tc.wantHash { + t.Errorf("parseContentHash(%q) hash=%q, want %q", tc.raw, gotHash, tc.wantHash) + } + }) + } +} + +func TestIsDatalinkType(t *testing.T) { + datalinkType := types.New(types.T_datalink, 0, 0) + datalinkBytes := types.EncodeType(&datalinkType) + + varcharType := types.New(types.T_varchar, 255, 0) + varcharBytes := types.EncodeType(&varcharType) + + tests := []struct { + name string + atttyp []byte + want bool + }{ + { + name: "datalink type", + atttyp: datalinkBytes, + want: true, + }, + { + name: "varchar type", + atttyp: varcharBytes, + want: false, + }, + { + name: "empty bytes", + atttyp: []byte{}, + want: false, + }, + { + name: "nil bytes", + atttyp: nil, + want: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := isDatalinkType(tc.atttyp) + if got != tc.want { + t.Errorf("isDatalinkType(%v) = %v, want %v", tc.atttyp, got, tc.want) + } + }) + } +} + +// fakeScanner is a test implementation of rowScanner. +type fakeScanner struct { + rows map[columnRef][]string + err map[columnRef]error +} + +func (f *fakeScanner) scanColumn(_ context.Context, ref columnRef, _ string) ([]string, error) { + if err, ok := f.err[ref]; ok { + return nil, err + } + return f.rows[ref], nil +} + +func TestCollectHashesFromColumns(t *testing.T) { + hash1 := realHash("first-blob") + hash2 := realHash("second-blob") + + col1 := columnRef{DBName: "mydb", TableName: "mytable", ColName: "dl_col"} + + t.Run("two valid hashes, one missing param, one empty string", func(t *testing.T) { + sc := &fakeScanner{ + rows: map[columnRef][]string{ + col1: { + fmt.Sprintf("mo://bucket/a?contenthash=%s", hash1), + fmt.Sprintf("mo://bucket/b?contenthash=%s", hash2), + "mo://bucket/c?other=value", // no contenthash + "", // empty string + }, + }, + } + + got, err := collectHashesFromColumns(context.Background(), sc, []columnRef{col1}, "") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != 2 { + t.Fatalf("expected 2 hashes, got %d: %v", len(got), got) + } + if _, ok := got[hash1]; !ok { + t.Errorf("expected hash1 %q in result", hash1) + } + if _, ok := got[hash2]; !ok { + t.Errorf("expected hash2 %q in result", hash2) + } + }) + + t.Run("empty columns list returns empty non-nil map", func(t *testing.T) { + sc := &fakeScanner{rows: map[columnRef][]string{}} + got, err := collectHashesFromColumns(context.Background(), sc, nil, "") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got == nil { + t.Fatal("expected non-nil map") + } + if len(got) != 0 { + t.Fatalf("expected empty map, got %v", got) + } + }) + + t.Run("scan error propagates", func(t *testing.T) { + scanErr := errors.New("scan failed") + sc := &fakeScanner{ + err: map[columnRef]error{col1: scanErr}, + } + _, err := collectHashesFromColumns(context.Background(), sc, []columnRef{col1}, "") + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, scanErr) { + t.Errorf("expected scanErr, got %v", err) + } + }) +} diff --git a/pkg/datalink/casgc/sweep.go b/pkg/datalink/casgc/sweep.go new file mode 100644 index 0000000000000..3812540b6a091 --- /dev/null +++ b/pkg/datalink/casgc/sweep.go @@ -0,0 +1,200 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/matrixorigin/matrixone/pkg/datalink" + "github.com/matrixorigin/matrixone/pkg/fileservice" + "github.com/matrixorigin/matrixone/pkg/logutil" +) + +// snapshotRef identifies a live snapshot to re-scan. +type snapshotRef struct { + Name string + TS int64 +} + +// accountRefs provides the reference data needed to sweep one account. +type accountRefs interface { + rowScanner // scanColumn(ctx, ref, snapshotHint) ([]string, error) + // datalinkColumns lists the account's datalink-typed columns (live schema). + datalinkColumns(ctx context.Context) ([]columnRef, error) + // liveSnapshots lists the account's currently-live snapshots. + liveSnapshots(ctx context.Context) ([]snapshotRef, error) +} + +// sweepEnv is the data-access boundary the Sweeper depends on. +type sweepEnv interface { + // listAccountIDs returns all live account ids (sys view). + listAccountIDs(ctx context.Context) ([]uint32, error) + // refsForAccount returns the reference source scoped to one account. + refsForAccount(ctx context.Context, accountID uint32) (accountRefs, error) +} + +// Sweeper reclaims unreferenced datalink CAS blobs per account using a +// reference-aware, two-pass mark-and-delete grace window. Because the file +// service does not expose object mtime, the grace period is implemented with +// in-process memory state: a blob is deleted only after it has stayed an orphan +// across passes for at least cfg.GraceWindow. +type Sweeper struct { + fs fileservice.FileService // SHARED file service holding the CAS + cfg Config + env sweepEnv + nowFn func() time.Time // injectable clock for tests + + mu sync.Mutex + pending map[uint32]map[string]time.Time // accountID -> hash -> first-seen-as-orphan +} + +// NewSweeper builds a Sweeper over the SHARED file service holding the CAS and +// the given reference environment. cfg defaults are filled via Adjust. +func NewSweeper(fs fileservice.FileService, cfg Config, env sweepEnv) *Sweeper { + cfg.Adjust() + return &Sweeper{ + fs: fs, + cfg: cfg, + env: env, + nowFn: time.Now, + pending: make(map[uint32]map[string]time.Time), + } +} + +// SweepAccount runs one sweep pass for a single account and returns the number +// of blobs deleted in this pass. It is candidate-driven: snapshots are only +// scanned when there is at least one orphan candidate. A blob is deleted only +// after it has remained an orphan for at least cfg.GraceWindow across passes. +func (s *Sweeper) SweepAccount(ctx context.Context, accountID uint32) (deleted int, err error) { + refs, err := s.env.refsForAccount(ctx, accountID) + if err != nil { + return 0, err + } + cols, err := refs.datalinkColumns(ctx) + if err != nil { + return 0, err + } + // Hashes referenced by the current (live) data. + currentLive, err := collectHashesFromColumns(ctx, refs, cols, "") + if err != nil { + return 0, err + } + + // Candidates: blobs present in the CAS but not referenced by current data. + candidates := make(map[string]struct{}) + for ent, listErr := range datalink.CASListAccount(ctx, s.fs, accountID) { + if listErr != nil { + return 0, listErr + } + if _, live := currentLive[ent.Hash]; !live { + candidates[ent.Hash] = struct{}{} + } + } + + // Confirm candidates against live snapshots. Candidate-driven: when there + // are no candidates, snapshots are never scanned. + orphans := candidates + if len(candidates) > 0 { + snaps, snapErr := refs.liveSnapshots(ctx) + if snapErr != nil { + return 0, snapErr + } + for _, snap := range snaps { + if len(orphans) == 0 { + break + } + snapHashes, scanErr := collectHashesFromColumns( + ctx, refs, cols, "{snapshot = '"+snap.Name+"'}") + if scanErr != nil { + return 0, scanErr + } + for h := range snapHashes { + delete(orphans, h) + } + } + } + + // Two-pass grace: mark on first sighting, delete once the grace window has + // elapsed for a continuously-orphan blob. + s.mu.Lock() + defer s.mu.Unlock() + + pacc := s.pending[accountID] + if pacc == nil { + pacc = make(map[string]time.Time) + s.pending[accountID] = pacc + } + now := s.nowFn() + + for h := range orphans { + seen, ok := pacc[h] + if !ok { + pacc[h] = now + continue + } + if now.Sub(seen) >= s.cfg.GraceWindow { + if delErr := datalink.CASDelete(ctx, s.fs, accountID, h); delErr != nil { + // Return partial progress along with the error so the caller can + // log it; pending state for h is left intact for a later retry. + s.prune(accountID, pacc, orphans) + return deleted, delErr + } + delete(pacc, h) + deleted++ + } + } + + // Prune entries that are no longer orphans (re-referenced or deleted) so the + // pending map cannot leak. + s.prune(accountID, pacc, orphans) + return deleted, nil +} + +// prune removes pending entries for hashes that are no longer orphans, and +// drops the account's pending map entirely once it is empty. Caller holds s.mu. +func (s *Sweeper) prune(accountID uint32, pacc map[string]time.Time, orphans map[string]struct{}) { + for h := range pacc { + if _, ok := orphans[h]; !ok { + delete(pacc, h) + } + } + if len(pacc) == 0 { + delete(s.pending, accountID) + } +} + +// SweepAll sweeps every live account. A per-account failure is logged and does +// not abort the run; remaining accounts are still swept. It returns the total +// number of blobs deleted across all accounts and a joined error aggregating +// every per-account failure (nil if all accounts succeeded). +func (s *Sweeper) SweepAll(ctx context.Context) (deleted int, err error) { + ids, err := s.env.listAccountIDs(ctx) + if err != nil { + return 0, err + } + var errs []error + for _, id := range ids { + n, accErr := s.SweepAccount(ctx, id) + deleted += n + if accErr != nil { + logutil.Errorf("casgc: sweep account %d failed: %v", id, accErr) + errs = append(errs, accErr) + } + } + return deleted, errors.Join(errs...) +} diff --git a/pkg/datalink/casgc/sweep_test.go b/pkg/datalink/casgc/sweep_test.go new file mode 100644 index 0000000000000..c7eab89fdab23 --- /dev/null +++ b/pkg/datalink/casgc/sweep_test.go @@ -0,0 +1,338 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package casgc + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/matrixorigin/matrixone/pkg/datalink" + "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/fileservice" + "github.com/stretchr/testify/require" +) + +const testAccountID = uint32(7) + +func newTestCASFS(t *testing.T) fileservice.FileService { + fs, err := fileservice.NewMemoryFS(defines.SharedFileServiceName, fileservice.DisabledCacheConfig, nil) + require.NoError(t, err) + return fs +} + +// datalinkURL builds a pinned datalink URL string carrying the given contenthash. +func datalinkURL(hash string) string { + return fmt.Sprintf("mo://bucket/path?contenthash=%s", hash) +} + +// fakeRefs fakes accountRefs for one account. It records calls so tests can +// assert candidate-driven behavior. +type fakeRefs struct { + cols []columnRef + // liveValues are the raw column values returned for snapshotHint=="". + liveValues []string + // snapValues maps a snapshot name to the raw column values returned for it. + snapValues map[string][]string + snaps []snapshotRef + + // error injection + colsErr error + scanErr error + snapErr error + + // call recording + scanCalls int + liveSnapshotCalls int + snapScanHints []string +} + +func (f *fakeRefs) datalinkColumns(ctx context.Context) ([]columnRef, error) { + if f.colsErr != nil { + return nil, f.colsErr + } + return f.cols, nil +} + +func (f *fakeRefs) liveSnapshots(ctx context.Context) ([]snapshotRef, error) { + f.liveSnapshotCalls++ + if f.snapErr != nil { + return nil, f.snapErr + } + return f.snaps, nil +} + +func (f *fakeRefs) scanColumn(ctx context.Context, ref columnRef, snapshotHint string) ([]string, error) { + f.scanCalls++ + if f.scanErr != nil { + return nil, f.scanErr + } + if snapshotHint == "" { + return f.liveValues, nil + } + f.snapScanHints = append(f.snapScanHints, snapshotHint) + // snapValues is keyed by snapshot name; recover it from the hint. + for name, vals := range f.snapValues { + if snapshotHint == "{snapshot = '"+name+"'}" { + return vals, nil + } + } + return nil, nil +} + +// fakeEnv fakes sweepEnv with a fixed set of accounts. +type fakeEnv struct { + ids []uint32 + refs map[uint32]*fakeRefs + refsErr map[uint32]error + idsErr error + refsCalls []uint32 +} + +func (e *fakeEnv) listAccountIDs(ctx context.Context) ([]uint32, error) { + if e.idsErr != nil { + return nil, e.idsErr + } + return e.ids, nil +} + +func (e *fakeEnv) refsForAccount(ctx context.Context, accountID uint32) (accountRefs, error) { + e.refsCalls = append(e.refsCalls, accountID) + if err, ok := e.refsErr[accountID]; ok && err != nil { + return nil, err + } + return e.refs[accountID], nil +} + +func newSweeper(t *testing.T, fs fileservice.FileService, env sweepEnv) (*Sweeper, *fakeClock) { + cfg := Config{Interval: time.Hour, GraceWindow: 24 * time.Hour} + s := NewSweeper(fs, cfg, env) + clk := &fakeClock{now: time.Unix(1_700_000_000, 0)} + s.nowFn = clk.Now + return s, clk +} + +type fakeClock struct{ now time.Time } + +func (c *fakeClock) Now() time.Time { return c.now } +func (c *fakeClock) advance(d time.Duration) { c.now = c.now.Add(d) } + +// 1. Unreferenced blob deleted only after grace. +func TestSweepAccount_UnreferencedDeletedAfterGrace(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + hash, err := datalink.CASPut(ctx, fs, testAccountID, []byte("orphan blob")) + require.NoError(t, err) + + refs := &fakeRefs{cols: []columnRef{{DBName: "db", TableName: "t", ColName: "c"}}} + env := &fakeEnv{ids: []uint32{testAccountID}, refs: map[uint32]*fakeRefs{testAccountID: refs}} + s, clk := newSweeper(t, fs, env) + + // First pass: only marks, deletes nothing. + deleted, err := s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + ok, err := datalink.CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.True(t, ok) + + // Advance past grace and sweep again: now deleted. + clk.advance(24 * time.Hour) + deleted, err = s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 1, deleted) + ok, err = datalink.CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.False(t, ok) +} + +// 2. Referenced blob never deleted. +func TestSweepAccount_ReferencedNeverDeleted(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + hash, err := datalink.CASPut(ctx, fs, testAccountID, []byte("live blob")) + require.NoError(t, err) + + refs := &fakeRefs{ + cols: []columnRef{{DBName: "db", TableName: "t", ColName: "c"}}, + liveValues: []string{datalinkURL(hash)}, + } + env := &fakeEnv{ids: []uint32{testAccountID}, refs: map[uint32]*fakeRefs{testAccountID: refs}} + s, clk := newSweeper(t, fs, env) + + for i := 0; i < 3; i++ { + deleted, err := s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + clk.advance(48 * time.Hour) + } + ok, err := datalink.CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.True(t, ok) + + // Referenced => never a candidate => snapshots never scanned. + require.Equal(t, 0, refs.liveSnapshotCalls) +} + +// 3. Snapshot keeps blob; and with zero candidates snapshots are not scanned. +func TestSweepAccount_SnapshotKeepsBlob(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + hash, err := datalink.CASPut(ctx, fs, testAccountID, []byte("snapshotted blob")) + require.NoError(t, err) + + refs := &fakeRefs{ + cols: []columnRef{{DBName: "db", TableName: "t", ColName: "c"}}, + liveValues: nil, // absent from current data + snaps: []snapshotRef{{Name: "snap1", TS: 1}}, + snapValues: map[string][]string{"snap1": {datalinkURL(hash)}}, + } + env := &fakeEnv{ids: []uint32{testAccountID}, refs: map[uint32]*fakeRefs{testAccountID: refs}} + s, clk := newSweeper(t, fs, env) + + for i := 0; i < 3; i++ { + deleted, err := s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + clk.advance(48 * time.Hour) + } + ok, err := datalink.CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.True(t, ok) + // There WAS a candidate, so snapshots must have been consulted. + require.Greater(t, refs.liveSnapshotCalls, 0) + require.NotEmpty(t, refs.snapScanHints) +} + +// 3b. Candidate-driven: zero candidates => snapshots never scanned. +func TestSweepAccount_NoCandidatesSkipsSnapshots(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) // empty CAS namespace + + refs := &fakeRefs{ + cols: []columnRef{{DBName: "db", TableName: "t", ColName: "c"}}, + snaps: []snapshotRef{{Name: "snap1", TS: 1}}, + } + env := &fakeEnv{ids: []uint32{testAccountID}, refs: map[uint32]*fakeRefs{testAccountID: refs}} + s, _ := newSweeper(t, fs, env) + + deleted, err := s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + require.Equal(t, 0, refs.liveSnapshotCalls) + require.Empty(t, refs.snapScanHints) +} + +// 4. Re-reference resets the grace timer. +func TestSweepAccount_ReReferenceResetsGrace(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + hash, err := datalink.CASPut(ctx, fs, testAccountID, []byte("flapping blob")) + require.NoError(t, err) + + refs := &fakeRefs{cols: []columnRef{{DBName: "db", TableName: "t", ColName: "c"}}} + env := &fakeEnv{ids: []uint32{testAccountID}, refs: map[uint32]*fakeRefs{testAccountID: refs}} + s, clk := newSweeper(t, fs, env) + + // Pass 1: orphan, marked. + deleted, err := s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + + // Before grace elapses, the blob is referenced again -> pruned from pending. + clk.advance(1 * time.Hour) + refs.liveValues = []string{datalinkURL(hash)} + deleted, err = s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + + // Pending must no longer track the hash (timer reset). + s.mu.Lock() + _, tracked := s.pending[testAccountID][hash] + s.mu.Unlock() + require.False(t, tracked) + + // Even well past the original grace, a still-referenced blob survives. + clk.advance(48 * time.Hour) + deleted, err = s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + ok, err := datalink.CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.True(t, ok) + + // Now it becomes orphan again: timer restarts from this pass. + refs.liveValues = nil + deleted, err = s.SweepAccount(ctx, testAccountID) // re-mark + require.NoError(t, err) + require.Equal(t, 0, deleted) + + // Just under grace: still not deleted. + clk.advance(23 * time.Hour) + deleted, err = s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 0, deleted) + ok, err = datalink.CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.True(t, ok) + + // Cross grace from the restart: deleted. + clk.advance(2 * time.Hour) + deleted, err = s.SweepAccount(ctx, testAccountID) + require.NoError(t, err) + require.Equal(t, 1, deleted) + ok, err = datalink.CASExists(ctx, fs, testAccountID, hash) + require.NoError(t, err) + require.False(t, ok) +} + +// 5. SweepAll continues past a failing account. +func TestSweepAll_ContinuesPastFailingAccount(t *testing.T) { + ctx := context.Background() + fs := newTestCASFS(t) + + const acctBad = uint32(1) + const acctGood = uint32(2) + + // acctGood has one orphan blob already past grace once we advance. + hash, err := datalink.CASPut(ctx, fs, acctGood, []byte("good orphan")) + require.NoError(t, err) + + goodRefs := &fakeRefs{cols: []columnRef{{DBName: "db", TableName: "t", ColName: "c"}}} + env := &fakeEnv{ + ids: []uint32{acctBad, acctGood}, + refs: map[uint32]*fakeRefs{acctGood: goodRefs}, + refsErr: map[uint32]error{acctBad: fmt.Errorf("boom: refs unavailable")}, + } + s, clk := newSweeper(t, fs, env) + + // First SweepAll: bad account errors (logged), good account marks its orphan. + deleted, err := s.SweepAll(ctx) + require.Error(t, err) // joined error surfaces the bad account + require.Equal(t, 0, deleted) + // The good account was still visited. + require.Contains(t, env.refsCalls, acctGood) + + // Advance past grace; the good account's blob should now be deleted despite + // the bad account continuing to fail. + clk.advance(24 * time.Hour) + deleted, err = s.SweepAll(ctx) + require.Error(t, err) + require.Equal(t, 1, deleted) + ok, err := datalink.CASExists(ctx, fs, acctGood, hash) + require.NoError(t, err) + require.False(t, ok) +} diff --git a/pkg/datalink/datalink.go b/pkg/datalink/datalink.go index 7da5fbdda985d..5a7edc9706a1c 100644 --- a/pkg/datalink/datalink.go +++ b/pkg/datalink/datalink.go @@ -25,6 +25,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/common/moerr" "github.com/matrixorigin/matrixone/pkg/datalink/docx" "github.com/matrixorigin/matrixone/pkg/datalink/pdf" + "github.com/matrixorigin/matrixone/pkg/defines" "github.com/matrixorigin/matrixone/pkg/fileservice" "github.com/matrixorigin/matrixone/pkg/stage" "github.com/matrixorigin/matrixone/pkg/stage/stageutil" @@ -36,6 +37,10 @@ type Datalink struct { Offset int64 Size int64 MoPath string + + // ContentHash is non-empty when the datalink is pinned (carries ?contenthash=). + // In that case MoPath addresses an immutable CAS object in the SHARED service. + ContentHash string } func NewDatalink(aurl string, proc *process.Process) (Datalink, error) { @@ -49,7 +54,15 @@ func NewDatalink(aurl string, proc *process.Process) (Datalink, error) { if err != nil { return Datalink{}, err } - return Datalink{Url: u, Offset: int64(offsetSize[0]), Size: int64(offsetSize[1]), MoPath: moUrl}, nil + + dl := Datalink{Url: u, Offset: offsetSize[0], Size: offsetSize[1], MoPath: moUrl} + // Derive ContentHash from the parsed MoPath so it always addresses the same + // CAS object that ParseDatalink resolved (no split-brain on duplicate or + // mixed-case contenthash params). + if hash, ok := casHashFromKey(moUrl); ok { + dl.ContentHash = hash + } + return dl, nil } func (d Datalink) GetBytes(proc *process.Process) ([]byte, error) { @@ -86,7 +99,14 @@ func (d Datalink) GetPlainText(proc *process.Process) ([]byte, error) { } func (d Datalink) NewWriter(proc *process.Process) (*fileservice.FileServiceWriter, error) { - + // A pinned (?contenthash=) datalink addresses an immutable CAS object, and its + // MoPath is the internal CAS key rather than the original external path. Writing + // through it would route to that CAS key and break the pinned-read contract, so + // reject writes outright: pinned content is immutable. + if d.ContentHash != "" { + return nil, moerr.NewInternalErrorf(proc.Ctx, + "cannot write to a pinned datalink (contenthash=%s): pinned content is immutable", d.ContentHash) + } return fileservice.NewFileServiceWriter(d.MoPath, proc.Ctx) } @@ -100,8 +120,53 @@ func ParseDatalink(fsPath string, proc *process.Process) (string, []int64, error return "", nil, err } + // 1. collect query params. Values are lower-cased; a sha256 hex digest is + // already lower-case so the contenthash value is unaffected. + urlParams := make(map[string]string) + for k, v := range u.Query() { + urlParams[strings.ToLower(k)] = strings.ToLower(v[0]) + } + + // 2. get size and offset from the query (apply to both live and pinned values) + offsetSize := []int64{0, -1} + if _, ok := urlParams["offset"]; ok { + if offsetSize[0], err = strconv.ParseInt(urlParams["offset"], 10, 64); err != nil { + return "", nil, err + } + } + if _, ok := urlParams["size"]; ok { + if offsetSize[1], err = strconv.ParseInt(urlParams["size"], 10, 64); err != nil { + return "", nil, err + } + } + if offsetSize[0] < 0 { + return "", nil, moerr.NewInternalErrorNoCtx("offset cannot be negative") + } + if offsetSize[1] < -1 { + return "", nil, moerr.NewInternalErrorNoCtx("size cannot be less than -1") + } + + // 3. a pinned datalink (?contenthash=) addresses an immutable CAS object and + // never resolves to the live external path, so historical snapshot bytes stay + // reproducible. A missing CAS object surfaces as a read error rather than a + // silent fall back to the live (possibly overwritten) file. + // + // The CAS key is namespaced by the calling account, resolved from the trusted + // execution context (never from the URL). This binds the read to the caller's + // account: a contenthash cannot be used to read another account's pinned bytes. + if hash, ok := urlParams[ContentHashKey]; ok { + if err = ValidateContentHash(hash); err != nil { + return "", nil, err + } + accountID, err := contentHashAccountID(proc) + if err != nil { + return "", nil, err + } + return CASKey(accountID, hash), offsetSize, nil + } + + // 4. live reference: resolve to the external file's current location var moUrl string - // 1. get moUrl from the path switch u.Scheme { case stage.FILE_PROTOCOL: moUrl = strings.Join([]string{u.Host, u.Path}, "") @@ -124,35 +189,54 @@ func ParseDatalink(fsPath string, proc *process.Process) (string, []int64, error return "", nil, moerr.NewNYINoCtxf("unsupported url scheme %s", u.Scheme) } - // 2. get size and offset from the query - urlParams := make(map[string]string) - for k, v := range u.Query() { - urlParams[strings.ToLower(k)] = strings.ToLower(v[0]) - } - offsetSize := []int64{0, -1} - if _, ok := urlParams["offset"]; ok { - if offsetSize[0], err = strconv.ParseInt(urlParams["offset"], 10, 64); err != nil { - return "", nil, err - } - } - if _, ok := urlParams["size"]; ok { - if offsetSize[1], err = strconv.ParseInt(urlParams["size"], 10, 64); err != nil { - return "", nil, err - } - } - - if offsetSize[0] < 0 { - return "", nil, moerr.NewInternalErrorNoCtx("offset cannot be negative") - } + return moUrl, offsetSize, nil +} - if offsetSize[1] < -1 { - return "", nil, moerr.NewInternalErrorNoCtx("size cannot be less than -1") +// contentHashAccountID resolves the account that owns a pinned datalink's CAS +// namespace from the trusted execution context. A pinned read/write requires a +// session account; resolving it from the URL would re-introduce the bearer-token +// problem the per-account namespace is meant to prevent. +func contentHashAccountID(proc *process.Process) (uint32, error) { + if proc == nil { + return 0, moerr.NewInternalErrorNoCtx("pinned datalink requires an execution context to resolve the account") } - - return moUrl, offsetSize, nil + return defines.GetAccountId(proc.Ctx) } func (d Datalink) NewReadCloser(proc *process.Process) (io.ReadCloser, error) { + if d.ContentHash != "" { + // pinned value: read the immutable CAS object directly from the SHARED + // service. SHARED may be a plain FileService (e.g. LocalFS in standalone) + // that does not implement ETLFileService, so we must not route through + // GetForETL here. A missing object surfaces as a read error rather than a + // silent fall back to the live (possibly overwritten) file. + fs, err := fileservice.Get[fileservice.FileService](proc.GetFileService(), defines.SharedFileServiceName) + if err != nil { + return nil, err + } + var r io.ReadCloser + vec := fileservice.IOVector{ + FilePath: d.MoPath, + Entries: []fileservice.IOEntry{ + 0: { + Offset: d.Offset, + Size: d.Size, + ReadCloserForRead: &r, + }, + }, + } + if err = fs.Read(proc.Ctx, &vec); err != nil { + if moerr.IsMoErrCode(err, moerr.ErrFileNotFound) { + // Key the error on the content hash, not the internal CAS storage + // path: the path embeds the file-service layout and the account id + // (non-deterministic), while the hash is what the user supplied. + return nil, moerr.NewFileNotFound(proc.Ctx, d.ContentHash) + } + return nil, err + } + return r, nil + } + fs := proc.GetFileService() fs, readPath, err := fileservice.GetForETL(proc.Ctx, fs, d.MoPath) if fs == nil || err != nil { @@ -175,3 +259,32 @@ func (d Datalink) NewReadCloser(proc *process.Process) (io.ReadCloser, error) { } return r, nil } + +// StatSize returns the byte size of the referenced object, transparently +// handling both live references and pinned (content-addressed) values. +func (d Datalink) StatSize(proc *process.Process) (int64, error) { + if d.ContentHash != "" { + fs, err := fileservice.Get[fileservice.FileService](proc.GetFileService(), defines.SharedFileServiceName) + if err != nil { + return 0, err + } + entry, err := fs.StatFile(proc.Ctx, d.MoPath) + if err != nil { + if moerr.IsMoErrCode(err, moerr.ErrFileNotFound) { + return 0, moerr.NewFileNotFound(proc.Ctx, d.ContentHash) + } + return 0, err + } + return entry.Size, nil + } + + etlFS, readPath, err := fileservice.GetForETL(proc.Ctx, proc.GetFileService(), d.MoPath) + if err != nil { + return 0, err + } + entry, err := etlFS.StatFile(proc.Ctx, readPath) + if err != nil { + return 0, err + } + return entry.Size, nil +} diff --git a/pkg/datalink/datalink_contenthash_test.go b/pkg/datalink/datalink_contenthash_test.go new file mode 100644 index 0000000000000..58f5a90f4f2a9 --- /dev/null +++ b/pkg/datalink/datalink_contenthash_test.go @@ -0,0 +1,104 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datalink + +import ( + "strings" + "testing" + + "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/testutil" + "github.com/matrixorigin/matrixone/pkg/vm/process" + "github.com/stretchr/testify/require" +) + +// procWithAccount returns a process whose context carries an account id, plus +// that id. A pinned datalink resolves its CAS namespace from this account. +func procWithAccount(t *testing.T) (*process.Process, uint32) { + proc := testutil.NewProc(t) + accountID, err := defines.GetAccountId(proc.Ctx) + require.NoError(t, err) + return proc, accountID +} + +// A pinned datalink resolves to the immutable CAS object path, never the live path. +func TestParseDatalinkContentHashResolvesToCAS(t *testing.T) { + proc, accountID := procWithAccount(t) + hash := strings.Repeat("a", 64) + moUrl, offsetSize, err := ParseDatalink("file:///a/b/c/1.txt?contenthash="+hash, proc) + require.NoError(t, err) + require.Equal(t, CASKey(accountID, hash), moUrl) + require.NotEqual(t, "/a/b/c/1.txt", moUrl) // must not fall back to the live path + require.Equal(t, []int64{0, -1}, offsetSize) +} + +// An uppercase hash is normalized to lowercase so it matches the stored sha256 hex. +func TestParseDatalinkContentHashNormalizesCase(t *testing.T) { + proc, accountID := procWithAccount(t) + moUrl, _, err := ParseDatalink("file:///a/b/c/1.txt?contenthash="+strings.Repeat("A", 64), proc) + require.NoError(t, err) + require.Equal(t, CASKey(accountID, strings.Repeat("a", 64)), moUrl) +} + +// offset/size still apply on top of a pinned object. +func TestParseDatalinkContentHashWithOffsetSize(t *testing.T) { + proc, accountID := procWithAccount(t) + hash := strings.Repeat("a", 64) + moUrl, offsetSize, err := ParseDatalink("file:///a/b/c/1.txt?contenthash="+hash+"&offset=1&size=2", proc) + require.NoError(t, err) + require.Equal(t, CASKey(accountID, hash), moUrl) + require.Equal(t, []int64{1, 2}, offsetSize) +} + +// A pinned stage datalink does not need the live stage resolution. +func TestParseDatalinkContentHashStageNoLiveResolution(t *testing.T) { + proc, accountID := procWithAccount(t) + hash := strings.Repeat("b", 64) + moUrl, _, err := ParseDatalink("stage://st/doc.txt?contenthash="+hash, proc) + require.NoError(t, err) + require.Equal(t, CASKey(accountID, hash), moUrl) +} + +// A pinned datalink resolved in different account contexts maps to distinct CAS +// keys: the contenthash alone does not address a globally shared object. +func TestParseDatalinkContentHashIsAccountScoped(t *testing.T) { + proc, accountID := procWithAccount(t) + hash := strings.Repeat("a", 64) + + moUrl, _, err := ParseDatalink("file:///a/b/c/1.txt?contenthash="+hash, proc) + require.NoError(t, err) + require.Equal(t, CASKey(accountID, hash), moUrl) + + // a different account resolving the same URL gets a different CAS key + proc.Ctx = defines.AttachAccountId(proc.Ctx, accountID+1) + otherUrl, _, err := ParseDatalink("file:///a/b/c/1.txt?contenthash="+hash, proc) + require.NoError(t, err) + require.Equal(t, CASKey(accountID+1, hash), otherUrl) + require.NotEqual(t, moUrl, otherUrl) +} + +func TestParseDatalinkInvalidContentHash(t *testing.T) { + cases := []string{ + "file:///a/b/c/1.txt?contenthash=abc", // too short + "file:///a/b/c/1.txt?contenthash=" + strings.Repeat("a", 63), // 63 chars + "file:///a/b/c/1.txt?contenthash=" + strings.Repeat("a", 65), // 65 chars + "file:///a/b/c/1.txt?contenthash=" + strings.Repeat("z", 64), // non-hex + "file:///a/b/c/1.txt?contenthash=", // empty + } + for _, c := range cases { + _, _, err := ParseDatalink(c, nil) + require.Error(t, err, c) + } +} diff --git a/pkg/datalink/datalink_pin_read_test.go b/pkg/datalink/datalink_pin_read_test.go new file mode 100644 index 0000000000000..e19aba3524c03 --- /dev/null +++ b/pkg/datalink/datalink_pin_read_test.go @@ -0,0 +1,160 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package datalink + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/matrixorigin/matrixone/pkg/defines" + "github.com/matrixorigin/matrixone/pkg/fileservice" + "github.com/matrixorigin/matrixone/pkg/testutil" + "github.com/stretchr/testify/require" +) + +// A live (un-pinned) datalink reads its bytes from the external file via the +// GetForETL path, and StatSize reports the live file size. +func TestNewReadCloserLive(t *testing.T) { + proc := testutil.NewProc(t) + dir := t.TempDir() + fp := filepath.Join(dir, "f.txt") + require.NoError(t, os.WriteFile(fp, []byte("live-content"), 0o600)) + + dl, err := NewDatalink("file://"+fp, proc) + require.NoError(t, err) + require.Empty(t, dl.ContentHash) + + got, err := dl.GetBytes(proc) + require.NoError(t, err) + require.Equal(t, []byte("live-content"), got) + + sz, err := dl.StatSize(proc) + require.NoError(t, err) + require.Equal(t, int64(len("live-content")), sz) +} + +// NewDatalink populates ContentHash and MoPath for a pinned URL, and leaves +// ContentHash empty for a live one. +func TestNewDatalinkContentHash(t *testing.T) { + proc, accountID := procWithAccount(t) + hash := strings.Repeat("a", 64) + dl, err := NewDatalink("file:///x.txt?contenthash="+hash, proc) + require.NoError(t, err) + require.Equal(t, hash, dl.ContentHash) + require.Equal(t, CASKey(accountID, hash), dl.MoPath) + + live, err := NewDatalink("file:///x.txt", proc) + require.NoError(t, err) + require.Empty(t, live.ContentHash) +} + +// ContentHash always addresses the same CAS object as MoPath, regardless of the +// case used in the contenthash query key or value. +func TestNewDatalinkContentHashConsistentWithMoPath(t *testing.T) { + proc, accountID := procWithAccount(t) + hash := strings.Repeat("a", 64) + for _, raw := range []string{ + "file:///x.txt?contenthash=" + hash, + "file:///x.txt?ContentHash=" + strings.ToUpper(hash), + } { + dl, err := NewDatalink(raw, proc) + require.NoError(t, err, raw) + require.Equal(t, hash, dl.ContentHash, raw) + require.Equal(t, CASKey(accountID, dl.ContentHash), dl.MoPath, raw) + } +} + +// A pinned datalink requires an execution context to resolve its account +// namespace; a nil process is rejected rather than silently using a global key. +func TestNewDatalinkContentHashRequiresAccount(t *testing.T) { + _, err := NewDatalink("file:///x.txt?contenthash="+strings.Repeat("a", 64), nil) + require.Error(t, err) +} + +// A pinned datalink reads its bytes from the CAS, decoupled from the original +// path. NewProc(t) backs SHARED with LocalFS (no ETLFileService), matching +// standalone, so this exercises the direct-Read path rather than GetForETL. +func TestNewReadCloserPinned(t *testing.T) { + proc, accountID := procWithAccount(t) + + casFS, err := fileservice.Get[fileservice.FileService](proc.Base.FileService, defines.SharedFileServiceName) + require.NoError(t, err) + content := []byte("frozen-bytes") + hash, err := CASPut(proc.Ctx, casFS, accountID, content) + require.NoError(t, err) + + // a bogus original path with a valid contenthash is still served from the CAS + dl, err := NewDatalink("file:///bogus/path.txt?contenthash="+hash, proc) + require.NoError(t, err) + + got, err := dl.GetBytes(proc) + require.NoError(t, err) + require.Equal(t, content, got) + + sz, err := dl.StatSize(proc) + require.NoError(t, err) + require.Equal(t, int64(len(content)), sz) +} + +// A pinned datalink read in a different account than the one that pinned it does +// not see the bytes: the CAS object is namespaced per account, so a contenthash +// is not a cross-account bearer capability. It errors (no live fallback). +func TestNewReadCloserPinnedCrossAccountIsolated(t *testing.T) { + proc, accountID := procWithAccount(t) + + casFS, err := fileservice.Get[fileservice.FileService](proc.Base.FileService, defines.SharedFileServiceName) + require.NoError(t, err) + content := []byte("tenant-A-only") + hash, err := CASPut(proc.Ctx, casFS, accountID, content) + require.NoError(t, err) + + // switch the execution context to a different account + proc.Ctx = defines.AttachAccountId(proc.Ctx, accountID+1) + dl, err := NewDatalink("file:///bogus/path.txt?contenthash="+hash, proc) + require.NoError(t, err) + + _, err = dl.GetBytes(proc) + require.Error(t, err) +} + +// A pinned (contenthash) datalink addresses an immutable CAS object, so writes +// must be refused: NewWriter would otherwise build a writer over the internal CAS +// key (MoPath), clobbering the content-addressed store at the wrong backing path. +func TestNewWriterRejectsPinned(t *testing.T) { + proc, _ := procWithAccount(t) + dl, err := NewDatalink("file:///bogus/path.txt?contenthash="+strings.Repeat("a", 64), proc) + require.NoError(t, err) + require.NotEmpty(t, dl.ContentHash) + + _, err = dl.NewWriter(proc) + require.Error(t, err) +} + +// Reading a pinned datalink whose CAS object is missing errors out: it never +// falls back to the live (bogus) path. +func TestNewReadCloserPinnedMissing(t *testing.T) { + proc := testutil.NewProc(t) + + dl, err := NewDatalink("file:///bogus/path.txt?contenthash="+strings.Repeat("b", 64), proc) + require.NoError(t, err) + + _, err = dl.GetBytes(proc) + require.Error(t, err) + + _, err = dl.StatSize(proc) + require.Error(t, err) +} diff --git a/pkg/frontend/authenticate.go b/pkg/frontend/authenticate.go index 91102ed3d8c5d..fb2667be4e688 100644 --- a/pkg/frontend/authenticate.go +++ b/pkg/frontend/authenticate.go @@ -40,6 +40,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/common/pubsub" "github.com/matrixorigin/matrixone/pkg/config" "github.com/matrixorigin/matrixone/pkg/container/types" + "github.com/matrixorigin/matrixone/pkg/datalink" "github.com/matrixorigin/matrixone/pkg/defines" "github.com/matrixorigin/matrixone/pkg/fileservice" "github.com/matrixorigin/matrixone/pkg/logutil" @@ -4281,6 +4282,15 @@ func doDropAccount(ctx context.Context, bh BackgroundExec, ses *Session, da *dro if !hasAccount { return err } + // Reclaim the dropped tenant's pinned datalink CAS blobs. Best-effort: the + // account metadata is already committed and the periodic sweep is the backstop, + // so a failure here must not fail DROP ACCOUNT. + if sharedFS, gerr := fileservice.Get[fileservice.FileService]( + getPu(ses.GetService()).FileService, defines.SharedFileServiceName); gerr != nil { + ses.Errorf(ctx, "dropAccount %s: get shared fs for CAS cleanup failed: %v", da.Name, gerr) + } else if derr := datalink.CASDeleteAccountPrefix(ctx, sharedFS, uint32(accountId)); derr != nil { + ses.Errorf(ctx, "dropAccount %s: clean datalink CAS prefix failed: %v", da.Name, derr) + } // if drop the account, add the account to kill queue ses.getRoutineManager().accountRoutine.EnKillQueue(accountId, version) diff --git a/pkg/pb/task/task.pb.go b/pkg/pb/task/task.pb.go index 5bb3177eed416..b77f49a8910a4 100644 --- a/pkg/pb/task/task.pb.go +++ b/pkg/pb/task/task.pb.go @@ -1,5 +1,5 @@ // Code generated by protoc-gen-gogo. DO NOT EDIT. -// source: proto/task.proto +// source: task.proto package task @@ -90,7 +90,7 @@ func (x TaskStatus) String() string { } func (TaskStatus) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{0} + return fileDescriptor_ce5d8dd45b4a91ff, []int{0} } // TaskCode task code @@ -121,6 +121,8 @@ const ( TaskCode_PublicationExecutor TaskCode = 11 // SQL task TaskCode_SQLTask TaskCode = 12 + // Datalink CAS garbage-collection sweep task + TaskCode_DatalinkCASGCExecutor TaskCode = 13 ) var TaskCode_name = map[int32]string{ @@ -136,6 +138,7 @@ var TaskCode_name = map[int32]string{ 10: "IndexUpdateTaskExecutor", 11: "PublicationExecutor", 12: "SQLTask", + 13: "DatalinkCASGCExecutor", } var TaskCode_value = map[string]int32{ @@ -151,6 +154,7 @@ var TaskCode_value = map[string]int32{ "IndexUpdateTaskExecutor": 10, "PublicationExecutor": 11, "SQLTask": 12, + "DatalinkCASGCExecutor": 13, } func (x TaskCode) String() string { @@ -158,7 +162,7 @@ func (x TaskCode) String() string { } func (TaskCode) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{1} + return fileDescriptor_ce5d8dd45b4a91ff, []int{1} } // ResultCode result code @@ -186,7 +190,7 @@ func (x ResultCode) String() string { } func (ResultCode) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{2} + return fileDescriptor_ce5d8dd45b4a91ff, []int{2} } type TaskType int32 @@ -220,7 +224,7 @@ func (x TaskType) String() string { } func (TaskType) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{3} + return fileDescriptor_ce5d8dd45b4a91ff, []int{3} } // TaskMetadata is a task metadata abstraction that can be scheduled for execution at any CN node. @@ -242,7 +246,7 @@ func (m *TaskMetadata) Reset() { *m = TaskMetadata{} } func (m *TaskMetadata) String() string { return proto.CompactTextString(m) } func (*TaskMetadata) ProtoMessage() {} func (*TaskMetadata) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{0} + return fileDescriptor_ce5d8dd45b4a91ff, []int{0} } func (m *TaskMetadata) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -323,7 +327,7 @@ func (m *SQLTaskContext) Reset() { *m = SQLTaskContext{} } func (m *SQLTaskContext) String() string { return proto.CompactTextString(m) } func (*SQLTaskContext) ProtoMessage() {} func (*SQLTaskContext) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{1} + return fileDescriptor_ce5d8dd45b4a91ff, []int{1} } func (m *SQLTaskContext) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -474,7 +478,7 @@ func (m *TaskOptions) Reset() { *m = TaskOptions{} } func (m *TaskOptions) String() string { return proto.CompactTextString(m) } func (*TaskOptions) ProtoMessage() {} func (*TaskOptions) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{2} + return fileDescriptor_ce5d8dd45b4a91ff, []int{2} } func (m *TaskOptions) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -560,7 +564,7 @@ func (m *Resource) Reset() { *m = Resource{} } func (m *Resource) String() string { return proto.CompactTextString(m) } func (*Resource) ProtoMessage() {} func (*Resource) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{3} + return fileDescriptor_ce5d8dd45b4a91ff, []int{3} } func (m *Resource) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -618,7 +622,7 @@ func (m *ExecuteResult) Reset() { *m = ExecuteResult{} } func (m *ExecuteResult) String() string { return proto.CompactTextString(m) } func (*ExecuteResult) ProtoMessage() {} func (*ExecuteResult) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{4} + return fileDescriptor_ce5d8dd45b4a91ff, []int{4} } func (m *ExecuteResult) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -692,7 +696,7 @@ func (m *AsyncTask) Reset() { *m = AsyncTask{} } func (m *AsyncTask) String() string { return proto.CompactTextString(m) } func (*AsyncTask) ProtoMessage() {} func (*AsyncTask) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{5} + return fileDescriptor_ce5d8dd45b4a91ff, []int{5} } func (m *AsyncTask) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -815,7 +819,7 @@ func (m *CronTask) Reset() { *m = CronTask{} } func (m *CronTask) String() string { return proto.CompactTextString(m) } func (*CronTask) ProtoMessage() {} func (*CronTask) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{6} + return fileDescriptor_ce5d8dd45b4a91ff, []int{6} } func (m *CronTask) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -905,7 +909,7 @@ func (m *ConnectorDetails) Reset() { *m = ConnectorDetails{} } func (m *ConnectorDetails) String() string { return proto.CompactTextString(m) } func (*ConnectorDetails) ProtoMessage() {} func (*ConnectorDetails) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{7} + return fileDescriptor_ce5d8dd45b4a91ff, []int{7} } func (m *ConnectorDetails) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -960,7 +964,7 @@ func (m *Account) Reset() { *m = Account{} } func (m *Account) String() string { return proto.CompactTextString(m) } func (*Account) ProtoMessage() {} func (*Account) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{8} + return fileDescriptor_ce5d8dd45b4a91ff, []int{8} } func (m *Account) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1019,7 +1023,7 @@ func (m *CreateCdcDetails) Reset() { *m = CreateCdcDetails{} } func (m *CreateCdcDetails) String() string { return proto.CompactTextString(m) } func (*CreateCdcDetails) ProtoMessage() {} func (*CreateCdcDetails) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{9} + return fileDescriptor_ce5d8dd45b4a91ff, []int{9} } func (m *CreateCdcDetails) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1079,7 +1083,7 @@ func (m *RetentionDetails) Reset() { *m = RetentionDetails{} } func (m *RetentionDetails) String() string { return proto.CompactTextString(m) } func (*RetentionDetails) ProtoMessage() {} func (*RetentionDetails) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{10} + return fileDescriptor_ce5d8dd45b4a91ff, []int{10} } func (m *RetentionDetails) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1122,7 +1126,7 @@ func (m *ISCPDetails) Reset() { *m = ISCPDetails{} } func (m *ISCPDetails) String() string { return proto.CompactTextString(m) } func (*ISCPDetails) ProtoMessage() {} func (*ISCPDetails) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{11} + return fileDescriptor_ce5d8dd45b4a91ff, []int{11} } func (m *ISCPDetails) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1179,7 +1183,7 @@ func (m *PublicationDetails) Reset() { *m = PublicationDetails{} } func (m *PublicationDetails) String() string { return proto.CompactTextString(m) } func (*PublicationDetails) ProtoMessage() {} func (*PublicationDetails) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{12} + return fileDescriptor_ce5d8dd45b4a91ff, []int{12} } func (m *PublicationDetails) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1244,7 +1248,7 @@ func (m *Details) Reset() { *m = Details{} } func (m *Details) String() string { return proto.CompactTextString(m) } func (*Details) ProtoMessage() {} func (*Details) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{13} + return fileDescriptor_ce5d8dd45b4a91ff, []int{13} } func (m *Details) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1400,7 +1404,7 @@ func (m *DaemonTask) Reset() { *m = DaemonTask{} } func (m *DaemonTask) String() string { return proto.CompactTextString(m) } func (*DaemonTask) ProtoMessage() {} func (*DaemonTask) Descriptor() ([]byte, []int) { - return fileDescriptor_5de11d747d3f16e6, []int{14} + return fileDescriptor_ce5d8dd45b4a91ff, []int{14} } func (m *DaemonTask) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -1544,114 +1548,115 @@ func init() { proto.RegisterType((*DaemonTask)(nil), "task.DaemonTask") } -func init() { proto.RegisterFile("proto/task.proto", fileDescriptor_5de11d747d3f16e6) } - -var fileDescriptor_5de11d747d3f16e6 = []byte{ - // 1659 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x57, 0x4f, 0x73, 0x2b, 0x47, - 0x11, 0xd7, 0x4a, 0x2b, 0x69, 0x35, 0x2b, 0xe9, 0x0d, 0xf3, 0x52, 0x2f, 0x5b, 0x22, 0xf8, 0xa9, - 0x96, 0x40, 0x8c, 0xab, 0x22, 0x83, 0x08, 0x14, 0x79, 0x05, 0x54, 0x6c, 0xc9, 0x60, 0x25, 0x76, - 0x9e, 0x33, 0xb2, 0x2f, 0xdc, 0x46, 0xbb, 0x13, 0xbd, 0xc5, 0xd2, 0xae, 0xb2, 0x3b, 0x1b, 0xac, - 0xaf, 0xe0, 0x13, 0x37, 0x4e, 0xae, 0xe2, 0x4e, 0x15, 0x47, 0xae, 0x5c, 0x73, 0xcc, 0x07, 0xa0, - 0xf8, 0xf3, 0xe0, 0x23, 0x50, 0xc5, 0x29, 0x55, 0x54, 0xcf, 0xcc, 0xfe, 0x93, 0x81, 0x8a, 0xc9, - 0xbb, 0x6d, 0xff, 0xba, 0x7b, 0xfe, 0x74, 0xf7, 0xaf, 0x7b, 0x16, 0xe1, 0x4d, 0x1c, 0x89, 0xe8, - 0x50, 0xb0, 0xe4, 0x7a, 0x24, 0x3f, 0x89, 0x09, 0xdf, 0x83, 0xb7, 0x97, 0x81, 0x78, 0x91, 0x2e, - 0x46, 0x5e, 0xb4, 0x3e, 0x5c, 0x46, 0xcb, 0xe8, 0x50, 0x2a, 0x17, 0xe9, 0xc7, 0x52, 0x52, 0x4e, - 0xf0, 0xa5, 0x9c, 0x06, 0x4f, 0x97, 0x51, 0xb4, 0x5c, 0xf1, 0xc2, 0x4a, 0x04, 0x6b, 0x9e, 0x08, - 0xb6, 0xde, 0x68, 0x83, 0xfe, 0x9a, 0x0b, 0xe6, 0x33, 0xc1, 0x94, 0xec, 0xfe, 0xc6, 0x40, 0xdd, - 0x4b, 0x96, 0x5c, 0x9f, 0x6b, 0x98, 0xf4, 0x51, 0x7d, 0x36, 0x75, 0x8c, 0xa1, 0xb1, 0xdf, 0xa1, - 0xf5, 0xd9, 0x94, 0x1c, 0x20, 0xeb, 0xe4, 0x86, 0x7b, 0xa9, 0x88, 0x62, 0xa7, 0x3e, 0x34, 0xf6, - 0xfb, 0xe3, 0xfe, 0x48, 0x9e, 0x12, 0xbc, 0x26, 0x91, 0xcf, 0x69, 0xae, 0x27, 0x0e, 0x6a, 0x4f, - 0xa2, 0x50, 0xf0, 0x1b, 0xe1, 0x34, 0x86, 0xc6, 0x7e, 0x97, 0x66, 0x22, 0xf9, 0x1e, 0x6a, 0x3f, - 0xdf, 0x88, 0x20, 0x0a, 0x13, 0xc7, 0x1c, 0x1a, 0xfb, 0xf6, 0xf8, 0x6b, 0xc5, 0x22, 0x5a, 0x71, - 0x6c, 0x7e, 0xf6, 0xe7, 0xa7, 0x35, 0x9a, 0xd9, 0xb9, 0x7f, 0x6a, 0xa0, 0xfe, 0xfc, 0xa3, 0x33, - 0xb5, 0x8d, 0x5a, 0xe5, 0x09, 0x6a, 0x81, 0x38, 0xf3, 0xe5, 0xf9, 0x4c, 0xaa, 0x25, 0x32, 0x40, - 0x16, 0x7c, 0x7d, 0xc8, 0xd6, 0x5c, 0x9e, 0xb1, 0x43, 0x73, 0x99, 0xbc, 0x81, 0x3a, 0x47, 0x9e, - 0x17, 0xa5, 0xa1, 0x98, 0xf9, 0xf2, 0x54, 0x3d, 0x5a, 0x00, 0xc4, 0x45, 0xdd, 0x29, 0x13, 0x6c, - 0xc1, 0x12, 0x2e, 0xbd, 0x4d, 0xe9, 0x5d, 0xc1, 0xe0, 0x56, 0xf3, 0x8f, 0xce, 0x8e, 0x23, 0x7f, - 0xeb, 0x34, 0xa5, 0x3a, 0x13, 0xc9, 0x9b, 0xa8, 0xf7, 0x73, 0x26, 0xf8, 0x24, 0x0a, 0xfd, 0x00, - 0x0e, 0xed, 0xb4, 0xa4, 0xbe, 0x0a, 0x92, 0x3d, 0x84, 0x28, 0x17, 0xf1, 0xf6, 0x2c, 0x58, 0x07, - 0xc2, 0x69, 0xcb, 0x23, 0x94, 0x10, 0xf2, 0x6d, 0xd4, 0xbf, 0x0c, 0xd6, 0x3c, 0x4a, 0xc5, 0x9c, - 0x7b, 0x51, 0xe8, 0x27, 0x8e, 0x25, 0x6d, 0x76, 0x50, 0x19, 0xdd, 0x98, 0x33, 0x48, 0x44, 0x47, - 0x9d, 0x43, 0x8b, 0x70, 0x0e, 0xfd, 0x79, 0x95, 0xf0, 0x78, 0xe6, 0x3b, 0x48, 0x2e, 0x50, 0x05, - 0x4b, 0x56, 0x34, 0x5a, 0xf1, 0x99, 0xef, 0xd8, 0x15, 0x2b, 0x05, 0x92, 0x21, 0xb2, 0x2f, 0xe3, - 0x60, 0xb9, 0xe4, 0xf1, 0xe5, 0x76, 0xc3, 0x9d, 0xae, 0xdc, 0xa9, 0x0c, 0x81, 0xc5, 0xdc, 0x7b, - 0xc1, 0xfd, 0x74, 0xc5, 0xfd, 0x23, 0xe1, 0xf4, 0x86, 0xc6, 0x7e, 0x83, 0x96, 0x21, 0x88, 0xaa, - 0x76, 0x98, 0x40, 0x9c, 0x9d, 0xbe, 0xcc, 0x56, 0x05, 0x73, 0xff, 0x58, 0x47, 0x76, 0x29, 0xfb, - 0x70, 0xba, 0x73, 0x76, 0x23, 0xc3, 0x02, 0xf7, 0x4e, 0x64, 0x8a, 0x7b, 0xb4, 0x0a, 0x82, 0x95, - 0x94, 0x66, 0xa1, 0xe0, 0xf1, 0xa7, 0x6c, 0x25, 0xd3, 0xdd, 0xa0, 0x55, 0x10, 0xac, 0xa6, 0x7c, - 0xc5, 0xb6, 0xd3, 0x34, 0x66, 0x32, 0x2f, 0x0d, 0x65, 0x55, 0x01, 0xe1, 0x1e, 0x93, 0x28, 0xf4, - 0xd2, 0x38, 0xe6, 0xa1, 0xb7, 0x95, 0xa9, 0xef, 0xd1, 0x32, 0x44, 0x7e, 0x80, 0x5a, 0x67, 0x6c, - 0xc1, 0x57, 0x89, 0xd3, 0x1c, 0x36, 0xf6, 0xed, 0xf1, 0x37, 0xee, 0x15, 0xed, 0x48, 0xe9, 0x4f, - 0x42, 0x11, 0x6f, 0xa9, 0x36, 0x06, 0xca, 0x50, 0x9e, 0x44, 0x69, 0xec, 0x71, 0x59, 0x11, 0x76, - 0x46, 0x99, 0x0c, 0xa5, 0xb9, 0x7e, 0xf0, 0x2e, 0xb2, 0x4b, 0x4b, 0x10, 0x8c, 0x1a, 0xd7, 0x7c, - 0xab, 0xe9, 0x07, 0x9f, 0xe4, 0x35, 0xd4, 0xfc, 0x94, 0xad, 0xd2, 0xac, 0xb0, 0x95, 0xf0, 0xac, - 0xfe, 0x23, 0xc3, 0x7d, 0xa7, 0xd8, 0x06, 0xfc, 0x26, 0x17, 0x57, 0x9a, 0x16, 0xf0, 0x09, 0x5c, - 0x39, 0xe7, 0xeb, 0x28, 0xde, 0x4a, 0x47, 0x93, 0x6a, 0xc9, 0xfd, 0x00, 0xf5, 0x14, 0x5f, 0x39, - 0xe5, 0x49, 0xba, 0x12, 0xe4, 0x4d, 0x64, 0x02, 0x8d, 0xa5, 0x6f, 0x7f, 0x8c, 0xf3, 0x93, 0xa6, - 0x2b, 0x21, 0xe9, 0x2d, 0xb5, 0x70, 0x8c, 0x93, 0x38, 0xd6, 0x3d, 0xa0, 0x43, 0x95, 0xe0, 0xfe, - 0xb3, 0x8e, 0x3a, 0x47, 0xc9, 0x36, 0xf4, 0x20, 0x24, 0xa5, 0xd6, 0x61, 0xca, 0xd6, 0xf1, 0x0e, - 0xb2, 0xb2, 0xb6, 0x22, 0xdd, 0xec, 0x31, 0x29, 0x02, 0x98, 0x69, 0x34, 0xed, 0x73, 0x4b, 0x28, - 0x9e, 0x0b, 0x16, 0xf3, 0x50, 0x48, 0x72, 0x4f, 0x65, 0xee, 0x3a, 0xb4, 0x82, 0x91, 0x7d, 0xd4, - 0x9a, 0x0b, 0x26, 0x52, 0xd5, 0x4d, 0xf2, 0x53, 0x83, 0x56, 0xe1, 0x54, 0xeb, 0x81, 0x7c, 0x80, - 0xd2, 0x34, 0x0c, 0x79, 0xac, 0xf9, 0x5b, 0x42, 0xe4, 0xbd, 0x36, 0x91, 0xf7, 0x42, 0x26, 0xaa, - 0x47, 0x95, 0x00, 0x05, 0x74, 0xc6, 0x12, 0x71, 0xca, 0x59, 0x2c, 0x16, 0x9c, 0x29, 0xd6, 0x36, - 0x68, 0x15, 0x84, 0xb6, 0x23, 0xb9, 0xc3, 0x8f, 0x84, 0xa4, 0x6c, 0x83, 0xe6, 0xb2, 0x2a, 0xae, - 0xf5, 0x66, 0xc5, 0x85, 0x24, 0x49, 0x47, 0x91, 0xa4, 0x04, 0x91, 0x77, 0x77, 0x12, 0x21, 0x49, - 0x6b, 0x8f, 0x1f, 0xab, 0xab, 0x54, 0x54, 0xb4, 0x6a, 0xe9, 0xfe, 0xc3, 0x80, 0x9d, 0xa3, 0xf0, - 0x15, 0x46, 0x7d, 0xa0, 0x56, 0x3c, 0xb9, 0xd9, 0xc4, 0x3a, 0xe2, 0xb9, 0x0c, 0xba, 0x0f, 0xf9, - 0x8d, 0x00, 0x06, 0xca, 0x78, 0x37, 0x68, 0x2e, 0x97, 0xa8, 0xae, 0x58, 0xdb, 0xac, 0x50, 0x5d, - 0x91, 0xb6, 0x1c, 0xa7, 0xd6, 0x4e, 0x9c, 0x06, 0xc8, 0xba, 0xda, 0xf8, 0x4a, 0xa7, 0x82, 0x9c, - 0xcb, 0xee, 0xef, 0x0c, 0x84, 0x27, 0x51, 0x18, 0x72, 0x4f, 0x44, 0xf1, 0x94, 0x0b, 0x16, 0xac, - 0x12, 0xe8, 0xe7, 0x97, 0x6c, 0xb1, 0x52, 0xed, 0x5a, 0xf1, 0xa4, 0x00, 0xc8, 0x4f, 0x8a, 0x39, - 0x53, 0x97, 0x94, 0xfd, 0xa6, 0xba, 0xfb, 0xee, 0x32, 0x23, 0x6d, 0xa5, 0x88, 0x9b, 0xf9, 0x0c, - 0x9e, 0xa1, 0x6e, 0x59, 0xf1, 0x20, 0x3a, 0xbe, 0x8d, 0xda, 0x7a, 0xae, 0xc8, 0x94, 0xf8, 0x79, - 0x4a, 0x7c, 0x42, 0x90, 0x59, 0x9a, 0x4d, 0xf2, 0xdb, 0xfd, 0x04, 0x61, 0x15, 0x84, 0x89, 0xef, - 0x65, 0x77, 0xab, 0xce, 0xb7, 0xce, 0x97, 0x9a, 0x6f, 0xdf, 0x41, 0x96, 0xde, 0x36, 0x71, 0x1a, - 0xf2, 0xca, 0x3d, 0x75, 0x65, 0x8d, 0xd2, 0x5c, 0xed, 0x12, 0x84, 0x29, 0x17, 0x3c, 0x84, 0x0b, - 0xea, 0x2d, 0xdd, 0x23, 0x64, 0xcf, 0xe6, 0x93, 0x8b, 0xaf, 0x70, 0x02, 0xf7, 0x14, 0x91, 0x8b, - 0x74, 0xb1, 0x0a, 0x3c, 0x56, 0x5a, 0xf8, 0xff, 0x5a, 0xe9, 0x8b, 0x3a, 0x6a, 0x67, 0xfe, 0x43, - 0x64, 0x4f, 0x79, 0xe2, 0xc5, 0x81, 0xcc, 0x87, 0x5e, 0xa4, 0x0c, 0x95, 0x27, 0xfb, 0x54, 0x2e, - 0x55, 0x9a, 0xec, 0x53, 0x98, 0x96, 0x5a, 0xd0, 0xf5, 0x9c, 0x67, 0x07, 0x4a, 0x2e, 0xe1, 0x71, - 0x58, 0xcc, 0xfb, 0x5c, 0x2e, 0xda, 0x5c, 0xb3, 0xd4, 0xe6, 0xc8, 0x0f, 0x51, 0x27, 0x2f, 0x20, - 0x4d, 0xd3, 0x27, 0xff, 0xb9, 0xae, 0x4e, 0x6b, 0xb4, 0x30, 0x95, 0x7e, 0x59, 0x8e, 0xe5, 0x24, - 0x2d, 0xfc, 0x76, 0x52, 0x2f, 0xfd, 0x32, 0x8c, 0xbc, 0x85, 0x4c, 0x48, 0x8a, 0x1c, 0xad, 0xf9, - 0x53, 0xa9, 0x94, 0xa6, 0xd3, 0x1a, 0x95, 0x06, 0xe4, 0xc7, 0xc8, 0x2e, 0x85, 0x5e, 0xce, 0x59, - 0x7b, 0xec, 0x28, 0xfb, 0xfb, 0x39, 0x39, 0xad, 0xd1, 0xb2, 0xf9, 0x71, 0x27, 0x8f, 0xf6, 0xfb, - 0xa6, 0x65, 0xe3, 0xae, 0xfb, 0x85, 0x89, 0xd0, 0x94, 0xf1, 0xf5, 0x2b, 0xed, 0x2c, 0x95, 0x34, - 0x35, 0xfe, 0x47, 0x9a, 0xcc, 0x6a, 0x9a, 0x0e, 0x54, 0xa1, 0xc8, 0x57, 0x48, 0x73, 0xf7, 0xe1, - 0x09, 0x28, 0xcd, 0xf5, 0x3b, 0x5d, 0xbe, 0x75, 0xaf, 0xcb, 0x7f, 0x57, 0xe9, 0xf5, 0xcc, 0x68, - 0xff, 0x97, 0x99, 0x51, 0xb2, 0x21, 0xef, 0xef, 0x4e, 0x00, 0x4b, 0x5e, 0x78, 0x30, 0x52, 0x0f, - 0xec, 0x51, 0xf6, 0xc0, 0x1e, 0x5d, 0x66, 0x0f, 0xec, 0x63, 0x0b, 0x2e, 0xfe, 0xeb, 0xbf, 0x3c, - 0x35, 0x76, 0xe7, 0xc4, 0x5b, 0x79, 0x9c, 0xe5, 0x1c, 0xc8, 0x19, 0xaa, 0x41, 0x9a, 0xd7, 0xfc, - 0x7b, 0xa5, 0x46, 0x89, 0x1e, 0xb0, 0x5f, 0xd1, 0x4e, 0xdf, 0x2b, 0xb5, 0x53, 0xfb, 0x21, 0x2b, - 0x64, 0x5e, 0xe4, 0x19, 0x6a, 0x9e, 0x84, 0x30, 0xb2, 0xba, 0x0f, 0x70, 0x57, 0x2e, 0xe4, 0xa7, - 0xa8, 0x0d, 0x37, 0xa7, 0x69, 0xa8, 0x4b, 0xf7, 0xcb, 0x79, 0x67, 0x4e, 0x07, 0xbf, 0x37, 0xca, - 0x79, 0x22, 0xb6, 0x7e, 0xf0, 0x72, 0x1f, 0xd7, 0x40, 0x80, 0x64, 0x06, 0xe1, 0x12, 0x1b, 0xa4, - 0x07, 0x84, 0xd4, 0xa3, 0x14, 0xd7, 0x09, 0x42, 0xad, 0x0b, 0x96, 0x26, 0xdc, 0xc7, 0x0d, 0xd2, - 0xd1, 0x0c, 0xc6, 0x26, 0xe9, 0x22, 0x6b, 0xc2, 0x42, 0x8f, 0xaf, 0xb8, 0x8f, 0x9b, 0xe4, 0x31, - 0x7a, 0x04, 0xe3, 0x73, 0xcd, 0x29, 0xff, 0x24, 0xe5, 0x09, 0x78, 0xb6, 0x08, 0x41, 0x7d, 0xe9, - 0x59, 0x60, 0x6d, 0x30, 0x54, 0x6e, 0x05, 0x68, 0x91, 0xd7, 0xa0, 0x77, 0x26, 0x82, 0xc5, 0xa2, - 0x40, 0x3b, 0x07, 0xff, 0x32, 0x54, 0x91, 0xca, 0x27, 0x52, 0x17, 0x59, 0x97, 0x3c, 0x11, 0xcf, - 0xc3, 0xd5, 0x16, 0xd7, 0x48, 0x1f, 0xa1, 0xf9, 0x36, 0x11, 0x7c, 0x3d, 0x0b, 0x03, 0x81, 0x0d, - 0xd8, 0xe9, 0x9c, 0x8b, 0x38, 0xf0, 0xce, 0xa2, 0xe5, 0x39, 0x8f, 0x97, 0x1c, 0xd7, 0xc9, 0x13, - 0x44, 0x14, 0x36, 0x17, 0x51, 0xcc, 0x96, 0xfc, 0x2a, 0x61, 0x4b, 0x8e, 0x1b, 0x80, 0xe7, 0x4d, - 0xe4, 0x03, 0xf6, 0xf1, 0x35, 0x9b, 0x07, 0xe1, 0x35, 0x36, 0xc9, 0x23, 0x64, 0x4b, 0xd7, 0xe7, - 0x8b, 0x5f, 0x72, 0x4f, 0xe0, 0x26, 0x04, 0x05, 0x96, 0x9f, 0xf8, 0x1e, 0x6e, 0x13, 0x8c, 0xba, - 0xe7, 0xcf, 0xe5, 0x28, 0x84, 0xf8, 0x25, 0xd8, 0x02, 0x04, 0xda, 0x44, 0xf6, 0x7f, 0x86, 0x3b, - 0xe4, 0xeb, 0xe8, 0xf5, 0x59, 0xe8, 0xf3, 0x1b, 0x95, 0x6e, 0x38, 0x7a, 0xae, 0x44, 0xe4, 0x75, - 0xf4, 0xb8, 0xd4, 0x1e, 0x72, 0x85, 0x0d, 0xdb, 0xe8, 0x3f, 0x31, 0xdc, 0x75, 0x4d, 0xab, 0x85, - 0x5b, 0x07, 0xdf, 0x82, 0x9f, 0x9a, 0xec, 0x8d, 0x28, 0x0d, 0x52, 0xcf, 0xe3, 0x49, 0x82, 0x6b, - 0x90, 0x8d, 0x9f, 0xb1, 0x00, 0x82, 0x6e, 0x1c, 0xfc, 0xc1, 0x28, 0x58, 0x4c, 0xde, 0x40, 0xed, - 0xab, 0xf0, 0x3a, 0x8c, 0x7e, 0x15, 0xe2, 0xda, 0xe0, 0xd1, 0xed, 0xdd, 0xd0, 0x06, 0x58, 0x43, - 0x64, 0x8c, 0x48, 0x7e, 0xd7, 0xfc, 0xf6, 0xd8, 0x18, 0x0c, 0x6e, 0xef, 0x86, 0x4f, 0xc0, 0xf0, - 0xbe, 0x16, 0x7a, 0x4b, 0xde, 0x35, 0x71, 0x63, 0xd0, 0xbb, 0xbd, 0x1b, 0x96, 0xda, 0x28, 0x51, - 0x6d, 0x14, 0x9b, 0x03, 0xeb, 0xf6, 0x6e, 0xa8, 0x3a, 0xe6, 0xb0, 0xd2, 0x31, 0x71, 0x53, 0x9d, - 0xa3, 0x04, 0xb9, 0xa6, 0x55, 0xc7, 0xf5, 0xe3, 0xc9, 0xe7, 0x7f, 0xdb, 0x33, 0x3e, 0x7b, 0xb9, - 0x67, 0x7c, 0xfe, 0x72, 0xcf, 0xf8, 0xeb, 0xcb, 0xbd, 0xda, 0x6f, 0xff, 0xbe, 0x67, 0xfc, 0xa2, - 0xfc, 0x37, 0xbe, 0x66, 0x22, 0x0e, 0x6e, 0xa2, 0x38, 0x58, 0x06, 0x61, 0x26, 0x84, 0xfc, 0x70, - 0x73, 0xbd, 0x3c, 0xdc, 0x2c, 0xe4, 0x8f, 0xfc, 0xa2, 0x25, 0xcb, 0xfe, 0xfb, 0xff, 0x0e, 0x00, - 0x00, 0xff, 0xff, 0x43, 0x88, 0xec, 0x35, 0xdd, 0x0f, 0x00, 0x00, +func init() { proto.RegisterFile("task.proto", fileDescriptor_ce5d8dd45b4a91ff) } + +var fileDescriptor_ce5d8dd45b4a91ff = []byte{ + // 1673 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x57, 0xcd, 0x92, 0x23, 0x47, + 0x11, 0x56, 0x4b, 0xad, 0xbf, 0x6c, 0x49, 0xdb, 0xd4, 0x9a, 0x75, 0x23, 0xcc, 0xac, 0x42, 0x18, + 0x3c, 0x4c, 0x84, 0x67, 0x60, 0x30, 0x04, 0xde, 0x00, 0xc2, 0x33, 0xd2, 0xe0, 0x91, 0x3d, 0xe3, + 0x1d, 0x97, 0x66, 0x2e, 0xdc, 0x4a, 0xdd, 0x65, 0x6d, 0x33, 0x52, 0xb5, 0xdc, 0x5d, 0x6d, 0x46, + 0xaf, 0xb0, 0x27, 0x82, 0x0b, 0xa7, 0x8d, 0xe0, 0x4e, 0x04, 0x47, 0xae, 0x5c, 0x7d, 0xf4, 0x03, + 0x10, 0xfc, 0x2c, 0x3c, 0x02, 0x57, 0x47, 0x10, 0x59, 0x55, 0xfd, 0xa7, 0x05, 0xc2, 0x0b, 0xbe, + 0x75, 0x7e, 0x99, 0x59, 0x95, 0x95, 0x59, 0x5f, 0x66, 0x35, 0x80, 0x64, 0xc9, 0xed, 0xe1, 0x26, + 0x8e, 0x64, 0x44, 0x6c, 0xfc, 0x1e, 0xbe, 0xb9, 0x0c, 0xe5, 0x93, 0x74, 0x71, 0xe8, 0x47, 0xeb, + 0xa3, 0x65, 0xb4, 0x8c, 0x8e, 0x94, 0x72, 0x91, 0x7e, 0xa4, 0x24, 0x25, 0xa8, 0x2f, 0xed, 0x34, + 0x7c, 0xb8, 0x8c, 0xa2, 0xe5, 0x8a, 0x17, 0x56, 0x32, 0x5c, 0xf3, 0x44, 0xb2, 0xf5, 0xc6, 0x18, + 0x0c, 0xd6, 0x5c, 0xb2, 0x80, 0x49, 0xa6, 0xe5, 0xf1, 0x6f, 0x2c, 0xe8, 0x5d, 0xb3, 0xe4, 0xf6, + 0xd2, 0xc0, 0x64, 0x00, 0xf5, 0xd9, 0xd4, 0xb3, 0x46, 0xd6, 0x7e, 0x97, 0xd6, 0x67, 0x53, 0x72, + 0x00, 0x9d, 0xb3, 0x3b, 0xee, 0xa7, 0x32, 0x8a, 0xbd, 0xfa, 0xc8, 0xda, 0x1f, 0x1c, 0x0f, 0x0e, + 0x55, 0x94, 0xe8, 0x35, 0x89, 0x02, 0x4e, 0x73, 0x3d, 0xf1, 0xa0, 0x3d, 0x89, 0x84, 0xe4, 0x77, + 0xd2, 0x6b, 0x8c, 0xac, 0xfd, 0x1e, 0xcd, 0x44, 0xf2, 0x3d, 0x68, 0x3f, 0xde, 0xc8, 0x30, 0x12, + 0x89, 0x67, 0x8f, 0xac, 0x7d, 0xe7, 0xf8, 0x2b, 0xc5, 0x22, 0x46, 0x71, 0x6a, 0x7f, 0xfa, 0xe7, + 0x87, 0x35, 0x9a, 0xd9, 0x8d, 0xff, 0xd4, 0x80, 0xc1, 0xfc, 0xc3, 0x0b, 0xbd, 0x8d, 0x5e, 0xe5, + 0x01, 0xb4, 0x50, 0x9c, 0x05, 0x2a, 0x3e, 0x9b, 0x1a, 0x89, 0x0c, 0xa1, 0x83, 0x5f, 0x1f, 0xb0, + 0x35, 0x57, 0x31, 0x76, 0x69, 0x2e, 0x93, 0xd7, 0xa0, 0x7b, 0xe2, 0xfb, 0x51, 0x2a, 0xe4, 0x2c, + 0x50, 0x51, 0xf5, 0x69, 0x01, 0x90, 0x31, 0xf4, 0xa6, 0x4c, 0xb2, 0x05, 0x4b, 0xb8, 0xf2, 0xb6, + 0x95, 0x77, 0x05, 0xc3, 0x53, 0xcd, 0x3f, 0xbc, 0x38, 0x8d, 0x82, 0xad, 0xd7, 0x54, 0xea, 0x4c, + 0x24, 0xaf, 0x43, 0xff, 0x5d, 0x26, 0xf9, 0x24, 0x12, 0x41, 0x88, 0x41, 0x7b, 0x2d, 0xa5, 0xaf, + 0x82, 0x64, 0x0f, 0x80, 0x72, 0x19, 0x6f, 0x2f, 0xc2, 0x75, 0x28, 0xbd, 0xb6, 0x0a, 0xa1, 0x84, + 0x90, 0x6f, 0xc3, 0xe0, 0x3a, 0x5c, 0xf3, 0x28, 0x95, 0x73, 0xee, 0x47, 0x22, 0x48, 0xbc, 0x8e, + 0xb2, 0xd9, 0x41, 0x55, 0x76, 0x63, 0xce, 0xb0, 0x10, 0x5d, 0x1d, 0x87, 0x11, 0x31, 0x0e, 0xf3, + 0x79, 0x93, 0xf0, 0x78, 0x16, 0x78, 0xa0, 0x16, 0xa8, 0x82, 0x25, 0x2b, 0x1a, 0xad, 0xf8, 0x2c, + 0xf0, 0x9c, 0x8a, 0x95, 0x06, 0xc9, 0x08, 0x9c, 0xeb, 0x38, 0x5c, 0x2e, 0x79, 0x7c, 0xbd, 0xdd, + 0x70, 0xaf, 0xa7, 0x76, 0x2a, 0x43, 0x68, 0x31, 0xf7, 0x9f, 0xf0, 0x20, 0x5d, 0xf1, 0xe0, 0x44, + 0x7a, 0xfd, 0x91, 0xb5, 0xdf, 0xa0, 0x65, 0x08, 0xb3, 0x6a, 0x1c, 0x26, 0x98, 0x67, 0x6f, 0xa0, + 0xaa, 0x55, 0xc1, 0xc6, 0x7f, 0xac, 0x83, 0x53, 0xaa, 0x3e, 0x46, 0x77, 0xc9, 0xee, 0x54, 0x5a, + 0xf0, 0xdc, 0x89, 0x2a, 0x71, 0x9f, 0x56, 0x41, 0xb4, 0x52, 0xd2, 0x4c, 0x48, 0x1e, 0x7f, 0xc2, + 0x56, 0xaa, 0xdc, 0x0d, 0x5a, 0x05, 0xd1, 0x6a, 0xca, 0x57, 0x6c, 0x3b, 0x4d, 0x63, 0xa6, 0xea, + 0xd2, 0xd0, 0x56, 0x15, 0x10, 0xcf, 0x31, 0x89, 0x84, 0x9f, 0xc6, 0x31, 0x17, 0xfe, 0x56, 0x95, + 0xbe, 0x4f, 0xcb, 0x10, 0xf9, 0x01, 0xb4, 0x2e, 0xd8, 0x82, 0xaf, 0x12, 0xaf, 0x39, 0x6a, 0xec, + 0x3b, 0xc7, 0xdf, 0x78, 0xe1, 0xd2, 0x1e, 0x6a, 0xfd, 0x99, 0x90, 0xf1, 0x96, 0x1a, 0x63, 0xa4, + 0x0c, 0xe5, 0x49, 0x94, 0xc6, 0x3e, 0x57, 0x37, 0xc2, 0xc9, 0x28, 0x93, 0xa1, 0x34, 0xd7, 0x0f, + 0xdf, 0x06, 0xa7, 0xb4, 0x04, 0x71, 0xa1, 0x71, 0xcb, 0xb7, 0x86, 0x7e, 0xf8, 0x49, 0x5e, 0x81, + 0xe6, 0x27, 0x6c, 0x95, 0x66, 0x17, 0x5b, 0x0b, 0x8f, 0xea, 0x3f, 0xb2, 0xc6, 0x6f, 0x15, 0xdb, + 0xa0, 0xdf, 0xe4, 0xea, 0xc6, 0xd0, 0x02, 0x3f, 0x91, 0x2b, 0x97, 0x7c, 0x1d, 0xc5, 0x5b, 0xe5, + 0x68, 0x53, 0x23, 0x8d, 0xdf, 0x87, 0xbe, 0xe6, 0x2b, 0xa7, 0x3c, 0x49, 0x57, 0x92, 0xbc, 0x0e, + 0x36, 0xd2, 0x58, 0xf9, 0x0e, 0x8e, 0xdd, 0x3c, 0xd2, 0x74, 0x25, 0x15, 0xbd, 0x95, 0x16, 0xc3, + 0x38, 0x8b, 0x63, 0xd3, 0x03, 0xba, 0x54, 0x0b, 0xe3, 0x7f, 0xd6, 0xa1, 0x7b, 0x92, 0x6c, 0x85, + 0x8f, 0x29, 0x29, 0xb5, 0x0e, 0x5b, 0xb5, 0x8e, 0xb7, 0xa0, 0x93, 0xb5, 0x15, 0xe5, 0xe6, 0x1c, + 0x93, 0x22, 0x81, 0x99, 0xc6, 0xd0, 0x3e, 0xb7, 0xc4, 0xcb, 0x73, 0xc5, 0x62, 0x2e, 0xa4, 0x22, + 0xf7, 0x54, 0xd5, 0xae, 0x4b, 0x2b, 0x18, 0xd9, 0x87, 0xd6, 0x5c, 0x32, 0x99, 0xea, 0x6e, 0x92, + 0x47, 0x8d, 0x5a, 0x8d, 0x53, 0xa3, 0x47, 0xf2, 0x21, 0x4a, 0x53, 0x21, 0x78, 0x6c, 0xf8, 0x5b, + 0x42, 0xd4, 0xb9, 0x36, 0x91, 0xff, 0x44, 0x15, 0xaa, 0x4f, 0xb5, 0x80, 0x17, 0xe8, 0x82, 0x25, + 0xf2, 0x9c, 0xb3, 0x58, 0x2e, 0x38, 0xd3, 0xac, 0x6d, 0xd0, 0x2a, 0x88, 0x6d, 0x47, 0x71, 0x87, + 0x9f, 0x48, 0x45, 0xd9, 0x06, 0xcd, 0x65, 0x7d, 0xb9, 0xd6, 0x9b, 0x15, 0x97, 0x8a, 0x24, 0x5d, + 0x4d, 0x92, 0x12, 0x44, 0xde, 0xde, 0x29, 0x84, 0x22, 0xad, 0x73, 0x7c, 0x5f, 0x1f, 0xa5, 0xa2, + 0xa2, 0x55, 0xcb, 0xf1, 0x3f, 0x2c, 0xdc, 0x39, 0x12, 0x5f, 0x62, 0xd6, 0x87, 0x7a, 0xc5, 0xb3, + 0xbb, 0x4d, 0x6c, 0x32, 0x9e, 0xcb, 0xa8, 0xfb, 0x80, 0xdf, 0x49, 0x64, 0xa0, 0xca, 0x77, 0x83, + 0xe6, 0x72, 0x89, 0xea, 0x9a, 0xb5, 0xcd, 0x0a, 0xd5, 0x35, 0x69, 0xcb, 0x79, 0x6a, 0xed, 0xe4, + 0x69, 0x08, 0x9d, 0x9b, 0x4d, 0xa0, 0x75, 0x3a, 0xc9, 0xb9, 0x3c, 0xfe, 0x9d, 0x05, 0xee, 0x24, + 0x12, 0x82, 0xfb, 0x32, 0x8a, 0xa7, 0x5c, 0xb2, 0x70, 0x95, 0x60, 0x3f, 0xbf, 0x66, 0x8b, 0x95, + 0x6e, 0xd7, 0x9a, 0x27, 0x05, 0x40, 0x7e, 0x52, 0xcc, 0x99, 0xba, 0xa2, 0xec, 0x37, 0xf5, 0xd9, + 0x77, 0x97, 0x39, 0x34, 0x56, 0x9a, 0xb8, 0x99, 0xcf, 0xf0, 0x11, 0xf4, 0xca, 0x8a, 0x97, 0xa2, + 0xe3, 0x9b, 0xd0, 0x36, 0x73, 0x45, 0x95, 0x24, 0xc8, 0x4b, 0x12, 0x10, 0x02, 0x76, 0x69, 0x36, + 0xa9, 0xef, 0xf1, 0xc7, 0xe0, 0xea, 0x24, 0x4c, 0x02, 0x3f, 0x3b, 0x5b, 0x75, 0xbe, 0x75, 0xbf, + 0xd0, 0x7c, 0xfb, 0x0e, 0x74, 0xcc, 0xb6, 0x89, 0xd7, 0x50, 0x47, 0xee, 0xeb, 0x23, 0x1b, 0x94, + 0xe6, 0xea, 0x31, 0x01, 0x97, 0x72, 0xc9, 0x05, 0x1e, 0xd0, 0x6c, 0x39, 0x3e, 0x01, 0x67, 0x36, + 0x9f, 0x5c, 0xfd, 0x1f, 0x11, 0x8c, 0xcf, 0x81, 0x5c, 0xa5, 0x8b, 0x55, 0xe8, 0xb3, 0xd2, 0xc2, + 0xff, 0xd3, 0x4a, 0x9f, 0xd7, 0xa1, 0x9d, 0xf9, 0x8f, 0xc0, 0x99, 0xf2, 0xc4, 0x8f, 0x43, 0x55, + 0x0f, 0xb3, 0x48, 0x19, 0x2a, 0x4f, 0xf6, 0xa9, 0x5a, 0xaa, 0x34, 0xd9, 0xa7, 0x38, 0x2d, 0x8d, + 0x60, 0xee, 0x73, 0x5e, 0x1d, 0xbc, 0x72, 0x09, 0x8f, 0x45, 0x31, 0xef, 0x73, 0xb9, 0x68, 0x73, + 0xcd, 0x52, 0x9b, 0x23, 0x3f, 0x84, 0x6e, 0x7e, 0x81, 0x0c, 0x4d, 0x1f, 0xfc, 0xfb, 0x7b, 0x75, + 0x5e, 0xa3, 0x85, 0xa9, 0xf2, 0xcb, 0x6a, 0xac, 0x26, 0x69, 0xe1, 0xb7, 0x53, 0x7a, 0xe5, 0x97, + 0x61, 0xe4, 0x0d, 0xb0, 0xb1, 0x28, 0x6a, 0xb4, 0xe6, 0x4f, 0xa5, 0x52, 0x99, 0xce, 0x6b, 0x54, + 0x19, 0x90, 0x1f, 0x83, 0x53, 0x4a, 0xbd, 0x9a, 0xb3, 0xce, 0xb1, 0xa7, 0xed, 0x5f, 0xac, 0xc9, + 0x79, 0x8d, 0x96, 0xcd, 0x4f, 0xbb, 0x79, 0xb6, 0xdf, 0xb3, 0x3b, 0x8e, 0xdb, 0x1b, 0x7f, 0x6e, + 0x03, 0x4c, 0x19, 0x5f, 0x7f, 0xa9, 0x9d, 0xa5, 0x52, 0xa6, 0xc6, 0x7f, 0x29, 0x93, 0x5d, 0x2d, + 0xd3, 0x81, 0xbe, 0x28, 0xea, 0x15, 0xd2, 0xdc, 0x7d, 0x78, 0x22, 0x4a, 0x73, 0xfd, 0x4e, 0x97, + 0x6f, 0xbd, 0xd0, 0xe5, 0xbf, 0xab, 0xf5, 0x66, 0x66, 0xb4, 0xff, 0xc3, 0xcc, 0x28, 0xd9, 0x90, + 0xf7, 0x76, 0x27, 0x40, 0x47, 0x1d, 0x78, 0x78, 0xa8, 0x1f, 0xd8, 0x87, 0xd9, 0x03, 0xfb, 0xf0, + 0x3a, 0x7b, 0x60, 0x9f, 0x76, 0xf0, 0xe0, 0xbf, 0xfa, 0xcb, 0x43, 0x6b, 0x77, 0x4e, 0xbc, 0x91, + 0xe7, 0x59, 0xcd, 0x81, 0x9c, 0xa1, 0x06, 0xa4, 0xf9, 0x9d, 0x7f, 0xa7, 0xd4, 0x28, 0xe1, 0x25, + 0xf6, 0x2b, 0xda, 0xe9, 0x3b, 0xa5, 0x76, 0xea, 0xbc, 0xcc, 0x0a, 0x99, 0x17, 0x79, 0x04, 0xcd, + 0x33, 0x81, 0x23, 0xab, 0xf7, 0x12, 0xee, 0xda, 0x85, 0xfc, 0x14, 0xda, 0x78, 0x72, 0x9a, 0x0a, + 0x73, 0x75, 0xbf, 0x98, 0x77, 0xe6, 0x74, 0xf0, 0x7b, 0xab, 0x5c, 0x27, 0xe2, 0x98, 0x07, 0x2f, + 0x0f, 0xdc, 0x1a, 0x0a, 0x58, 0xcc, 0x50, 0x2c, 0x5d, 0x8b, 0xf4, 0x91, 0x90, 0x66, 0x94, 0xba, + 0x75, 0x02, 0xd0, 0xba, 0x62, 0x69, 0xc2, 0x03, 0xb7, 0x41, 0xba, 0x86, 0xc1, 0xae, 0x4d, 0x7a, + 0xd0, 0x99, 0x30, 0xe1, 0xf3, 0x15, 0x0f, 0xdc, 0x26, 0xb9, 0x0f, 0xf7, 0x70, 0x7c, 0xae, 0x39, + 0xe5, 0x1f, 0xa7, 0x3c, 0x41, 0xcf, 0x16, 0x21, 0x30, 0x50, 0x9e, 0x05, 0xd6, 0x46, 0x43, 0xed, + 0x56, 0x80, 0x1d, 0xf2, 0x0a, 0xf6, 0xce, 0x44, 0xb2, 0x58, 0x16, 0x68, 0xf7, 0xe0, 0xd7, 0x75, + 0x7d, 0x49, 0xd5, 0x13, 0xa9, 0x07, 0x9d, 0x6b, 0x9e, 0xc8, 0xc7, 0x62, 0xb5, 0x75, 0x6b, 0x64, + 0x00, 0x30, 0xdf, 0x26, 0x92, 0xaf, 0x67, 0x22, 0x94, 0xae, 0x85, 0x3b, 0x5d, 0x72, 0x19, 0x87, + 0xfe, 0x45, 0xb4, 0xbc, 0xe4, 0xf1, 0x92, 0xbb, 0x75, 0xf2, 0x00, 0x88, 0xc6, 0xe6, 0x32, 0x8a, + 0xd9, 0x92, 0xdf, 0x24, 0x6c, 0xc9, 0xdd, 0x06, 0xe2, 0x79, 0x13, 0x79, 0x9f, 0x7d, 0x74, 0xcb, + 0xe6, 0xa1, 0xb8, 0x75, 0x6d, 0x72, 0x0f, 0x1c, 0xe5, 0xfa, 0x78, 0xf1, 0x0b, 0xee, 0x4b, 0xb7, + 0x89, 0x49, 0xc1, 0xe5, 0x27, 0x81, 0xef, 0xb6, 0x89, 0x0b, 0xbd, 0xcb, 0xc7, 0x6a, 0x14, 0x62, + 0xfe, 0x12, 0xb7, 0x83, 0x08, 0xb6, 0x89, 0xec, 0xff, 0xcc, 0xed, 0x92, 0xaf, 0xc3, 0xab, 0x33, + 0x11, 0xf0, 0x3b, 0x5d, 0x6e, 0x0c, 0x3d, 0x57, 0x02, 0x79, 0x15, 0xee, 0x97, 0xda, 0x43, 0xae, + 0x70, 0x70, 0x1b, 0xf3, 0x27, 0xe6, 0xf6, 0xc8, 0xd7, 0xe0, 0xab, 0xf8, 0x7b, 0xb4, 0x0a, 0xc5, + 0xed, 0xe4, 0x64, 0xfe, 0xee, 0x24, 0xb7, 0xeb, 0x8f, 0xed, 0x4e, 0xcb, 0x6d, 0x1d, 0x7c, 0x0b, + 0xff, 0x77, 0xb2, 0xe7, 0xa3, 0xf2, 0x4d, 0x7d, 0x9f, 0x27, 0x89, 0x5b, 0xc3, 0x42, 0xfd, 0x8c, + 0x85, 0x58, 0x0f, 0xeb, 0xe0, 0x0f, 0x56, 0x41, 0x70, 0xf2, 0x1a, 0xb4, 0x6f, 0xc4, 0xad, 0x88, + 0x7e, 0x29, 0xdc, 0xda, 0xf0, 0xde, 0xd3, 0x67, 0x23, 0x07, 0x61, 0x03, 0x91, 0x63, 0x20, 0x79, + 0x1a, 0xf2, 0xc4, 0xb8, 0xd6, 0x70, 0xf8, 0xf4, 0xd9, 0xe8, 0x01, 0x1a, 0xbe, 0xa8, 0xc5, 0xb6, + 0x93, 0x37, 0x54, 0xb7, 0x31, 0xec, 0x3f, 0x7d, 0x36, 0x2a, 0x75, 0x58, 0xa2, 0x3b, 0xac, 0x6b, + 0x0f, 0x3b, 0x4f, 0x9f, 0x8d, 0x74, 0x33, 0x1d, 0x55, 0x9a, 0xa9, 0xdb, 0xd4, 0x71, 0x94, 0xa0, + 0xb1, 0xdd, 0xa9, 0xbb, 0xf5, 0xd3, 0xc9, 0x67, 0x7f, 0xdb, 0xb3, 0x3e, 0x7d, 0xbe, 0x67, 0x7d, + 0xf6, 0x7c, 0xcf, 0xfa, 0xeb, 0xf3, 0xbd, 0xda, 0x6f, 0xff, 0xbe, 0x67, 0xfd, 0xbc, 0xfc, 0xa3, + 0xbe, 0x66, 0x32, 0x0e, 0xef, 0xa2, 0x38, 0x5c, 0x86, 0x22, 0x13, 0x04, 0x3f, 0xda, 0xdc, 0x2e, + 0x8f, 0x36, 0x8b, 0x23, 0xe4, 0xfe, 0xa2, 0xa5, 0x18, 0xf1, 0xfd, 0x7f, 0x05, 0x00, 0x00, 0xff, + 0xff, 0x8d, 0xd5, 0x5a, 0x45, 0xf2, 0x0f, 0x00, 0x00, } func (m *TaskMetadata) Marshal() (dAtA []byte, err error) { diff --git a/pkg/predefine/predefine.go b/pkg/predefine/predefine.go index 8cd7d37f763b1..5ff2a0043a388 100644 --- a/pkg/predefine/predefine.go +++ b/pkg/predefine/predefine.go @@ -22,6 +22,7 @@ import ( "github.com/google/uuid" "github.com/matrixorigin/matrixone/pkg/catalog" + "github.com/matrixorigin/matrixone/pkg/datalink/casgc" "github.com/matrixorigin/matrixone/pkg/pb/task" "github.com/matrixorigin/matrixone/pkg/util/export" "github.com/matrixorigin/matrixone/pkg/util/metric/mometric" @@ -96,6 +97,12 @@ func GenInitCronTaskSQL(codes ...int32) (string, error) { } cronTasks = append(cronTasks, task5) + task6, err := createCronTask(casgc.DatalinkCASGCTaskMetadata(task.TaskCode_DatalinkCASGCExecutor), casgc.DatalinkCASGCCronExpr) + if err != nil { + return "", err + } + cronTasks = append(cronTasks, task6) + sql := fmt.Sprintf(`insert into %s.sys_cron_task ( task_metadata_id, task_metadata_executor, diff --git a/pkg/sql/plan/function/datalink_pin_test.go b/pkg/sql/plan/function/datalink_pin_test.go new file mode 100644 index 0000000000000..fbb55d49a2d11 --- /dev/null +++ b/pkg/sql/plan/function/datalink_pin_test.go @@ -0,0 +1,341 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package function + +import ( + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/matrixorigin/matrixone/pkg/container/types" + "github.com/matrixorigin/matrixone/pkg/datalink" + "github.com/matrixorigin/matrixone/pkg/testutil" + "github.com/stretchr/testify/require" +) + +// pinnedURLOf returns the datalink URL that datalink_pin is expected to emit for +// a live file:// reference: the live path plus ?contenthash=. +func pinnedURLOf(liveURL string, content []byte) string { + sum := sha256.Sum256(content) + return liveURL + "?contenthash=" + hex.EncodeToString(sum[:]) +} + +// datalink_pin freezes the referenced bytes and returns a datalink carrying the +// content hash of those bytes. +func TestDatalinkPinReturnsContentHash(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "doc.txt") + require.NoError(t, os.WriteFile(filePath, []byte("v1"), 0o600)) + + proc := testutil.NewProc(t) // NewProc(t) backs SHARED with LocalFS, matching standalone + liveURL := "file://" + filePath + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{liveURL}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{pinnedURLOf(liveURL, []byte("v1"))}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) +} + +// The core issue scenario: after an out-of-band overwrite, reading the pinned +// datalink still returns the original bytes, while the live datalink returns new. +func TestDatalinkPinReproducesBytesAfterOverwrite(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "doc.txt") + require.NoError(t, os.WriteFile(filePath, []byte("v1"), 0o600)) + + proc := testutil.NewProc(t) + liveURL := "file://" + filePath + pinnedURL := pinnedURLOf(liveURL, []byte("v1")) + + // pin: performs the CAS write into the SHARED store + pinTC := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{liveURL}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{pinnedURL}, []bool{false}), + DatalinkPin) + s, info := pinTC.Run() + require.True(t, s, info) + + // out-of-band overwrite of the external file + require.NoError(t, os.WriteFile(filePath, []byte("v2-overwritten"), 0o600)) + + // reading the pinned datalink returns the original frozen bytes + pinnedReadTC := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{pinnedURL}, []bool{false})}, + NewFunctionTestResult(types.T_text.ToType(), false, + []string{"v1"}, []bool{false}), + LoadFileDatalink) + s, info = pinnedReadTC.Run() + require.True(t, s, info) + + // reading the live datalink returns the new bytes (pin did not freeze live) + liveReadTC := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{liveURL}, []bool{false})}, + NewFunctionTestResult(types.T_text.ToType(), false, + []string{"v2-overwritten"}, []bool{false}), + LoadFileDatalink) + s, info = liveReadTC.Run() + require.True(t, s, info) +} + +// An already-pinned datalink whose CAS blob exists for the calling account is +// returned unchanged (idempotent): no live read, no second CAS write. +func TestDatalinkPinIdempotentWhenBlobExists(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "doc.txt") + require.NoError(t, os.WriteFile(filePath, []byte("v1"), 0o600)) + + proc := testutil.NewProc(t) + liveURL := "file://" + filePath + pinned := pinnedURLOf(liveURL, []byte("v1")) + + // first pin materializes the CAS blob for this account + first := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{liveURL}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{pinned}, []bool{false}), + DatalinkPin) + s, info := first.Run() + require.True(t, s, info) + + // re-pinning the already-pinned URL returns it unchanged: the blob is present. + again := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{pinned}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{pinned}, []bool{false}), + DatalinkPin) + s, info = again.Run() + require.True(t, s, info) +} + +// Re-pinning an already-pinned URL whose CAS blob is absent for the calling +// account re-materializes it from the live source when the live bytes still hash +// to the declared contenthash. The blob is then readable for this account (under +// the old unconditional early-return it was never stored, so a read would fail). +func TestDatalinkPinRepinsAbsentBlobFromLive(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "doc.txt") + content := []byte("frozen") + require.NoError(t, os.WriteFile(filePath, content, 0o600)) + + proc := testutil.NewProc(t) + liveURL := "file://" + filePath + pinned := pinnedURLOf(liveURL, content) // contenthash = sha256(content); blob not yet stored + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{pinned}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{pinned}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) + + dl, err := datalink.NewDatalink(pinned, proc) + require.NoError(t, err) + got, err := dl.GetBytes(proc) + require.NoError(t, err) + require.Equal(t, content, got) +} + +// Re-pinning an already-pinned URL whose CAS blob is absent for the calling +// account re-reads the live source. If the live bytes no longer hash to the +// declared contenthash, the requested version is unavailable, so pin errors out +// rather than silently pinning different bytes under the old hash. +func TestDatalinkPinRepinRejectsChangedLiveContent(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "doc.txt") + require.NoError(t, os.WriteFile(filePath, []byte("actual-bytes"), 0o600)) + + proc := testutil.NewProc(t) + // a valid-format hash that does not match the live file; no CAS blob exists. + mismatch := strings.Repeat("a", 64) + pinned := "file://" + filePath + "?contenthash=" + mismatch + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{pinned}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), true, + []string{""}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) +} + +func TestDatalinkPinRejectsInvalidExistingContentHash(t *testing.T) { + proc := testutil.NewProc(t) + + cases := []struct { + name string + url string + }{ + { + name: "short", + url: "file:///does/not/matter.txt?contenthash=abc", + }, + { + name: "non-hex", + url: "file:///does/not/matter.txt?contenthash=" + strings.Repeat("g", 64), + }, + { + name: "empty", + url: "file:///does/not/matter.txt?contenthash=", + }, + { + name: "mixed-case-key", + url: "file:///does/not/matter.txt?ContentHash=bad", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{c.url}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), true, + []string{""}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) + }) + } +} + +// NULL in, NULL out. +func TestDatalinkPinNull(t *testing.T) { + proc := testutil.NewProc(t) + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{""}, []bool{true})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{""}, []bool{true}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) +} + +// When the external file cannot be read, pin must error out rather than silently +// producing an un-pinned or empty value. +func TestDatalinkPinMissingLiveFileErrors(t *testing.T) { + proc := testutil.NewProc(t) + liveURL := "file:///nonexistent/datalink/pin/xyz.txt" + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{liveURL}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), true, + []string{""}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) +} + +// Mixed-case offset/size params must be stripped after the sliced bytes are +// frozen, so the pinned URL carries only the contenthash. Otherwise a later read +// would slice the (already-sliced) CAS object again and return wrong bytes. +func TestDatalinkPinStripsMixedCaseSlice(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "doc.txt") + require.NoError(t, os.WriteFile(filePath, []byte("Hello world!"), 0o600)) + + proc := testutil.NewProc(t) + liveURL := "file://" + filePath + // ?Offset=6&Size=5 selects "world"; the pinned URL must address only that + // content by hash, with no residual Offset/Size. + input := liveURL + "?Offset=6&Size=5" + expected := pinnedURLOf(liveURL, []byte("world")) + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{input}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{expected}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) +} + +// pin refuses to copy more than one blob's worth of bytes into memory, mirroring +// load_file's MaxBlobLen guard, rather than risking OOM on a huge object. +func TestDatalinkPinRejectsOversizedFile(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "big.bin") + f, err := os.Create(filePath) + require.NoError(t, err) + // sparse file just over MaxBlobLen: StatFile reports the full size without + // allocating it, so the guard must reject before reading anything. + require.NoError(t, f.Truncate(int64(types.MaxBlobLen)+1)) + require.NoError(t, f.Close()) + + proc := testutil.NewProc(t) + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_datalink.ToType(), + []string{"file://" + filePath}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), true, + []string{""}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) +} + +// save_file() must reject writes to a pinned (contenthash) datalink: the pinned +// value addresses an immutable CAS object, so writing through it would target the +// internal CAS key rather than any real external path. +func TestWriteFileDatalinkRejectsPinned(t *testing.T) { + proc := testutil.NewProc(t) + pinned := "file:///does/not/matter.txt?contenthash=" + strings.Repeat("a", 64) + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{ + NewFunctionTestInput(types.T_datalink.ToType(), []string{pinned}, []bool{false}), + NewFunctionTestInput(types.T_varchar.ToType(), []string{"new content"}, []bool{false}), + }, + NewFunctionTestResult(types.T_int64.ToType(), true, []int64{0}, []bool{false}), + WriteFileDatalink) + s, info := tc.Run() + require.True(t, s, info) +} + +// pin accepts a plain varchar URL too (implicitly treated as a datalink). +func TestDatalinkPinAcceptsVarchar(t *testing.T) { + dir := t.TempDir() + filePath := filepath.Join(dir, "doc.txt") + require.NoError(t, os.WriteFile(filePath, []byte("hello"), 0o600)) + + proc := testutil.NewProc(t) + liveURL := "file://" + filePath + + tc := NewFunctionTestCase(proc, + []FunctionTestInput{NewFunctionTestInput(types.T_varchar.ToType(), + []string{liveURL}, []bool{false})}, + NewFunctionTestResult(types.T_datalink.ToType(), false, + []string{pinnedURLOf(liveURL, []byte("hello"))}, []bool{false}), + DatalinkPin) + s, info := tc.Run() + require.True(t, s, info) +} diff --git a/pkg/sql/plan/function/func_unary.go b/pkg/sql/plan/function/func_unary.go index eb508d33a7623..c555bc15ac644 100644 --- a/pkg/sql/plan/function/func_unary.go +++ b/pkg/sql/plan/function/func_unary.go @@ -33,6 +33,7 @@ import ( "io" "math" "net" + "net/url" "runtime" "sort" "strconv" @@ -52,6 +53,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/container/types" "github.com/matrixorigin/matrixone/pkg/container/vector" "github.com/matrixorigin/matrixone/pkg/datalink" + "github.com/matrixorigin/matrixone/pkg/defines" "github.com/matrixorigin/matrixone/pkg/fileservice" "github.com/matrixorigin/matrixone/pkg/logutil" "github.com/matrixorigin/matrixone/pkg/sql/plan/function/functionUtil" @@ -3364,18 +3366,14 @@ func LoadFileDatalink(ivecs []*vector.Vector, result vector.FunctionResultWrappe } size := dl.Size if size < 0 { - etlFS, readPath, err := fileservice.GetForETL(proc.Ctx, proc.GetFileService(), dl.MoPath) + fileSize, err := dl.StatSize(proc) if err != nil { return err } - entry, err := etlFS.StatFile(proc.Ctx, readPath) - if err != nil { - return err - } - if dl.Offset > entry.Size { + if dl.Offset > fileSize { return moerr.NewInternalError(proc.Ctx, "offset exceeds file size") } - size = entry.Size - dl.Offset + size = fileSize - dl.Offset } if size > int64(types.MaxBlobLen) { return moerr.NewInternalError(proc.Ctx, "Data too long for blob") @@ -3457,6 +3455,167 @@ func WriteFileDatalink(ivecs []*vector.Vector, result vector.FunctionResultWrapp return nil } +// DatalinkPin freezes the bytes currently referenced by a datalink into the +// immutable content-addressed store (CAS) and returns a datalink carrying +// ?contenthash= of those bytes. Reading the pinned datalink later always +// returns the frozen bytes, so historical snapshots stay reproducible even if +// the external object is overwritten out of band. +// +// - Inputs that already carry a valid contenthash are returned unchanged +// (idempotent) only when the CAS blob exists in the caller's account; if it is +// absent (e.g. minted by another account), they are re-pinned from the live +// source and the live bytes must still reproduce the declared hash. +// - If the external file cannot be read, pin errors out (never silently falls +// back to an un-pinned value). +// - The default, un-pinned datalink behavior is unaffected: only values passed +// through datalink_pin participate in content freezing. +func DatalinkPin(ivecs []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int, selectList *FunctionSelectList) error { + rs := vector.MustFunctionResult[types.Varlena](result) + urlVec := vector.GenerateFunctionStrParameter(ivecs[0]) + + casFS, err := fileservice.Get[fileservice.FileService](proc.Base.FileService, defines.SharedFileServiceName) + if err != nil { + return err + } + + for i := uint64(0); i < uint64(length); i++ { + _url, null := urlVec.GetStrValue(i) + if null { + if err = rs.AppendBytes(nil, true); err != nil { + return err + } + continue + } + + pinned, err := pinDatalink(util.UnsafeBytesToString(_url), casFS, proc) + if err != nil { + return err + } + if err = rs.AppendBytes([]byte(pinned), false); err != nil { + return err + } + } + return nil +} + +// pinDatalink reads the live bytes of rawURL, stores them in the CAS, and returns +// rawURL rewritten to address that immutable copy via ?contenthash=. +// +// An already-pinned URL is idempotent only when its CAS blob exists in the calling +// account's namespace; then it is validated and returned unchanged (no live read, +// no second CAS write). If the blob is absent for this account (e.g. a contenthash +// minted by another account), the URL is re-pinned from the live source for the +// current account, and the live bytes must still reproduce the declared hash — +// otherwise the requested version is unavailable and pin errors out rather than +// silently pinning different bytes. +func pinDatalink(rawURL string, casFS fileservice.FileService, proc *process.Process) (string, error) { + u, err := url.Parse(rawURL) + if err != nil { + return "", err + } + + q := u.Query() + // Detect an existing contenthash (case-insensitively, consistent with + // ParseDatalink which lower-cases query keys). + var pinnedKey, pinnedHash string + for k := range q { + if strings.EqualFold(k, datalink.ContentHashKey) { + pinnedKey = k + pinnedHash = strings.ToLower(q.Get(k)) + break + } + } + + if pinnedKey != "" { + if err := datalink.ValidateContentHash(pinnedHash); err != nil { + return "", err + } + accountID, err := defines.GetAccountId(proc.Ctx) + if err != nil { + return "", err + } + // Idempotent only when the blob actually exists in THIS account's CAS + // namespace. A contenthash present only in another account's namespace would + // otherwise be returned unchanged and fail at read time; re-pin it from the + // live source for the current account instead. + exists, err := datalink.CASExists(proc.Ctx, casFS, accountID, pinnedHash) + if err != nil { + return "", err + } + if exists { + return rawURL, nil + } + // Strip the contenthash so the live external path is read below; otherwise + // NewDatalink would resolve to the (missing) CAS key instead of the file. + q.Del(pinnedKey) + u.RawQuery = q.Encode() + rawURL = u.String() + } + + // read the live bytes this datalink currently references + dl, err := datalink.NewDatalink(rawURL, proc) + if err != nil { + return "", err + } + + // Size guard, mirroring load_file: refuse to pin more than one blob's worth of + // bytes. Pinning copies the whole (sliced) object into memory, so without this + // a huge object would OOM the CN. The effective size accounts for offset/size, + // so a small slice of a large file is still allowed. Streaming hash/copy of + // arbitrarily large objects is a follow-up (see issue #24555). + size := dl.Size + if size < 0 { + fileSize, err := dl.StatSize(proc) + if err != nil { + return "", err + } + size = fileSize - dl.Offset + } + if size > int64(types.MaxBlobLen) { + return "", moerr.NewInternalError(proc.Ctx, "Data too long for blob") + } + + fileBytes, err := dl.GetBytes(proc) + if err != nil { + return "", err + } + + // Re-pin of an already-pinned input: the live bytes must still reproduce the + // declared version. If they no longer match, that version is unavailable to the + // current account, so error out rather than silently pinning different bytes. + if pinnedHash != "" { + sum := sha256.Sum256(fileBytes) + if got := hex.EncodeToString(sum[:]); got != pinnedHash { + return "", moerr.NewInternalErrorf(proc.Ctx, + "datalink_pin: live content no longer matches pinned contenthash %s (now %s)", pinnedHash, got) + } + } + + // Namespace the CAS object by the calling account (resolved from the trusted + // context, never from the URL) so a contenthash is not a cross-account bearer + // capability. + accountID, err := defines.GetAccountId(proc.Ctx) + if err != nil { + return "", err + } + hash, err := datalink.CASPut(proc.Ctx, casFS, accountID, fileBytes) + if err != nil { + return "", err + } + + // Rewrite the URL to address the immutable copy. The sliced bytes are already + // baked into the CAS object, so drop offset/size (case-insensitively, matching + // ParseDatalink which lower-cases query keys) to avoid re-slicing on read. + for k := range q { + if strings.EqualFold(k, "offset") || strings.EqualFold(k, "size") { + q.Del(k) + } + } + q.Set(datalink.ContentHashKey, hash) + u.RawQuery = q.Encode() + return u.String(), nil +} + func MoMemUsage(ivecs []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int, selectList *FunctionSelectList) error { if len(ivecs) != 1 { return moerr.NewInvalidInput(proc.Ctx, "no mpool name") diff --git a/pkg/sql/plan/function/function_id.go b/pkg/sql/plan/function/function_id.go index a7e855295222a..610ec79c0ad7c 100644 --- a/pkg/sql/plan/function/function_id.go +++ b/pkg/sql/plan/function/function_id.go @@ -640,9 +640,11 @@ const ( HLL_MERGE_AGG = 454 HLL_CARDINALITY = 455 + DATALINK_PIN = 456 + // FUNCTION_END_NUMBER is not a function, just a flag to record the max number of function. // TODO: every one should put the new function id in front of this one if you want to make a new function. - FUNCTION_END_NUMBER = 456 + FUNCTION_END_NUMBER = 457 ) // functionIdRegister is what function we have registered already. @@ -933,6 +935,7 @@ var functionIdRegister = map[string]int32{ "uuid": UUID, "load_file": LOAD_FILE, "save_file": SAVE_FILE, + "datalink_pin": DATALINK_PIN, "hex": HEX, "unhex": UNHEX, "md5": MD5, diff --git a/pkg/sql/plan/function/function_id_test.go b/pkg/sql/plan/function/function_id_test.go index a9b1dd5b8ee68..4958a00ac39d2 100644 --- a/pkg/sql/plan/function/function_id_test.go +++ b/pkg/sql/plan/function/function_id_test.go @@ -509,7 +509,8 @@ var predefinedFunids = map[int]int{ HLL_ADD_AGG: 453, HLL_MERGE_AGG: 454, HLL_CARDINALITY: 455, - FUNCTION_END_NUMBER: 456, + DATALINK_PIN: 456, + FUNCTION_END_NUMBER: 457, } func Test_funids(t *testing.T) { diff --git a/pkg/sql/plan/function/list_builtIn.go b/pkg/sql/plan/function/list_builtIn.go index 3d1bdd3a76786..b3b5ea0493676 100644 --- a/pkg/sql/plan/function/list_builtIn.go +++ b/pkg/sql/plan/function/list_builtIn.go @@ -10520,6 +10520,63 @@ var supportedOthersBuiltIns = []FuncNew{ }, }, + // function `datalink_pin` + // freezes the referenced bytes into the content-addressed store and returns a + // datalink carrying ?contenthash=, so historical snapshots are reproducible. + { + functionId: DATALINK_PIN, + class: plan.Function_STRICT, + layout: STANDARD_FUNCTION, + checkFn: fixedTypeMatch, + + Overloads: []overload{ + { + overloadId: 0, + volatile: true, + args: []types.T{types.T_datalink}, + retType: func(parameters []types.Type) types.Type { + return types.T_datalink.ToType() + }, + newOp: func() executeLogicOfOverload { + return DatalinkPin + }, + }, + { + overloadId: 1, + volatile: true, + args: []types.T{types.T_varchar}, + retType: func(parameters []types.Type) types.Type { + return types.T_datalink.ToType() + }, + newOp: func() executeLogicOfOverload { + return DatalinkPin + }, + }, + { + overloadId: 2, + volatile: true, + args: []types.T{types.T_char}, + retType: func(parameters []types.Type) types.Type { + return types.T_datalink.ToType() + }, + newOp: func() executeLogicOfOverload { + return DatalinkPin + }, + }, + { + overloadId: 3, + volatile: true, + args: []types.T{types.T_text}, + retType: func(parameters []types.Type) types.Type { + return types.T_datalink.ToType() + }, + newOp: func() executeLogicOfOverload { + return DatalinkPin + }, + }, + }, + }, + // function `mo_memory_usage` { functionId: MO_MEMORY_USAGE, diff --git a/proto/task.proto b/proto/task.proto index eeb9e749364ed..c6c7d2d680e6e 100644 --- a/proto/task.proto +++ b/proto/task.proto @@ -82,6 +82,8 @@ option (gogoproto.protosizer_all) = true; PublicationExecutor = 11; // SQL task SQLTask = 12; + // Datalink CAS garbage-collection sweep task + DatalinkCASGCExecutor = 13; } // TaskMetadata is a task metadata abstraction that can be scheduled for execution at any CN node. diff --git a/test/distributed/cases/function/func_datalink_cas_gc.result b/test/distributed/cases/function/func_datalink_cas_gc.result new file mode 100644 index 0000000000000..8ec0b89cacab7 --- /dev/null +++ b/test/distributed/cases/function/func_datalink_cas_gc.result @@ -0,0 +1,16 @@ +create account cas_gc_acc ADMIN_NAME 'admin' IDENTIFIED BY '123456'; +create database cas_gc_db; +use cas_gc_db; +create table t(id int, dl datalink); +insert into t values(1, datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink))); +insert into t values(2, datalink_pin(cast('file://$resources/file_test/normal.txt?offset=0&size=5' as datalink))); +select id, load_file(dl) as content from t order by id; +➤ id[4,32,0] ¦ content[12,0,0] 𝄀 +1 ¦ Hello world! 𝄀 +2 ¦ Hello +drop account cas_gc_acc; +create account cas_gc_acc ADMIN_NAME 'admin' IDENTIFIED BY '123456'; +select load_file(datalink_pin('file://$resources/file_test/normal.txt')) as repin; +➤ repin[12,0,0] 𝄀 +Hello world! +drop account cas_gc_acc; diff --git a/test/distributed/cases/function/func_datalink_cas_gc.sql b/test/distributed/cases/function/func_datalink_cas_gc.sql new file mode 100644 index 0000000000000..07cb24eeee24b --- /dev/null +++ b/test/distributed/cases/function/func_datalink_cas_gc.sql @@ -0,0 +1,28 @@ +-- datalink CAS garbage collection: dropping an account reclaims its pinned +-- content-addressed store namespace (datalink_cas//). This verifies +-- that an account can pin content into the per-account CAS, that DROP ACCOUNT +-- succeeds while pinned blobs exist (the per-account prefix cleanup runs +-- best-effort during the drop), and that recreating a same-named account starts +-- from a clean, independent namespace. + +-- an account pins content into a table, reads it back from its CAS, then is dropped +create account cas_gc_acc ADMIN_NAME 'admin' IDENTIFIED BY '123456'; +-- @session:id=2&user=cas_gc_acc:admin:accountadmin&password=123456 +create database cas_gc_db; +use cas_gc_db; +create table t(id int, dl datalink); +insert into t values(1, datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink))); +insert into t values(2, datalink_pin(cast('file://$resources/file_test/normal.txt?offset=0&size=5' as datalink))); +select id, load_file(dl) as content from t order by id; +-- @session + +-- dropping the account with pinned blobs present must succeed +drop account cas_gc_acc; + +-- a same-named account recreated afterwards has its own clean CAS namespace and +-- can pin again without interference from the dropped account +create account cas_gc_acc ADMIN_NAME 'admin' IDENTIFIED BY '123456'; +-- @session:id=3&user=cas_gc_acc:admin:accountadmin&password=123456 +select load_file(datalink_pin('file://$resources/file_test/normal.txt')) as repin; +-- @session +drop account cas_gc_acc; diff --git a/test/distributed/cases/function/func_datalink_pin.result b/test/distributed/cases/function/func_datalink_pin.result new file mode 100644 index 0000000000000..2e99feb7bbb41 --- /dev/null +++ b/test/distributed/cases/function/func_datalink_pin.result @@ -0,0 +1,60 @@ +select load_file(datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink))) as pin_datalink; +➤ pin_datalink[12,0,0] 𝄀 +Hello world! +select load_file(datalink_pin('file://$resources/file_test/normal.txt')) as pin_varchar; +➤ pin_varchar[12,0,0] 𝄀 +Hello world! +select load_file(datalink_pin(cast('file://$resources/file_test/normal.txt' as text))) as pin_text; +➤ pin_text[12,0,0] 𝄀 +Hello world! +select load_file(datalink_pin(datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink)))) as pin_idempotent; +➤ pin_idempotent[12,0,0] 𝄀 +Hello world! +select load_file(datalink_pin(cast('file://$resources/file_test/normal.txt?offset=0&size=5' as datalink))) as pin_slice; +➤ pin_slice[12,0,0] 𝄀 +Hello +create table pin_t(id int, dl datalink); +insert into pin_t values(1, datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink))); +insert into pin_t values(2, datalink_pin(cast('file://$resources/file_test/normal.txt?offset=0&size=5' as datalink))); +select id, load_file(dl) as content from pin_t order by id; +➤ id[4,32,0] ¦ content[12,0,0] 𝄀 +1 ¦ Hello world! 𝄀 +2 ¦ Hello +select load_file(cast('file:///bogus/nonexistent/path.txt?contenthash=c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a' as datalink)) as cas_decoupled; +➤ cas_decoupled[12,0,0] 𝄀 +Hello world! +select load_file(cast('file://$resources/file_test/normal.txt?contenthash=0000000000000000000000000000000000000000000000000000000000000000' as datalink)) as missing_cas; +file 0000000000000000000000000000000000000000000000000000000000000000 is not found +select load_file(cast('file://$resources/file_test/normal.txt?contenthash=notavalidhash' as datalink)) as bad_hash; +internal error: invalid datalink contenthash length 13, want 64 +select datalink_pin(cast('unknownscheme://x/y' as datalink)) as pin_bad_scheme; +unsupported url scheme unknownscheme is not yet implemented +select datalink_pin(cast(null as datalink)) as pin_null; +➤ pin_null[12,0,0] 𝄀 +null +drop table pin_t; +create stage pin_ow_st URL='file://$resources/into_outfile/pin_ow_a/'; +select save_file(cast('stage://pin_ow_st/f.txt' as datalink), 'version-ONE') as ow_setup_v1; +➤ ow_setup_v1[-5,64,0] 𝄀 +11 +create table pin_ow(id int, dl datalink); +insert into pin_ow values(1, datalink_pin(cast('stage://pin_ow_st/f.txt' as datalink))); +drop stage pin_ow_st; +create stage pin_ow_st URL='file://$resources/into_outfile/pin_ow_b/'; +select save_file(cast('stage://pin_ow_st/f.txt' as datalink), 'version-TWO') as ow_setup_v2; +➤ ow_setup_v2[-5,64,0] 𝄀 +11 +select load_file(cast('stage://pin_ow_st/f.txt' as datalink)) as ow_live_read; +➤ ow_live_read[12,0,0] 𝄀 +version-TWO +select id, load_file(dl) as ow_pinned_read from pin_ow where id = 1; +➤ id[4,32,0] ¦ ow_pinned_read[12,0,0] 𝄀 +1 ¦ version-ONE +drop table pin_ow; +drop stage pin_ow_st; +create account pin_acc ADMIN_NAME 'admin' IDENTIFIED BY '123456'; +select load_file(cast('file:///x.txt?contenthash=c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a' as datalink)) as cross_account_blocked; +file c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a is not found +drop account pin_acc; +select save_file(cast('file:///bogus/path.txt?contenthash=c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a' as datalink), 'should-fail') as pin_write_rejected; +internal error: cannot write to a pinned datalink (contenthash=c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a): pinned content is immutable diff --git a/test/distributed/cases/function/func_datalink_pin.sql b/test/distributed/cases/function/func_datalink_pin.sql new file mode 100644 index 0000000000000..57f99f86f0f1d --- /dev/null +++ b/test/distributed/cases/function/func_datalink_pin.sql @@ -0,0 +1,71 @@ +-- datalink_pin: freeze the bytes referenced by a datalink into the immutable +-- content-addressed store, so the value stays reproducible by its content hash +-- even if the external object is later changed. + +-- 1. pin then read returns the referenced content (datalink / varchar / text) +select load_file(datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink))) as pin_datalink; +select load_file(datalink_pin('file://$resources/file_test/normal.txt')) as pin_varchar; +select load_file(datalink_pin(cast('file://$resources/file_test/normal.txt' as text))) as pin_text; + +-- 2. pinning an already-pinned value is idempotent +select load_file(datalink_pin(datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink)))) as pin_idempotent; + +-- 3. offset/size: pin freezes the sliced bytes +select load_file(datalink_pin(cast('file://$resources/file_test/normal.txt?offset=0&size=5' as datalink))) as pin_slice; + +-- 4. store pinned datalinks in a table and read them back from the CAS +create table pin_t(id int, dl datalink); +insert into pin_t values(1, datalink_pin(cast('file://$resources/file_test/normal.txt' as datalink))); +insert into pin_t values(2, datalink_pin(cast('file://$resources/file_test/normal.txt?offset=0&size=5' as datalink))); +select id, load_file(dl) as content from pin_t order by id; + +-- 5. a pinned value is decoupled from its original path: reading by contenthash is +-- served from the CAS even when the original path no longer resolves +select load_file(cast('file:///bogus/nonexistent/path.txt?contenthash=c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a' as datalink)) as cas_decoupled; + +-- 6. a contenthash with no stored object errors out (never falls back to the live file) +select load_file(cast('file://$resources/file_test/normal.txt?contenthash=0000000000000000000000000000000000000000000000000000000000000000' as datalink)) as missing_cas; + +-- 7. an ill-formed contenthash is rejected +select load_file(cast('file://$resources/file_test/normal.txt?contenthash=notavalidhash' as datalink)) as bad_hash; + +-- 8. pinning an invalid url scheme errors out +select datalink_pin(cast('unknownscheme://x/y' as datalink)) as pin_bad_scheme; + +-- 9. NULL in -> NULL out +select datalink_pin(cast(null as datalink)) as pin_null; + +drop table pin_t; + +-- 10. core regression: pin, then change what the reference resolves to, and +-- verify the pinned value still reads the original bytes while the live +-- reference reads the new bytes. The external object is "overwritten" out of +-- band by repointing the stage to a different directory (the file service is +-- write-once, so the same path cannot be overwritten in place). +create stage pin_ow_st URL='file://$resources/into_outfile/pin_ow_a/'; +select save_file(cast('stage://pin_ow_st/f.txt' as datalink), 'version-ONE') as ow_setup_v1; +create table pin_ow(id int, dl datalink); +insert into pin_ow values(1, datalink_pin(cast('stage://pin_ow_st/f.txt' as datalink))); +drop stage pin_ow_st; +create stage pin_ow_st URL='file://$resources/into_outfile/pin_ow_b/'; +select save_file(cast('stage://pin_ow_st/f.txt' as datalink), 'version-TWO') as ow_setup_v2; +select load_file(cast('stage://pin_ow_st/f.txt' as datalink)) as ow_live_read; +select id, load_file(dl) as ow_pinned_read from pin_ow where id = 1; +drop table pin_ow; +drop stage pin_ow_st; + +-- 11. cross-account isolation: a pinned datalink's CAS object is namespaced by +-- account, so a contenthash is not a global bearer token. The sys account +-- pinned normal.txt ('Hello world!', hash c0535e..) in the cases above; a +-- separate account reading the same contenthash is served from its own +-- (empty) namespace and errors out, never reaching the sys account's bytes. +create account pin_acc ADMIN_NAME 'admin' IDENTIFIED BY '123456'; +-- @session:id=2&user=pin_acc:admin:accountadmin&password=123456 +select load_file(cast('file:///x.txt?contenthash=c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a' as datalink)) as cross_account_blocked; +-- @session +drop account pin_acc; + +-- 12. writes to a pinned (contenthash) datalink are rejected: the pinned value +-- addresses an immutable CAS object whose internal key is not a writable +-- external path, so save_file must error rather than clobber the wrong path. +select save_file(cast('file:///bogus/path.txt?contenthash=c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a' as datalink), 'should-fail') as pin_write_rejected;