diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle new file mode 100644 index 0000000000..93bbe2aeb4 --- /dev/null +++ b/buildSrc/build.gradle @@ -0,0 +1,28 @@ +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +plugins { + id 'groovy' +} + +repositories { + mavenCentral() +} + +dependencies { + implementation gradleApi() + implementation localGroovy() + implementation 'com.github.victools:jsonschema-generator:4.38.0' +} diff --git a/buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy b/buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy new file mode 100644 index 0000000000..aa961065de --- /dev/null +++ b/buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy @@ -0,0 +1,223 @@ +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.gradle + +import java.nio.file.Path +import java.time.OffsetDateTime + +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.node.ArrayNode +import com.fasterxml.jackson.databind.node.ObjectNode +import com.github.victools.jsonschema.generator.CustomDefinition +import com.github.victools.jsonschema.generator.Option +import com.github.victools.jsonschema.generator.OptionPreset +import com.github.victools.jsonschema.generator.SchemaBuilder +import com.github.victools.jsonschema.generator.SchemaGenerator +import com.github.victools.jsonschema.generator.SchemaGeneratorConfigBuilder +import com.github.victools.jsonschema.generator.SchemaVersion +import groovy.transform.CompileStatic +import org.gradle.api.DefaultTask +import org.gradle.api.file.FileCollection +import org.gradle.api.tasks.Input +import org.gradle.api.tasks.InputDirectory +import org.gradle.api.tasks.InputFiles +import org.gradle.api.tasks.OutputFile +import org.gradle.api.tasks.TaskAction + +/** + * Generates a JSON Schema (draft 2020-12) describing the JSON documents emitted + * by {@code nextflow.lineage.serde.LinEncoder} for v1beta1 model classes. + * + * The task does not depend on the nf-lineage runtime; it loads compiled model + * classes via a URLClassLoader and uses victools jsonschema-generator to derive + * the per-subtype schemas. A single SchemaBuilder collects $defs across all + * subtypes so shared types (Checksum, DataPath, Parameter) are emitted once and + * referenced via $ref. Each subtype is wrapped in a {version, kind, spec} + * envelope matching {@code LinTypeAdapterFactory.write(...)}. + */ +@CompileStatic +class GenerateLineageSchemaTask extends DefaultTask { + + @InputFiles + FileCollection classpath + + @Input + List subtypes + + @InputDirectory + File modelSourceDir + + @OutputFile + File outputFile + + private static final String MODEL_PACKAGE = 'nextflow.lineage.model.v1beta1.' + + @TaskAction + void generate() { + final loader = buildClassLoader() + final version = readLineageVersion(loader) + final descriptions = parseClassDocs(modelSourceDir) + final mapper = new ObjectMapper() + final root = mapper.createObjectNode() + root.put('$schema', 'https://json-schema.org/draft/2020-12/schema') + root.put('title', 'Nextflow Lineage Model v1beta1') + + final schemaBuilder = newSchemaBuilder(mapper, descriptions) + final oneOf = mapper.createArrayNode() + + subtypes.each { String fqn -> + final cls = loader.loadClass(fqn) + final specRef = schemaBuilder.createSchemaReference(cls) as ObjectNode + oneOf.add(wrapEnvelope(cls.simpleName, version, specRef, mapper)) + } + + // Collect consolidated $defs from the builder; emit at root so $ref + // targets like "#/$defs/Checksum" resolve correctly. + final defs = schemaBuilder.collectDefinitions('$defs') + root.set('$defs', defs) + root.set('oneOf', oneOf) + + // Victools, when combining NULLABLE_FIELDS_BY_DEFAULT with + // DEFINITIONS_FOR_ALL_OBJECTS, emits "-nullable" wrapper defs. + // Inline those wrappers at the call site and drop the def entries. + inlineNullableWrappers(root) + + outputFile.parentFile.mkdirs() + mapper.writerWithDefaultPrettyPrinter().writeValue(outputFile, root) + logger.lifecycle("Wrote lineage schema with ${subtypes.size()} subtypes to ${outputFile}") + } + + private URLClassLoader buildClassLoader() { + final urls = classpath.files.collect { it.toURI().toURL() } as URL[] + return new URLClassLoader(urls, getClass().classLoader) + } + + private static String readLineageVersion(ClassLoader loader) { + final linModel = loader.loadClass('nextflow.lineage.model.v1beta1.LinModel') + return linModel.getField('VERSION').get(null) as String + } + + private static SchemaBuilder newSchemaBuilder(ObjectMapper mapper, Map descriptions) { + // LinEncoder serializes nulls (withSerializeNulls(true)) so every field + // can appear as null in the emitted JSON — schema marks them nullable. + final config = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_2020_12, OptionPreset.PLAIN_JSON) + .with(Option.NULLABLE_FIELDS_BY_DEFAULT) + .with(Option.NULLABLE_ARRAY_ITEMS_ALLOWED) + .with(Option.DEFINITIONS_FOR_ALL_OBJECTS) + config.forTypesInGeneral().withCustomDefinitionProvider({ javaType, context -> + final erased = javaType.erasedType + if (erased == OffsetDateTime) { + final node = mapper.createObjectNode() + node.put('type', 'string') + node.put('format', 'date-time') + return new CustomDefinition(node) + } + if (Path.isAssignableFrom(erased)) { + final node = mapper.createObjectNode() + node.put('type', 'string') + return new CustomDefinition(node) + } + return null + }) + config.forTypesInGeneral().withTitleResolver({ scope -> + final cls = scope.type.erasedType + cls.name.startsWith(MODEL_PACKAGE) ? cls.simpleName : null + }) + config.forTypesInGeneral().withDescriptionResolver({ scope -> + descriptions[scope.type.erasedType.name] + }) + return new SchemaGenerator(config.build()).buildMultipleSchemaDefinitions() + } + + private static Map parseClassDocs(File dir) { + final result = [:] as Map + if (dir == null || !dir.isDirectory()) return result + final pattern = ~/(?s)\/\*\*(.*?)\*\/\s*(?:@\w+(?:\([^)]*\))?\s*)*(?:abstract\s+|final\s+|public\s+|static\s+)*(?:class|interface)\s+(\w+)/ + dir.eachFileRecurse { File file -> + if (!file.name.endsWith('.groovy')) return + final text = file.text + final pkgMatcher = text =~ /(?m)^\s*package\s+([\w.]+)/ + if (!pkgMatcher.find()) return + final pkg = pkgMatcher.group(1) as String + final m = text =~ pattern + while (m.find()) { + final raw = m.group(1) as String + final clsName = m.group(2) as String + final cleaned = cleanDoc(raw) + if (cleaned) result["${pkg}.${clsName}".toString()] = cleaned + } + } + return result + } + + private static String cleanDoc(String raw) { + // Strip the `*` line prefix and drop everything from the first @tag onwards. + final lines = raw.readLines().collect { line -> line.replaceFirst(/^\s*\*\s?/, '') } + final stopIdx = lines.findIndexOf { (it =~ /^\s*@\w+/).find() } + final kept = stopIdx >= 0 ? lines[0.. + defs.fieldNames().each { String name -> + if (name.endsWith('-nullable')) + wrappers[name] = defs.get(name) as ObjectNode + } + if (wrappers.isEmpty()) return + replaceNullableRefs(root, wrappers) + wrappers.keySet().each { defs.remove(it) } + } + + private static void replaceNullableRefs(JsonNode node, Map wrappers) { + if (node instanceof ObjectNode) { + final on = node as ObjectNode + final ref = on.get('$ref') + if (ref != null && on.size() == 1) { + final target = ref.asText() - '#/$defs/' + final body = wrappers[target] + if (body != null) { + on.removeAll() + on.setAll(body.deepCopy() as ObjectNode) + return + } + } + on.fields().each { replaceNullableRefs(it.value, wrappers) } + } + else if (node instanceof ArrayNode) { + (node as ArrayNode).each { replaceNullableRefs(it as JsonNode, wrappers) } + } + } + + private static ObjectNode wrapEnvelope(String kind, String version, JsonNode spec, ObjectMapper mapper) { + final env = mapper.createObjectNode() + env.put('type', 'object') + final props = env.putObject('properties') + props.putObject('version').put('const', version) + props.putObject('kind').put('const', kind) + props.set('spec', spec) + final required = env.putArray('required') + required.add('version') + required.add('kind') + required.add('spec') + env.put('additionalProperties', false) + return env + } +} diff --git a/modules/nf-lineage/build.gradle b/modules/nf-lineage/build.gradle index e2c8ae5b80..5eae420338 100644 --- a/modules/nf-lineage/build.gradle +++ b/modules/nf-lineage/build.gradle @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +import nextflow.gradle.GenerateLineageSchemaTask + apply plugin: 'groovy' sourceSets { @@ -37,3 +39,24 @@ dependencies { testImplementation "org.apache.groovy:groovy-nio:4.0.31" } +tasks.register('generateLineageSchema', GenerateLineageSchemaTask) { + description = 'Generate JSON Schema for the lineage model v1beta1' + group = 'documentation' + dependsOn compileGroovy + classpath = sourceSets.main.runtimeClasspath + // Keep this list in sync with LinTypeAdapterFactory.registerSubtype(...) calls + // in src/main/nextflow/lineage/serde/LinTypeAdapterFactory.groovy. + // After editing, re-run this task to refresh + // src/resources/schema/lineage-v1beta1.schema.json. + subtypes = [ + 'nextflow.lineage.model.v1beta1.WorkflowRun', + 'nextflow.lineage.model.v1beta1.WorkflowOutput', + 'nextflow.lineage.model.v1beta1.Workflow', + 'nextflow.lineage.model.v1beta1.TaskRun', + 'nextflow.lineage.model.v1beta1.TaskOutput', + 'nextflow.lineage.model.v1beta1.FileOutput', + ] + modelSourceDir = file('src/main/nextflow/lineage/model/v1beta1') + outputFile = file('src/resources/schema/lineage-v1beta1.schema.json') +} + diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Checksum.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Checksum.groovy index 0feaa5e695..0d5c06c650 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Checksum.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Checksum.groovy @@ -22,7 +22,7 @@ import groovy.transform.Canonical import groovy.transform.CompileStatic import nextflow.util.CacheHelper /** - * Models a checksum including the value as well as the algortihm and mode used to compute it. + * Models a checksum, with the value, algorithm and mode used to compute it. * * @author Jorge Ejarque */ diff --git a/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Parameter.groovy b/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Parameter.groovy index c311bda9cf..bffd009c66 100644 --- a/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Parameter.groovy +++ b/modules/nf-lineage/src/main/nextflow/lineage/model/v1beta1/Parameter.groovy @@ -20,7 +20,7 @@ import groovy.transform.Canonical import groovy.transform.CompileStatic /** - * Model Workflow and Task Parameters. + * Models a workflow or task parameter. * * @author Jorge Ejarque extends RuntimeTypeAdapterFactory { LinTypeAdapterFactory() { super(LinSerializable.class, "kind", false) + // When adding or removing a subtype here, also update the `subtypes` list in + // modules/nf-lineage/build.gradle (task `generateLineageSchema`) and re-run + // `./gradlew :nf-lineage:generateLineageSchema` to refresh + // src/resources/schema/lineage-v1beta1.schema.json. this.registerSubtype(WorkflowRun, WorkflowRun.simpleName) .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) .registerSubtype(Workflow, Workflow.simpleName) diff --git a/modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json b/modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json new file mode 100644 index 0000000000..5fd3459e5c --- /dev/null +++ b/modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json @@ -0,0 +1,371 @@ +{ + "$schema" : "https://json-schema.org/draft/2020-12/schema", + "title" : "Nextflow Lineage Model v1beta1", + "$defs" : { + "Checksum" : { + "type" : "object", + "properties" : { + "algorithm" : { + "type" : [ "string", "null" ] + }, + "mode" : { + "type" : [ "string", "null" ] + }, + "value" : { + "type" : [ "string", "null" ] + } + }, + "title" : "Checksum", + "description" : "Models a checksum, with the value, algorithm and mode used to compute it." + }, + "DataPath" : { + "type" : "object", + "properties" : { + "checksum" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/Checksum" + } ] + }, + "path" : { + "type" : [ "string", "null" ] + } + }, + "title" : "DataPath", + "description" : "Models a data path with a checksum to validate its contents." + }, + "FileOutput" : { + "type" : "object", + "properties" : { + "checksum" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/Checksum" + } ] + }, + "createdAt" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/OffsetDateTime" + } ] + }, + "labels" : { + "type" : [ "array", "null" ], + "items" : { + "type" : "string" + } + }, + "modifiedAt" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/OffsetDateTime" + } ] + }, + "path" : { + "type" : [ "string", "null" ] + }, + "size" : { + "type" : "integer" + }, + "source" : { + "type" : [ "string", "null" ] + }, + "taskRun" : { + "type" : [ "string", "null" ] + }, + "workflowRun" : { + "type" : [ "string", "null" ] + } + }, + "title" : "FileOutput", + "description" : "Models a file produced by a workflow or task." + }, + "Map" : { + "type" : "object" + }, + "OffsetDateTime" : { + "type" : "string", + "format" : "date-time" + }, + "Parameter" : { + "type" : "object", + "properties" : { + "name" : { + "type" : [ "string", "null" ] + }, + "type" : { + "type" : [ "string", "null" ] + }, + "value" : { } + }, + "title" : "Parameter", + "description" : "Models a workflow or task parameter." + }, + "TaskOutput" : { + "type" : "object", + "properties" : { + "createdAt" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/OffsetDateTime" + } ] + }, + "labels" : { + "type" : [ "array", "null" ], + "items" : { + "type" : "string" + } + }, + "output" : { + "type" : [ "array", "null" ], + "items" : { + "$ref" : "#/$defs/Parameter" + } + }, + "taskRun" : { + "type" : [ "string", "null" ] + }, + "workflowRun" : { + "type" : [ "string", "null" ] + } + }, + "title" : "TaskOutput", + "description" : "Models task results." + }, + "TaskRun" : { + "type" : "object", + "properties" : { + "architecture" : { + "type" : [ "string", "null" ] + }, + "binEntries" : { + "type" : [ "array", "null" ], + "items" : { + "$ref" : "#/$defs/DataPath" + } + }, + "codeChecksum" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/Checksum" + } ] + }, + "conda" : { + "type" : [ "string", "null" ] + }, + "container" : { + "type" : [ "string", "null" ] + }, + "globalVars" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/Map" + } ] + }, + "input" : { + "type" : [ "array", "null" ], + "items" : { + "$ref" : "#/$defs/Parameter" + } + }, + "name" : { + "type" : [ "string", "null" ] + }, + "script" : { + "type" : [ "string", "null" ] + }, + "sessionId" : { + "type" : [ "string", "null" ] + }, + "spack" : { + "type" : [ "string", "null" ] + }, + "workflowRun" : { + "type" : [ "string", "null" ] + } + }, + "title" : "TaskRun", + "description" : "Models a task execution." + }, + "Workflow" : { + "type" : "object", + "properties" : { + "commitId" : { + "type" : [ "string", "null" ] + }, + "repository" : { + "type" : [ "string", "null" ] + }, + "scriptFiles" : { + "type" : [ "array", "null" ], + "items" : { + "$ref" : "#/$defs/DataPath" + } + } + }, + "title" : "Workflow", + "description" : "Models a workflow definition." + }, + "WorkflowOutput" : { + "type" : "object", + "properties" : { + "createdAt" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/OffsetDateTime" + } ] + }, + "output" : { + "type" : [ "array", "null" ], + "items" : { + "$ref" : "#/$defs/Parameter" + } + }, + "workflowRun" : { + "type" : [ "string", "null" ] + } + }, + "title" : "WorkflowOutput", + "description" : "Models the results of a workflow execution." + }, + "WorkflowRun" : { + "type" : "object", + "properties" : { + "config" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/Map" + } ] + }, + "metadata" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/Map" + } ] + }, + "name" : { + "type" : [ "string", "null" ] + }, + "params" : { + "type" : [ "array", "null" ], + "items" : { + "$ref" : "#/$defs/Parameter" + } + }, + "sessionId" : { + "type" : [ "string", "null" ] + }, + "workflow" : { + "anyOf" : [ { + "type" : "null" + }, { + "$ref" : "#/$defs/Workflow" + } ] + } + }, + "title" : "WorkflowRun", + "description" : "Models a workflow execution." + } + }, + "oneOf" : [ { + "type" : "object", + "properties" : { + "version" : { + "const" : "lineage/v1beta1" + }, + "kind" : { + "const" : "WorkflowRun" + }, + "spec" : { + "$ref" : "#/$defs/WorkflowRun" + } + }, + "required" : [ "version", "kind", "spec" ], + "additionalProperties" : false + }, { + "type" : "object", + "properties" : { + "version" : { + "const" : "lineage/v1beta1" + }, + "kind" : { + "const" : "WorkflowOutput" + }, + "spec" : { + "$ref" : "#/$defs/WorkflowOutput" + } + }, + "required" : [ "version", "kind", "spec" ], + "additionalProperties" : false + }, { + "type" : "object", + "properties" : { + "version" : { + "const" : "lineage/v1beta1" + }, + "kind" : { + "const" : "Workflow" + }, + "spec" : { + "$ref" : "#/$defs/Workflow" + } + }, + "required" : [ "version", "kind", "spec" ], + "additionalProperties" : false + }, { + "type" : "object", + "properties" : { + "version" : { + "const" : "lineage/v1beta1" + }, + "kind" : { + "const" : "TaskRun" + }, + "spec" : { + "$ref" : "#/$defs/TaskRun" + } + }, + "required" : [ "version", "kind", "spec" ], + "additionalProperties" : false + }, { + "type" : "object", + "properties" : { + "version" : { + "const" : "lineage/v1beta1" + }, + "kind" : { + "const" : "TaskOutput" + }, + "spec" : { + "$ref" : "#/$defs/TaskOutput" + } + }, + "required" : [ "version", "kind", "spec" ], + "additionalProperties" : false + }, { + "type" : "object", + "properties" : { + "version" : { + "const" : "lineage/v1beta1" + }, + "kind" : { + "const" : "FileOutput" + }, + "spec" : { + "$ref" : "#/$defs/FileOutput" + } + }, + "required" : [ "version", "kind", "spec" ], + "additionalProperties" : false + } ] +} \ No newline at end of file diff --git a/specs/260519-lineage-json-schema/plan.md b/specs/260519-lineage-json-schema/plan.md new file mode 100644 index 0000000000..267b67c00d --- /dev/null +++ b/specs/260519-lineage-json-schema/plan.md @@ -0,0 +1,517 @@ +# Lineage v1beta1 JSON Schema — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Produce a JSON Schema (draft 2020-12) describing the lineage v1beta1 wire format emitted by `LinEncoder`, generated from the compiled Groovy classes by a Gradle task, and check the resulting `lineage-v1beta1.schema.json` into `modules/nf-lineage/src/resources/schema/`. + +**Architecture:** A new top-level `buildSrc/` Groovy build module hosts a custom `DefaultTask` (`GenerateLineageSchemaTask`) that depends only on `com.github.victools:jsonschema-generator` on the build-script classpath. The task takes a classpath, a hardcoded list of subtype FQNs, and an output file; it loads each subtype via a `URLClassLoader` and runs victools to produce per-subtype schemas, then wraps each in a `{version, kind, spec}` envelope and emits a single `oneOf` root document. `modules/nf-lineage/build.gradle` registers a `generateLineageSchema` task instance; no runtime dep is added to `nf-lineage`. The only production-source change is a three-line maintainer comment in `LinTypeAdapterFactory`. + +**Tech Stack:** Gradle (Groovy DSL) + buildSrc, Groovy 4, victools jsonschema-generator 4.36.0 (Jackson-databind transitively). Module under work: `modules/nf-lineage` and new `buildSrc/`. + +--- + +## File Structure + +**Created:** +- `buildSrc/build.gradle` — declares groovy plugin, mavenCentral, and the victools dep on the build-script classpath only. +- `buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy` — the `DefaultTask` subclass. +- `modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json` — generated artifact, committed. + +**Modified:** +- `modules/nf-lineage/build.gradle` — register the `generateLineageSchema` task with hardcoded `subtypes` list and sync-warning comment. +- `modules/nf-lineage/src/main/nextflow/lineage/serde/LinTypeAdapterFactory.groovy` — three-line maintainer comment above the `registerSubtype` chain. No behavior change. + +--- + +## Task 1: Scaffold `buildSrc` with the victools dependency + +**Files:** +- Create: `buildSrc/build.gradle` + +- [ ] **Step 1: Create `buildSrc/build.gradle`** + +Create the file with this exact content: + +```groovy +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +plugins { + id 'groovy' +} + +repositories { + mavenCentral() +} + +dependencies { + implementation gradleApi() + implementation localGroovy() + implementation 'com.github.victools:jsonschema-generator:4.36.0' +} +``` + +Note: `buildSrc/` already has a `build/` output directory from a prior empty buildSrc Gradle creates by default. Only the `build.gradle` file is new; do not delete the `build/` directory. + +- [ ] **Step 2: Verify buildSrc compiles cleanly** + +Run: +``` +./gradlew help +``` + +Expected: succeeds with no warnings about buildSrc. Gradle compiles buildSrc once at the start of the build; if there's a syntax error in `buildSrc/build.gradle` or missing repository, this command will fail with a buildSrc compilation error. Confirm it does not. + +- [ ] **Step 3: Commit** + +``` +git add buildSrc/build.gradle +git commit -s -m "Add buildSrc with victools jsonschema-generator dep" +``` + +--- + +## Task 2: Implement `GenerateLineageSchemaTask` — class loading + subtype enumeration + +This task establishes the skeleton (inputs, classloader, subtype loading, write empty oneOf). Schema generation follows in Task 3. + +**Files:** +- Create: `buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy` + +- [ ] **Step 1: Create the task class** + +Create the file with this exact content: + +```groovy +/* + * Copyright 2013-2026, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nextflow.gradle + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.node.ObjectNode +import groovy.transform.CompileStatic +import org.gradle.api.DefaultTask +import org.gradle.api.file.FileCollection +import org.gradle.api.tasks.Input +import org.gradle.api.tasks.InputFiles +import org.gradle.api.tasks.OutputFile +import org.gradle.api.tasks.TaskAction + +/** + * Generates a JSON Schema (draft 2020-12) describing the JSON documents emitted + * by {@code nextflow.lineage.serde.LinEncoder} for v1beta1 model classes. + * + * The task does not depend on the nf-lineage runtime; it loads compiled model + * classes via a URLClassLoader and uses victools jsonschema-generator to derive + * the per-subtype schemas. Each subtype is wrapped in a {version, kind, spec} + * envelope matching {@code LinTypeAdapterFactory.write(...)}. + */ +@CompileStatic +class GenerateLineageSchemaTask extends DefaultTask { + + @InputFiles + FileCollection classpath + + @Input + List subtypes + + @OutputFile + File outputFile + + @TaskAction + void generate() { + final loader = buildClassLoader() + final version = readLineageVersion(loader) + final mapper = new ObjectMapper() + final root = mapper.createObjectNode() + root.put('$schema', 'https://json-schema.org/draft/2020-12/schema') + root.put('title', 'Lineage v1beta1') + final oneOf = root.putArray('oneOf') + + subtypes.each { String fqn -> + final cls = loader.loadClass(fqn) + final spec = generateSubtypeSchema(cls, mapper) + oneOf.add(wrapEnvelope(cls.simpleName, version, spec, mapper)) + } + + outputFile.parentFile.mkdirs() + mapper.writerWithDefaultPrettyPrinter().writeValue(outputFile, root) + logger.lifecycle("Wrote lineage schema with ${subtypes.size()} subtypes to ${outputFile}") + } + + private URLClassLoader buildClassLoader() { + final urls = classpath.files.collect { it.toURI().toURL() } as URL[] + return new URLClassLoader(urls, getClass().classLoader) + } + + private String readLineageVersion(ClassLoader loader) { + final linModel = loader.loadClass('nextflow.lineage.model.v1beta1.LinModel') + return linModel.getField('VERSION').get(null) as String + } + + // Filled in by Task 3 + private ObjectNode generateSubtypeSchema(Class cls, ObjectMapper mapper) { + return mapper.createObjectNode() + } + + private ObjectNode wrapEnvelope(String kind, String version, ObjectNode spec, ObjectMapper mapper) { + final env = mapper.createObjectNode() + env.put('type', 'object') + final props = env.putObject('properties') + props.putObject('version').put('const', version) + props.putObject('kind').put('const', kind) + props.set('spec', spec) + env.putArray('required').with { it.add('version'); it.add('kind'); it.add('spec'); return it } + env.put('additionalProperties', false) + return env + } +} +``` + +- [ ] **Step 2: Verify buildSrc still compiles** + +Run: +``` +./gradlew help +``` + +Expected: succeeds. The task isn't wired to any project yet, so it just has to compile. + +- [ ] **Step 3: Commit** + +``` +git add buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy +git commit -s -m "Add GenerateLineageSchemaTask skeleton in buildSrc" +``` + +--- + +## Task 3: Implement victools subtype schema generation with custom type defs + +Replace the placeholder `generateSubtypeSchema` from Task 2 with the real implementation that uses victools and registers custom type definitions for `OffsetDateTime` and `java.nio.file.Path`. + +**Files:** +- Modify: `buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy` + +- [ ] **Step 1: Add imports for victools** + +Add these imports near the top of the file, alongside the existing imports: + +```groovy +import com.github.victools.jsonschema.generator.CustomDefinition +import com.github.victools.jsonschema.generator.OptionPreset +import com.github.victools.jsonschema.generator.SchemaGenerator +import com.github.victools.jsonschema.generator.SchemaGeneratorConfigBuilder +import com.github.victools.jsonschema.generator.SchemaVersion +import java.nio.file.Path +import java.time.OffsetDateTime +``` + +- [ ] **Step 2: Replace `generateSubtypeSchema` with the real implementation** + +Find this placeholder in the file: + +```groovy + // Filled in by Task 3 + private ObjectNode generateSubtypeSchema(Class cls, ObjectMapper mapper) { + return mapper.createObjectNode() + } +``` + +Replace it with: + +```groovy + private ObjectNode generateSubtypeSchema(Class cls, ObjectMapper mapper) { + final builder = new SchemaGeneratorConfigBuilder(SchemaVersion.DRAFT_2020_12, OptionPreset.PLAIN_JSON) + builder.forTypesInGeneral().withCustomDefinitionProvider { javaType, context -> + final erased = javaType.erasedType + if (erased == OffsetDateTime) { + final node = mapper.createObjectNode() + node.put('type', 'string') + node.put('format', 'date-time') + return new CustomDefinition(node) + } + if (Path.isAssignableFrom(erased)) { + final node = mapper.createObjectNode() + node.put('type', 'string') + return new CustomDefinition(node) + } + return null + } + final generator = new SchemaGenerator(builder.build()) + return generator.generateSchema(cls) as ObjectNode + } +``` + +- [ ] **Step 3: Verify buildSrc compiles** + +Run: +``` +./gradlew help +``` + +Expected: succeeds. Confirms the victools imports resolve from the dep declared in Task 1 and the closure-based `CustomDefinitionProviderV2` SAM conversion compiles under Groovy. + +- [ ] **Step 4: Commit** + +``` +git add buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy +git commit -s -m "Implement victools-based subtype schema generation" +``` + +--- + +## Task 4: Register `generateLineageSchema` in `modules/nf-lineage/build.gradle` + +**Files:** +- Modify: `modules/nf-lineage/build.gradle` + +- [ ] **Step 1: Append task registration** + +Open `modules/nf-lineage/build.gradle`. Currently the file ends with the `dependencies { ... }` block. Append a blank line, then the following block to the end of the file: + +```groovy + +import nextflow.gradle.GenerateLineageSchemaTask + +tasks.register('generateLineageSchema', GenerateLineageSchemaTask) { + description = 'Generate JSON Schema for the lineage model v1beta1' + group = 'documentation' + dependsOn compileGroovy + classpath = sourceSets.main.runtimeClasspath + // Keep this list in sync with LinTypeAdapterFactory.registerSubtype(...) calls + // in src/main/nextflow/lineage/serde/LinTypeAdapterFactory.groovy. + // After editing, re-run this task to refresh + // src/resources/schema/lineage-v1beta1.schema.json. + subtypes = [ + 'nextflow.lineage.model.v1beta1.WorkflowRun', + 'nextflow.lineage.model.v1beta1.WorkflowOutput', + 'nextflow.lineage.model.v1beta1.Workflow', + 'nextflow.lineage.model.v1beta1.TaskRun', + 'nextflow.lineage.model.v1beta1.TaskOutput', + 'nextflow.lineage.model.v1beta1.FileOutput', + ] + outputFile = file('src/resources/schema/lineage-v1beta1.schema.json') +} +``` + +Note: in Groovy Gradle scripts, top-level `import` statements must appear *before* the `plugins { ... }` and any other code. If Gradle complains about the import position, move the `import nextflow.gradle.GenerateLineageSchemaTask` line to the very top of the file (above `apply plugin: 'groovy'`) and leave the `tasks.register(...)` block at the end. + +- [ ] **Step 2: Verify the task is discoverable** + +Run: +``` +./gradlew :nf-lineage:tasks --group documentation +``` + +Expected: output lists `generateLineageSchema` under the `Documentation tasks` group with the description "Generate JSON Schema for the lineage model v1beta1". + +- [ ] **Step 3: Commit** + +``` +git add modules/nf-lineage/build.gradle +git commit -s -m "Register generateLineageSchema task in nf-lineage build" +``` + +--- + +## Task 5: Run the task and verify the generated schema + +This is the manual verification gate mandated by the spec. No code is written; the goal is to prove the pipeline end-to-end produces a correct schema. + +**Files:** +- Create (via task run): `modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json` + +- [ ] **Step 1: Run the task** + +``` +./gradlew :nf-lineage:generateLineageSchema +``` + +Expected: `BUILD SUCCESSFUL`, lifecycle log line `Wrote lineage schema with 6 subtypes to .../lineage-v1beta1.schema.json`. If the build fails, debug and fix in `GenerateLineageSchemaTask.groovy` before continuing. + +- [ ] **Step 2: Confirm the file exists and is valid JSON** + +``` +ls -la modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json +python3 -c "import json; json.load(open('modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json'))" +``` + +Expected: file exists and python's `json.load` runs without exception. + +- [ ] **Step 3: Confirm structural correctness** + +``` +python3 -c " +import json +s = json.load(open('modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json')) +assert s.get('\$schema') == 'https://json-schema.org/draft/2020-12/schema', s.get('\$schema') +assert s.get('title') == 'Lineage v1beta1' +oneof = s.get('oneOf') +assert isinstance(oneof, list) and len(oneof) == 6, len(oneof) if isinstance(oneof, list) else type(oneof) +kinds = [b['properties']['kind']['const'] for b in oneof] +assert set(kinds) == {'WorkflowRun', 'WorkflowOutput', 'Workflow', 'TaskRun', 'TaskOutput', 'FileOutput'}, kinds +versions = {b['properties']['version']['const'] for b in oneof} +assert versions == {'lineage/v1beta1'}, versions +print('schema structure OK') +" +``` + +Expected: prints `schema structure OK` and exits 0. + +- [ ] **Step 4: Validate a known-good lineage JSON document against the schema** + +Locate an existing lineage encoder fixture or unit-test sample. Search for one: + +``` +grep -rln "lineage/v1beta1" modules/nf-lineage/src/test | head -3 +``` + +Pick one of the listed files, find a JSON literal inside it, save it to `/tmp/sample-lineage.json`. If no fixture is convenient, hand-craft a minimal `FileOutput` document: + +```json +{ + "version": "lineage/v1beta1", + "kind": "FileOutput", + "spec": { + "path": "/tmp/out.txt", + "size": 42 + } +} +``` + +Save to `/tmp/sample-lineage.json`, then validate: + +``` +pip install --user jsonschema 2>/dev/null +python3 -c " +import json +from jsonschema import Draft202012Validator +schema = json.load(open('modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json')) +doc = json.load(open('/tmp/sample-lineage.json')) +Draft202012Validator(schema).validate(doc) +print('document valid') +" +``` + +Expected: prints `document valid`. If validation fails, inspect the schema, identify the mismatch (likely an over-strict `required` or an unsupported type mapping), adjust `GenerateLineageSchemaTask.generateSubtypeSchema`, regenerate, and re-validate. + +- [ ] **Step 5: Commit the generated schema** + +``` +git add modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json +git commit -s -m "Add generated lineage v1beta1 JSON Schema" +``` + +--- + +## Task 6: Add maintainer sync comment to `LinTypeAdapterFactory` + +The only production-code touch. No behavior change. + +**Files:** +- Modify: `modules/nf-lineage/src/main/nextflow/lineage/serde/LinTypeAdapterFactory.groovy` + +- [ ] **Step 1: Insert the comment** + +Open the file. Find the constructor (currently lines 47-55): + +```groovy + LinTypeAdapterFactory() { + super(LinSerializable.class, "kind", false) + this.registerSubtype(WorkflowRun, WorkflowRun.simpleName) + .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) + .registerSubtype(Workflow, Workflow.simpleName) + .registerSubtype(TaskRun, TaskRun.simpleName) + .registerSubtype(TaskOutput, TaskOutput.simpleName) + .registerSubtype(FileOutput, FileOutput.simpleName) + } +``` + +Replace it with: + +```groovy + LinTypeAdapterFactory() { + super(LinSerializable.class, "kind", false) + // When adding or removing a subtype here, also update the `subtypes` list in + // modules/nf-lineage/build.gradle (task `generateLineageSchema`) and re-run + // `./gradlew :nf-lineage:generateLineageSchema` to refresh + // src/resources/schema/lineage-v1beta1.schema.json. + this.registerSubtype(WorkflowRun, WorkflowRun.simpleName) + .registerSubtype(WorkflowOutput, WorkflowOutput.simpleName) + .registerSubtype(Workflow, Workflow.simpleName) + .registerSubtype(TaskRun, TaskRun.simpleName) + .registerSubtype(TaskOutput, TaskOutput.simpleName) + .registerSubtype(FileOutput, FileOutput.simpleName) + } +``` + +- [ ] **Step 2: Confirm no behavior change — run the module's existing tests** + +``` +./gradlew :nf-lineage:test +``` + +Expected: all tests pass. The change is comment-only; this is a sanity check. + +- [ ] **Step 3: Re-run the schema task and confirm the output is unchanged** + +``` +./gradlew :nf-lineage:generateLineageSchema +git diff modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json +``` + +Expected: `git diff` shows no changes. (The comment doesn't affect generation, so the schema must be byte-identical.) + +- [ ] **Step 4: Commit** + +``` +git add modules/nf-lineage/src/main/nextflow/lineage/serde/LinTypeAdapterFactory.groovy +git commit -s -m "Add sync note pointing maintainers to generateLineageSchema task" +``` + +--- + +## Self-Review + +Spec coverage: +- Goal 1 (single JSON Schema document, six `oneOf` branches): Tasks 2–5. +- Goal 2 (reproducible Gradle task): Task 4. +- Goal 3 (checked-in artifact): Task 5 commit. +- Goal 4 (no runtime deps on nf-lineage): Task 1 (dep lives in `buildSrc/build.gradle` only); verified implicitly by Task 6, step 2 running nf-lineage tests without any new classpath entries. +- Non-Goals respected: no CI enforcement task, no runtime validation, no auto-discovery. +- US1 acceptance (six branches, correct consts, valid JSON): Task 5, step 3. +- US1 acceptance (third-party validator passes): Task 5, step 4. +- US2 (refresh after model change): manual workflow documented via the comment added in Task 6; no separate task needed since the regeneration path is identical to Task 5. +- US3 (new subtype): the sync comment in Task 6 + the build.gradle comment in Task 4 cover the maintainer workflow. + +Placeholder scan: none — all steps have concrete commands, code blocks, or assertions. + +Type consistency: `classpath` (FileCollection), `subtypes` (List), `outputFile` (File) are declared in Task 2 and referenced consistently in Tasks 4 and 5. `generateSubtypeSchema(Class, ObjectMapper) → ObjectNode` defined in Task 2, replaced in Task 3 with the same signature. + +No issues found. diff --git a/specs/260519-lineage-json-schema/spec.md b/specs/260519-lineage-json-schema/spec.md new file mode 100644 index 0000000000..41805c8cc7 --- /dev/null +++ b/specs/260519-lineage-json-schema/spec.md @@ -0,0 +1,198 @@ +# Feature Specification: JSON Schema generation for lineage model v1beta1 + +**Feature Branch**: `lineage-json-schema` +**Created**: 2026-05-19 +**Status**: Draft +**Input**: User request — provide a way to produce a JSON Schema describing the JSON documents emitted by `LinEncoder` for the `nextflow.lineage.model.v1beta1` classes, suitable for checking into the repository as a versioned artifact. + +## Motivation + +`modules/nf-lineage/src/main/nextflow/lineage/serde/LinEncoder` serializes `LinSerializable` objects to JSON via Gson with a `RuntimeTypeAdapterFactory`. The on-the-wire shape is an envelope: + +```json +{ "version": "lineage/v1beta1", "kind": "", "spec": { /* subtype fields */ } } +``` + +There is no machine-readable description of that shape today. Consumers (validators, external tooling, future contributors editing the model) have to read the Groovy classes to understand the schema. A JSON Schema would: + +- Provide a contract for the `seqera://`-style lineage JSON documents. +- Make breaking changes to the model visible in PR diffs once the generated schema is committed. +- Enable third-party validation without depending on the JVM. + +## Goals + +- Produce a single JSON Schema document covering all six `LinTypeAdapterFactory`-registered subtypes (`WorkflowRun`, `WorkflowOutput`, `Workflow`, `TaskRun`, `TaskOutput`, `FileOutput`) as a top-level `oneOf` over the `{version, kind, spec}` envelope. +- Generate the schema reproducibly from the compiled Groovy classes via a Gradle task. +- Check the resulting schema into `modules/nf-lineage/src/resources/schema/` so diffs surface in PRs. +- Keep all schema-generation dependencies off `nf-lineage`'s runtime/compile classpath. + +## Non-Goals + +- No automatic CI enforcement that the checked-in schema matches the current model (manual regeneration). +- No runtime validation of lineage JSON against the schema inside Nextflow. +- No schema for non-`LinSerializable` model classes (`Checksum`, `DataPath`, `Parameter`, `LinModel`) as top-level branches — they appear only as nested schemas referenced from the registered subtypes. +- No changes to the JSON wire format produced by `LinEncoder`. +- No changes to how `LinTypeAdapterFactory` works at runtime (single optional comment only). + +## User Scenarios & Testing + +### User Story 1 — Generate and commit the v1beta1 schema (Priority: P1) + +A maintainer wants the first version of the schema produced and added to the repo. + +**Why this priority**: Without this, the feature has zero user-visible output. + +**Independent Test**: Run `./gradlew :nf-lineage:generateLineageSchema`. Confirm `modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json` exists, parses as valid JSON, declares `$schema: https://json-schema.org/draft/2020-12/schema`, and has a top-level `oneOf` of length 6 — one branch per registered subtype. + +**Acceptance Scenarios**: + +1. **Given** a clean checkout, **When** the task is run, **Then** the schema file is created with exactly six `oneOf` branches, each having `properties.version.const = "lineage/v1beta1"`, `properties.kind.const = ""`, and a `properties.spec` schema describing the subtype's fields. +2. **Given** the schema file already exists, **When** the task is run again with no model changes, **Then** the file content is byte-identical (or differs only in stable, deterministic ordering). +3. **Given** a sample lineage JSON document produced by `LinEncoder.encode(...)` for any subtype, **When** validated against the generated schema using a third-party validator, **Then** validation succeeds. + +### User Story 2 — Refresh schema after a model change (Priority: P2) + +A developer adds a new field to `WorkflowRun` (or any subtype) and needs to update the committed schema. + +**Why this priority**: The on-going maintenance workflow; defines the manual-only update story chosen during brainstorming. + +**Independent Test**: Add a field to one of the model classes, run the task, observe the schema file diff includes the new property under the corresponding `spec` schema. + +**Acceptance Scenarios**: + +1. **Given** a new `String` field on a registered subtype, **When** the task is run, **Then** the corresponding `oneOf` branch's `spec.properties` includes that field with `type: string`. +2. **Given** a new field of type `OffsetDateTime`, **When** the task is run, **Then** the field appears with `{type: string, format: date-time}`. +3. **Given** a new field of type `java.nio.file.Path`, **When** the task is run, **Then** the field appears with `{type: string}`. + +### User Story 3 — Add a new `LinSerializable` subtype (Priority: P3) + +A developer adds a new model class and registers it in `LinTypeAdapterFactory`. + +**Why this priority**: The maintenance path most likely to drift. + +**Acceptance Scenarios**: + +1. **Given** a new `LinSerializable` class registered via `registerSubtype(...)`, **When** the developer also appends its fully-qualified name to the `subtypes` list in `modules/nf-lineage/build.gradle` and re-runs the task, **Then** the schema's `oneOf` grows to include a new branch for that subtype. +2. **Given** a new subtype registered in the factory but NOT added to the `subtypes` list in `build.gradle`, **When** the task is run, **Then** the schema is silently incomplete (this is the known drift trade-off, mitigated by an in-source comment — not enforced by CI). + +## Design + +### Component 1 — `buildSrc` module hosting the task class + +New top-level `buildSrc/` directory (currently absent from the repo; the existing `buildSrc/build/` tree is just Gradle's empty-buildSrc output dir). + +**`buildSrc/build.gradle`** (new): + +```groovy +plugins { id 'groovy' } +repositories { mavenCentral() } +dependencies { + implementation gradleApi() + implementation localGroovy() + implementation 'com.github.victools:jsonschema-generator:4.36.0' +} +``` + +**`buildSrc/src/main/groovy/nextflow/gradle/GenerateLineageSchemaTask.groovy`** (new): a `DefaultTask` with three inputs and one `@TaskAction`: + +- `@InputFiles FileCollection classpath` — the compiled `nf-lineage` classes plus its runtime classpath +- `@Input List subtypes` — fully-qualified class names to expose as top-level `oneOf` branches +- `@OutputFile File outputFile` — destination path for the schema JSON + +`@TaskAction generate()` performs: + +1. Build a `URLClassLoader` over `classpath`, parented to the task's own class loader. This lets the loaded model classes resolve `LinSerializable` (and JDK types) consistently. +2. Construct a `SchemaGenerator` from victools with `SchemaVersion.DRAFT_2020_12` and `OptionPreset.PLAIN_JSON`. Register two custom type definitions: + - `OffsetDateTime` → `{"type": "string", "format": "date-time"}` + - `java.nio.file.Path` → `{"type": "string"}` +3. Load `nextflow.lineage.model.v1beta1.LinModel` via the URLClassLoader and read its `VERSION` static field reflectively — single source of truth for the envelope's `version` const, no duplication of the literal `"lineage/v1beta1"`. +4. For each FQN in `subtypes`: load the class via the URLClassLoader, call `generator.generateSchema(cls)`, and wrap the result in an envelope object: + ```json + { + "type": "object", + "properties": { + "version": { "const": "" }, + "kind": { "const": "" }, + "spec": + }, + "required": ["version", "kind", "spec"], + "additionalProperties": false + } + ``` +5. Compose the root document: + ```json + { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Lineage v1beta1", + "oneOf": [ /* one envelope per subtype, in input order */ ] + } + ``` +6. Serialize with a deterministic pretty-printer (victools returns a Jackson `ObjectNode`; use Jackson's `writerWithDefaultPrettyPrinter()` to render). Write bytes to `outputFile`, creating parent dirs. + +### Component 2 — Task registration in `modules/nf-lineage/build.gradle` + +Append: + +```groovy +import nextflow.gradle.GenerateLineageSchemaTask + +tasks.register('generateLineageSchema', GenerateLineageSchemaTask) { + description = 'Generate JSON Schema for the lineage model v1beta1' + group = 'documentation' + dependsOn compileGroovy + classpath = sourceSets.main.runtimeClasspath + sourceSets.main.output + // Keep this list in sync with LinTypeAdapterFactory.registerSubtype(...) calls + // and re-run this task whenever the model changes. + subtypes = [ + 'nextflow.lineage.model.v1beta1.WorkflowRun', + 'nextflow.lineage.model.v1beta1.WorkflowOutput', + 'nextflow.lineage.model.v1beta1.Workflow', + 'nextflow.lineage.model.v1beta1.TaskRun', + 'nextflow.lineage.model.v1beta1.TaskOutput', + 'nextflow.lineage.model.v1beta1.FileOutput', + ] + outputFile = file('src/resources/schema/lineage-v1beta1.schema.json') +} +``` + +### Component 3 — In-source sync comment + +`modules/nf-lineage/src/main/nextflow/lineage/serde/LinTypeAdapterFactory.groovy`, immediately above the `registerSubtype` chain in the constructor: + +```groovy +// When adding or removing a subtype, also update the `subtypes` list in +// modules/nf-lineage/build.gradle (task `generateLineageSchema`) and re-run +// `./gradlew :nf-lineage:generateLineageSchema` to refresh +// src/resources/schema/lineage-v1beta1.schema.json. +``` + +This is the only modification to production source code. No behavior change. + +### Component 4 — First generated schema artifact + +`modules/nf-lineage/src/resources/schema/lineage-v1beta1.schema.json` (new). Produced by running the task once and committed alongside the other changes. + +### Testing + +This is a tooling change with no runtime behavior. Coverage comes from running the task and inspecting the artifact: + +- **Manual verification during implementation** (mandatory): run `./gradlew :nf-lineage:generateLineageSchema`, confirm the file is created, parse it as JSON, verify it has six `oneOf` branches each with the expected `kind` const value, and validate at least one known-good lineage JSON document (taken from an existing `LinEncoder` test fixture) against the generated schema using a standard JSON Schema 2020-12 validator. +- **No buildSrc unit test**: the task is mostly orchestration around victools. Setting up a Gradle `ProjectBuilder` harness with the correct classpath inside `buildSrc` would be more code than the task itself, and the manual verification above covers the same surface. Deferred unless the task accrues real branching logic. +- **No production-code test changes** — `LinTypeAdapterFactory` is only gaining a comment. + +## Risks + +- **victools schema fidelity for Groovy classes**: `@CompileStatic` + `@Canonical` produces plain JavaBean-style properties, which victools reads via the default field-introspection mode. If a field type isn't covered by the two custom definitions and isn't a primitive, `String`, `List`, `Map`, or a `LinSerializable`-related class, victools may produce a generic `{type: object}`. The current model only uses `String`, `Long`/`long`, `Map`, `List`, `OffsetDateTime`, `Path`, `Checksum`, `DataPath`, `Parameter`, and nested `LinSerializable`s. Verified covered during implementation; new exotic types in the future may need new custom definitions. +- **Drift between `subtypes` list and `LinTypeAdapterFactory`**: deliberately accepted, mitigated by in-source comment. No CI enforcement per the brainstorming decision. +- **`$schema` URI choice**: draft 2020-12 is the current stable. `const` keyword used for discriminator values is supported from draft-06 onward, well within 2020-12. +- **Untyped `Map config` / `Map metadata` in `WorkflowRun`**: schema will describe them as `{type: object}` with no `properties`/`additionalProperties` constraints. This matches the intentional opacity; documented here so it's not flagged later as a generator bug. +- **Determinism**: victools and Jackson both produce stable output for the same input, but property ordering inside generated subtype schemas relies on victools' field-order behavior. If non-determinism shows up across JVMs, address by post-processing (sort `properties` keys) — not anticipated for now. + +## Out of Scope + +- A `checkSchema` task that fails CI on drift. +- Wiring `generateLineageSchema` into `check`, `build`, or `publish`. +- Schema generation for any model package other than `v1beta1`. +- Auto-discovery of subtypes (considered and rejected during brainstorming in favor of an explicit list with a sync comment). +- Validation of incoming JSON against the schema at runtime inside Nextflow. +- A `module-spec`–style ADR; if the v1beta1 schema becomes part of an external contract, that's a separate ADR follow-up. \ No newline at end of file diff --git a/specs/260519-lineage-json-schema/validate.py b/specs/260519-lineage-json-schema/validate.py new file mode 100644 index 0000000000..047ddce9e6 --- /dev/null +++ b/specs/260519-lineage-json-schema/validate.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +"""Validate every .data.json file under a directory against a JSON Schema. + +Usage: + validate.py --schema PATH --root DIR [--errors-per-file N] [--summary-only] +""" +import argparse +import json +import sys +from pathlib import Path + +from jsonschema import Draft202012Validator + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument('--schema', required=True, type=Path, help='Path to JSON Schema file') + ap.add_argument('--root', required=True, type=Path, help='Directory to scan for .data.json') + ap.add_argument('--errors-per-file', type=int, default=3, help='Max errors to print per failing file') + ap.add_argument('--summary-only', action='store_true', help='Skip per-file failure detail') + args = ap.parse_args() + + schema = json.load(open(args.schema)) + validator = Draft202012Validator(schema) + + files = sorted(args.root.rglob('.data.json')) + print(f'Schema: {args.schema}') + print(f'Root: {args.root}') + print(f'Found {len(files)} .data.json file(s)\n') + + total_fail = 0 + total_parse = 0 + for f in files: + try: + doc = json.load(open(f)) + except json.JSONDecodeError as e: + total_parse += 1 + if not args.summary_only: + print(f'PARSE {f.relative_to(args.root)}: {e}') + continue + errors = list(validator.iter_errors(doc)) + if errors: + total_fail += 1 + if not args.summary_only: + print(f'FAIL {f.relative_to(args.root)} ({len(errors)} errors)') + for e in errors[:args.errors_per_file]: + path = '/'.join(str(p) for p in e.absolute_path) or '(root)' + print(f' at {path}: {e.message[:160]}') + + print(f'\n--- summary ---') + print(f' files: {len(files)}') + print(f' passed: {len(files) - total_fail - total_parse}') + print(f' failed: {total_fail}') + print(f' parse errors: {total_parse}') + return 1 if (total_fail or total_parse) else 0 + + +if __name__ == '__main__': + sys.exit(main())