Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions internal/dao/file2document.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,8 @@ func (dao *File2DocumentDAO) GetByDocumentID(docID string) ([]*entity.File2Docum
func (dao *File2DocumentDAO) DeleteByDocumentID(docID string) error {
return DB.Unscoped().Where("document_id = ?", docID).Delete(&entity.File2Document{}).Error
}

// Create inserts a new file2document mapping record.
func (dao *File2DocumentDAO) Create(mapping *entity.File2Document) error {
return DB.Create(mapping).Error
}
78 changes: 74 additions & 4 deletions internal/handler/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package handler

import (
"errors"
"fmt"
"net/http"
"net/url"
"ragflow/internal/common"
Expand All @@ -32,15 +34,17 @@ import (

// FileHandler file handler
type FileHandler struct {
fileService *service.FileService
userService *service.UserService
fileService *service.FileService
userService *service.UserService
file2DocumentService *service.File2DocumentService
}

// NewFileHandler create file handler
func NewFileHandler(fileService *service.FileService, userService *service.UserService) *FileHandler {
return &FileHandler{
fileService: fileService,
userService: userService,
fileService: fileService,
userService: userService,
file2DocumentService: service.NewFile2DocumentService(),
}
}

Expand Down Expand Up @@ -552,3 +556,69 @@ func (h *FileHandler) Download(c *gin.Context) {
// Send file data
c.Data(http.StatusOK, contentType, blob)
}

// LinkToDatasets links files (or folder trees) to one or more datasets.
// Mirrors Python POST /api/v1/files/link-to-datasets (convert).
// @Summary Link files to datasets
// @Description Associate files with target knowledge-base datasets, re-indexing
// as needed. Folder inputs are expanded to their innermost files.
// The heavy DB work runs in a goroutine; the endpoint returns immediately.
// @Tags file
// @Accept json
// @Produce json
// @Param request body service.LinkToDatasetsRequest true "file_ids and kb_ids"
// @Success 200 {object} map[string]interface{}
// @Router /api/v1/files/link-to-datasets [post]
func (h *FileHandler) LinkToDatasets(c *gin.Context) {
user, errorCode, errorMessage := GetUser(c)
if errorCode != common.CodeSuccess {
jsonError(c, errorCode, errorMessage)
return
}

var req service.LinkToDatasetsRequest
// Tolerate bind errors: a malformed or empty body simply leaves the fields
// empty, which the validate_request-style check below reports as missing
// arguments — matching Python's @validate_request behaviour and code.
_ = c.ShouldBindJSON(&req)

// Mirror Python @validate_request("file_ids", "kb_ids"): missing arguments
// return ARGUMENT_ERROR (101) with data=null and the aggregated message.
var missing []string
if len(req.FileIDs) == 0 {
missing = append(missing, "file_ids")
}
if len(req.KbIDs) == 0 {
missing = append(missing, "kb_ids")
}
if len(missing) > 0 {
jsonError(c, common.CodeArgumentError, fmt.Sprintf("required argument are missing: %s; ", strings.Join(missing, ",")))
return
}

if err := h.file2DocumentService.LinkToDatasets(user.ID, &req); err != nil {
jsonError(c, linkToDatasetsErrorCode(err), err.Error())
return
}

c.JSON(http.StatusOK, gin.H{
"code": common.CodeSuccess,
"data": true,
"message": "success",
})
}

// linkToDatasetsErrorCode maps File2DocumentService sentinel errors to
// Python-compatible response codes. File/dataset-not-found and no-authorization
// use DATA_ERROR (102), matching Python's get_data_error_result in convert();
// any other (internal) error is reported as a server error.
func linkToDatasetsErrorCode(err error) common.ErrorCode {
switch {
case errors.Is(err, service.ErrLinkFileNotFound),
errors.Is(err, service.ErrLinkDatasetNotFound),
errors.Is(err, service.ErrLinkNoAuthorization):
return common.CodeDataError
default:
return common.CodeServerError
}
}
1 change: 1 addition & 0 deletions internal/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ func (r *Router) Setup(engine *gin.Engine) {
file.GET("", r.fileHandler.ListFiles)
file.DELETE("", r.fileHandler.DeleteFiles)
file.POST("/move", r.fileHandler.MoveFiles)
file.POST("/link-to-datasets", r.fileHandler.LinkToDatasets)
file.GET("/:id/ancestors", r.fileHandler.GetFileAncestors)
file.GET("/:id/parent", r.fileHandler.GetParentFolder)
file.GET("/:id", r.fileHandler.Download)
Expand Down
35 changes: 35 additions & 0 deletions internal/service/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,41 @@ func (s *DocumentService) deleteDocumentFull(docID string) error {
return nil
}

// RemoveDocumentKeepFile removes a document's chunks/metadata and the document
// row, decrementing the KB counters (doc_num/chunk_num/token_num), WITHOUT
// deleting the underlying file record, its storage blob, or its file2document
// mappings. Mirrors Python DocumentService.remove_document — the caller is
// responsible for cleaning up the file2document mappings separately.
func (s *DocumentService) RemoveDocumentKeepFile(docID string) error {
doc, kb, err := s.resolveDocAndKB(docID)
if err != nil {
return err
}
if _, delErr := s.taskDAO.DeleteByDocIDs([]string{docID}); delErr != nil {
common.Logger.Warn(fmt.Sprintf("RemoveDocumentKeepFile: failed to delete tasks for %s: %v", docID, delErr))
}
s.deleteDocEngineData(docID, kb.TenantID, doc.KbID)
return s.deleteDocRecordWithCounters(doc, kb.ID)
}

// InsertDocument creates a document row and increments the owning KB's doc_num
// counter in a single transaction. Mirrors Python DocumentService.insert, which
// updates dataset/document counters on insert. The document's ID and timestamps
// are populated by the caller / model hooks before insertion.
func (s *DocumentService) InsertDocument(doc *entity.Document) error {
return dao.DB.Transaction(func(tx *gorm.DB) error {
if err := tx.Create(doc).Error; err != nil {
return fmt.Errorf("failed to create document: %w", err)
}
if err := tx.Model(&entity.Knowledgebase{}).
Where("id = ?", doc.KbID).
Update("doc_num", gorm.Expr("doc_num + 1")).Error; err != nil {
return fmt.Errorf("failed to increment doc_num for KB %s: %w", doc.KbID, err)
}
Comment on lines +288 to +296
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== Document / Knowledgebase model definitions =="
rg -nC2 'type Document struct|type Knowledgebase struct|KbID|foreignKey|constraint' --type=go

echo
echo "== Migrations / schema references for documents.kb_id =="
rg -nC2 'documents|knowledgebase|kb_id|FOREIGN KEY|foreign key|constraint' --type=go --type=sql

Repository: infiniflow/ragflow

Length of output: 50375


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== 1) Inspect Document entity KbID gorm tags =="
sed -n '1,120p' internal/entity/document.go | cat -n

echo
echo "== 2) Inspect knowledgebase entity (for delete behavior / tags) =="
sed -n '1,140p' internal/entity/kb.go | cat -n

echo
echo "== 3) Inspect the function doing tx.Create + KB doc_num increment =="
sed -n '240,340p' internal/service/document.go | cat -n

echo
echo "== 4) Inspect the test that mentions inserting a doc with a missing KB row =="
rg -n "orphan-doc|no-such-kb|kb_id that has no KB" internal/service/document_test.go
# Print around the referenced region without dumping whole file
start=680
end=780
sed -n "${start},${end}p" internal/service/document_test.go | cat -n

echo
echo "== 5) Search repo for actual FK constraints referencing kb_id -> knowledgebase/id =="
rg -n --hidden --glob '*.{sql,psql,postgres,mysql,sqlite,duckdb,migration,ddl}' "FOREIGN KEY" .
rg -n --hidden --glob '*.sql' "kb_id[^\\n]*REFERENCES|REFERENCES[^\\n]*kb_id|kb_id[^\\n]*references|references[^\\n]*kb_id" .
rg -n --hidden --glob '*.go' "foreignKey|constraint:|Constraints|OnDelete|OnUpdate|references|REFERENCES" internal/entity internal/dao

Repository: infiniflow/ragflow

Length of output: 21725


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== 1) Find setupServiceTestDB definition (DB type + FK enforcement) =="
rg -n "func setupServiceTestDB|setupServiceTestDB\\(" internal/service/document_test.go internal/service -S
rg -n "PRAGMA foreign_keys|foreign_keys=on|ForeignKey|constraint" internal/service -S

echo
echo "== 2) Inspect the test DB setup file(s) =="
# Prefer the file that contains setupServiceTestDB
file="$(rg -n "func setupServiceTestDB" -S internal/service | head -n1 | awk -F: '{print $1}')"
if [ -n "${file:-}" ]; then
  echo "Using: $file"
  sed -n '1,220p' "$file" | cat -n
fi

echo
echo "== 3) Search for migrations/DDL mentioning document.kb_id foreign keys (kb_id -> knowledgebase.id) =="
rg -n --hidden --glob '*.{sql,psql,postgres,mysql,sqlite,migration,ddl}' "document.*kb_id|documents.*kb_id|kb_id.*REFERENCES|REFERENCES.*knowledgebase|knowledgebase.*id" .
rg -n --hidden --glob '*.go' "AutoMigrate|Migrate|schema|foreignKey|constraint|OnDelete|OnUpdate" internal -S

Repository: infiniflow/ragflow

Length of output: 13752


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== A) Check if any SQL/DDL/migrations exist in repo =="
fd -t f -i -d 6 -e sql -e ddl -e psql -e postgres -e sqlite -e mysql -e migration -e go -a --max-results 2000 . > /tmp/ddl_files.txt || true
wc -l /tmp/ddl_files.txt || true
head -n 50 /tmp/ddl_files.txt || true

echo
echo "== B) Search repo for FK definitions involving kb_id -> knowledgebase/id =="
rg -n --hidden --glob '*.{sql,ddl,migration,psql,postgres,mysql,sqlite}' "kb_id.*REFERENCES|REFERENCES.*kb_id|document.*kb_id|documents.*kb_id|knowledgebase\\.id" . || true

echo
echo "== C) Inspect deleteDocRecordWithCounters for RowsAffected handling =="
rg -n "deleteDocRecordWithCounters\\(" internal/service/document.go
sed -n '90,170p' internal/service/document.go | cat -n

Repository: infiniflow/ragflow

Length of output: 6259


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== 1) Show deleteDocRecordWithCounters function body =="
ln=$(rg -n "func \\(s \\*DocumentService\\) deleteDocRecordWithCounters" -n internal/service/document.go | head -n1 | cut -d: -f1)
echo "function starts at line: $ln"
start=$((ln-20))
end=$((ln+80))
sed -n "${start},${end}p" internal/service/document.go | cat -n

echo
echo "== 2) Inspect docker init.sql for any FK/constraint on document/kb =="
# Only show relevant lines to keep output small
rg -n "document|documents|knowledgebase|knowledgebase\\.id|kb_id|FOREIGN KEY|REFERENCES" docker/init.sql docker/oceanbase/init.d/vec_memory.sql 2>/dev/null || true
# broaden to all docker init scripts
rg -n "FOREIGN KEY|REFERENCES|documents|knowledgebase|kb_id" docker -S || true

Repository: infiniflow/ragflow

Length of output: 4922


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "== 1) Search docker/init.sql (and all docker init scripts) for FK/REFERENCES =="
rg -n "FOREIGN KEY|REFERENCES" docker -S || true

echo
echo "== 2) Search for document table DDL + kb_id inside init scripts =="
rg -n "CREATE TABLE.*document|CREATE TABLE.*documents|\\bdocument\\b" docker -S || true
rg -n "kb_id" docker -S || true

echo
echo "== 3) Search repo-wide for DDL that mentions knowledgebase/doc_num/documents.kb_id =="
rg -n --hidden --glob '*.{sql,ddl,psql,postgres,mysql,sqlite}' "knowledgebase|doc_num|CREATE TABLE.*document|\\bdocuments\\b|\\bkb_id\\b" . || true

Repository: infiniflow/ragflow

Length of output: 420


Guard the KB doc_num increment with RowsAffected in DocumentService.InsertDocument.
entity.Document.KbID has no gorm FK/constraint tags, and the test suite inserts an “orphan” document (KbID: "no-such-kb") successfully, so the DB-level FK enforcement isn’t reliably present. As a result, the current Update(...).Error can be nil even when the KB row doesn’t exist (0 rows updated), leaving the counters inconsistent. Capture the update result and roll back when RowsAffected == 0 (similar to the deleteDocRecordWithCounters pattern).

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@internal/service/document.go` around lines 288 - 296, In
DocumentService.InsertDocument's transaction block, guard the KB counter update
by capturing the update result instead of only checking Error: call
tx.Model(&entity.Knowledgebase{}).Where("id = ?", doc.KbID).Update("doc_num",
gorm.Expr("doc_num + 1")) into a result variable, return an error and rollback
if result.Error != nil, and also return an error when result.RowsAffected == 0
(indicating no KB row was updated) so the transaction fails when the KB is
missing (follow the same pattern used in deleteDocRecordWithCounters); reference
the tx variable, doc.KbID, and the Update("doc_num", gorm.Expr("doc_num + 1"))
call when making the change.

return nil
})
}

// resolveDocAndKB loads the document and its knowledgebase, returning both or
// an error.
func (s *DocumentService) resolveDocAndKB(docID string) (*entity.Document, *entity.Knowledgebase, error) {
Expand Down
Loading
Loading