Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions internal/handler/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"path/filepath"
"ragflow/internal/common"
"ragflow/internal/entity"
"ragflow/internal/utility"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -55,6 +56,9 @@ type documentServiceIface interface {
DeleteDocumentMetadata(docID string, keys []string) error
DeleteDocumentAllMetadata(docID string) error
GetDocumentMetadataByID(docID string) (map[string]interface{}, error)
GetDocumentArtifact(filename string) (*service.ArtifactResponse, error)
GetDocumentPreview(docID string) (*service.DocumentPreview, error)
DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error)
}

// DocumentHandler document handler
Expand Down Expand Up @@ -198,6 +202,68 @@ func (h *DocumentHandler) GetDocumentImage(c *gin.Context) {
c.Data(http.StatusOK, contentType, data)
}

func (h *DocumentHandler) GetDocumentArtifact(c *gin.Context) {
filename := c.Param("filename")
artifact, err := h.documentService.GetDocumentArtifact(filename)
if err != nil {
switch {
case errors.Is(err, service.ErrArtifactInvalidFilename),
errors.Is(err, service.ErrArtifactInvalidFileType),
errors.Is(err, service.ErrArtifactNotFound):
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": err.Error(),
})
default:
c.JSON(http.StatusOK, gin.H{
"code": common.CodeExceptionError,
"data": nil,
"message": err.Error(),
})
}
return
}

c.Header("Content-Type", artifact.ContentType)
if artifact.ForceAttachment {
c.Header("X-Content-Type-Options", "nosniff")
c.Header("Content-Disposition", "attachment")
} else {
c.Header("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, artifact.SafeFilename))
}
c.Data(http.StatusOK, artifact.ContentType, artifact.Data)
}

func (h *DocumentHandler) GetDocumentPreview(c *gin.Context) {
docID := c.Param("id")

if docID == "" {
jsonError(c, common.CodeParamError, "id is required")
return
}

preview, err := h.documentService.GetDocumentPreview(docID)
if err != nil {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": "Document not found!",
})
return
}

ext := utility.GetFileExtension(preview.FileName)
if preview.ContentType != "" {
c.Header("Content-Type", preview.ContentType)
}

if utility.ShouldForceAttachment(ext, preview.ContentType) {
c.Header("X-Content-Type-Options", "nosniff")
c.Header("Content-Disposition", "attachment")
}

c.Data(http.StatusOK, preview.ContentType, preview.Data)
}

// UpdateDocument update document
// @Summary Update Document
// @Description Update document info
Expand Down Expand Up @@ -382,6 +448,40 @@ func (h *DocumentHandler) ListDocuments(c *gin.Context) {
})
}

func (h *DocumentHandler) DownloadDocument(c *gin.Context) {
datasetID := c.Param("dataset_id")
docID := c.Param("document_id")

if docID == "" {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": "Specify document_id please.",
})
return
}
if datasetID == "" {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": fmt.Sprintf("The dataset not own the document %s.", docID),
})
return
}

res, err := h.documentService.DownloadDocument(datasetID, docID)

if err != nil {
c.JSON(http.StatusOK, gin.H{
"code": common.CodeDataError,
"message": err.Error(),
})
return
}

c.Header("Content-Type", res.ContentType)
c.Header("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, res.FileName))
c.Data(http.StatusOK, res.ContentType, res.Data)
Comment on lines +480 to +482
Copy link
Copy Markdown
Contributor

@coderabbitai coderabbitai Bot Jun 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Sanitize filename in Content-Disposition header to prevent header injection.

res.FileName originates from doc.Name and is used directly in the header without sanitization. If the document name contains characters like ", \r, or \n, this could result in malformed headers or header injection.

The GetDocumentArtifact handler correctly uses artifact.SafeFilename (which is sanitized via sanitizeArtifactFilename). Apply similar sanitization here for consistency and safety.

Proposed fix
+func sanitizeDownloadFilename(filename string) string {
+	// Remove or replace characters that could cause issues in Content-Disposition headers
+	replacer := strings.NewReplacer(`"`, `_`, "\r", "", "\n", "", "\\", "_")
+	return replacer.Replace(filename)
+}
+
 func (h *DocumentHandler) DownloadDocument(c *gin.Context) {
 	// ... existing code ...

 	c.Header("Content-Type", res.ContentType)
-	c.Header("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, res.FileName))
+	c.Header("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, sanitizeDownloadFilename(res.FileName)))
 	c.Data(http.StatusOK, res.ContentType, res.Data)
 }
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@internal/handler/document.go` around lines 480 - 482, The Content-Disposition
header is using res.FileName directly, risking header injection; update the
download handler to sanitize the filename before use by calling the same
sanitizer used elsewhere (sanitizeArtifactFilename) or by using the
already-sanitized artifact.SafeFilename pattern like in GetDocumentArtifact;
replace fmt.Sprintf(`attachment; filename="%s"`, res.FileName) with a safe value
(e.g., safeName := sanitizeArtifactFilename(res.FileName) and use safeName) so
quotes/newlines are removed/escaped before setting the header and keep
c.Data(...) unchanged.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Hz-186 pls check this.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!

}

func mapDocumentListItem(doc *entity.DocumentListItem, metaFields map[string]interface{}) map[string]interface{} {
item := map[string]interface{}{
"id": doc.ID,
Expand Down
177 changes: 177 additions & 0 deletions internal/handler/document_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,40 @@ type fakeDocumentService struct {
stopErr error
}

func (f *fakeDocumentService) GetDocumentArtifact(filename string) (*service.ArtifactResponse, error) {
if filename == "error.txt" {
return nil, service.ErrArtifactNotFound
}
if filename == "unexpected.txt" {
return nil, fmt.Errorf("unexpected error")
}
return &service.ArtifactResponse{
Data: []byte("artifact content"),
ContentType: "text/plain",
SafeFilename: "safe.txt",
ForceAttachment: false,
}, nil
}
func (f *fakeDocumentService) GetDocumentPreview(docID string) (*service.DocumentPreview, error) {
if docID == "not-found" {
return nil, fmt.Errorf("not found")
}
return &service.DocumentPreview{
Data: []byte("preview content"),
ContentType: "text/plain",
FileName: "preview.txt",
}, nil
}
func (f *fakeDocumentService) DownloadDocument(datasetID, docID string) (*service.DownloadDocumentResp, error) {
if docID == "not-found" {
return nil, fmt.Errorf("not found")
}
return &service.DownloadDocumentResp{
Data: []byte("document data"),
ContentType: "application/pdf",
FileName: "doc.pdf",
}, nil
}
func (f *fakeDocumentService) CreateDocument(req *service.CreateDocumentRequest) (*entity.Document, error) {
return nil, nil
}
Expand Down Expand Up @@ -442,3 +476,146 @@ func TestStopParseDocumentsHandler_NotAccessible(t *testing.T) {
t.Fatal("expected error for no authorization")
}
}

func TestGetDocumentArtifact_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/test.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "test.txt"}}

h.GetDocumentArtifact(c)

if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "text/plain" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "artifact content" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}

func TestGetDocumentArtifact_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/error.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "error.txt"}}

h.GetDocumentArtifact(c)

if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}

func TestGetDocumentArtifact_UnexpectedError(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/artifact/unexpected.txt", "")
c.Params = gin.Params{{Key: "filename", Value: "unexpected.txt"}}

h.GetDocumentArtifact(c)

if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeExceptionError) {
t.Fatalf("expected code %d, got %v", common.CodeExceptionError, resp["code"])
}
}

func TestGetDocumentPreview_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/doc-1/preview", "")
c.Params = gin.Params{{Key: "id", Value: "doc-1"}}

h.GetDocumentPreview(c)

if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "text/plain" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "preview content" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}

func TestGetDocumentPreview_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/documents/not-found/preview", "")
c.Params = gin.Params{{Key: "id", Value: "not-found"}}

h.GetDocumentPreview(c)

if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}

func TestDownloadDocument_Success(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/doc-1", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "doc-1"}}

h.DownloadDocument(c)

if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
if w.Header().Get("Content-Type") != "application/pdf" {
t.Fatalf("unexpected content type: %s", w.Header().Get("Content-Type"))
}
if w.Body.String() != "document data" {
t.Fatalf("unexpected body: %s", w.Body.String())
}
}

func TestDownloadDocument_NotFound(t *testing.T) {
gin.SetMode(gin.TestMode)
h := &DocumentHandler{
documentService: &fakeDocumentService{},
}
c, w := setupGinContextWithUser("GET", "/api/v1/datasets/ds-1/documents/not-found", "")
c.Params = gin.Params{{Key: "dataset_id", Value: "ds-1"}, {Key: "document_id", Value: "not-found"}}

h.DownloadDocument(c)

if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["code"] != float64(common.CodeDataError) {
t.Fatalf("expected code %d, got %v", common.CodeDataError, resp["code"])
}
}
3 changes: 3 additions & 0 deletions internal/router/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ func (r *Router) Setup(engine *gin.Engine) {
{
documents.POST("", r.documentHandler.CreateDocument)
documents.GET("", r.documentHandler.ListDocuments)
documents.GET("/artifact/:filename", r.documentHandler.GetDocumentArtifact)
documents.GET("/:id/preview", r.documentHandler.GetDocumentPreview)
documents.GET("/:id", r.documentHandler.GetDocumentByID)
documents.PUT("/:id", r.documentHandler.UpdateDocument)
documents.DELETE("/:id", r.documentHandler.DeleteDocument)
Expand Down Expand Up @@ -244,6 +246,7 @@ func (r *Router) Setup(engine *gin.Engine) {

// Dataset documents
datasets.GET("/:dataset_id/documents", r.documentHandler.ListDocuments)
datasets.GET("/:dataset_id/documents/:document_id", r.documentHandler.DownloadDocument)
datasets.DELETE("/:dataset_id/documents", r.documentHandler.DeleteDocuments)

// Dataset document chunk
Expand Down
Loading
Loading