tencent_ocr/.history/pkg/handler/upload_20250115172229.go
2025-01-15 18:05:16 +08:00

158 lines
3.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// 上传文件到cloudflare R2
package handler
import (
"encoding/base64"
"io"
"net/http"
"strings"
"github.com/gin-gonic/gin"
"tencent_ocr/pkg/service"
)
type UploadHandler struct {
uploadService *service.UploadService
ocrService *service.OCRService
geminiService *service.GeminiService
}
func NewUploadHandler(
uploadService *service.UploadService,
ocrService *service.OCRService,
geminiService *service.GeminiService,
) *UploadHandler {
return &UploadHandler{
uploadService: uploadService,
ocrService: ocrService,
geminiService: geminiService,
}
}
type MultiUploadResponse struct {
ImageURLs []string `json:"image_urls"`
Text string `json:"text"`
Success bool `json:"success"`
Error string `json:"error,omitempty"`
}
func (h *UploadHandler) HandleUpload(c *gin.Context) {
form, err := c.MultipartForm()
if err != nil {
c.JSON(http.StatusBadRequest, MultiUploadResponse{
Success: false,
Error: "Failed to parse form",
})
return
}
files := form.File["files"]
if len(files) == 0 {
c.JSON(http.StatusBadRequest, MultiUploadResponse{
Success: false,
Error: "No files uploaded",
})
return
}
if len(files) > 5 {
c.JSON(http.StatusBadRequest, MultiUploadResponse{
Success: false,
Error: "Maximum 5 files allowed",
})
return
}
var imageURLs []string
var ocrTexts []string
for _, fileHeader := range files {
if fileHeader.Size > 10<<20 { // 10MB
c.JSON(http.StatusBadRequest, MultiUploadResponse{
Success: false,
Error: "File size exceeds the limit of 10MB",
})
return
}
file, err := fileHeader.Open()
if err != nil {
c.JSON(http.StatusInternalServerError, MultiUploadResponse{
Success: false,
Error: "Failed to open file",
})
return
}
defer file.Close()
// Read file content for content type detection
fileBytes, err := io.ReadAll(file)
if err != nil {
c.JSON(http.StatusInternalServerError, MultiUploadResponse{
Success: false,
Error: "Failed to read file",
})
return
}
// Verify file type
contentType := http.DetectContentType(fileBytes)
if !h.uploadService.IsValidFileType(contentType) {
c.JSON(http.StatusBadRequest, MultiUploadResponse{
Success: false,
Error: "Invalid file type. Only images are allowed",
})
return
}
// Convert to base64 for OCR
base64Str := base64.StdEncoding.EncodeToString(fileBytes)
// Process OCR
ocrText, err := h.ocrService.ProcessImage(c.Request.Context(), base64Str)
if err != nil {
c.JSON(http.StatusInternalServerError, MultiUploadResponse{
Success: false,
Error: "OCR processing failed",
})
return
}
ocrTexts = append(ocrTexts, ocrText)
// Upload to R2
imageURL, err := h.uploadService.UploadFile(file, fileHeader.Filename, contentType)
if err != nil {
c.JSON(http.StatusInternalServerError, MultiUploadResponse{
Success: false,
Error: "Failed to upload file",
})
return
}
imageURLs = append(imageURLs, imageURL)
}
// Process combined text with Gemini if multiple images
finalText := strings.Join(ocrTexts, "\n")
if len(ocrTexts) > 1 {
for _, ocrText := range ocrTexts {
// prompt中在每个ocrText前加上"第" + 序号 + "段:"
prompt := "根据以下多段文字的语意,重新拼接成一段通顺的文字,保持原来的所有文字不改变同时,确保拼接的正确:\n\n"
processedText, err := h.geminiService.ProcessText(c.Request.Context(), prompt)
if err != nil {
c.JSON(http.StatusInternalServerError, MultiUploadResponse{
Success: false,
Error: "Text processing failed",
})
return
}
finalText = processedText
}
c.JSON(http.StatusOK, MultiUploadResponse{
ImageURLs: imageURLs,
Text: finalText,
Success: true,
})
}