tencent_ocr/pkg/handler/ocr.go
2025-01-15 16:09:25 +08:00

127 lines
3.2 KiB
Go

package handler
import (
"context"
"encoding/base64"
"net/http"
"strings"
"github.com/gin-gonic/gin"
"github.com/google/generative-ai-go/genai"
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common"
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/profile"
ocr "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/ocr/v20181119"
"google.golang.org/api/option"
"git.disbaidu.com/maxwell/tencent_ocr/pkg/service"
)
type OCRService struct {
tencentSecretID string
tencentSecretKey string
geminiService *service.GeminiService
}
func NewOCRService(tencentSecretID, tencentSecretKey string, geminiService *service.GeminiService) *OCRService {
return &OCRService{
tencentSecretID: tencentSecretID,
tencentSecretKey: tencentSecretKey,
geminiService: geminiService,
}
}
func (s *OCRService) ProcessImage(ctx context.Context, imageBase64 string) (string, error) {
// Initialize Tencent Cloud client
credential := common.NewCredential(s.tencentSecretID, s.tencentSecretKey)
cpf := profile.NewClientProfile()
cpf.HttpProfile.Endpoint = "ocr.tencentcloudapi.com"
client, err := ocr.NewClient(credential, "", cpf)
if err != nil {
return "", err
}
// Create OCR request
request := ocr.NewGeneralHandwritingOCRRequest()
request.ImageBase64 = common.StringPtr(imageBase64)
// Perform OCR
response, err := client.GeneralHandwritingOCR(request)
if err != nil {
return "", err
}
// Extract text from OCR response
var ocrText string
for _, textDetection := range response.Response.TextDetections {
ocrText += *textDetection.DetectedText + "\n"
}
return ocrText, nil
}
type OCRRequest struct {
ImageBase64 string `json:"image_base64"`
ImageURL string `json:"image_url"`
Scene string `json:"scene"`
APIKey string `json:"apikey" binding:"required"`
}
type OCRResponse struct {
OriginalText string `json:"original_text"`
Result string `json:"result"`
Success bool `json:"success"`
}
func (h *OCRService) HandleOCR(c *gin.Context) {
var req OCRRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, OCRResponse{
Success: false,
Result: "Invalid request format",
})
return
}
// Validate API key
if req.APIKey != h.geminiService.APIKey {
c.JSON(http.StatusUnauthorized, OCRResponse{
Success: false,
Result: "Invalid API key",
})
return
}
// Validate that at least one of ImageURL or ImageBase64 is provided
if req.ImageURL == "" && req.ImageBase64 == "" {
c.JSON(http.StatusBadRequest, OCRResponse{
Success: false,
Result: "Either image_url or image_base64 must be provided",
})
return
}
// Process image
ocrText, err := h.ProcessImage(c.Request.Context(), req.ImageBase64)
if err != nil {
c.JSON(http.StatusInternalServerError, OCRResponse{
Success: false,
Result: "OCR processing failed",
})
return
}
// Process with Gemini
processedText, err := h.geminiService.ProcessText(c.Request.Context(), ocrText)
if err != nil {
c.JSON(http.StatusInternalServerError, OCRResponse{
Success: false,
Result: "Text processing failed",
})
return
}
c.JSON(http.StatusOK, OCRResponse{
Success: true,
OriginalText: ocrText,
Result: processedText,
})
}