124 lines
3.1 KiB
Go
124 lines
3.1 KiB
Go
package handler
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common"
|
|
"github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/profile"
|
|
ocr "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/ocr/v20181119"
|
|
"google.golang.org/api/option"
|
|
"tencent_ocr/pkg/service"
|
|
)
|
|
|
|
type OCRService struct {
|
|
tencentSecretID string
|
|
tencentSecretKey string
|
|
geminiService *service.GeminiService
|
|
}
|
|
|
|
func NewOCRService(tencentSecretID, tencentSecretKey string, geminiService *service.GeminiService) *OCRService {
|
|
return &OCRService{
|
|
tencentSecretID: tencentSecretID,
|
|
tencentSecretKey: tencentSecretKey,
|
|
geminiService: geminiService,
|
|
}
|
|
}
|
|
|
|
func (s *OCRService) ProcessImage(ctx context.Context, imageBase64 string) (string, error) {
|
|
// Initialize Tencent Cloud client
|
|
credential := common.NewCredential(s.tencentSecretID, s.tencentSecretKey)
|
|
cpf := profile.NewClientProfile()
|
|
cpf.HttpProfile.Endpoint = "ocr.tencentcloudapi.com"
|
|
client, err := ocr.NewClient(credential, "", cpf)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Create OCR request
|
|
request := ocr.NewGeneralHandwritingOCRRequest()
|
|
request.ImageBase64 = common.StringPtr(imageBase64)
|
|
|
|
// Perform OCR
|
|
response, err := client.GeneralHandwritingOCR(request)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Extract text from OCR response
|
|
var ocrText string
|
|
for _, textDetection := range response.Response.TextDetections {
|
|
ocrText += *textDetection.DetectedText + "\n"
|
|
}
|
|
|
|
return ocrText, nil
|
|
}
|
|
|
|
type OCRRequest struct {
|
|
ImageBase64 string `json:"image_base64"`
|
|
ImageURL string `json:"image_url"`
|
|
Scene string `json:"scene"`
|
|
APIKey string `json:"apikey" binding:"required"`
|
|
}
|
|
|
|
type OCRResponse struct {
|
|
OriginalText string `json:"original_text"`
|
|
Result string `json:"result"`
|
|
Success bool `json:"success"`
|
|
}
|
|
|
|
func (h *OCRService) HandleOCR(c *gin.Context) {
|
|
var req OCRRequest
|
|
if err := c.ShouldBindJSON(&req); err != nil {
|
|
c.JSON(http.StatusBadRequest, OCRResponse{
|
|
Success: false,
|
|
Result: "Invalid request format",
|
|
})
|
|
return
|
|
}
|
|
|
|
// Validate API key
|
|
if req.APIKey != h.geminiService.APIKey {
|
|
c.JSON(http.StatusUnauthorized, OCRResponse{
|
|
Success: false,
|
|
Result: "Invalid API key",
|
|
})
|
|
return
|
|
}
|
|
|
|
// Validate that at least one of ImageURL or ImageBase64 is provided
|
|
if req.ImageURL == "" && req.ImageBase64 == "" {
|
|
c.JSON(http.StatusBadRequest, OCRResponse{
|
|
Success: false,
|
|
Result: "Either image_url or image_base64 must be provided",
|
|
})
|
|
return
|
|
}
|
|
|
|
// Process image
|
|
ocrText, err := h.ProcessImage(c.Request.Context(), req.ImageBase64)
|
|
if err != nil {
|
|
c.JSON(http.StatusInternalServerError, OCRResponse{
|
|
Success: false,
|
|
Result: "OCR processing failed",
|
|
})
|
|
return
|
|
}
|
|
|
|
// Process with Gemini
|
|
processedText, err := h.geminiService.ProcessText(c.Request.Context(), ocrText)
|
|
if err != nil {
|
|
c.JSON(http.StatusInternalServerError, OCRResponse{
|
|
Success: false,
|
|
Result: "Text processing failed",
|
|
})
|
|
return
|
|
}
|
|
|
|
c.JSON(http.StatusOK, OCRResponse{
|
|
Success: true,
|
|
OriginalText: ocrText,
|
|
Result: processedText,
|
|
})
|
|
} |