package handler import ( "context" "net/http" "github.com/gin-gonic/gin" "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common" "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/profile" ocr "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/ocr/v20181119" "google.golang.org/api/option" "tencent_ocr/pkg/service" ) type OCRService struct { tencentSecretID string tencentSecretKey string geminiService *service.GeminiService } func NewOCRService(tencentSecretID, tencentSecretKey string, geminiService *service.GeminiService) *OCRService { return &OCRService{ tencentSecretID: tencentSecretID, tencentSecretKey: tencentSecretKey, geminiService: geminiService, } } func (s *OCRService) ProcessImage(ctx context.Context, imageBase64 string) (string, error) { // Initialize Tencent Cloud client credential := common.NewCredential(s.tencentSecretID, s.tencentSecretKey) cpf := profile.NewClientProfile() cpf.HttpProfile.Endpoint = "ocr.tencentcloudapi.com" client, err := ocr.NewClient(credential, "", cpf) if err != nil { return "", err } // Create OCR request request := ocr.NewGeneralHandwritingOCRRequest() request.ImageBase64 = common.StringPtr(imageBase64) // Perform OCR response, err := client.GeneralHandwritingOCR(request) if err != nil { return "", err } // Extract text from OCR response var ocrText string for _, textDetection := range response.Response.TextDetections { ocrText += *textDetection.DetectedText + "\n" } return ocrText, nil } type OCRRequest struct { ImageBase64 string `json:"image_base64"` ImageURL string `json:"image_url"` Scene string `json:"scene"` APIKey string `json:"apikey" binding:"required"` } type OCRResponse struct { OriginalText string `json:"original_text"` Result string `json:"result"` Success bool `json:"success"` } func (h *OCRService) HandleOCR(c *gin.Context) { var req OCRRequest if err := c.ShouldBindJSON(&req); err != nil { c.JSON(http.StatusBadRequest, OCRResponse{ Success: false, Result: "Invalid request format", }) return } // Validate API key if req.APIKey != h.geminiService.APIKey { c.JSON(http.StatusUnauthorized, OCRResponse{ Success: false, Result: "Invalid API key", }) return } // Validate that at least one of ImageURL or ImageBase64 is provided if req.ImageURL == "" && req.ImageBase64 == "" { c.JSON(http.StatusBadRequest, OCRResponse{ Success: false, Result: "Either image_url or image_base64 must be provided", }) return } // Process image ocrText, err := h.ProcessImage(c.Request.Context(), req.ImageBase64) if err != nil { c.JSON(http.StatusInternalServerError, OCRResponse{ Success: false, Result: "OCR processing failed", }) return } // Process with Gemini processedText, err := h.geminiService.ProcessText(c.Request.Context(), ocrText) if err != nil { c.JSON(http.StatusInternalServerError, OCRResponse{ Success: false, Result: "Text processing failed", }) return } c.JSON(http.StatusOK, OCRResponse{ Success: true, OriginalText: ocrText, Result: processedText, }) }