1111
1212#include " plugin-support.h"
1313
14- CGImageRef convertBinarytoCgImage (const cv::Mat &imageBGRA)
14+ cv::Mat preprocessCvMat (const cv::Mat &src)
15+ {
16+ cv::Mat resized;
17+ cv::resize (src, resized, cv::Size (src.cols * 4 , src.rows * 4 ));
18+
19+ cv::Mat padded;
20+ cv::copyMakeBorder (resized, padded, resized.rows / 4 , resized.rows / 4 ,
21+ resized.cols / 4 , resized.cols / 4 ,
22+ cv::BORDER_CONSTANT, cv::Scalar (255 ));
23+
24+ return padded;
25+ }
26+
27+ CGImageRef convertCvMatToCGImage (const cv::Mat &cvMat, OSType pixelFormatType)
1528{
1629 CVPixelBufferRef pixelBuffer;
1730 CVReturn retPixelBuffer = CVPixelBufferCreateWithBytes (
18- kCFAllocatorDefault , imageBGRA.cols , imageBGRA.rows ,
19- kCVPixelFormatType_OneComponent8 , imageBGRA.data ,
20- imageBGRA.cols , NULL , NULL , NULL , &pixelBuffer);
31+ NULL , cvMat.cols , cvMat.rows , pixelFormatType, cvMat.data ,
32+ cvMat.cols , NULL , NULL , NULL , &pixelBuffer);
2133 if (retPixelBuffer != kCVReturnSuccess ) {
22- blog (LOG_ERROR, " CVPixelBuffer creation failed! %d" ,
23- retPixelBuffer);
34+ obs_log (LOG_ERROR, " CVPixelBuffer creation failed! %d" ,
35+ retPixelBuffer);
2436 if (pixelBuffer != NULL ) {
25- CFRelease (pixelBuffer);
37+ CVPixelBufferRelease (pixelBuffer);
2638 }
2739 return NULL ;
2840 }
@@ -31,81 +43,67 @@ CGImageRef convertBinarytoCgImage(const cv::Mat &imageBGRA)
3143 OSStatus retImage =
3244 VTCreateCGImageFromCVPixelBuffer (pixelBuffer, NULL , &image);
3345 if (retImage != noErr) {
34- blog (LOG_ERROR, " CGImage creation failed!" );
46+ obs_log (LOG_ERROR, " CGImage creation failed!" );
3547 if (image != NULL ) {
36- CFRelease (image);
48+ CGImageRelease (image);
3749 }
3850 return NULL ;
3951 }
40- CFRelease (pixelBuffer);
52+ CVPixelBufferRelease (pixelBuffer);
4153
4254 return image;
4355}
4456
45- class VisionTextRecognizer {
46- public:
47- std::string recognizeByVision (CGImageRef image);
48-
49- private:
50- std::string resultText;
51- };
52-
53- std::string VisionTextRecognizer::recognizeByVision (CGImageRef image)
57+ std::string recognizeByVision (CGImageRef image, NSString *langCode)
5458{
59+ __block std::string resultText;
60+
5561 VNImageRequestHandler *requestHandler = [[VNImageRequestHandler alloc ]
5662 initWithCGImage: image
5763 options: @{}];
64+
65+ VNRequestCompletionHandler completionHandler = ^(VNRequest *req,
66+ NSError *err) {
67+ if (err) {
68+ NSLog (@" %@ " , err);
69+ return ;
70+ }
71+ for (VNRecognizedTextObservation *observation in req.results ) {
72+ NSArray <VNRecognizedText *> *candidates =
73+ [observation topCandidates: 1 ];
74+ VNRecognizedText *recognizedText =
75+ [candidates firstObject ];
76+ NSString *nsString = recognizedText.string ;
77+ resultText += [nsString UTF8String ];
78+ }
79+ };
80+
5881 VNRecognizeTextRequest *request = [[VNRecognizeTextRequest alloc ]
59- initWithCompletionHandler: ^(VNRequest *req, NSError *err) {
60- if (err) {
61- NSLog (@" %@ " , err);
62- return ;
63- }
64- for (VNRecognizedTextObservation *observation in req
65- .results ) {
66- NSArray <VNRecognizedText *> *candidates =
67- [observation topCandidates: 1 ];
68- VNRecognizedText *recognizedText =
69- [candidates firstObject ];
70- NSString *nsString = recognizedText.string ;
71- resultText += [nsString UTF8String ];
72- }
73- }];
74- request.usesCPUOnly = true ;
75- request.recognitionLanguages = @[@" ja-JP" ];
82+ initWithCompletionHandler: completionHandler];
83+ request.minimumTextHeight = 0.8 ;
84+ request.recognitionLevel = VNRequestTextRecognitionLevelAccurate;
85+ request.recognitionLanguages = @[langCode];
86+
7687 NSError *_Nullable error;
7788 [requestHandler performRequests: @[request] error: &error];
89+
7890 return resultText;
7991}
8092
81- void recognizeText (const cv::Mat & imageBinary,
93+ void recognizeText (const cv::Mat imageBinary,
8294 std::function<void (std::string)> callback)
8395{
84- cv::Size destSize (imageBinary.cols * 4 , imageBinary.rows * 4 );
85- cv::Mat resizedBinary;
86- cv::resize (imageBinary, resizedBinary, destSize);
87-
88- cv::Mat paddedImageBinary;
89- cv::copyMakeBorder (resizedBinary, paddedImageBinary,
90- resizedBinary.rows / 4 , resizedBinary.rows / 4 ,
91- resizedBinary.cols / 4 , resizedBinary.cols / 4 ,
92- cv::BORDER_CONSTANT, cv::Scalar (255 ));
93-
94- CGImageRef image = convertBinarytoCgImage (paddedImageBinary);
95- if (image == NULL ) {
96- obs_log (LOG_INFO, " Couldn't convert cv::Mat to CGImage!" );
97- callback (" " );
98- return ;
99- }
96+ dispatch_queue_t queue =
97+ dispatch_get_global_queue (DISPATCH_QUEUE_PRIORITY_DEFAULT, 0 );
10098
101- __block VisionTextRecognizer recognizer;
99+ dispatch_block_t block = ^{
100+ cv::Mat padded = preprocessCvMat (imageBinary);
101+ CGImageRef image = convertCvMatToCGImage (
102+ padded, kCVPixelFormatType_OneComponent8 );
103+ std::string result = recognizeByVision (image, @" ja-JP" );
104+ CGImageRelease (image);
105+ callback (result);
106+ };
102107
103- dispatch_async (
104- dispatch_get_global_queue (DISPATCH_QUEUE_PRIORITY_DEFAULT, 0 ),
105- ^{
106- std::string result =
107- recognizer.recognizeByVision (image);
108- CFRelease (image);
109- callback (result);
110- });
108+ dispatch_async (queue, block);
111109}
0 commit comments