Skip to content

Commit 90be052

Browse files
authored
文字列認識のコードが不安定になるバグを修正 (#164)
* Fix leak * Update buildspec.json * Update VisionTextRecognizer.mm
1 parent 5e480c9 commit 90be052

File tree

5 files changed

+64
-66
lines changed

5 files changed

+64
-66
lines changed

buildspec.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
}
4040
},
4141
"name": "obs-pokemon-sv-screen-builder",
42-
"version": "0.6.7",
42+
"version": "0.6.8",
4343
"author": "Kaito Udagawa",
4444
"website": "https://github.com/umireon/obs-pokemon-sv-screen-builder",
4545
"email": "umireon@gmail.com",

src/TextRecognizer/TesseractTextRecognizer/TesseractTextRecognizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
#include <obs.h>
66

7-
void recognizeText(const cv::Mat &imageBinary,
7+
void recognizeText(const cv::Mat imageBinary,
88
std::function<void(std::string)> callback)
99
{
1010
UNUSED_PARAMETER(imageBinary);

src/TextRecognizer/TextRecognizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55

66
#include <opencv2/opencv.hpp>
77

8-
void recognizeText(const cv::Mat &imageBinary,
8+
void recognizeText(const cv::Mat imageBinary,
99
std::function<void(std::string)> callback);

src/TextRecognizer/VisionTextRecognizer/VisionTextRecognizer.mm

Lines changed: 60 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,30 @@
1111

1212
#include "plugin-support.h"
1313

14-
CGImageRef convertBinarytoCgImage(const cv::Mat &imageBGRA)
14+
cv::Mat preprocessCvMat(const cv::Mat &src)
15+
{
16+
cv::Mat resized;
17+
cv::resize(src, resized, cv::Size(src.cols * 4, src.rows * 4));
18+
19+
cv::Mat padded;
20+
cv::copyMakeBorder(resized, padded, resized.rows / 4, resized.rows / 4,
21+
resized.cols / 4, resized.cols / 4,
22+
cv::BORDER_CONSTANT, cv::Scalar(255));
23+
24+
return padded;
25+
}
26+
27+
CGImageRef convertCvMatToCGImage(const cv::Mat &cvMat, OSType pixelFormatType)
1528
{
1629
CVPixelBufferRef pixelBuffer;
1730
CVReturn retPixelBuffer = CVPixelBufferCreateWithBytes(
18-
kCFAllocatorDefault, imageBGRA.cols, imageBGRA.rows,
19-
kCVPixelFormatType_OneComponent8, imageBGRA.data,
20-
imageBGRA.cols, NULL, NULL, NULL, &pixelBuffer);
31+
NULL, cvMat.cols, cvMat.rows, pixelFormatType, cvMat.data,
32+
cvMat.cols, NULL, NULL, NULL, &pixelBuffer);
2133
if (retPixelBuffer != kCVReturnSuccess) {
22-
blog(LOG_ERROR, "CVPixelBuffer creation failed! %d",
23-
retPixelBuffer);
34+
obs_log(LOG_ERROR, "CVPixelBuffer creation failed! %d",
35+
retPixelBuffer);
2436
if (pixelBuffer != NULL) {
25-
CFRelease(pixelBuffer);
37+
CVPixelBufferRelease(pixelBuffer);
2638
}
2739
return NULL;
2840
}
@@ -31,81 +43,67 @@ CGImageRef convertBinarytoCgImage(const cv::Mat &imageBGRA)
3143
OSStatus retImage =
3244
VTCreateCGImageFromCVPixelBuffer(pixelBuffer, NULL, &image);
3345
if (retImage != noErr) {
34-
blog(LOG_ERROR, "CGImage creation failed!");
46+
obs_log(LOG_ERROR, "CGImage creation failed!");
3547
if (image != NULL) {
36-
CFRelease(image);
48+
CGImageRelease(image);
3749
}
3850
return NULL;
3951
}
40-
CFRelease(pixelBuffer);
52+
CVPixelBufferRelease(pixelBuffer);
4153

4254
return image;
4355
}
4456

45-
class VisionTextRecognizer {
46-
public:
47-
std::string recognizeByVision(CGImageRef image);
48-
49-
private:
50-
std::string resultText;
51-
};
52-
53-
std::string VisionTextRecognizer::recognizeByVision(CGImageRef image)
57+
std::string recognizeByVision(CGImageRef image, NSString *langCode)
5458
{
59+
__block std::string resultText;
60+
5561
VNImageRequestHandler *requestHandler = [[VNImageRequestHandler alloc]
5662
initWithCGImage:image
5763
options:@{}];
64+
65+
VNRequestCompletionHandler completionHandler = ^(VNRequest *req,
66+
NSError *err) {
67+
if (err) {
68+
NSLog(@"%@", err);
69+
return;
70+
}
71+
for (VNRecognizedTextObservation *observation in req.results) {
72+
NSArray<VNRecognizedText *> *candidates =
73+
[observation topCandidates:1];
74+
VNRecognizedText *recognizedText =
75+
[candidates firstObject];
76+
NSString *nsString = recognizedText.string;
77+
resultText += [nsString UTF8String];
78+
}
79+
};
80+
5881
VNRecognizeTextRequest *request = [[VNRecognizeTextRequest alloc]
59-
initWithCompletionHandler:^(VNRequest *req, NSError *err) {
60-
if (err) {
61-
NSLog(@"%@", err);
62-
return;
63-
}
64-
for (VNRecognizedTextObservation *observation in req
65-
.results) {
66-
NSArray<VNRecognizedText *> *candidates =
67-
[observation topCandidates:1];
68-
VNRecognizedText *recognizedText =
69-
[candidates firstObject];
70-
NSString *nsString = recognizedText.string;
71-
resultText += [nsString UTF8String];
72-
}
73-
}];
74-
request.usesCPUOnly = true;
75-
request.recognitionLanguages = @[@"ja-JP"];
82+
initWithCompletionHandler:completionHandler];
83+
request.minimumTextHeight = 0.8;
84+
request.recognitionLevel = VNRequestTextRecognitionLevelAccurate;
85+
request.recognitionLanguages = @[langCode];
86+
7687
NSError *_Nullable error;
7788
[requestHandler performRequests:@[request] error:&error];
89+
7890
return resultText;
7991
}
8092

81-
void recognizeText(const cv::Mat &imageBinary,
93+
void recognizeText(const cv::Mat imageBinary,
8294
std::function<void(std::string)> callback)
8395
{
84-
cv::Size destSize(imageBinary.cols * 4, imageBinary.rows * 4);
85-
cv::Mat resizedBinary;
86-
cv::resize(imageBinary, resizedBinary, destSize);
87-
88-
cv::Mat paddedImageBinary;
89-
cv::copyMakeBorder(resizedBinary, paddedImageBinary,
90-
resizedBinary.rows / 4, resizedBinary.rows / 4,
91-
resizedBinary.cols / 4, resizedBinary.cols / 4,
92-
cv::BORDER_CONSTANT, cv::Scalar(255));
93-
94-
CGImageRef image = convertBinarytoCgImage(paddedImageBinary);
95-
if (image == NULL) {
96-
obs_log(LOG_INFO, "Couldn't convert cv::Mat to CGImage!");
97-
callback("");
98-
return;
99-
}
96+
dispatch_queue_t queue =
97+
dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
10098

101-
__block VisionTextRecognizer recognizer;
99+
dispatch_block_t block = ^{
100+
cv::Mat padded = preprocessCvMat(imageBinary);
101+
CGImageRef image = convertCvMatToCGImage(
102+
padded, kCVPixelFormatType_OneComponent8);
103+
std::string result = recognizeByVision(image, @"ja-JP");
104+
CGImageRelease(image);
105+
callback(result);
106+
};
102107

103-
dispatch_async(
104-
dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
105-
^{
106-
std::string result =
107-
recognizer.recognizeByVision(image);
108-
CFRelease(image);
109-
callback(result);
110-
});
108+
dispatch_async(queue, block);
111109
}

src/TextRecognizer/WinRTTextRecognizer/WinRTTextRecognizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ using winrt::Windows::Foundation::MemoryBuffer;
2323
using winrt::Windows::Foundation::IMemoryBufferReference;
2424
using Windows::Foundation::IMemoryBufferByteAccess;
2525

26-
void recognizeText(const cv::Mat &imageBinary,
26+
void recognizeText(const cv::Mat imageBinary,
2727
std::function<void(std::string)> callback)
2828
{
2929
cv::Mat padImage;

0 commit comments

Comments
 (0)