11package com .luooqi .ocr .utils ;
22
33import cn .hutool .core .codec .Base64 ;
4+ import cn .hutool .core .lang .UUID ;
45import cn .hutool .core .util .CharsetUtil ;
56import cn .hutool .core .util .StrUtil ;
7+ import cn .hutool .core .util .URLUtil ;
68import cn .hutool .crypto .SecureUtil ;
79import cn .hutool .http .HttpRequest ;
810import cn .hutool .http .HttpResponse ;
1214import cn .hutool .json .JSONUtil ;
1315import com .luooqi .ocr .model .TextBlock ;
1416
17+ import java .awt .*;
1518import java .util .*;
19+ import java .util .List ;
1620
1721/**
1822 * tools-ocr
1923 * Created by 何志龙 on 2019-03-22.
2024 */
2125public class OcrUtils {
2226
27+ public static String ocrImg (byte [] imgData ) {
28+ int i = Math .abs (UUID .randomUUID ().hashCode ()) % 4 ;
29+ switch (i ){
30+ case 0 :
31+ return bdGeneralOcr (imgData );
32+ case 1 :
33+ return bdAccurateOcr (imgData );
34+ case 2 :
35+ return sogouMobileOcr (imgData );
36+ default :
37+ return sogouWebOcr (imgData );
38+ }
39+ }
40+
41+ private static String bdGeneralOcr (byte [] imgData ){
42+ return bdBaseOcr (imgData , "general_location" );
43+ }
44+
45+ private static String bdAccurateOcr (byte [] imgData ){
46+ return bdBaseOcr (imgData , "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate" );
47+ }
48+
49+ private static String bdBaseOcr (byte [] imgData , String type ){
50+ String [] urlArr = new String []{"http://ai.baidu.com/tech/ocr/general" , "http://ai.baidu.com/index/seccode?action=show" };
51+ StringBuilder cookie = new StringBuilder ();
52+ for (String url : urlArr ) {
53+ HttpResponse cookieResp = WebUtils .get (url );
54+ List <String > ckList = cookieResp .headerList ("Set-Cookie" );
55+ for (String s : ckList ) {
56+ cookie .append (s .replaceAll ("expires[\\ S\\ s]+" , "" ));
57+ }
58+ }
59+ HashMap <String , String > header = new HashMap <>();
60+ header .put ("Referer" , "http://ai.baidu.com/tech/ocr/general" );
61+ header .put ("Cookie" , cookie .toString ());
62+ String data = "type=" +URLUtil .encodeQuery (type )+"&detect_direction=false&image_url&image=" + URLUtil .encodeQuery ("data:image/jpeg;base64," + Base64 .encode (imgData )) + "&language_type=CHN_ENG" ;
63+ HttpResponse response = WebUtils .postRaw ("http://ai.baidu.com/aidemo" , data , 0 , header );
64+ return extractBdResult (WebUtils .getSafeHtml (response ));
65+ }
66+
2367 public static String sogouMobileOcr (byte [] imgData ) {
2468 String boundary = "------WebKitFormBoundary8orYTmcj8BHvQpVU" ;
2569 String url = "http://ocr.shouji.sogou.com/v2/ocr/json" ;
@@ -59,7 +103,7 @@ private static String extractSogouResult(String html) {
59103 }
60104 JSONArray jsonArray = jsonObject .getJSONArray ("result" );
61105 List <TextBlock > textBlocks = new ArrayList <>();
62- boolean isEng = false ;
106+ boolean isEng ;
63107 for (int i = 0 ; i < jsonArray .size (); i ++) {
64108 JSONObject jObj = jsonArray .getJSONObject (i );
65109 TextBlock textBlock = new TextBlock ();
@@ -76,4 +120,34 @@ private static String extractSogouResult(String html) {
76120 return CommUtils .combineTextBlocks (textBlocks , isEng );
77121 }
78122
123+ private static String extractBdResult (String html ) {
124+ if (StrUtil .isBlank (html )) {
125+ return "" ;
126+ }
127+ JSONObject jsonObject = JSONUtil .parseObj (html );
128+ if (jsonObject .getInt ("errno" , 0 ) != 0 ) {
129+ return "" ;
130+ }
131+ JSONArray jsonArray = jsonObject .getJSONObject ("data" ).getJSONArray ("words_result" );
132+ List <TextBlock > textBlocks = new ArrayList <>();
133+ boolean isEng = false ;
134+ for (int i = 0 ; i < jsonArray .size (); i ++) {
135+ JSONObject jObj = jsonArray .getJSONObject (i );
136+ TextBlock textBlock = new TextBlock ();
137+ textBlock .setText (jObj .getStr ("words" ).trim ());
138+ //noinspection SuspiciousToArrayCall
139+ JSONObject location = jObj .getJSONObject ("location" );
140+ int top = location .getInt ("top" );
141+ int left = location .getInt ("left" );
142+ int width = location .getInt ("width" );
143+ int height = location .getInt ("height" );
144+ textBlock .setTopLeft (new Point (top , left ));
145+ textBlock .setTopRight (new Point (top , left + width ));
146+ textBlock .setBottomLeft (new Point (top + height , left ));
147+ textBlock .setBottomRight (new Point (top + height , left + width ));
148+ textBlocks .add (textBlock );
149+ }
150+ return CommUtils .combineTextBlocks (textBlocks , isEng );
151+ }
152+
79153}
0 commit comments