一、Tess-two 概述
-
Tess-two 是 Tesseract OCR 引擎在 Android 平臺上的一個封裝庫,用于實現離線文字識別
-
Tess-two 的 GitHub 官網:
https://github.com/rmtheis/tess-two
二、Tess-two 文字識別
1、演示
(1)Dependencies
- 模塊級 build.gradle
implementation 'com.rmtheis:tess-two:9.1.0'
(2)Tessdata
-
從 Tessdata 倉庫
https://github.com/tesseract-ocr/tessdata
下載所需語言包 -
例如,
eng.traineddata
用于英文、chi_sim.traineddata
用于簡體中文 -
將下載的
.traineddata
文件放在項目的src/main/assets
目錄下
(3)Manifest
- AndroidManifest.xml
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
(4)Test
- MainActivity.java
public class MainActivity extends AppCompatActivity {public static final String TAG = MainActivity.class.getSimpleName();@Overrideprotected void onCreate(Bundle savedInstanceState) {super.onCreate(savedInstanceState);EdgeToEdge.enable(this);setContentView(R.layout.activity_main);ViewCompat.setOnApplyWindowInsetsListener(findViewById(R.id.main), (v, insets) -> {Insets systemBars = insets.getInsets(WindowInsetsCompat.Type.systemBars());v.setPadding(systemBars.left, systemBars.top, systemBars.right, systemBars.bottom);return insets;});if (checkSelfPermission(Manifest.permission.READ_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED|| checkSelfPermission(Manifest.permission.WRITE_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED) {registerForActivityResult(new ActivityResultContracts.RequestMultiplePermissions(),o -> {for (Map.Entry<String, Boolean> entry : o.entrySet()) {Log.i(TAG, entry.getKey() + " : " + entry.getValue());}boolean allGranted = true;for (Map.Entry<String, Boolean> entry : o.entrySet()) {if (!entry.getValue()) {allGranted = false;break;}}if (allGranted) {test();} else {Log.i(TAG, "權限未全部授予");}}).launch(new String[]{Manifest.permission.READ_EXTERNAL_STORAGE,Manifest.permission.WRITE_EXTERNAL_STORAGE});} else {test();}}private void test() {copyTessDataToStorage("chi_sim.traineddata", "eng.traineddata");TessBaseAPI tessBaseAPI = new TessBaseAPI();String tesseractDirPath = getExternalFilesDir(null) + "/tesseract/";boolean initResult = tessBaseAPI.init(tesseractDirPath, "chi_sim+eng");if (!initResult) {Log.i(TAG, "初始化 Tesseract 失敗");return;}Bitmap bitmap = BitmapFactory.decodeResource(getResources(), R.drawable.test_img);tessBaseAPI.setImage(bitmap);String result = tessBaseAPI.getUTF8Text();Log.i(TAG, "result: " + result);}public void copyTessDataToStorage(String... tessDataFiles) {String tessDataDirPath = getExternalFilesDir(null) + "/tesseract/tessdata/";File tessDataDir = new File(tessDataDirPath);if (!tessDataDir.exists()) {tessDataDir.mkdirs();}AssetManager assetManager = getAssets();for (String fileName : tessDataFiles) {File outFile = new File(tessDataDirPath + fileName);if (outFile.exists()) continue;try (InputStream in = assetManager.open(fileName);OutputStream out = new FileOutputStream(outFile)) {byte[] buffer = new byte[1024];int read;while ((read = in.read(buffer)) != -1) {out.write(buffer, 0, read);}} catch (IOException e) {e.printStackTrace();}}}
}

# 輸出結果result: 張 三
2、解讀
(1)請求權限
-
通過 checkSelfPermission 方法檢查是否已有權限,如果已有權限,執行測試代碼
-
如果沒有權限,則使用 Activity Result API 請求權限
-
請求完成后,檢查所有權限是否都被授予,如果都被授予,執行測試代碼
// 檢查是否已有權限
if (checkSelfPermission(Manifest.permission.READ_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED|| checkSelfPermission(Manifest.permission.WRITE_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED) {// 如果沒有權限,請求權限registerForActivityResult(new ActivityResultContracts.RequestMultiplePermissions(),o -> {for (Map.Entry<String, Boolean> entry : o.entrySet()) {Log.i(TAG, entry.getKey() + " : " + entry.getValue());}boolean allGranted = true;for (Map.Entry<String, Boolean> entry : o.entrySet()) {if (!entry.getValue()) {allGranted = false;break;}}// 檢查所有權限是否都被授予if (allGranted) {// 如果都被授予,執行測試代碼test();} else {Log.i(TAG, "權限未全部授予");}}).launch(new String[]{Manifest.permission.READ_EXTERNAL_STORAGE,Manifest.permission.WRITE_EXTERNAL_STORAGE});
} else {// 如果已有權限,執行測試代碼test();
}
(2)復制 Tessdata
- 從
src/main/assets
目錄復制.traineddata
文件到應用私有存儲外部目錄的files/tesseract/tessdata/
目錄
public void copyTessDataToStorage(String... tessDataFiles) {// 創建目標目錄String tessDataDirPath = getExternalFilesDir(null) + "/tesseract/tessdata/";File tessDataDir = new File(tessDataDirPath);if (!tessDataDir.exists()) {tessDataDir.mkdirs();}AssetManager assetManager = getAssets();for (String fileName : tessDataFiles) {File outFile = new File(tessDataDirPath + fileName);if (outFile.exists()) continue; // 如果文件已存在則跳過try (InputStream in = assetManager.open(fileName);OutputStream out = new FileOutputStream(outFile)) {byte[] buffer = new byte[1024];int read;while ((read = in.read(buffer)) != -1) {out.write(buffer, 0, read);}} catch (IOException e) {e.printStackTrace();}}
}
(3)初始化與識別
- 調用 init 方法初始化 Tesseract
-
第一個參數是包含 Tessdata 目錄的父目錄,Tessdata 在
files/tesseract/tessdata/
目錄,那么這里就是files/tesseract/
-
第二個參數是語言代碼,多個可以用加號
+
連接,chi_sim+eng
表示識別中文和英文
TessBaseAPI tessBaseAPI = new TessBaseAPI();String tesseractDirPath = getExternalFilesDir(null) + "/tesseract/";boolean initResult = tessBaseAPI.init(tesseractDirPath, "chi_sim+eng");
if (!initResult) {Log.i(TAG, "初始化 Tesseract 失敗");return;
}
- 調用 setImage 方法識別,調用 getUTF8Text 獲取識別結果
Bitmap bitmap = BitmapFactory.decodeResource(getResources(), R.drawable.test_img);tessBaseAPI.setImage(bitmap);String result = tessBaseAPI.getUTF8Text();Log.i(TAG, "result: " + result);
三、補充情況
1、Bitmap 獲取失敗的情況
- 這里從一個不存在的資源文件獲取 Bitmap
Bitmap bitmap = BitmapFactory.decodeResource(getResources(), 1001);Log.i(TAG, "bitmap: " + bitmap);tessBaseAPI.setImage(bitmap);String result = tessBaseAPI.getUTF8Text();Log.i(TAG, "result: " + result);
# 輸出結果bitmap: null
...
FATAL EXCEPTION: main
Process: com.my.ocr_tesseract, PID: 25149
java.lang.RuntimeException: Unable to start activity ComponentInfo{com.my.ocr_tesseract/com.my.ocr_tesseract.MainActivity}: java.lang.RuntimeException: Failed to read bitmap
2、識別連筆字
- Tess-two 文字識別,識別連筆字的能力有限,推薦使用 ML Kit 數字墨水識別

# 輸出結果result:

# 輸出結果result: 鋤
3、使用應用私有存儲內部目錄
- 也可以使用應用私有存儲內部目錄,這樣也不需要請求權限
private void test() {copyTessDataToStorage("chi_sim.traineddata", "eng.traineddata");TessBaseAPI tessBaseAPI = new TessBaseAPI();String tesseractDirPath = getFilesDir() + "/tesseract/";boolean initResult = tessBaseAPI.init(tesseractDirPath, "chi_sim+eng");if (!initResult) {Log.i(TAG, "初始化 Tesseract 失敗");return;}Bitmap bitmap = BitmapFactory.decodeResource(getResources(), R.drawable.test_img);Log.i(TAG, "bitmap: " + bitmap);tessBaseAPI.setImage(bitmap);String result = tessBaseAPI.getUTF8Text();Log.i(TAG, "result: " + result);
}public void copyTessDataToStorage(String... tessDataFiles) {String tessDataDirPath = getFilesDir() + "/tesseract/tessdata/";File tessDataDir = new File(tessDataDirPath);if (!tessDataDir.exists()) {tessDataDir.mkdirs();}AssetManager assetManager = getAssets();for (String fileName : tessDataFiles) {File outFile = new File(tessDataDirPath + fileName);if (outFile.exists()) continue;try (InputStream in = assetManager.open(fileName);OutputStream out = new FileOutputStream(outFile)) {byte[] buffer = new byte[1024];int read;while ((read = in.read(buffer)) != -1) {out.write(buffer, 0, read);}} catch (IOException e) {e.printStackTrace();}}
}