/* ???? 功能:實現文件編碼格式的判斷 ???? 通過一個文件的最前面三個字節,可以判斷出該的編碼類型: ???? ANSI: 無格式定義;(第一個字節開始就是文件內容) ???? Unicode: 前兩個字節為FFFE; ???? Unicode big endian: 前兩字節為FEFF; ???? UTF-8: 前兩字節為EFBB,第三字節為BF */ #include <stdio.h> #include <stdlib.h> #include <string.h> // 讀取一個文件的最前面n個字節,并以十六進制形式輸出每個字節的值 void ?readNBytes( char ?*fileName, int ?n) { ???? FILE ?*fp = fopen (fileName, "r" ); ???? unsigned char ?*buf = (unsigned char *) malloc ( sizeof (unsigned char )*n); ???? int ?i; ???? if (fp == NULL) ???? { ???????? printf ( "open file [%s] failed.\n" , fileName); ???????? return ; ???? } ???? fread (buf, sizeof (unsigned char ), n, fp); ???? fclose (fp); ???? printf ( "%s:\t" , fileName); ???? for (i = 0; i < n; i++) ???? { ???????? printf ( "%x\t" , buf[i]); ???? } ???? printf ( "\n" ); ???? free (buf); } void ?main() { ???? char ?fileName[][50] = { "ansi.txt" , "unicode.txt" , "ubigendian.txt" , "utf8.txt" }; ???? int ?i; ???? for (i = 0; i < 4; i++) ???? { ???????? // 每個文件中的內容都是:你what123456 ???????? readNBytes(fileName[i], 3); ???? } } |
每個測試文件中的內容都是:你what123456
運行結果為:
ansi.txt:?????? c4????? e3????? 77
unicode.txt:??? ff????? fe????? 60
ubigendian.txt: fe????? ff????? 4f
utf8.txt:?????? ef????? bb????? bf
分類:?C/C++,?Linux_C/C++