背景: 項目需要從html 提取說明書目錄
實現: 由于html是包含所有內容,所以將其中目錄部分手動重新生成一個html 文件dir26.html
python
import requests
from bs4 import BeautifulSoup
import jsonfilename = "dir26.html" # 替換為實際的文件路徑
with open(filename, "r") as file:html = file.read()soup = BeautifulSoup(html, "html.parser")results = soup.find_all('div') # 根據需要修改選擇器
# print("soup:",soup)
# print("results:",results)# 提取數據并轉換為JSON格式
data = []
for result in results:#print(result.text)resulttext = result.textdirectory = resulttext.split(" ")[0] // 取第一個空格之前的內容print(directory)page = resulttext.split(" ")[2]//取第三個空格之前的內容print(page)data.append({'directory': directory, 'page': page})# 寫入JSON文件
with open("manualdir.json", "w") as file:json.dump(data, file, ensure_ascii=False)
android中讀取Json 中的內容代碼:
// 存儲Json中的信息
private LinkedHashMap<String, String> mDirMap = new LinkedHashMap<String, String>();
private void initDirectory() {byte[] buffer;try {//將json文件讀取到buffer數組中InputStream is = getContext().getResources().getAssets().open("manualdir.json");buffer = new byte[is.available()];is.read(buffer);} catch (IOException e) {Log.w(TAG, "manual dir json IOException e:" + e);return;}// 將字符數組轉換為UTF-8編碼的字符串String json;try {json = new String(buffer, "UTF-8");} catch (UnsupportedEncodingException e) {Log.w(TAG, "manual dir json UnsupportedEncodingException e" + e);return;}//將字符串json轉換為json對象,以便于取出數據try {JSONArray jsa = new JSONArray(json);for (int i = 0; i < jsa.length();i++){JSONObject jso = jsa.getJSONObject(i);Log.w(TAG, " jsa.length():" + jsa.length());String title = jso.optString("directory");//標題Log.w(TAG, "title:"+title);int page = jso.optInt("page");//目錄頁數mDirMap.put(title, "#pf"+Integer.toHexString(page));}} catch (JSONException e) {Log.w(TAG, "manual dir json exception e:"+e);}}