想從mysql導出一個表到csv文件,然后上傳到hdfs,開始使用sqoop,結果各種問題頻出:
https://blog.csdn.net/weixin_45357522/article/details/149498030
https://blog.csdn.net/weixin_45357522/article/details/149449413
特別是那個QueryResult的問題,基本沒有完美解決方法,所以才有了本腳本完美替換sqoop方式。
本腳本從mysql導出一個表到csv文件,然后上傳到hdfs。其中有些步驟要檢測是否成功,命令后面加了checkRst,不成功就中斷退出,否則dolphinscheduler會認為任務成功了,接著執行后續任務,誤導管理員以為任務成功,有些是不用管是否成功的。本腳本用于dolphinscheduler中加載數據。
另外,要注意dolphinscheduler worker和數據庫服務器之間,用戶(hive)要設置好ssh免登錄,用于下載數據文件到本地。數據庫的數據導出目錄也要給寫權限,以免殘留文件在服務器上。
#!/bin/bash
if [[ $# < 5 ]]
thenecho "usage: $0 mysqlConnect sql hdfsRoot srcSystem bizDate"echo "e.g.: $0 \"-h 10.6.2.9 -P 13306 -u root -ppasswd123\" \"SELECT id,is_set,zl_office_id FROM loan.project_info\" \"/dmp/biz\" \"yecai\" \"20250727\""exit 1
fi
checkRst(){if [[ $? != 0 ]]thenecho "--- check failed"exit 1elseecho "--- check success"fi
}
#解析參數
mysqlConnect=$1
sql=$2
dmpRoot=$3
srcSystem=$4
bizDate=$5
echo "===== got input params:"
echo "mysqlConnect: $mysqlConnect"
echo "sql: $sql"
echo "dmpRoot: $dmpRoot"
echo "srcSystem: $srcSystem"
echo "bizDate: $bizDate"
dbHost=$( echo $mysqlConnect | awk -F '-h ' '{print $2}'|awk -F ' ' '{print $1}')
echo "bizDate: $bizDate"
echo "===== parsed params:"
tableName=$(echo $sql | awk -F ' from ' '{print $2}' |awk -F ' ' '{print $1}')
if [ -z $tableName ]; thentableName=$(echo $sql | awk -F ' FROM ' '{print $2}' |awk -F ' ' '{print $1}')
fi
if [[ $tableName == *.* ]]
thentableName=$(echo $tableName | awk -F '.' '{print $2}')
fi
echo "tableName: $tableName"
echo "===== end of params"echo "1.嘗試刪除數據庫服務器殘留文件"
ssh hive@${dbHost} rm -f /tmp/${tableName}.csvecho "2.導出數據到數據庫服務器csv文件"
mysql ${mysqlConnect} -e "$sql INTO OUTFILE '/tmp/$tableName.csv' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'"
checkRstecho "3.從數據庫服務器下載文件到本地"
scp hive@${dbHost}:/tmp/${tableName}.csv ~/
checkRstecho "4.刪除數據庫服務器殘留文件"
ssh hive@${dbHost} rm -f /tmp/${tableName}.csvecho "5.嘗試清除hdfs舊文件"
hdfs dfs -rm -r ${dmpRoot}/tmp/${srcSystem}/${tableName}/${bizDate}echo "6.嘗試創建hdfs文件目錄"
hdfs dfs -mkdir -p ${dmpRoot}/tmp/${srcSystem}/${tableName}/${bizDate}echo "7.上傳本地文件到hdfs"
hdfs dfs -put ~/${tableName}.csv ${dmpRoot}/tmp/${srcSystem}/${tableName}/${bizDate}
checkRstecho "8.清除本地臨時文件"
rm -f ~/${tableName}.csv