最近由于老項目頻繁掛掉,由于項目經過多批人之手,短時間難以定位問題,所以只好寫一個監控程序。 時間比較緊半天時間,而且水平有限大神勿噴,有好的方法還請賜教。 1、問題描述:分兩種情況1.1、tomcat 徹底掛掉端口不會占用進程;1.2、并沒有徹底掛掉 端口仍占用進程,但是接口訪問異常; 2、解決思路:啟動一個java服務輪詢(10秒鐘一次)程序的一個接口(比如獲取當前時間),如果請求不到則查看該端口是否占用進程,如果占用則殺死進程,然后重啟tomcat ;如果沒有占用進程則直接重啟tomca; 本來考慮的是監控多個程序的 但是時間問題就先監控一個吧
3.1 輪訓接口
@Service
public class SchedulerService {
private static final Logger logger = LoggerFactory.getLogger(SchedulerService.class);
@Autowired
private KillService killService;
@Value("#{system['jiankong.ports']}")
private String portsStr;
@Value("#{system['url']}")
private String url;
/**
* 監控接口是否通 如果接口不通 或者返回結果不對則重啟服務 并發送郵件 每10秒執行一次掃描
* @author gaozemin
* @date 2017年10月18日
* @throws Exception
* @return
*/
public void watch() throws Exception {
String[] ports = portsStr.split(",");
for (String port : ports) {
// 調用測試接口
String ret = HttpUtil.sendPost(url, null);
if (ret == null) {// 如果返回結果為空重啟服務
logger.info("返回結果為null ");
killService.start(Integer.valueOf(port));
} else {
try {
Map retMap = JSONObject.parseObject(ret, Map.class);
String retFlag = String.valueOf(retMap.get("result"));
if (!"200".equals(retFlag)) {// 如果返回結果異常 重啟服務
killService.start(Integer.valueOf(port));
} else {
logger.info("系統運行正常....");
}
} catch (Exception e) {
logger.info("返回值解析異常....");
killService.start(Integer.valueOf(port));
}
}
logger.info("監控執行中..");
}
}
3.2 如果監控到異常則重啟服務
@Service
public class KillService {
private static final Logger logger = LoggerFactory.getLogger(KillService.class);
@Value("#{system['waitTime']}")
private Long waitTime;
@Value("#{system['startTomcatExec']}")
private String startTomcatExec;
@Value("#{system['startLinuxTomcatExec']}")
private String startLinuxTomcatExec;
@Value("#{system['findPid']}")
private String findPid;
@Value("#{system['isLinux']}")
private boolean isLinux;
@Value("#{system['send.emails']}")
private String emails;
@Autowired
private SendMail sendMail;
private Map map = new HashMap();
public void start(int port) {
// 先每10秒 殺死一次進程 然后重啟一次 ,執行重啟后5分鐘后再重新執行掃描,確保程序重新啟動
// 1 獲取 指定端口的進程號
// 如果調用接口失敗則殺死進程并重新啟動 ,并記錄當前時間 ,否則不進行操作
Date lastExecTime = map.get(port);
if (lastExecTime != null) {// 如果存在重啟記錄則判斷重啟時間是否間隔5分鐘
Date nowTome = new Date();
Long subTime = nowTome.getTime() - lastExecTime.getTime();
logger.info("間隔時間:{}", subTime);
if (subTime < waitTime) {
logger.info("間隔時間過短 等待程序啟動!");
return;
} else {
map.put(port, new Date());
restartTomcat(port, isLinux);
}
} else {
map.put(port, new Date());
restartTomcat(port, isLinux);
}
}
private void restartTomcat(int port, boolean isLinux) {
Runtime runtime = Runtime.getRuntime();
try {
if (isLinux) {
// 查找進程號
linuxRestart(port);
StartTomcatThread a = new StartTomcatThread(startLinuxTomcatExec);
a.start();
} else {
Process p = runtime.exec(findPid + port);
InputStream inputStream = p.getInputStream();
List read = read(port, inputStream, "UTF-8");
if (read.size() == 0) {
logger.info("找不到端口:{}的進程", port);
StartTomcatThread a = new StartTomcatThread(startTomcatExec);
a.start();
logger.info("tomcat已重啟");
} else {
logger.info("找到" + read.size() + "個進程,正在準備清理");
kill(read);
StartTomcatThread a = new StartTomcatThread(startTomcatExec);
a.start();
}
}
String dataStr = "admin 服務宕機 現已自動重啟 請及時查看日志 修改錯誤!";
String[] emailStrs = emails.split(",");
for (String email : emailStrs) {
sendMail.sendMsg(email, dataStr);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 驗證此行是否為指定的端口,因為 findstr命令會是把包含的找出來,例如查找80端口,但是會把8099查找出來
*
* @param str
* @return
*/
private boolean validPort(int port, String str) {
String patternString = "^ *[a-zA-Z]+ +\\S+";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(str);
matcher.find();
String find = matcher.group();
int spstart = find.lastIndexOf(":");
find = find.substring(spstart + 1);
int findport = 0;
try {
findport = Integer.parseInt(find);
} catch (NumberFormatException e) {
System.out.println("查找到錯誤的端口:" + find);
return false;
}
if (port == findport) {
return true;
} else {
return false;
}
}
public void linuxRestart(int port) throws IOException, InterruptedException {
String cmd = "kill -9 $(netstat -tlnp|grep " + port + "|awk '{print $7}'|awk -F '/' '{print $1}')";
String[] command = { "sh", "-c", cmd };
Process pro = Runtime.getRuntime().exec(command);
pro.waitFor();
// cmd = path;
// pro = Runtime.getRuntime().exec(cmd);
// pro.waitFor();
}
/**
* 更換為一個Set,去掉重復的pid值
*
* @param data
*/
public void kill(List data) {
Set pids = new HashSet<>();
logger.info("列表:{}" + pids);
for (String line : data) {
int offset = line.lastIndexOf(" ");
String spid = line.substring(offset);
spid = spid.replaceAll(" ", "");
int pid = 0;
try {
pid = Integer.parseInt(spid);
} catch (NumberFormatException e) {
System.out.println("獲取的進程號錯誤:" + spid);
}
pids.add(pid);
}
killWithPid(pids);
}
/**
* 一次性殺除所有的端口
*
* @param pids
*/
public void killWithPid(Set pids) {
for (Integer pid : pids) {
try {
Process process = Runtime.getRuntime().exec("taskkill /F /pid " + pid + "");
InputStream inputStream = process.getInputStream();
String txt = readTxt(inputStream, "UTF-8");
logger.info(txt);
} catch (IOException e) {
e.printStackTrace();
}
}
}
private List read(int port, InputStream in, String charset) throws IOException {
List data = new ArrayList<>();
BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
String line;
while ((line = reader.readLine()) != null) {
boolean validPort = validPort(port, line);
if (validPort) {
data.add(line);
}
}
reader.close();
return data;
}
public String readTxt(InputStream in, String charset) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset));
StringBuffer sb = new StringBuffer();
String line;
while ((line = reader.readLine()) != null) {
sb.append(line);
}
reader.close();
return sb.toString();
}
}