數據挖掘—K-Means算法(Java實現)

算法描述

(1)任意選擇k個數據對象作為初始聚類中心
(2)根據簇中對象的平均值,將每個對象賦給最類似的簇
(3)更新簇的平均值,即計算每個對象簇中對象的平均值
(4)計算聚類準則函數E
(5)重復2-4步驟,直到準則函數E值不再進行變化

代碼


public class Cluster {public String clusterName; // 類簇名private Medoid medoid; // 類簇的質點private ArrayList<DataPoint> dataPoints; // 類簇中各樣本點public Cluster(String clusterName) {this.clusterName = clusterName;this.medoid = null; // will be set by calling setCentroid()dataPoints = new ArrayList<DataPoint>();}public void setMedoid(Medoid c) {medoid = c;}public Medoid getMedoid() {return medoid;}public void addDataPoint(DataPoint dp) { // called from CAInstancedp.setCluster(this);// 標注該類簇屬于某點,計算歐式距離this.dataPoints.add(dp);}public void removeDataPoint(DataPoint dp) {this.dataPoints.remove(dp);}public int getNumDataPoints() {return this.dataPoints.size();}public DataPoint getDataPoint(int pos) {return (DataPoint) this.dataPoints.get(pos);}public String getName() {return this.clusterName;}public ArrayList<DataPoint> getDataPoints() {return this.dataPoints;}
}

public class ClusterAnalysis {public Cluster[] clusters;// 所有類簇private int miter;// 迭代次數private ArrayList<DataPoint> dataPoints = new ArrayList<DataPoint>();// 所有樣本點private int dimNum;//維度public ClusterAnalysis(int k, int iter, ArrayList<DataPoint> dataPoints,int dimNum) {clusters = new Cluster[k];// 類簇種類數for (int i = 0; i < k; i++) {clusters[i] = new Cluster(i+"");}this.miter = iter;this.dataPoints = dataPoints;this.dimNum=dimNum;}public int getIterations() {return miter;}public ArrayList<DataPoint>[] getClusterOutput() {ArrayList<DataPoint> v[] = new ArrayList[clusters.length];for (int i = 0; i < clusters.length; i++) {v[i] = clusters[i].getDataPoints();}return v;}public void startAnalysis(double[][] medoids) {setInitialMedoids(medoids);double[][] newMedoids=medoids;double[][] oldMedoids=new double[medoids.length][this.dimNum];while(!isEqual(oldMedoids,newMedoids)){for(int m = 0; m < clusters.length; m++){//每次迭代開始情況各類簇的點clusters[m].getDataPoints().clear();}for (int j = 0; j < dataPoints.size(); j++) {int clusterIndex=0;double minDistance=Double.MAX_VALUE;for (int k = 0; k < clusters.length; k++) {//判斷樣本點屬于哪個類簇double eucDistance=dataPoints.get(j).testEuclideanDistance(clusters[k].getMedoid());if(eucDistance<minDistance){minDistance=eucDistance;clusterIndex=k;}}//將該樣本點添加到該類簇clusters[clusterIndex].addDataPoint(dataPoints.get(j));}for(int m = 0; m < clusters.length; m++){clusters[m].getMedoid().calcMedoid();//重新計算各類簇的質點}for(int i=0;i<medoids.length;i++){for(int j=0;j<this.dimNum;j++){oldMedoids[i][j]=newMedoids[i][j];}}for(int n=0;n<clusters.length;n++){newMedoids[n]=clusters[n].getMedoid().getDimensioin();}this.miter++;}}private void setInitialMedoids(double[][] medoids) {for (int n = 0; n < clusters.length; n++) {Medoid medoid = new Medoid(medoids[n]);clusters[n].setMedoid(medoid);medoid.setCluster(clusters[n]);}}private boolean isEqual(double[][] oldMedoids,double[][] newMedoids){boolean flag=false;for(int i=0;i<oldMedoids.length;i++){for(int j=0;j<oldMedoids[i].length;j++){if(oldMedoids[i][j]!=newMedoids[i][j]){return flag;}}}flag=true;return flag;}
}

public class DataPoint {private double dimension[]; //樣本點的維度private String pointName; //樣本點名字private Cluster cluster; //類簇private double euDt;//樣本點到質點的距離public DataPoint(double dimension[], String pointName) {this.dimension = dimension;this.pointName = pointName;this.cluster = null;}@Overridepublic String toString() {String result = "Point_id=" + pointName + "  [";for (int i = 0; i < dimension.length; i++) {result += String.format("%.2f",dimension[i]) + " ";}return result.trim()+"] clusterId: "+cluster.clusterName;}public void setCluster(Cluster cluster) {this.cluster = cluster;}public double calEuclideanDistanceSum() {double sum=0.0;Cluster cluster=this.getCluster();ArrayList<DataPoint> dataPoints=cluster.getDataPoints();for(int i=0;i<dataPoints.size();i++){double[] dims=dataPoints.get(i).getDimensioin();for(int j=0;j<dims.length;j++){double temp=Math.pow((dims[j]-this.dimension[j]),2);sum=sum+temp;}}return Math.sqrt(sum);}public double testEuclideanDistance(Medoid c) {double sum=0.0;double[] cDim=c.getDimensioin();for(int i=0;i<dimension.length;i++){double temp=Math.pow((dimension[i]-cDim[i]),2);sum=sum+temp;}return Math.sqrt(sum);}public double[] getDimensioin() {return this.dimension;}public Cluster getCluster() {return this.cluster;}public double getCurrentEuDt() {return this.euDt;}public String getPointName() {return this.pointName;}
}
public class Medoid{private double dimension[]; // 質點的維度private Cluster cluster; //所屬類簇private double etdDisSum;//Medoid到本類簇中所有的歐式距離之和public String toString() {String result ="  [";DecimalFormat decimalFormat=new DecimalFormat("0.000000");for (int i = 0; i < dimension.length; i++) {result += decimalFormat.format(dimension[i]) + " ";}return result.trim()+"] clusterId: "+cluster.clusterName;}public Medoid(double dimension[]) {this.dimension = dimension;}public void setCluster(Cluster c) {this.cluster = c;}public double[] getDimensioin() {return this.dimension;}public Cluster getCluster() {return this.cluster;}public void calcMedoid() {// 取代價最小的點calcEtdDisSum();double minEucDisSum = this.etdDisSum;ArrayList<DataPoint> dps = this.cluster.getDataPoints();for (int i = 0; i < dps.size(); i++) {double tempeucDisSum = dps.get(i).calEuclideanDistanceSum();if (tempeucDisSum < minEucDisSum) {dimension = dps.get(i).getDimensioin();minEucDisSum=tempeucDisSum;}}}// 計算該Medoid到同類簇所有樣本點的歐斯距離和private void calcEtdDisSum() {double sum=0.0;Cluster cluster=this.getCluster();ArrayList<DataPoint> dataPoints=cluster.getDataPoints();for(int i=0;i<dataPoints.size();i++){double[] dims=dataPoints.get(i).getDimensioin();for(int j=0;j<dims.length;j++){double temp=Math.abs(dims[j]-this.dimension[j]);sum=sum+temp;}}etdDisSum= sum;}
}

public class TestMain {public static List<double[]> readTxt(String fileName){List<double[]> list=new ArrayList<>();try {File filename = new File(fileName); // 讀取input.txt文件InputStreamReader reader = new InputStreamReader(new FileInputStream(filename)); // 建立一個輸入流對象readerBufferedReader br = new BufferedReader(reader);String line = "";line = br.readLine();while (true) {line = br.readLine();if(line==null) break;String[] temp=line.split(",");double[] c=new double[temp.length];for(int i=0;i<temp.length;i++){c[i]=Float.parseFloat(temp[i]);}list.add(c);}} catch (Exception e) {e.printStackTrace();}return list;}public static void writeTxt(String content){try { // 防止文件建立或讀取失敗,用catch捕捉錯誤并打印,也可以throw/* 讀入TXT文件 */File writename = new File("src/km/output.txt"); // 相對路徑,如果沒有則要建立一個新的output。txt文件BufferedWriter out = new BufferedWriter(new FileWriter(writename));out.write(content); // \r\n即為換行out.flush(); // 把緩存區內容壓入文件out.close(); // 最后記得關閉文件} catch (Exception e) {e.printStackTrace();}}public static void main (String args[]){ArrayList<DataPoint> dataPoints = new ArrayList<DataPoint>();List<double[]> list=readTxt("src/km/t2.txt");for(int i=0;i<list.size();i++){dataPoints.add(new DataPoint(list.get(i),String.valueOf(i)));}long s=System.currentTimeMillis();ClusterAnalysis ca=new ClusterAnalysis(5,200,dataPoints,5);double[][] cen={list.get(22),list.get(3),list.get(45),list.get(156),list.get(96)};ca.startAnalysis(cen);StringBuilder stringBuilder=new StringBuilder();ArrayList<DataPoint>[] v = ca.getClusterOutput();System.out.println("K-中心點聚類算法運行時間"+(System.currentTimeMillis()-s)+"ms");for (int ii=0; ii<v.length; ii++){ArrayList tempV = v[ii];stringBuilder.append("\n").append("-----------Cluster").append(ii).append("---------").append("\n");stringBuilder.append("Mid_Point:  ").append(ca.clusters[ii].getMedoid()).append("  Points_num:  "+ca.clusters[ii].getDataPoints().size()).append("\n");System.out.println(ca.clusters[ii].getMedoid()+"  Points_num:  "+ca.clusters[ii].getDataPoints().size());Iterator iter = tempV.iterator();while(iter.hasNext()){DataPoint dpTemp = (DataPoint)iter.next();stringBuilder.append(dpTemp).append("\n");}}writeTxt(stringBuilder.toString());}}

本文來自互聯網用戶投稿,該文觀點僅代表作者本人,不代表本站立場。本站僅提供信息存儲空間服務,不擁有所有權,不承擔相關法律責任。
如若轉載,請注明出處:http://www.pswp.cn/news/392088.shtml
繁體地址,請注明出處:http://hk.pswp.cn/news/392088.shtml
英文地址,請注明出處:http://en.pswp.cn/news/392088.shtml

如若內容造成侵權/違法違規/事實不符,請聯系多彩編程網進行投訴反饋email:809451989@qq.com,一經查實,立即刪除!

相關文章

自我價值感缺失的表現_不同類型的缺失價值觀和應對方法

自我價值感缺失的表現Before handling the missing values, we must know what all possible types of it exists in the data science world. Basically there are 3 types to be found everywhere on the web, but in some of the core research papers there is one more ty…

[收藏轉載]C# GDI+ 簡單繪圖(一)

最近對GDI這個東西接觸的比較多&#xff0c;也做了些簡單的實例&#xff0c;比如繪圖板&#xff0c;仿QQ截圖等&#xff0e; 廢話不多說了&#xff0c;我們先來認識一下這個GDI&#xff0c;看看它到底長什么樣. GDI&#xff1a;Graphics Device Interface Plus也就是圖形設備接…

mybaties總結+hibernate總結

一、對原生態jdbc程序中問題總結 1.1 jdbc程序 需求&#xff1a;使用jdbc查詢mysql數據庫中用戶表的記錄 statement:向數據庫中發送一個sql語句 預編譯statement&#xff1a;好處&#xff1a;提高數據庫性能。 預編譯statement向數據庫中發送一個sql語句&#xff0c;數據庫編譯…

客戶旅程_我如何充分利用freeCodeCamp的旅程

客戶旅程by Catherine Vassant (aka Codingk8)由凱瑟琳瓦森(Catherine Vassant)(又名Codingk8) 我如何充分利用freeCodeCamp的旅程 (How I made the most out of my freeCodeCamp journey) 我的路線圖&#xff1f; ?超越課程范圍的reeCodeCamp (My road map ?? to freeCode…

Python14 函數

函數 面向對象編程&#xff1a; 類----class 面向過程編程&#xff1a;過程---def 函數式編程&#xff1a;函數---def def test(x):描述x 1return x#def是定義函數的關鍵字#test是函數名稱#&#xff08;x&#xff09;是參數#x1是 函數體&#xff0c;是一段邏輯代碼#return 定義…

學習sql注入:猜測數據庫_面向數據科學家SQL:學習簡單方法

學習sql注入:猜測數據庫We don’t pick a hammer and look for nails — that would be an unusual way of solving problems. The usual way of doing business is to identify the problem first, then look for appropriate tools.我們不用錘子找釘子&#xff0c;那是解決問…

android 百度地圖3.0,android 百度地圖3.0

一&#xff1a;為地圖設置事件注意新版本中要有一個getMapmMapView.getMap().setOnMapStatusChangeListener(listener);OnMapStatusChangeListener listener newOnMapStatusChangeListener() {/*** 手勢操作地圖&#xff0c;設置地圖狀態等操作導致地圖狀態開始改變。* param s…

(摘錄)sockaddr與sockaddr_in,sockaddr_un結構體詳細講解

struct sockaddr { unsigned short sa_family; /* address family, AF_xxx */ char sa_data[14]; /* 14 bytes of protocol address */ }; sa_family是地址家族&#xff0c;一般都是“AF_xxx”的形式。好像通常大多用的是都是AF_INET。 sa_data是14字節協議…

數據挖掘—K-中心點聚類算法(Java實現)

K-中心點聚類算法 &#xff08;1&#xff09;任意選擇k個對象作為初始的簇中心點 &#xff08;2&#xff09;指派每個剩余對象給離他最近的中心點所表示的簇 &#xff08;3&#xff09;選擇一個未被選擇的中心點直到所有的中心點都被選擇過 &#xff08;4&#xff09;選擇一個…

使用akka構建高并發程序_如何使用Akka Cluster創建簡單的應用程序

使用akka構建高并發程序If you read my previous story about Scalachain, you probably noticed that it is far from being a distributed system. It lacks all the features to properly work with other nodes. Add to it that a blockchain composed by a single node is…

pandas之數值計算與統計

數值計算與統計 對于DataFrame來說&#xff0c;求和、最大、最小、平均等統計方法&#xff0c;默認是按列進行統計&#xff0c;即axis 0&#xff0c;如果添加參數axis 1則會按照行進行統計。 如果存在空值&#xff0c;在統計時默認會忽略空值&#xff0c;如果添加參數skipna …

python自動化數據報告_如何:使用Python將實時數據自動化到您的網站

python自動化數據報告This tutorial will be helpful for people who have a website that hosts live data on a cloud service but are unsure how to completely automate the updating of the live data so the website becomes hassle free. For example: I host a websit…

一顆站在技術邊緣的土豆

2012年開始上專業課&#xff0c;2013年打了一年游戲&#xff0c;年底專業課忘光了&#xff0c;但是蒙混過關沒掛科&#xff0c;2014年7月份畢業&#xff0c;對這個社會充滿向往。2014年9月份——方正代理商做網絡安全公司。2015年3月份跳槽到一家vmware代理商公司。2016年6月&a…

leetcode 839. 相似字符串組(并查集)

如果交換字符串 X 中的兩個不同位置的字母&#xff0c;使得它和字符串 Y 相等&#xff0c;那么稱 X 和 Y 兩個字符串相似。如果這兩個字符串本身是相等的&#xff0c;那它們也是相似的。 例如&#xff0c;“tars” 和 “rats” 是相似的 (交換 0 與 2 的位置)&#xff1b; “r…

android intent參數是上次的結果,【Android】7.0 Intent向下一個活動傳遞數據、返回數據給上一個活動...

1.0 可以利用Intent吧數據傳遞給上一個活動&#xff0c;新建一個叫“hellotest01”的項目。新建活動FirstActivity&#xff0c;勾選“Generate Layout File”和“Launcher Activity”。image修改AndroidMainifest.xml中的內容&#xff1a;android:name".FirstActivity&quo…

實習一年算工作一年嗎?_經過一年的努力,我如何找到軟件工程工作

實習一年算工作一年嗎?by Andrew Ngo通過安德魯恩戈 經過一年的努力&#xff0c;我如何找到軟件工程工作 (How I landed a software engineering job after a year of hard work) Many of us think the path to becoming a software engineer requires years of education an…

學習深度學習需要哪些知識_您想了解的有關深度學習的所有知識

學習深度學習需要哪些知識有關深層學習的FAU講義 (FAU LECTURE NOTES ON DEEP LEARNING) Corona was a huge challenge for many of us and affected our lives in a variety of ways. I have been teaching a class on Deep Learning at Friedrich-Alexander-University Erlan…

參加開發競賽遇到的問題【總結】

等比賽完就寫。 轉載于:https://www.cnblogs.com/jiangyuanjia/p/11261978.html

html5--3.16 button元素

html5--3.16 button元素 學習要點 掌握button元素的使用button元素 用來建立一個按鈕從功能上來說&#xff0c;與input元素建立的按鈕相同button元素是雙標簽&#xff0c;其內部可以配置圖片與文字&#xff0c;進行更復雜的樣式設計不僅可以在表單中使用&#xff0c;還可以在其…

如何注冊鴻蒙id,鴻蒙系統真機調試證書 和 設備ID獲取

鴻蒙系統真機調試創建項目創建項目創建應用創建鴻蒙應用(注意&#xff0c;測試階段需要發郵件申請即可)關聯應用項目進入關聯 添加引用準備調試使用的 p12 和證書請求 csr使用以下命令// 別名"test"可以修改&#xff0c;但必須前后一致&#xff0c;密碼請自行修改key…