【例 8.7】實現FP 樹算法,并對模擬數據集 simpDat挖掘頻繁項集,最小支持度為2,繪制 FP樹并輸出頻繁項集。?
運行結果:
?
聲明:著作權歸作者所有。商業轉載請聯系作者獲得授權,非商業轉載請注明出處。
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 31 10:14:48 2025@author: 破無差
"""
# - * - coding: utf-8 - * -class treeNode: # FP 樹的類定義def __init__(self, nameValue, numOccur, parentNode):self.name = nameValueself.count = numOccurself.nodeLink = None # 不同項集的相同項通過 nodeLink 連接self.parent = parentNodeself.children = {}# 存儲葉子節點def inc(self, numOccur): # 節點出現次數累加self.count += numOccurdef disp(self, ind=1): # 將樹以文本形式顯示print(' ' * ind, self.name, ' ', self.count)for child in self.children.values(): # 繪制子節點child.disp(ind + 1) # 縮進處理def createTree(dataSet, minSup=1): # 構建 FP 樹headerTable = {}for trans in dataSet: # 遍歷數據表中的每一行數據# 遍歷每一行的每一個數據元素,統計每一項出現的次數,將次數保存在 headerTable 中for item in trans:# get 函數返回指定鍵的值,如果值不在字典中返回 0,其中 dataSet[trans]=1headerTable[item] = headerTable.get(item, 0) + dataSet[trans]lessThanMinsup = list(filter(lambda k:headerTable[k]<minSup, headerTable .keys()))# 遍歷 headerTable 中的每一項,若一項出現的次數小于 minSup,則把該項刪除for k in lessThanMinsup:del(headerTable[k])for k in list(headerTable):if headerTable[k]<minSup:del(headerTable[k])# 將出現次數在 minSup 次以上的項保存在 freqItemSet 中freqItemSet = set(headerTable.keys())if len(freqItemSet) == 0: # 如果 freqItemSet 為空,則返回 Nonereturn None, Nonefor k in headerTable:# 保存計數值及指向每種類型第一個元素的指針headerTable[k] = [headerTable[k], None]retTree = treeNode('Null Set', 1, None) # 初始化 FP 樹for tranSet, count in dataSet.items(): # 遍歷 dataSet 的數據,累計出現次數localD = {}for item in tranSet: #遍歷一組數據中的每一項if item in freqItemSet:localD[item] = headerTable[item][0]if len(localD)>0:ordereItems = [v[0] for v in sorted(localD.items(), key=lambda p: (p[1],p[0]), reverse=True)]updateTree(ordereItems, retTree, headerTable, count)return retTree, headerTable #對 FP 樹進行更新def updateTree(items, infree, headerTable, count): #返回 FP 樹和頭指針表if items[0] in infree.children: #更新 FP 樹infree.children[items[0]].inc(count) #檢查是否存在該節點else: #存在則計數增加infree.children[items[0]] = treeNode(items[0], count, infree)#創建新節點if headerTable[items[0]][1] == None: #若不存在該類別,則更新頭指針列表headerTable[items[0]][1] = infree.children[items[0]]else:updateHeader(headerTable[items[0]][1], infree.children[items[0]])if len(items)>1: #仍有未分配的項updateTree(items[1:], infree.children[items[0]], headerTable, count)def updateHeader(nodeToTest, targetNode): #更新 FP 樹while(nodeToTest.nodeLink !=None):nodeToTest = nodeToTest.nodeLinknodeToTest.nodeLink = targetNodedef loadSimpDat(): #創建數據集simpDat = [['11', '12', '15'],['12', '14'], ['12', '13'], ['11', '12', '14'], ['11', '13'], ['12', '13'], ['11', '13'], ['11', '12', '13', '15'],['11', '12', '13']]return simpDatdef createInitSet(dataSet):#將數據集中的數據項轉換為 frozenset 并保存在字典中,其值均為 1retDict = {}for trans in dataSet:fset = frozenset(trans)retDict.setdefault(fset, 0)retDict[fset] += 1# retDict[frozenset(trans)] = 1return retDictdef ascendTree(leafNode, prefixPath): #尋找當前非空節點的前綴if leafNode.parent != None:prefixPath.append(leafNode.name) #將當前節點添加到前綴列表中ascendTree(leafNode.parent, prefixPath) #遞歸遍歷所有前綴路徑中的節點def findPrefixPath(basePat, treeNode): #返回條件模式基condPats = {}while treeNode != None:prefixPath = []ascendTree(treeNode, prefixPath) #尋找當前非空節點的前綴if len(prefixPath)>1:condPats[frozenset(prefixPath[1:])] = treeNode.count#將前綴路徑保存入字典treeNode = treeNode.nodeLink #到下一個頻繁項集出現的位置return condPats #返回條件模式基def mineTree(inTree, headerTable, minSup, prefix, freqItemList):#從頭指針表的底端開始,遞歸查找頻繁項集bigL = [v[0] for v in sorted(headerTable.items(), key=lambda p: str(p[1]))]for basePat in bigL:newFreqSet = prefix.copy() #加入頻繁項表newFreqSet.add(basePat)freqItemList.append(newFreqSet)condPattBases = findPrefixPath(basePat, headerTable[basePat][1])#創造條件基myContTree, myHead = createTree(condPattBases, minSup)#構建條件 FP 樹if myHead != None: #挖掘條件 FP 樹,直到其中沒有元素為止print('conditional tree for: ', newFreqSet)myContTree.disp(1)mineTree(myContTree, myHead, minSup, newFreqSet, freqItemList)if __name__ == '__main__': simpDat = loadSimpDat()initSet = createInitSet(simpDat)myFptree, myHeaderTab = createTree(initSet, 2)freqItems = []mineTree(myFptree, myHeaderTab, 2, set([]), freqItems)print(freqItems)