數據結構——堆的實現

堆的實現-----C語言版

目錄：
一、堆的實現
- 1.1堆的定義
- 1.2堆的實現
- - 1.2.1堆的各個接口
  - 1.2.2堆的向上調整
  - 1.2.3堆的向下調整
  - 1.2.4堆的定義聲明和初始化
  - 1.2.5堆的數據處理
  - 1.2.6堆的判空和堆的數據個數以及堆銷毀
  - 1.2.7堆的代碼實現
二、TOP—K問題

一、堆的實現

1.1堆的定義

堆（heap）是特殊的數據結構。堆通常是一個可以被看做一棵完全二叉樹(邏輯層面上)的數組對象(物理層面上),常用來在一組變化頻繁(發生增刪查改的頻率較高)的數據中尋找最值.將根結點最大的堆叫做最大堆或大根堆，這樣可以找到堆中的最大值(根節點的值);根結點最小的堆叫做最小堆或小根堆,這樣可以找到堆中的最小值。

其中堆不一定是完全二叉樹，只是為了方便存儲和索引，我們通常用完全二叉樹的形式來表示堆而已。
二叉堆：是一個數組，它可以被看成是一個近似的完全二叉樹。

最大堆，最小堆如圖：
在這里插入圖片描述

最大堆：根結點大于左右子樹結點的值，左右子樹結點的值大于它自己左右子樹結點的值，一種重復下去；最小堆：根結點小于左右子樹結點的值，左右子樹結點的值小于它自己左右子樹結點的值，一種重復下去。

1.2堆的實現

用數組實現一個堆

1.2.1堆的各個接口

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
typedef int HPDataType;
typedef struct Heap
{HPDataType* _a;//動態數組int _size;//存儲數據的下標int _capacity;//動態數組的容量
}Heap;
//堆的初始化
void HeapInit(Heap* hp);
// 堆的銷毀
void HeapDestory(Heap* hp);
// 堆的插入
void HeapPush(Heap* hp, HPDataType x);
// 堆的刪除
void HeapPop(Heap* hp);
// 取堆頂的數據
HPDataTypeHeapTop(Heap* hp);
// 堆的數據個數
int HeapSize(Heap* hp);
// 堆的判空
int HeapEmpty(Heap* hp);

1.2.2堆的向上調整

//向上調整算法
void HeapJustUp(HPDataType a[], HPDataType child)
{int parsent;parsent = (child - 1) / 2;//找到孩子的父親while (child > 0){int tmp = 0;if (a[parsent] < a[child])//孩子比父親的值大，{tmp = a[child];a[child] = a[parsent];a[parsent] = tmp;}elsebreak;child = parsent;parsent = (parsent - 1) / 2;//找到孩子的父親}
}

對于向上調整，我們把它看做是數組結構，邏輯上看做一顆完全二叉樹。我們只要將要插入堆的數據通過向上調整就可以把它調整成一個大堆。向上調整算法有一個前提：除了要插入的數據，其它的數據已經構成了一個大堆，這樣才能調整。

1.2.3堆的向下調整

void HeapJustDown(Heap* hp)
{//先假設當前待調整結點的左孩子結點存在//并且是待調整結點的左右孩子結點（不管右孩子結點存不存在，都這樣假設）中值最大的int parent = 0;//根節點int child = parent * 2 + 1;//孩子結點while (child < hp->_size){//child+1 < hp->_size說明右孩子結點確實存在//如果hp->_a[child] < hp->_a[child+1]也成立，那說明左右孩子結點中值最大的是右孩子結點if ((child + 1 < hp->_size) && hp->_a[child] < hp->_a[child + 1]){child = child + 1;}//如果a[child]>a[parent],則說明父節點比比左右孩子節點的值都要小，要置換if (hp->_a[child] > hp->_a[parent]){int tmp = hp->_a[parent];hp->_a[parent] = hp->_a[child];hp->_a[child] = tmp;parent = child;child = child * 2 + 1;}//如果a[child] <= a[parent],那就不需要進行調整else{break;}}
}

對于向下調整，我們把它看成是一個數組結構，邏輯上看做一顆完全二叉樹。我們通過從根節點開始的向下調整算法可以把它調整成一個大堆。向下調整算法有一個前提：左右子樹必須是一個堆，才能調整。

1.2.4堆的定義聲明和初始化

1.堆的聲明

typedef int HPDataType;
typedef struct Heap
{HPDataType* _a;//動態數組int _size;//存儲數據的下標int _capacity;//動態數組的容量
}Heap;

創建一個構成動態數組的結構體

2.堆的初始化

// 堆的初始化
void HeapInit(Heap* hp)
{hp->_a = (HPDataType*)malloc(sizeof(HPDataType) * 4);if (hp->_a == 0){printf("malloc is error\n");exit(-1);}hp->_capacity = 4;hp->_size = 0;
}

開空間，進行初始化

1.2.5堆的數據處理

1.堆的插入

// 堆的插入
void HeapPush(Heap* hp, HPDataType x)
{//數據滿了，需要擴容if (hp->_capacity == hp->_size){HPDataType* tmp = (HPDataType*)realloc(hp->_a, sizeof(HPDataType)*hp->_capacity * 2);if (tmp == NULL){printf("realloc is error");exit(-1);}hp->_a = tmp;hp->_capacity = hp->_capacity * 2;}//不需要擴容hp->_a[hp->_size++] = x;//插入數據，然后_size+1//一般數據都是放到數組尾得，建堆，向上調整，這里我們建大堆HeapJustUp(hp->_a, hp->_size - 1);
}

1.容量不夠就擴容
2.擴容足夠就插入數據
3.然后向上調整建大堆，直到滿足堆

2.堆的刪除

// 堆的刪除，從堆頂開始刪
void HeapPop(Heap* hp)
{
assert(hp);//斷言為空為假的話就報錯
assert(!HeapEmpty(hp));//斷言如果不是空為真就執行
//首元素的的值與尾元素交換，然后刪除尾元素
int tmp = hp->_a[0];
hp->_a[0] = hp->_a[hp->_size - 1];
hp->_a[hp->_size - 1] = tmp;
hp->_size--;
//堆頂元素進行向下調整
HeapJustDown(hp);
}

1.挪動覆蓋刪除堆頂元素，重新建堆
2.盡量保證關系不變（首尾數據交換，再刪除尾部數據，向下調整建堆）

3.獲取堆頂數據

// 取堆頂的數據
HPDataTypeHeapTop(Heap* hp)
{assert(hp->_a);assert(!HeapEmpty(hp));//斷言如果不是空為真就執行return hp->_a[0];
}

堆頂數據就是第一個元素

1.2.6堆的判空和堆的數據個數以及堆銷毀

1.堆的數據個數

// 堆的數據個數
int HeapSize(Heap* hp)
{assert(hp);return hp->_size;
}

堆的數據個數就是_size的個數

2.堆的判空

// 堆的判空
int HeapEmpty(Heap* hp)
{assert(hp);return hp->_size == 0;
}

_size為0，說明堆為空

3.堆銷毀

// 堆的銷毀
void HeapDestory(Heap* hp)
{assert(hp);free(hp->_a);hp->_a = NULL;hp->_capacity = hp->_size = 0;
}

開一塊空間（malloc），程序結束之前要釋放空間（free）

1.2.7堆的代碼實現

.h頭文件（聲明）

#pragma once
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
typedef int HPDataType;
typedef struct Heap
{HPDataType* _a;//動態數組int _size;//存儲數據的下標int _capacity;//動態數組的容量
}Heap;
//堆的初始化
void HeapInit(Heap* hp);
// 堆的銷毀
void HeapDestory(Heap* hp);
// 堆的插入
void HeapPush(Heap* hp, HPDataType x);
// 堆的刪除
void HeapPop(Heap* hp);
// 取堆頂的數據
HPDataTypeHeapTop(Heap* hp);
// 堆的數據個數
int HeapSize(Heap* hp);
// 堆的判空
int HeapEmpty(Heap* hp);

.c源文件（定義）

#include "Heap.h"
// 堆的構建
void HeapInit(Heap* hp)
{hp->_a = (HPDataType*)malloc(sizeof(HPDataType) * 4);if (hp->_a == 0){printf("malloc is error\n");exit(-1);}hp->_capacity = 4;hp->_size = 0;
}
//向上調整算法
HeapJustUp(HPDataType a[], HPDataType child)
{int parsent;parsent = (child - 1) / 2;//找到孩子的父親while (child > 0){int tmp = 0;if (a[parsent] < a[child])//孩子比父親的值大，{tmp = a[child];a[child] = a[parsent];a[parsent] = tmp;}elsebreak;child = parsent;parsent = (parsent - 1) / 2;//找到孩子的父親}
}
// 堆的插入
void HeapPush(Heap* hp, HPDataType x)
{//數據滿了，需要擴容if (hp->_capacity == hp->_size){HPDataType* tmp = (HPDataType*)realloc(hp->_a, sizeof(HPDataType)*hp->_capacity * 2);if (tmp == NULL){printf("realloc is error");exit(-1);}hp->_a = tmp;hp->_capacity = hp->_capacity * 2;}//不需要擴容hp->_a[hp->_size++] = x;//插入數據，然后_size+1//一般數據都是放到數組尾得，建堆，向上調整，這里我們建大堆HeapJustUp(hp->_a, hp->_size - 1);
}
// 堆的判空
int HeapEmpty(Heap* hp)
{assert(hp);return hp->_size == 0;
}
//堆頂元素進行向下調整
void HeapJustDown(Heap* hp)
{//先假設當前待調整結點的左孩子結點存在//并且是待調整結點的左右孩子結點（不管右孩子結點存不存在，都這樣假設）中值最大的int parent = 0;//根節點int child = parent * 2 + 1;//孩子結點while (child < hp->_size){//child+1 < hp->_size說明右孩子結點確實存在//如果hp->_a[child] < hp->_a[child+1]也成立，那說明左右孩子結點中值最大的是右孩子結點if ((child + 1 < hp->_size) && hp->_a[child] < hp->_a[child + 1]){child = child + 1;}//如果a[child]>a[parent],則說明父節點比比左右孩子節點的值都要小，要置換if (hp->_a[child] > hp->_a[parent]){int tmp = hp->_a[parent];hp->_a[parent] = hp->_a[child];hp->_a[child] = tmp;parent = child;child = child * 2 + 1;}//如果a[child] <= a[parent],那就不需要進行調整else{break;}}
}
// 堆的刪除，從堆頂開始刪
void HeapPop(Heap* hp)
{
assert(hp);//斷言為空為假的話就報錯
assert(!HeapEmpty(hp));//斷言如果不是空為真就執行
//首元素的的值與尾元素交換，然后刪除尾元素
int tmp = hp->_a[0];
hp->_a[0] = hp->_a[hp->_size - 1];
hp->_a[hp->_size - 1] = tmp;
hp->_size--;
//堆頂元素進行向下調整
HeapJustDown(hp);
}
// 取堆頂的數據
HPDataTypeHeapTop(Heap* hp)
{assert(hp->_a);assert(!HeapEmpty(hp));//斷言如果不是空為真就執行return hp->_a[0];
}
// 堆的數據個數
int HeapSize(Heap* hp)
{assert(hp);return hp->_size;
}
// 堆的銷毀
void HeapDestory(Heap* hp)
{assert(hp);free(hp->_a);hp->_a = NULL;hp->_capacity = hp->_size = 0;
}

.c源文件（測試）

#include "Heap.h"
int main()
{Heap hp;HeapInit(&hp);//初始化HeapPush(&hp, 2);//插入數據HeapPush(&hp, 3);HeapPush(&hp, 4);HeapPush(&hp, 5);HeapPush(&hp, 6);HeapPush(&hp, 1);HeapPush(&hp, 66);HeapPush(&hp, 62);HeapPush(&hp, 4);HeapPush(&hp, 6);HeapPop(&hp);//刪除數據,從堆頂開始刪int tmp= HPDataTypeHeapTop(&hp);//取堆頂元素// 堆的數據個數int num = HeapSize(&hp);printf("建大堆，棧頂元素為：%d,堆的數據個數：%d\n", tmp,num);for (int i = 0; i < num; i++)printf("%d ", hp._a[i]);HeapDestory(&hp);// 堆的銷毀return 0;
}

二、TOP—K問題

TOP—K問題：求數據集合中前k個最大的元素和最小的元素，一般情況數據非常大。
如：專業前10，世界500強，游戲中前100的活躍玩家，各種榜單等等。

1.用數據集合中前k個元素來建堆
求前k個最大的元素，建小堆
求前k個最小的元素，建大堆
2.用剩余的N—K個元素依次與堆頂元素來比較，根據規則替換堆頂元素，N—K個元素依次與堆頂元素比較完成后，堆里的K個元素就是所求的最小或者最大的元素。

例子：

問題：假設1億個數，內存存不下，數據在文件中找出最大的前k個數。
1.讀取文件的前10個數據，在內存數組中建立一個小堆。
2.在依次讀取剩下數據，跟堆頂元素比較，大于堆頂的數據就替換它，然后向下調整。
3.所有數據讀完，堆里面數據就是最大的前10個數。

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define n 100000000
void  WeinteData()//寫入1億數據
{FILE* fp = fopen("top.txt", "w");//打開文件，只寫
if (fp == NULL)
{perror("fopen error");exit(-1);
}
srand((unsigned)time(0));
int arr[100] = { 0 };
for (int i = 0; i < n; i++)
{int x = rand() % 10000 + 1;fprintf(fp, "%d\n", x);
}
fclose(fp);//關閉文件
}
//兩個數交換
void Swap(int* p, int* q)
{int tmp;tmp = *q;*q = *p;*p = tmp;
}
//向下調整算法
void JustDown(int* arr,int k,int parent)
{int child = parent * 2 + 1;//左孩子結點while (child < k){if ((child + 1 < k) && arr[child] > arr[child + 1])//找到最小值的孩子結點child += 1;//如果arr[child]<arr[parent],則說明父節點比比左右孩子節點的值都要大，要置換if (arr[child] < arr[parent]){Swap(&arr[child], &arr[parent]);//讓孩子結點為父節點，并且更新它的兒子結點parent = child;child = child * 2 + 1;}//如果a[child] <= a[parent],那就不需要進行調整else{break;}}
}
//建小堆
void HeapCreate(int* arr,int k)
{//最后一個結點的父親結點開始向下調整for (int i = (k - 2) / 2; i >= 0; --i){//向下調整算法JustDown(arr, k, i);}
}
void  FileTakeK()
{int k = 10;//10個數int* a = (int*)malloc(sizeof(int) * k);//開辟一塊空間用來建堆if (a == NULL){perror("malloc error:");exit(-1);}FILE* file = fopen("top.txt", "r");//打開top.txt文件,只讀模式if (file == NULL){perror("fopen error:");exit(-1);}for (int i = 0; i < k; i++){fscanf(file, "%d", &a[i]);}printf("前10個數:\n");for (int i = 0; i < k; i++)printf("%d ", a[i]);//建小堆HeapCreate(a, k);printf("\n建完小堆里面的數:\n");for (int i = 0; i < k; i++)printf("%d ", a[i]);//把剩余的n-k個數與小堆的堆頂比較，比較完成后，堆里的數就是文件里最大的10個數int x = 0;while (fscanf(file, "%d", &x) != EOF){//比堆頂數大，把這個數賦值給堆頂，然后向下調整if (x > a[0])a[0] = x;JustDown(a, k, 0);}printf("\n取最大的10個數:\n");for (int i = 0; i < k; i++)printf("%d ", a[i]);free(a);//釋放內存fclose(file);//關閉文件
}
int main()
{//寫入1億數據WeinteData();//從文件中取出k個數,建小堆FileTakeK();return 0;
}