幾個有趣的算法題目

本文首發 http://svtter.cn

最接近的數字

題目

一個K位的數N

$$ (K\leq2000，N\leq10^{20}) $$

找出一個比N大且最接近的數，這個數的每位之和與N相同，用代碼實現之。

例如：0050 所求書數字為0104；112 所求數為121；

算法分析算法思想

直接暴力求這個數字是不可以的，數字的量級太大，有K位的數字，不可能直接用int，或者float來表示，使用數組來存儲。應該分析這個數字，step1，從右邊開始的最小位數開始，分解最后一位數字，分解出1來拿給前面的一位。9和0比較特殊，因此從左往右掃描的開始，遇到0就跳過，遇到第一個非0的數字，就把這個數字-1，然后移到最后面去，然后，step2，開始找第一個非9的數字，如果遇到9，就把9放到最后面去，遇到非9，就+1，結束運算。

一個般的例子：

1999000 -> 1990008-> 2000899

要注意一個問題，就是如果是 999000 這種情況，在數字的最開頭補1，結果是1000899

幾個刁蠻的數據：29399 -> 29489

偽代碼

array = get_array() # number to char array
array.reverse()
step1 = true
step2 = false
zero = 0, cnt = 0;
for i : 1 - lengthof(array)if step1:if array[i] is 0:zero ++else:array[i] = array[i] - 1if zero > 0:array[0] = array[i]array[i] = 0step1 = falsestep2 = trueelse if step2:if array[i] is 9:if zero == 0:array[cnt+1] = array[cnt]array[cnt] = 9cnt++if (i != cnt):array[i] = array[i-1]else:array[cnt + 1] = array[cnt]array[cnt] = 9cnt++array[i] = 0else:i = i+1step2 = falsebreakif not step2:array[lengthof(array)] = 1array.reverse()
disp(array)

分析時間復雜度O

因為reverse操作，2K，加上最后整理最小數到最前面，最壞情況接近K，3K，在循環中的操作看運氣，但是最糟糕的情況也只有5K，所以時間復雜度為

$$ O(3K) \approx O(K) $$

源代碼

#include <stdio.h>
#include <string.h>const int MAXN = 3000;
char array[MAXN];
int length_of_number;
void get_array()
{int i;char null;scanf("%d", &length_of_number);scanf("%c", &null);for (i = 0; i < length_of_number; i++){scanf("%c", &array[i]);}scanf("%c", &null);
}void reverse()
{int i ;char temp;for (i = 0; i < length_of_number/2; i++){// _swaptemp = array[i];array[i] = array[length_of_number - 1 - i];array[length_of_number-1-i] = temp;}
}void run()
{reverse();int step1 = 1,step2 = 0,i = 0,zero = 0,cnt = 0;for (i = 0; i < length_of_number; i++){if (step1){if (array[i] == '0'){zero++;}else{array[i] = array[i] - 1;if (zero > 0){array[cnt] = array[i];array[i] = '0';}step1 = 0, step2 = 1;}}else if (step2){if (array[i] == '9'){if (zero == 0){array[cnt + 1] = array[cnt];array[cnt] = '9';cnt++;if (i != cnt){array[i] = array[i-1];}}else{array[cnt + 1] = array[cnt];array[cnt] = '9';cnt++;array[i] = '0';}}else{array[i] ++;step2 = 0;break;}}}if (step2){array[length_of_number] = '1';length_of_number ++;}
}void output()
{int i;reverse();for(i = 0; i < length_of_number; i++){printf("%c", array[i]);}printf("\n");
}int main()
{memset(array, 0, sizeof(array));freopen("input", "r", stdin);get_array();run();output();return 0;
}

測試結果

使用python生成測試數據進行測試：

"""
最接近的數字
"""
import random
import osdef test():"""sample test"""num = random.randint(0, 10000000)sum_of_num = 0for i in str(num):sum_of_num += int(i)length = len(str(num))temp_num = num + 1while(True):sum_temp = 0for i in str(temp_num):sum_temp += int(i)if sum_temp == sum_of_num:breaktemp_num += 1with open('input', 'w') as f:f.write(str(length) + '\n')f.write(str(num))res = os.popen('./ex2').read()if temp_num == int(res):return [True]else:return [False, num, temp_num, int(res)]all = True
for i in range(1000):res = test()if res[0] is False:all = Falseprint(res)if all:print('Pass testing!')

存在錯誤的情況：

通過：

后期改善優化的地方

reverse 是為了編程方便進行的處理，但是如果數字太大，速度肯定會受影響，這個時候就不要使用reverse了。
用鏈表來做可以簡化代碼，減少分析的，更加節省時間
處理移位的時候考慮幾個問題

尋找發帖水王

題目

如果“水王”沒有了，但有三個發帖很多的ID，發帖的數目都超過了帖子做數的1/4，又如何快速找出他們的ID。

算法分析算法思想

從0-n掃描ID數組，記錄3個數字的個數，如果出現第四個數字，就把三個數字的個數減少1，如果有一個數字的個數減少到0，那么把新來的數字作為原本三個數字之一進行記錄。

如此一來，掃描完ID數組之后，剩下記錄的3個數字的個數便是需要求的三個數字。

偽代碼

array = get_array()
count = empty_set()
for i in array:if count.full:if i in count:count.i.num ++else:for j in count:count.j.num--elsecount.add(i)
disp(count)

分析時間復雜度O

數列的大小為N，記錄數字的數組大小為3，每次判斷記錄數組count是否存在0，以及找到已存在的數字++，都會花費3個單位時間，因此其時間復雜度為

$$ O(3n) \approx O(n) $$

源代碼

#include <stdio.h>
#include <string.h>#define MAXN 5000
int idarray[MAXN];int cur[3]; // 記錄當前元素
int pos[3]; // 記錄當前元素個數// 檢查是否在數組內，如果不在數組內，添加進入數組
void checkin(int no)
{int i;// 檢查是否有空位置for (i = 0; i < 3; i++){if (pos[i] == 0){cur[i] = no;pos[i] ++;return;}}// 尋找指定數字++for (i = 0; i < 3; i++){if (cur[i] == no){pos[i] ++;return;}}// 沒有找到重復數字，全部--for (i = 0; i < 3; i++)pos[i] --;
}// 輸出最后結果
void output()
{printf("%d %d %d\n", cur[0], cur[1], cur[2]);
}// 主程序
int numberOfArray;
void run()
{int i;for (i = 0; i < numberOfArray; i++){checkin(idarray[i]);}output();
}void input()
{int i;scanf("%d", &numberOfArray);for(i = 0; i < numberOfArray; i++){scanf("%d", &idarray[i]);}}int main()
{freopen("input", "r", stdin);int groupOfTest;scanf("%d", &groupOfTest);while(groupOfTest--){memset(cur, 0, sizeof(cur));memset(pos, 0, sizeof(pos));memset(idarray, 0, sizeof(idarray));input();puts("Test running...");run();}return 0;
}

測試結果

本測試數據采用Python自動生成。

"""
尋找發帖水王
"""import randomN = 4000
a, b = (int(N/4), int(N/3))
three_id = random.sample(range(1, 100), 3)
three_id_num = {}
sum_rand = 0
for i in three_id:temp = random.randint(a, b)sum_rand += tempthree_id_num[i] = three_id_num.get(i, 0) + tempid_array = [random.randint(1, 100) for i in range(N-sum_rand)]
for i in three_id:id_array = id_array + [i for j in range(three_id_num[i])]random.shuffle(id_array)print('Most three id:', three_id)
print('Three id num: ', three_id_num)
print('Sum of three_id num: ', sum_rand)
print('---------------')
# print(id_array)with open('input', 'w') as f:f.write('1\n')f.write(str(N) + '\n')for i in id_array:f.write(str(i) + ' ')

后期改善優化的地方

對于N比較小的情況可以在內存中進行查找，但是一旦涉及到更大的數據，這個方法可能就沒有那么簡單了，不能在內部建立數組，需要一部分一部分的從磁盤中讀數；
如果需要查找的id數量變多，那么需要的臨時保存的數列可能更大；
這個實現沒有使用STL中的map，如果使用map，還能進一步使得代碼見解易懂，map使用hash來做內部實現，可以使得面對數據量更大的數據的時候，加快查找數據的速度。

山西煤老板

題目

你是山西的一個煤老板，你在礦區開采了有3000噸煤需要運送到市場上去賣，從你的礦區到市場有1000公里，你手里有一列燒煤的火車，這個火車只能裝1000噸煤，且能耗比較大——每一公里需要耗一噸煤。請問，作為一個懂編程的煤老板，你會怎么運送才能運最多的煤到集市？

算法分析算法思想

從動態規劃的角度求最優解：
假設起始運送貨物量為t，終點路程為s，火車容量為c，可以運抵終點的最多貨物量為函數 F(t, s)。
3種基本情況：
（1）t < s：貨物量不足以運送到此距離，所以F(t, s) = 0;
（2）s < t < c：火車一次就可以裝完貨物，所以F(t, s) = t - s;
（3）2s < c 使得火車一次無法運完，但可以采用往返的方式多次運輸，這種情況下最有的方式就是減少總共往返的次數，也就是直接運到終點而不在中間卸貨，所以

$$ F(t, s) = (t / c - 1) * (c - 2s) + (c - s) $$

可得遞歸式：

$$ F(t, s) = max\{ F( F(t, i), s - i)\} (1 <= i < s) $$

分析了一下這個方程是有問題的，比如F(1750, 250)會計算出1125；

所以正確的結果應該對t/c進行處理，也就是說，起點剩余的燃料不足運輸到終點，直接舍棄。第三階段的方程式應該是

$$ F(t, s) = (t // c - 1) * (c - 2s) + (c - s) + (t \% c - 2 s), if (t\%c > 2s) $$

偽代碼

begin:if t < s:f[t][s] = 0elif s < t < c:f[t][s] = t - selif 2*s < c:f[t][s] = int((t//c-1)*(c-2*s) + (c-s))if t % c > 2*s:f[t][s] += int(t % c-2*s)else:pre = -2for i in range(1, s):pre = int(max(F(F(t, i), s-i), pre))f[t][s] = pre
end
disp(f[3000][1000])

分析時間復雜度O

時間復雜度為

$$ O(3000*3000) $$

因為每個數字都要計算一遍。

源代碼

"""
山西煤老板
"""
c = 1000
f = [[-1 for k in range(4000)] for j in range(4000)]
for j in range(4000):for k in range(4000):if j < k:f[j][k] = 0
count = 1000
cnt = 0def F(t, s):"""dp"""global countglobal cglobal f# count -= 1# if count == 0:# count = int(input())t = int(t)s = int(s)if f[t][s] != -1:return f[t][s]if t < s:f[t][s] = 0elif s < t < c:f[t][s] = t - selif 2*s < c:f[t][s] = int((t//c-1)*(c-2*s) + (c-s))if t % c > 2*s:f[t][s] += int(t % c-2*s)else:pre = -2for i in range(1, s):pre = int(max(F(F(t, i), s-i), pre))f[t][s] = preprint(t, s, f[t][s])return f[t][s]print(F(3000, 500))

測試結果

后期改善優化的地方

去除了一下數據進行加速
保存f減少重復運算值
應該有更加簡單的方法，類似這種，但是不好解釋。
$$ 3y=1000\\ 5x=1000\\ 解得x+y=200+333=533，因此使得最后一輛火車抵達時節省了533噸煤\\ $$

Facebook

題目

Given a list of words, L, that are all the same length, and a string, S, find the starting position of the substring of S that is concatenation of each word in L exactly once and without intervening characters. This substring will occur exactly once in S.

算法分析算法思想

使用hashmap來保存word的hash值，來加快查找速度。（舊）

直接用hash函數求字符串的hash值，最后求得結果。

依據公式

$$ hash(w_1) + hash(w_2) = hash(w_2) + hash(w_1) $$

偽代碼

hash_word_list = list(map(hash, words))
hash_sum = reduce(lambda x, y: x + y, hash_word_list)for i in range(len(sentence)):wl = word_lenwlist = [sentence[i+j*wl:i+j*wl+wl] for j in range(words_len)]temp_sum = 0for k in wlist:temp_sum += hash(k)if temp_sum == hash_sum:print(i)break

分析時間復雜度O

就是字符串長度

$$ O(lengthOfS) $$

源代碼

#!/usr/bin/env python3
"""
facebook"""
from functools import reducewhile True:words = input()# words = "fooo barr wing ding wing"words = words.split(' ')word_len = len(words[0])words_len = len(words)hash_word_list = list(map(hash, words))hash_sum = reduce(lambda x, y: x + y, hash_word_list)sentence = input()# sentence = """lingmindraboofooowingdin\# gbarrwingfooomonkeypoundcakewingdingbarrwingfooowing"""# print(words, words_len, word_len, sentence)for i in range(len(sentence)):wl = word_lenwlist = [sentence[i+j*wl:i+j*wl+wl] for j in range(words_len)]# print(wlist)temp_sum = 0for k in wlist:temp_sum += hash(k)if temp_sum == hash_sum:print(i)break

測試結果

測試數據生成意義不是很大，

后期改善優化的地方

hash盡管在速度上非常優秀，但是在準確度方面，如果出現hash沖突，那么值可能不準確。此時可以利用hashmap來解決這個問題，不過會多出重置hashmap的相關時間。

For n -m - problems

Problemset

Assume we have a sequence that contains N numbers of type long. And we know for sure that among this sequence each number does occur exactly n times except for the one number that occurs exactly m times (0 < m < n). How do we find that number with O(N) operations and O(1) additional memory?

Algorithm

^ is the add operation without carry.
默認one,two都是0, 即任何數字都不存在
數字a第一次來的時候, one標記a存在, two不變
數字a第二次來的時候, one標記a不存在, two標記a存在
數字a第三次來的時候, one不變, two標記a不存在

構造這樣一種運算，通過異或將數據保存在one和two里面。

Pseudocode

def solve2(array):one = 0, two = 0for i in range(array):one = (one ^ array[i]) & ~twotwo = (two ^ array[i]) & ~onereturn one, twoarray = input()
_, res = solve2(array)

### Source code

#!/usr/bin/env pythondef solve(array):one, two = 0, 0for i in array:one = (one ^ i) & ~twotwo = (two ^ i) & ~onereturn one, twoif __name__ == '__main__':array = input()array = array.split(' ')array = list(map(lambda x: int(x), array))# print(array)_, res = solve(array)print(res)

Test

#!/usr/bin/env python3
import randomdef test():"""測試"""array = []n, m = 3, 2numberofNum = random.randint(100, 1000)record = {}for _ in range(numberofNum):temp = random.randint(10, 10000)while temp in record:temp = random.randint(10, 10000)record[temp] = 1for _ in range(3):array.append(temp)temp = random.randint(10, 1000)while temp in record:temp = random.randint(10, 1000)array.append(temp)array.append(temp)from run import solve_, res = solve(array)if res != temp:print('ERROR')print(array, temp)input()else:print('Pass: res: ', res, 'temp:', temp)for i in range(50):test()

Use python generate data to test.

Discussion and improve

如果n不是3，那么需要構造更多的臨時變量。

很長的數組

題目

一個很長很長的short型數組A，將它分成m個長為L的子數組B1，B2，…，Bm，其中每個數組排序后都是遞增的等差數列，求最大的L值。

$$ 例如，A = \{-1, 3, 6, 1, 8, 10\} 可以分成B_1 = \{-1, 1, 3\}, B_2 = \{6, 8, 10\},\; L = 3 即為所求。 $$

算法分析

首先進行排序，然后開始分三步走。

統計元素個數 O(n)
排序 O(nlog(n))

?

第一步用來枚舉L和m的大小，由題目可知，L * m = 數組的長度。從m為1開始枚舉，保證得到的L為最大值。

第二步搜索為深搜，確定當前子數組的起點和初始步長，使用pos記錄當前數組選定的元素。

第三步枚舉，根據起點給定的初始步長，開始枚舉步長，如果枚舉的步長可以在數組中找到足夠的元素，即數字為L，那么記錄這種分法，開始枚舉下一個起點。如果枚舉的步長和起點無法滿足條件，回溯到上一個節點，把上一個節點記錄的步長+1再一次搜索。當枚舉的起點數達到m，即滿足要求輸出。

大白話來講，就是從頭開始分原始數組到m個數組中去，排序過后，在前面的每一個節點未被分配的元素，都是子數組起點。如果使用廣度優先搜索，即每次都給一個子數組分配一個滿足子數組步長要求的數，會導致在最后才發現分配的元素數不滿足要求，從而浪費大量時間。

其中，深度優先搜索還有幾個剪枝的技巧：

當前步長*(L-1)如果超過了數組的最大元素，可以不繼續搜索
如果在給定步長的情況下，下一個數字的大小超過之前的數字+步長，那么可以不必繼續搜索。

因為數組已經排好序。
還有其他的剪枝技巧，體現在代碼中了。

時間復雜度

n為數組長度，排序的時間為 O(nlogn)，枚舉m時間為n，枚舉step時間為65536【short跨度】，枚舉全部元素時間為n，因此算法的時間上界為

$$ O(65536n^2) $$

實際情況下，由于剪枝等操作的存在，應優于這個時間。

偽代碼

leng = len(Array)
for m=1 to n:if n % m != 0:continueL = n // m# deep searchres, record = findArray(L, m)def findArray(L, m):group = 0pos = np.ones(leng)record = []record_start = []while group != m:step = 0start = getStart(pos)res, step = 尋找合適的步長(start, step, pos, record, L)if res:找到了計數while res is False:沒找到彈出棧，往回找if 彈出棧為空:不用找了找不到了return False, None

源代碼

#!/usr/bin/env python3
# coding: utf-8
"""
arrays
"""from __future__ import print_function
import numpy as nparray = [-1, 3, 6, 1, 8, 10]
# array = [1, 5, 9, 2, 6, 10]
# array = [1, 2, 4, 5, 8, 9, 13, 14]
# array = [1, 2, 4, 7, 11]
array = sorted(array)
print(array)
leng = len(array)
maxn = array[leng-1]
enable = 1
disable = 0def findJ(j, step, pos, record, L):"""尋找以J為開始，以步長step為開始的數列"""class StepError(Exception):passclass MaxException(Exception):passif pos[j] == disable:return Falsestart = array[j]pre = startrecord_temp = []# remember zerotry:for step in range(step, 40000):# 把第一個數字記錄record_temp.append(j)pos[j] = disablepre = startif start + step * (L - 1) > maxn:raise MaxExceptiontry:cnt = 1if cnt == L:record.append(record_temp)return True, stepfor k in range(j, leng):if pos[k] == disable:continueelif pos[k] == enable and array[k] == pre + step:record_temp.append(k)pre = array[k]cnt += 1pos[k] = disableelif pos[k] == enable and array[k] > pre + step:raise StepErrorif cnt == L:record.append(record_temp)return True, stepexcept StepError:# 重置標記for r in record_temp:pos[r] = enablerecord_temp = []except MaxException:# 沒有合適的stepreturn False, Nonedef findArray(L, m):"""尋找數組"""pos = np.ones(leng)record = []record_start = []group = 0while group != m:start = 0while pos[start] == disable:start += 1step = 0res, step = findJ(start, step, pos, record, L)if res:group += 1record_start.append((start, step))while res is False:try:start, step = record_start.pop()for r in record.pop():pos[r] = enablegroup -= 1res, step = findJ(start, step+1, pos, record, L)except IndexError:return False, Nonereturn True, recorddef divideArray():"""分離數組m 是分離的數組的個數L 是分離的數組的長度"""for m in range(1, leng+1):if leng % m != 0:continueL = leng // mres, record = findArray(L, m)def trans(x):return array[x]if res:print('lenth: ', L)for r in record:temp = map(trans, r)print(list(temp))returnprint('No result.')if __name__ == '__main__':divideArray()