import csv,re
def search(req,line):
text = re.search(req,line)
if text:
data = text.group(1)
else:
data = 'no'
return data
csvfile = file('serp_html.csv','rb')
reader = csv.reader(csvfile)
'''輸出百度搜索結果數據:當前關鍵詞,排名,排名網站,百度url(需轉義后才是真實的url),標題'''
for line in reader:
word = line[0]
html = line[1]
number = search(r'id="(\d+)"',html)
domain = search(r'(.*?)/.*',html)
bdurl = search(r'href="(http://www.baidu.com/link\?url=[^"]*?)"',html)
title = search(r'"title":"([^"]*?)"',html)
print '%s,%s,%s,%s,%s' % (word,number,domain,bdurl,title)
以上是一個繼承程序,運行后能print出正確結果,但是我希望能生成csv報表文件,嘗試修改for為函數失敗。
小菜鳥一枚,不知道怎么搞了,求大神指點
可以這樣import csv,re
def search(req,line):
text = re.search(req,line)
if text:
data = text.group(1)
else:
data = 'no'
return data
reuslts = []
result_csv = file('new_file.csv', 'wb')
result_csv_writer = csv.writer(result_csv)
'''輸出百度搜索結果數據:當前關鍵詞,排名,排名網站,百度url(需轉義后才是真實的url),標題'''
# 保存標題
result_csv_writer.writerow(['關鍵詞', '排名', '排名網站', '百度url', '標題'])
for line in reader:
word = line[0]
html = line[1]
number = search(r'id="(\d+)"',html)
domain = search(r'(.*?)/.*',html)
bdurl = search(r'href="(http://www.baidu.com/link\?url=[^"]*?)"',html)
title = search(r'"title":"([^"]*?)"',html)
reuslts.append((word, number, domain, bdurl, title))
//print '%s,%s,%s,%s,%s' % (word,number,domain,bdurl,title)
# 保存多行
result_csv_writer.writerows(reuslts)
result_csv.close()
代碼未測試,有問題請簡單修改
玩蛇網文章,轉載請注明出處和文章網址:https://www.iplaypy.com/wenda/wd13784.html
相關文章 Recommend