摘要:先貼連接,讓各位觀眾老爺看看,對不對你們的胃口工控行業系統漏洞可以看到,這個網頁是靜態的,所以問題變的非常的簡單只需要用請求網頁就可以了話不多說,直接貼代碼標題公開日期公開日期危害級別危害級別影響產品影響產品漏洞描述漏洞
先貼連接,讓各位觀眾老爺看看,對不對你們的胃口
工控行業系統漏洞
可以看到,這個網頁是html靜態的,所以問題變的非常的簡單
只需要用request請求網頁就可以了
話不多說,直接貼代碼
import requests from urllib.parse import urlencode from lxml import etree import pymysql import time import xlwt import xlrd def makeurl(): # http://ics.cnvd.org.cn/?tdsourcetag=s_pctim_aiomsg&max=20&offset=0 baseurl = "http://ics.cnvd.org.cn/?" params = { "tdsourcetag": "s_pctim_aiomsg", "max": "20" } for page in range(MAX_PAGE): params["offset"] = page * 20 url = baseurl + urlencode(params) print("url is ", url) yield url def get_page_urllist(url): headers = { "Host": "ics.cnvd.org.cn", "Referer": "http://ics.cnvd.org.cn/?tdsourcetag=s_pctim_aiomsg&max=20&offset=40", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" } response = requests.get(url, headers=headers) return response.text def parse_urllist(content): html = etree.HTML(content) for li in html.xpath("http://tbody[@id="tr"]/tr"): yield li.xpath("td/a/@href")[0] def get_page(url): headers = { "Host": "www.cnvd.org.cn", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" } response = requests.get(url, headers=headers) return response.text def parse_page(content, url): html = etree.HTML(content) item = {} item["url"] = url item["標題"] = str(html.xpath("http://div[@class="blkContainerSblk"]/h1/text()")[0]) item["CNVD_ID"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="CNVD-ID"]/following-sibling::*[1]//text()")]) item["公開日期"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="公開日期"]/following-sibling::*[1]//text()")]) item["危害級別"] = "".join([i.strip().replace(" ", "").replace(" ", "").replace(" ", "").replace(" ", "") for i in html.xpath("http://tbody/tr/td[text()="危害級別"]/following-sibling::*[1]//text()")]) item["影響產品"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="影響產品"]/following-sibling::*[1]//text()")]) try: item["BUGTRAQ_ID"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="BUGTRAQ ID"]/following-sibling::*[1]//text()")]) except: item["BUGTRAQ_ID"] = "" item["CVE_ID"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="CVE ID"]/following-sibling::*[1]//text()")]) + " " + "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="CVE ID"]/following-sibling::*[1]//@href")]) item["漏洞描述"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="漏洞描述"]/following-sibling::*[1]//text()")]) item["漏洞類型"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="漏洞類型"]/following-sibling::*[1]//text()")]) item["參考鏈接"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="參考鏈接"]/following-sibling::*[1]//text()")]) item["漏洞解決方案"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="漏洞解決方案"]/following-sibling::*[1]//text()")]) item["廠商補丁"] = "".join( [i.strip() for i in html.xpath( "http://tbody/tr/td[text()="廠商補丁"]/following-sibling::*[1]//text()")]) + " http://www.cnvd.org.cn" + "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="廠商補丁"]/following-sibling::*[1]//@href")]) item["驗證信息"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="驗證信息"]/following-sibling::*[1]//text()")]) item["報送時間"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="報送時間"]/following-sibling::*[1]//text()")]) item["收錄時間"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="收錄時間"]/following-sibling::*[1]//text()")]) item["更新時間"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="更新時間"]/following-sibling::*[1]//text()")]) item["漏洞附件"] = "".join( [i.strip() for i in html.xpath("http://tbody/tr/td[text()="漏洞附件"]/following-sibling::*[1]//text()")]) return item def save_data(index, item, workbook): sheet = workbook.get_sheet("sheet1") # 創建一個sheet表格 for col, value in enumerate(item.values()): sheet.write(index, col, value) workbook.save(filename) print("保存成功") def excel_prepare(heads): workbook = xlwt.Workbook() sheet = workbook.add_sheet("sheet1", cell_overwrite_ok=True) # 創建一個sheet表格 for col, value in enumerate(heads): sheet.write(0, col, value) return workbook def urlisexist(url, urlset): if url in urlset: return True else: return False def getallurl(filename): workbook = xlrd.open_workbook(filename) sheet1 = workbook.sheet_by_name("sheet1") results = sheet1.col_values(0, 1) return results def read_old(filename): workbook = xlrd.open_workbook(filename) sheet1 = workbook.sheet_by_name("sheet1") alloldset = [] for index in range(sheet1.nrows): alloldset.append(sheet1.row_values(index)) return alloldset, sheet1.nrows def save_old(index, olditem): sheet = workbook.get_sheet("sheet1") # 創建一個sheet表格 for col, value in enumerate(olditem): sheet.write(index, col, value) workbook.save(filename) if __name__ == "__main__": # http://ics.cnvd.org.cn/?tdsourcetag=s_pctim_aiomsg&max=20&offset=0 # 睡眠時間 TIMESLEEP = 0 filename = "工程控制系統漏洞.xls" MAX_PAGE = 96 heads = ["url", "標題", "CNVD_ID", "公開日期", "危害級別", "影響產品", "BUGTRAQ_ID", "CVE_ID", "漏洞描述", "漏洞類型", "參考鏈接", "漏洞解決方案", "廠商補丁", "驗證信息", "報送時間", "收錄時間", "更新時間", "漏洞附件"] try: alloldset, length = read_old(filename) except: alloldset = [] length = 1 workbook = excel_prepare(heads) for index, olditem in enumerate(alloldset): save_old(index, olditem) try: urlset = getallurl(filename) except: urlset = [] index = length for urlofpage in makeurl(): pagelistcontent = get_page_urllist(urlofpage) for url in parse_urllist(pagelistcontent): print("url is >>>", url) if not urlisexist(url, urlset): time.sleep(TIMESLEEP) result = get_page(url) item = parse_page(result, url) print("item is >>>", item) save_data(index, item, workbook) index = index + 1 workbook.save(filename)
不懂的地方,下方評論提問
文章版權歸作者所有,未經允許請勿轉載,若此文章存在違規行為,您可以聯系管理員刪除。
轉載請注明本文地址:http://specialneedsforspecialkids.com/yun/43933.html
摘要:作為在國產自主創新領域沉淀十余載的操作系統排頭兵,麒麟信安將攜手,立根鑄魂,共推操作系統產業新發展。 2021年11月9日-10日,主題為立根鑄魂,逐夢數字時代星辰...
摘要:本次活動的主旨就是和大家共同討論信息系統中如何有效的構建安全防護體系,如何規范安全制度,如何讓制定的安全制度落地,形成有效的規范制度來保護系統安全。用來保證系統一旦出現安全事故后還有應急的解決方案。 安全漏洞頻出,信息系統中安全防護如何有效構建?字數 3685閱讀 1029評論 0贊 3前言:隨著信息化的高速發展,越來越多...
閱讀 2975·2021-11-24 10:22
閱讀 3044·2021-11-23 10:10
閱讀 1353·2021-09-28 09:35
閱讀 1752·2019-08-29 13:16
閱讀 1395·2019-08-26 13:29
閱讀 2782·2019-08-26 10:27
閱讀 678·2019-08-26 10:09
閱讀 1436·2019-08-23 18:05