摘要:如果文件太大的話比較吃電腦的配置思路及代碼用自帶的做成更加普遍好用。
基于Python3.x pandas實現大疫情的查重功能 系統使用 win7 32位 或 64位簡單測試沒有問題,XP及win10未測試,如果感興趣的小伙伴可以參考源碼,自行使用Python運行!! 查重的文件請使用CSV格式的文件,最好為大疫情下載之后,僅刪除頭兩行空白的csv文件 初衷
每個月要對疫情卡片進行查重,而服務器只提供一段時間的查重功能,無法對指定時間的卡片進行查重!
用Excel查重,效率太低。如果文件太大的話比較吃電腦的配置!
思路及代碼用Python自帶的TK做成GUI更加普遍好用。
用pandas包來實現數據的查重功能,具體見代碼注釋!源代碼見文后:
查重的相關規則默認為名字拼音+疾病名稱 與 身份證號+疾病名稱的查重的交集(身份證查重的優先級高于名字拼音),如勾選了 性別 現住址國標的話,則判斷重卡的依據就是 姓名拼音+性別+現住址+疾病名稱為相同則判定為重卡!!其它類似!
因為為python打包而來,所以雙擊之后請稍等(解析速度較慢)!
如果殺毒軟件提示,請點擊允許允許!
如果雙擊之后出現找不到什么動態庫,如下界面的時候,請安裝前往https://www.microsoft.com/zh-... 下載安裝VC2015之后在運行
如果安裝以上下載文件出錯時,請用殺毒軟件下載系統更新補丁后重試!
如果在使用過程中有什么疑問的或好的建議的,可以發送郵件到ztwenxing@dingtalk.com(有時間的會回復)
#!/usr/bin/env python # -*- coding: utf-8 -*- from pinyin import pinyin from tkinter import Tk, filedialog, Canvas, messagebox, StringVar, IntVar from tkinter import Label, Entry, Button, Checkbutton # from PIL import ImageTk, Image from pandas import DataFrame, read_csv, Series, to_datetime ##設置窗口界面 window = Tk() # 設置標題 window.title("大疫情查重用") # 設置窗口大小 window.geometry("700x395") # frame=Frame(window) # frame.pack() # 設置背景圖片 canvas = Canvas(window, width=700, height=395, bd=0, highlightthickness=0) # imgpath = "giphy.gif" # 設置背景圖片 # img = Image.open("C:/Users/Administrator/chachong/ztcdc3.png") # photo = ImageTk.PhotoImage(image=img) width = 700 height = 395 # 畫布設置數值為中心點的數值 # canvas.create_image(width/2, height/2, image=photo) canvas.create_image(width / 2, height / 2) canvas.pack() # 設置標簽1 ##設置輸入界面 # label_text = Label(window, text="此小程序主要用大疫情網絡的疫情卡片查重!!!") # label_text.grid(row=0,column= 1) # 設置高度的等分比例 height_x = height / (height / 39.5) col_num = 1 canvas.create_window(width * (9 / 18), height_x * col_num, window=label_text) col_num = 2 def choiceFileCallBack(): # 選擇文件 filenames = filedialog.askopenfilenames(filetypes=[("csv文件", "*.csv")]) # filenames = filedialog.askopenfilenames() if len(filenames) != 0: if str(u"csv") in filenames[0]: en_text.set("") string_filename = "" string_filename = str(filenames[0]) # 設置Lb1的屬性 en_text.set(string_filename) # 設置bt3的激活屬性 button3.configure(state="active") else: en_text.set("") messagebox.showinfo("請選擇csv文件格式", "未選擇csv格式文件,請重新選擇!!") button3.configure(state="disabled") else: en_text.set("") messagebox.showinfo("未選擇", "未選擇文件,請選擇") # 設置bt3的激活屬性 button3.configure(state="disabled") label1 = Label(window, text="需要讀取的文件路徑:", ) canvas.create_window(width * (2 / 18), height_x * col_num, width=170, window=label1) en_text = StringVar() # 綁定listbox的列表值 en_text.set(("")) entry = Entry(window, textvariable=en_text) canvas.create_window(width * (9 / 18), height_x * col_num, width=360, window=entry) button1 = Button(window, text="選擇", command=choiceFileCallBack) canvas.create_window(width * (16 / 18), height_x * col_num, width=80, height=30, window=button1) # 選擇保存路徑 def choiceSaveCallBack(): # 選擇文件 filename = filedialog.asksaveasfilename(filetypes=[("csv文件", "*.csv")]) if filename != "": ##判斷是否為csv格式的文件 en1_text.set("") string_filename = "" string_filename = str(filename) + ".csv" # 設置Lb1的屬性 en1_text.set(string_filename) # 設置bt3的激活屬性 button3.configure(state="active") else: en1_text.set("") # 設置bt3的激活屬性 messagebox.showinfo("未選擇", "未選擇保存路徑請選擇!") button3.configure(state="disabled") col_num = 3 label2 = Label(window, text="查重的保存路徑及文件名:", ) canvas.create_window(width * (2 / 18), height_x * col_num, width=170, window=label2) en1_text = StringVar() # 綁定listbox的列表值 en1_text.set(("")) entry1 = Entry(window, textvariable=en1_text) canvas.create_window(width * (9 / 18), height_x * col_num, width=360, window=entry1) button2 = Button(window, text="選擇", command=choiceSaveCallBack) canvas.create_window(width * (16 / 18), height_x * col_num, width=80, height=30, window=button2) ###設置邏輯層 # 點擊OK按鈕的函數 def hellook(): # 設置查重list name_lists = ("患者姓名", "有效證件號", "性別", "聯系電話", "現住地址國標", "疾病名稱", "病例分類", "病例分類2") check_lists = [CheckVar1.get(), CheckVar2.get(), CheckVar3.get(), CheckVar4.get(), CheckVar5.get(), CheckVar6.get(), CheckVar7.get(), CheckVar8.get()] check_list_pd = DataFrame({"name_lists": name_lists, "check_lists": check_lists}) check_list = check_list_pd[check_list_pd["check_lists"] == 1]["name_lists"] check_list = check_list.values.tolist() print(check_list) try: rc_data = read_csv(filepath_or_buffer=entry.get(), encoding="GB18030") except: open_error = messagebox.showinfo(title="unfortunately ", message="打開文件出錯,請檢查!") messagebox.showinfo(title="unfortunately ", message="打開文件出錯,請檢查!") print(open_error) ### 拼音轉換函數 def hanzi2pinyin(sr, *args, **kwargs): list = [] for i in sr: list.append(pinyin.get(i, format="strip", delimiter="")) return list ##重卡生成函數 def shengcheng_chongka(data, checklist, aeslist="報告卡錄入時間"): # data為查重的列,chcklist為查重列的合并list # 合并對應的列 colwx = DataFrame(Series([""] * data.__len__()).str.cat(data[checklist], na_rep="_")) colwx = colwx.rename(columns={0: "chachong"}) # 生成查重的數據格式 chachong_data_sf = data.reset_index(drop=True) chachong_data_sf = chachong_data_sf.merge(colwx, left_index=True, right_index=True) chachong_data_sf = chachong_data_sf.sort_values(by=["chachong", aeslist]) chachong_data_sf = chachong_data_sf[chachong_data_sf.duplicated(subset="chachong", keep=False)] # 設置Index chachong_data_sf = chachong_data_sf.reset_index(drop=True) # 生成重復的數據的例數 chachong_num = chachong_data_sf["chachong"].value_counts() chachong_num = DataFrame(chachong_num) chachong_num = chachong_num.rename(columns={0: "chachong"}) # 生成重復的數據1 chachong_data_sf_first = chachong_data_sf.drop_duplicates(subset="chachong", keep="first") # 生成重復的數據2 chachong_data_sf_last = chachong_data_sf.drop_duplicates(subset="chachong", keep="last") chachong_data_sf_last = chachong_data_sf_last.join(chachong_num, on="chachong", lsuffix="_last", rsuffix="_f") # 合并重復列 # 合并為最后的數據 zong_sf = chachong_data_sf_last.join(chachong_data_sf_first.set_index("chachong"), on="chachong_last", lsuffix="_last", rsuffix="_f") zong_sf = zong_sf.drop(columns=["name_last", "chachong_last", "name_f"]) zong_sf = zong_sf.rename(columns={"chachong_f": "重復卡片數"}) return zong_sf ### 讀取數據 # rc_data=read_csv(r"C:/Users/Administrator/Desktop/2015010120181231#reprot.csv",encoding="GB18030") rc_data1 = rc_data.copy() rc_data1["name"] = hanzi2pinyin(rc_data1["患者姓名"]) rc_data1["現住地址國標"] = rc_data1["現住地址國標"].map(str) rc_data1["報告卡錄入時間"] = to_datetime(rc_data1["報告卡錄入時間"]) ## 根據checkbox選擇對應的數據列查重 name_lists = ["name", "有效證件號", "性別", "聯系電話", "現住地址國標", "疾病名稱", "病例分類", "病例分類2"] # check_lists = [CheckVar1.get(), CheckVar2.get(), CheckVar3.get(), CheckVar4.get(), CheckVar5.get(), CheckVar6.get(),CheckVar7.get(), CheckVar8.get()] # 身份證選擇范圍列名字默認為空 check_lists_sf = [0, 1, CheckVar3.get(), CheckVar4.get(), CheckVar5.get(), CheckVar6.get(), CheckVar7.get(), CheckVar8.get()] check_list_pd_sf = DataFrame({"name_lists": name_lists, "check_lists": check_lists_sf}) check_list_sf = check_list_pd_sf[check_list_pd_sf["check_lists"] == 1]["name_lists"].values.tolist() ###排除身份證為空的為數據 chachong_data_sf = rc_data1.dropna(subset=["有效證件號"]) zong_sf = shengcheng_chongka(data=chachong_data_sf, checklist=check_list_sf) # 通過姓名加其它條件查重 check_lists_nm = [1, 0, CheckVar3.get(), CheckVar4.get(), CheckVar5.get(), CheckVar6.get(), CheckVar7.get(), CheckVar8.get()] check_list_pd_nm = DataFrame({"name_lists": name_lists, "check_lists": check_lists_nm}) check_list_nm = check_list_pd_nm[check_list_pd_nm["check_lists"] == 1]["name_lists"].values.tolist() zong_nm = shengcheng_chongka(data=rc_data1, checklist=check_list_nm) ##兩個數據的合并 zong = zong_sf.append(zong_nm) zong = zong.drop_duplicates(subset="卡片編號_last", keep="first") zong = zong.sort_values(by=["報告單位地區編碼_last", "報告卡錄入時間_last"], ascending=False) try: zong_sf.to_csv(entry1.get(), index=False, encoding="GB18030") infomessage = "查重完畢!文件保存在{}".format(entry1.get()) messagebox.showinfo(title="unfortunately ", message=infomessage) except: # save_error = messagebox.showinfo(title="unfortunately ", message="保存文件出錯,請檢查!") messagebox.showinfo(title="unfortunately ", message="保存文件出錯,請檢查!") col_num = 4 button3 = Button(window, text="OK", command=hellook) canvas.create_window(width * (9 / 18), height_x * col_num, width=80, height=30, window=button3) col_num = 5 # 基礎信息 label3 = Label(window, text="基礎信息:", ) canvas.create_window(width * (2 / 18), height_x * col_num, width=170, window=label3) # 創建選擇查重的條件 col_num = 5.8 CheckVar1 = IntVar() CheckVar2 = IntVar() CheckVar3 = IntVar() CheckVar4 = IntVar() CheckVar5 = IntVar() C1 = Checkbutton(window, text="姓 名", variable=CheckVar1, onvalue=1, offvalue=0, height=5, width=20, state="disabled") C1.select() C2 = Checkbutton(window, text="有效證件號", variable=CheckVar2, onvalue=1, offvalue=0, height=5, width=20, state="disabled") C2.select() C3 = Checkbutton(window, text="性 別", variable=CheckVar3, onvalue=1, offvalue=0, height=5, width=20) C3.select() C4 = Checkbutton(window, text="聯系電話", variable=CheckVar4, onvalue=1, offvalue=0, height=5, width=20) C4.select() C5 = Checkbutton(window, text="現住地址國標", variable=CheckVar5, onvalue=1, offvalue=0, height=5, width=20) C5.select() # C1.select() canvas.create_window(width * (2 / 18), height_x * col_num, width=80, height=30, window=C1) canvas.create_window(width * (5 / 18), height_x * col_num, width=80, height=30, window=C2) canvas.create_window(width * (8 / 18), height_x * col_num, width=80, height=30, window=C3) canvas.create_window(width * (11 / 18), height_x * col_num, width=80, height=30, window=C4) canvas.create_window(width * (14 / 18), height_x * col_num, width=100, height=30, window=C5) col_num = 6.4 label4 = Label(window, text="疾病信息:", ) canvas.create_window(width * (2 / 18), height_x * col_num, width=170, window=label4) col_num = 7.2 CheckVar6 = IntVar() CheckVar7 = IntVar() CheckVar8 = IntVar() C6 = Checkbutton(window, text="疾病名稱", variable=CheckVar6, onvalue=1, offvalue=0, height=5, width=20, state="disabled") C6.select() # C3.select() C7 = Checkbutton(window, text="病例分類", variable=CheckVar7, onvalue=1, offvalue=0, height=5, width=20) # C4.select() C8 = Checkbutton(window, text="病例分類2", variable=CheckVar8, onvalue=1, offvalue=0, height=5, width=20) canvas.create_window(width * (2 / 18), height_x * col_num, width=80, height=30, window=C6) canvas.create_window(width * (5 / 18), height_x * col_num, width=80, height=30, window=C7) canvas.create_window(width * (8 / 18), height_x * col_num, width=80, height=30, window=C8) col_num = 8 label4 = Label(window, text="問題反饋:ztwenxing@dingtalk.com 源碼及說明:https://segmentfault.com/a/1190000018570381", ) canvas.create_window(width * (9 / 18), height_x * col_num, window=label4) window.mainloop()
文章版權歸作者所有,未經允許請勿轉載,若此文章存在違規行為,您可以聯系管理員刪除。
轉載請注明本文地址:http://specialneedsforspecialkids.com/yun/43414.html
U事記1. UCloud優刻得數字力量助力老年人跨越數字鴻溝。2.楊浦區委副書記、區長薛侃走訪調研UCloud優刻得疫情防控和復產復工工作。3.UCloud優刻得中標特變電工德陽、新疆兩地電纜公司無人值守地磅采購項目。4.UCloud優刻得聯合中國移動,打造山東棗莊高新區智慧城市云平臺。5.紫鳥瀏覽器使用GlobalSSH暢聯全球,安全高效管理云端跨境店鋪。6. UCloud優刻得數字哨兵護衛陸家...
摘要:完成可視化熱搜榜和國內疫情新增圖,提高學生的編程能力和分析問題解決問題的能力。下圖為百度微博知乎三大平臺的熱搜詞頻統計圖。后續我會補上薄弱項,為爭取做一名全棧技術人員而奮斗。 ...
摘要:互聯網行業薪資普遍高于其他行業拉勾網數據顯示,年以來,互聯網行業的年度平均薪資穩步增長,年度薪資較年同比增長,開年薪資同比去年增長。 智聯招聘的一組數據顯示,20...
閱讀 3400·2021-11-24 10:30
閱讀 3269·2021-11-22 15:29
閱讀 3706·2021-10-28 09:32
閱讀 1254·2021-09-07 10:22
閱讀 3336·2019-08-30 15:55
閱讀 3619·2019-08-30 15:54
閱讀 3494·2019-08-30 15:54
閱讀 2833·2019-08-30 15:44