登录  /  注册

Python实时数据采集-新型冠状病毒

angryTom
发布: 2020-02-03 17:51:00
转载
3691人浏览过

Python实时数据采集-新型冠状病毒

Python实时数据采集-新型冠状病毒

源代码 来源:https://github.com/programming-with-love/2019-ncov

疫情数据时间为:2020.2.1

项目相关截图:

全国数据展示

国内数据展示

国外数据展示

查看指定区域详细数据

源代码,注意安装所需模块(例如 pip install 模块名)

import requests
import re
from bs4 import BeautifulSoup
from time import sleep
import json
from prettytable import ALL
from prettytable import PrettyTable

hubei = {}
guangdong = {}
zhejiang = {}
beijing = {}
shanghai = {}
hunan = {}
anhui = {}
chongqing = {}
sichuan = {}
shandong = {}
guangxi = {}
fujian = {}
jiangsu = {}
henan = {}
hainan = {}
tianjin = {}
jiangxi = {}
shanxi1 = {} # 陕西
guizhou = {}
liaoning = {}
xianggang = {}
heilongjiang = {}
aomen = {}
xinjiang = {}
gansu = {}
yunnan = {}
taiwan = {}
shanxi2 = {} # 山西
jilin = {}
hebei = {}
ningxia = {}
neimenggu = {}
qinghai = {} # none
xizang = {} # none
provinces_idx = [hubei, guangdong, zhejiang, chongqing, hunan, anhui, beijing,
                 shanghai, henan, guangxi, shandong, jiangxi, jiangsu, sichuan,
                 liaoning, fujian, heilongjiang, hainan, tianjin, hebei, shanxi2,
                 yunnan, xianggang, shanxi1, guizhou, jilin, gansu, taiwan,
                 xinjiang, ningxia, aomen, neimenggu, qinghai, xizang]
map = {
    '湖北':0, '广东':1, '浙江':2, '北京':3, '上海':4, '湖南':5, '安徽':6, '重庆':7,
    '四川':8, '山东':9, '广西':10, '福建':11, '江苏':12, '河南':13, '海南':14,
    '天津':15, '江西':16, '陕西':17, '贵州':18, '辽宁':19, '香港':20, '黑龙江':21,
    '澳门':22, '新疆':23, '甘肃':24, '云南':25, '台湾':26, '山西':27, '吉林':28,
    '河北':29, '宁夏':30, '内蒙古':31, '青海':32, '西藏':33
}


def getTime(text):
    TitleTime = str(text)
    TitleTime = re.findall(&#39;<span>(.*?)</span>&#39;, TitleTime)
    return TitleTime[0]

def getAllCountry(text):
    AllCountry = str(text)
    AllCountry = AllCountry.replace("[<p class=\"confirmedNumber___3WrF5\"><span class=\"content___2hIPS\">", "")
    AllCountry = AllCountry.replace("<span style=\"color: #4169e2\">", "")
    AllCountry = re.sub("</span>", "", AllCountry)
    AllCountry = AllCountry.replace("</p>]", "")
    
    AllCountry = AllCountry.replace("<span style=\"color: rgb(65, 105, 226);\">", "")
    AllCountry = re.sub("<span>", "", AllCountry)
    AllCountry = re.sub("<p>", "", AllCountry)
    AllCountry = re.sub("</p>", "", AllCountry)
    return AllCountry 

def query(province):
    table = PrettyTable([&#39;地区&#39;, &#39;确诊&#39;, &#39;死亡&#39;, &#39;治愈&#39;])

    for (k, v) in province.items():
        name = k
        table.add_row([name, v[0] if v[0] != 0 else &#39;-&#39;, v[1] if v[1] != 0 else &#39;-&#39;, v[2] if v[2] != 0 else &#39;-&#39;])
    if len(province.keys()) != 0:
        print(table)
    else:
        print("暂无")

def getInfo(text):
    text = str(text)
    text = re.sub("<p class=\"descText___Ui3tV\">", "", text)
    text = re.sub("</p>", "", text)
    return text

def is_json(json_str):
    try:
        json.loads(json_str)
    except ValueError:
        return False
    return True

def ff(str, num):
    return str[:num] + str[num+1:]
        

def main():
    url = "https://3g.dxy.cn/newh5/view/pneumonia"

    try:
        headers = {}
        headers[&#39;user-agent&#39;] = &#39;Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36&#39; #http头大小写不敏感
        headers[&#39;accept&#39;] = &#39;text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8&#39;
        headers[&#39;Connection&#39;] = &#39;keep-alive&#39;
        headers[&#39;Upgrade-Insecure-Requests&#39;] = &#39;1&#39;

        r = requests.get(url, headers=headers)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        soup = BeautifulSoup(r.text,&#39;lxml&#39;)
        table = PrettyTable([&#39;地区&#39;, &#39;确诊&#39;, &#39;死亡&#39;, &#39;治愈&#39;])
        table.hrules = ALL

        #### 截至时间
        # TitleTime = getTime(soup.select(&#39;.title___2d1_B&#39;))
        
        print()
        # print("              ",TitleTime + "\n")

        while True:
            r = requests.get("https://service-f9fjwngp-1252021671.bj.apigw.tencentcs.com/release/pneumonia")
            json_str = json.loads(r.text)
            if json_str[&#39;error&#39;] == 0:
                break

        print("==================================全国数据==================================")
        print()
        
        print("     确诊 " + str(json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;confirmedCount&#39;]) + " 例"
            + "       " + "疑似 " + str(json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;suspectedCount&#39;]) + " 例"
            + "       " + "死亡" + str(json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;deadCount&#39;]) + " 例"
            + "       " + "治愈" + str(json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;curedCount&#39;]) + " 例\n")

        print("==================================相关情况==================================")
        print()

        print("传染源:" + json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;infectSource&#39;])
        print("病毒:" + json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;virus&#39;])
        print("传播途径:" + json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;passWay&#39;])
        print(json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;remark1&#39;])
        print(json_str[&#39;data&#39;][&#39;statistics&#39;][&#39;remark2&#39;] + "\n")
            
        print("==================================国内情况==================================")
        print()
        
        json_provinces = re.findall("{\"provinceName\":(.*?)]}", str(soup))

        idx = 0
        for province in json_provinces:
            if is_json(province):
                pass

            else:
                province = "{\"provinceName\":" + province + "]}"
                province = json.loads(province)
                
            province_name = province[&#39;provinceShortName&#39;] if province[&#39;provinceShortName&#39;] != 0 else &#39;-&#39;
            confirmed = province[&#39;confirmedCount&#39;] if province[&#39;confirmedCount&#39;] != 0 else &#39;-&#39;
            suspected = province[&#39;suspectedCount&#39;] if province[&#39;suspectedCount&#39;] != 0 else &#39;-&#39;
            cured = province[&#39;curedCount&#39;] if province[&#39;curedCount&#39;] != 0 else &#39;-&#39;
            dead = province[&#39;deadCount&#39;] if province[&#39;deadCount&#39;] != 0 else &#39;-&#39;
            table.add_row([province_name, confirmed, dead, cured])
            map[province_name] = idx
            idx = idx + 1
            for city in province[&#39;cities&#39;]:
                provinces_idx[map[province_name]][city[&#39;cityName&#39;]] = [city[&#39;confirmedCount&#39;], city[&#39;deadCount&#39;], city[&#39;curedCount&#39;]]

        print(table)
        
        
        print()
        print("==================================国外情况==================================")
        print()

        json_provinces = str(re.findall("\"id\":949(.*?)]}", str(soup)))
        json_provinces = json_provinces[:1] + "{\"id\":949" + json_provinces[2:]
        json_provinces = json_provinces[:len(json_provinces) - 2] + json_provinces[len(json_provinces) - 1:]
        provinces = json.loads(json_provinces)

        table = PrettyTable([&#39;地区&#39;, &#39;确诊&#39;, &#39;死亡&#39;, &#39;治愈&#39;])
        for province in provinces:
            confirmed = province[&#39;confirmedCount&#39;] if province[&#39;confirmedCount&#39;] != 0 else &#39;-&#39;
            dead = province[&#39;deadCount&#39;] if province[&#39;deadCount&#39;] != 0 else &#39;-&#39;
            cured = province[&#39;curedCount&#39;] if province[&#39;curedCount&#39;] != 0 else &#39;-&#39;
            table.add_row([province[&#39;provinceName&#39;], confirmed, dead, cured])
        
        print(table)
        print()
        
        print("==================================最新消息==================================")
        print()
        
            
        idx = 0
        for news in json_str[&#39;data&#39;][&#39;timeline&#39;]:
            if idx == 5:
                break
            print(news[&#39;pubDateStr&#39;] + "  " + news[&#39;title&#39;])
            idx = idx + 1
        

        print()
        key = input("请输入您想查询详细信息的省份,例如 湖北\n")
        print()
        if key in map.keys():
            query(provinces_idx[map[key]])
        else:
            print("暂无相关信息")
            
        print("\n欢迎提出各种意见")
    except:
        print("连接失败")

if __name__ == &#39;__main__&#39;:
    main()
    sleep(30)
登录后复制

 最后,祝大家百毒不侵,中国加油!!一定能够度过难关!!

以上就是Python实时数据采集-新型冠状病毒的详细内容,更多请关注php中文网其它相关文章!

智能AI问答
PHP中文网智能助手能迅速回答你的编程问题,提供实时的代码和解决方案,帮助你解决各种难题。不仅如此,它还能提供编程资源和学习指导,帮助你快速提升编程技能。无论你是初学者还是专业人士,AI智能助手都能成为你的可靠助手,助力你在编程领域取得更大的成就。
相关标签:
来源:CSDN网
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn
作者最新文章
最新问题
热门教程
更多>
最新下载
更多>
网站特效
网站源码
网站素材
前端模板
关于我们 免责申明 意见反馈 讲师合作 广告合作 最新更新
php中文网:公益在线php培训,帮助PHP学习者快速成长!
关注服务号 技术交流群
PHP中文网订阅号
每天精选资源文章推送
PHP中文网APP
随时随地碎片化学习
PHP中文网抖音号
发现有趣的

Copyright 2014-2024 https://www.php.cn/ All Rights Reserved | php.cn | 湘ICP备2023035733号