Python3 抓取本地天气和新闻,写入html文件

pip install --upgrade pip;
pip install feedparser requests beautifulsoup4 selenium;
#!/usr/bin/env python3
# coding=utf-8

import json
import time
import requests
from bs4 import BeautifulSoup
import re
import socket
socket.setdefaulttimeout(10)

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0','Accept':'*/*'}

def radar():
    s2 =  ' '
    req = requests.get('http://m.nmc.cn/f/wap/p-94', timeout = 10)
# 280= xian, 94=huabei, 92=quanguo
    req.encoding = 'utf-8'
    soup = BeautifulSoup(req.text, "html.parser")
    s1 = soup.find_all(attrs={'id':'imgpath'})
    s2 = s2+ '<a href='+ s1[0]['src'].replace('medium','small') +'>RadarXA</a>'

    req = requests.get('http://m.nmc.cn/f/wap/p-92', timeout = 10)
    req.encoding = 'utf-8'
    soup = BeautifulSoup(req.text, "html.parser")
    s1 = soup.find_all(attrs={'id':'imgpath'})
    s2 = s2+ ' <a href='+ s1[0]['src'] +'>RadarCN</a>'

    req = requests.get('http://m.nmc.cn/f/wap/p-337', timeout = 10)
    req.encoding = 'utf-8'
    soup = BeautifulSoup(req.text, "html.parser")
    s1 = soup.find_all(attrs={'id':'imgpath'})
    s2 = s2+ ' <a href='+ s1[0]['src'].replace('medium','small') +'>Rain24</a><br>\n'
    print('radar done')
    return s2 

def weather():
    req = requests.get( 'http://www.nmc.cn/f/rest/real/57036', timeout = 10)
    html = req.text
    d = json.loads(html)
    req = requests.get( 'http://www.nmc.cn/f/rest/aqi/57036', timeout = 10)
    html = req.text
    d2 = json.loads(html)
    s = '{0} {1}C Air.{2} {3}{4}m/s'.format(d['weather']['info'], d['weather']['temperature'], d2['aqi'], d['wind']['direct'], d['wind']['speed'])
    print('weather done')
    return s

def cnbeta():
    s = '<h3><a href=http://m.cnbeta.com/wap>CnBeta</a></h3>\n'
    req = requests.get('https://m.cnbeta.com/wap', timeout = 20)
    req.encoding = 'utf-8'
    soup = BeautifulSoup(req.text, "html.parser")
    s2 = soup.find_all(attrs={'class':'list'}, limit=36)
    for i in s2:
        s += '<a href=https://m.cnbeta.com'+i.a.get('href')[-16:]+' target=_blank>'+i.a.string+'</a><br>\n'
    print('CB done')
    return s

def hsw():
    s = '<h3>HSW NEWS XIAN</h3>\n'
    req = requests.get('http://news.hsw.cn/sx08/xaxw08/', timeout = 30)
    req.encoding = 'utf-8'
    soup = BeautifulSoup(req.text, "html.parser")
    st = soup.find_all('h3', limit=26)
    for i in range(1,18):
        s += ('<a href='+st[i].a.get('href')+' target=_blank>'+st[i].string+'</a><br>\n')
    print('HSW done')
    return s


sout = '<head><meta charset="utf-8"/><style type="text/css"> <!-- A { text-decoration: none; line-height:1.5;} --> </style><title>新闻Swds</title></head><body bgcolor=#eeeeee>'
sout = sout + time.strftime(" %y-%m-%d %H:%M:%S ", time.localtime())
try: sout = sout + radar() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + 'radar err<br>'
try: sout = sout + weather() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + '<br>weather err'
try: sout = sout + hsw() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + '<br>hsw err'
try: sout = sout + cnbeta() + time.strftime(" %H:%M:%S ", time.localtime())
except: sout = sout + 'cnbeta err'

sout = sout + time.strftime(" %H:%M:%S ", time.localtime()) +'</body>'
with open('snews.html', 'w', encoding='UTF-8') as f: f.write(sout)

标签: none

添加新评论