爬虫1 - BOC/PAB

# -*- coding:utf-8 -*-
import sys
import re
from lxml import etree
import requests
import smtplib
import time
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
from email.utils import formataddr
import mysql.connector
reload(sys)
sys.setdefaultencoding('utf8')
url1 = 'http://www.boc.cn/sourcedb/whpj/index.html'
url2 = 'https://bank.pingan.com.cn/ibp/portal/exchange/qryExchangeList.do'
html1 = requests.get(url1).content.decode('utf8')
html2 = requests.get(url2).content.decode('utf8')
a1 = html1.index('<td>美元</td>')
a2 = html2.index('<td class="tac">美元</td>')
s1 = html1[a1:a1 + 300]
s2 = html2[a2:a2 + 203]
result1 = re.findall('<td>(.*?)</td', s1)
result2 = re.findall('<td class="tac">(.*?)</td', s2)
date = time.strftime("%Y-%m-%d", time.localtime())
date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
with open('hl.txt', 'w+') as f:
f.write("中国银行外汇牌价: " + "美元 -> 人民币" + '\n')
f.write('现汇买入价格: ' + result1[1] + '\n')
f.write('现钞买入价格: ' + result1[2] + '\n')
f.write('现汇卖出价格: ' + result1[3] + '\n')
f.write('现钞卖出价格: ' + result1[4] + '\n')
f.write('中国银行转换价: ' + result1[5] + '\n')
f.write('爬取时间: ' + date + '\n')
f.write('\n')
f.write("中国平安银行外汇牌价: " + "美元 -> 人民币" + '\n')
f.write('中国平安银行中间价: ' + result2[1] + '\n')
f.write('现钞买入价格: ' + result2[2] + '\n')
f.write('现汇买入价格: ' + result2[3] + '\n')
f.write('现汇/现钞卖出价格: ' + result2[4] + '\n')
f.write('中国人民银行中间价: ' + result2[5] + '\n')
f.write('爬取时间: ' + date + '\n')
# 创建一个带附件的实例
message = MIMEMultipart()
message['From'] = formataddr(["Bro Yao", sender])
message['To'] = formataddr(["Vip", receivers])
subject = '美元汇率'
message['Subject'] = Header(subject, 'utf-8')
# 邮件正文内容
message.attach(MIMEText(
'早上好,直接查看或下载附件查看美元汇率\n' +
'中国银行外汇牌价: ' + '美元 -> 人民币' + '\n' +
'现汇买入价格: ' + result1[1] + '\n' +
'现钞买入价格: ' + result1[2] + '\n' +
'现汇卖出价格: ' + result1[3] + '\n' +
'现钞卖出价格: ' + result1[4] + '\n' +
'中国银行转换价: ' + result1[5] + '\n' +
'爬取时间: ' + date + '\n\n' +
'中国平安银行外汇牌价: ' + '美元 -> 人民币' + '\n' +
'中国平安银行中间价: ' + result2[1] + '\n' +
'现钞买入价格: ' + result2[2] + '\n' +
'现汇买入价格: ' + result2[3] + '\n' +
'现汇/现钞卖出价格: ' + result2[4] + '\n' +
'中国人民银行中间价: ' + result2[5] + '\n' +
'爬取时间: ' + date + '\n'
, 'plain'
, 'utf-8'
))
att1 = MIMEText(open('hl.txt', 'rb').read(), 'base64', 'utf-8')
att1["Content-Type"] = 'application/octet-stream'
# 这里的filename可以任意写,写什么名字,邮件中显示什么名字
att1["Content-Disposition"] = 'attachment; filename="hl.txt"'
message.attach(att1)
# 第三方 SMTP 服务
mail_host = "smtp.163.com" # 设置服务器
mail_user = "[email protected]" # 用户名
mail_pass = "" # 口令
# mysql 存储服务
cnx = mysql.connector.connect(
user='root',
password='',
host='111.231.225.172',
database='finance'
)
mycursor = cnx.cursor()
data = {}
# 中国银行
data['BOCSBP'] = float(result1[1]) if result1[1] != '' else 0 # 现汇买入
data['BOCPP'] = float(result1[2]) if result1[2] != '' else 0 # 现钞买入
data['BOCSSP'] = float(result1[3]) if result1[3] != '' else 0 # 现汇卖出
data['BOCSCP'] = float(result1[4]) if result1[4] != '' else 0 # 现钞卖出
data['BOCMP'] = (data['BOCPP'] + data['BOCSCP']) / 2 # 中间价
data['BOCCP'] = float(result1[5]) # 中行折算价
data['scrap_date'] = date_time
try:
mycursor.execute(
'INSERT INTO exr_dollar (spot_buying_price, purchase_price, spot_selling_price, selling_price, middle_price, boc_discounted_price, scrap_time)'
' VALUES(%f, %f, %f, %f, %f, %f, "%s")' %
(data['BOCSBP'], data['BOCPP'], data['BOCSSP'], data['BOCSCP'], data['BOCMP'], data['BOCCP'], data['scrap_date'])
)
cnx.commit() # 数据表内容有更新,必须使用到该语句
smtpObj = smtplib.SMTP_SSL(mail_host, 465)
smtpObj.login(mail_user, mail_pass)
smtpObj.sendmail(sender, receivers, message.as_string())
print("邮件发送成功")
except smtplib.SMTPException:
print("Error: 无法发送邮件")