故宫网站转码

import requests
import re
url = "https://www.dpm.org.cn/lights/royal.html"
headers = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"}
bb = requests.get(url=url,headers=headers).content
aa=bb.decode()
#content字节码二进制,如果出现乱码改txt为content然后转码
#decode()解码函数decode(utf-8)默认为utf-8
print(aa)
print(re.findall("""<img alt="(.*?)" src="(.+?)"></a>""",aa))
print(len(re.findall("""<img alt="(.*?)" src="(.+?)"></a>""",aa)))
cc=re.findall("""<img alt="(.+?)" title=".*?" src="(.+?)"></a>""",aa)
print(cc)
for i in cc:
    aa = requests.get(url=i[1], headers=headers).content#图片必须用二进制
    print(aa)
    print("***"*50)
    with open("{}.jpg".format(i[0]),"wb") as f:
        f.write(aa)