一、思路

主要使用xpath对数据解析，urllib库对数据爬取，openpyxl库将数据进行写入Excel表格

二、代码

直接上代码

import urllib.request
import urllib.parse
from lxml import etree
from openpyxl import Workbook

#https://s.weibo.com/weibo/%23山西暴雨%23&page=1
def topic_create_request(page):
    headers={
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.49',
        'cookie':'PC_TOKEN=fc6bbdffa0; login_sid_t=ea94e0e4632a5bd78f1e7c8d0bac86ca; cross_origin_proto=SSL; _s_tentry=cn.bing.com; UOR=cn.bing.com,weibo.com,cn.bing.com; Apache=1531283071038.816.1657941322931; SINAGLOBAL=1531283071038.816.1657941322931; ULV=1657941322934:1:1:1:1531283071038.816.1657941322931:; WBtopGlobal_register_version=2022071611; SSOLoginState=1657941525; SUB=_2A25P1l5FDeRhGedG61cZ9yrEzjuIHXVtOWINrDV8PUJbkNAKLXDdkW1NUZD1z40aAVyb8FaxE84OO2us1ukOJcAu; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFOJa89QcLwxaa-11YEbBHM5NHD95Qp1h5f1hMX1h-NWs4DqcjVi--ciKn4iKyFi--ciKLhi-iWi--NiK.Xi-2Ri--ciKnRi-zNeKn7SKnNShnfS7tt'
    }
    base_data={
        'q': '#山西暴雨#',
        'page': str(page),
    }
    base_url='https://s.weibo.com/weibo?'
    data=urllib.parse.urlencode(base_data)
    url=base_url+data
    request=urllib.request.Request(url=url,headers=headers)
    return request

def topic_get_content(request):
    response=urllib.request.urlopen(request)
    content=response.read().decode('utf-8')
    return content

def download(content,page):
    tree=etree.HTML(content)
    # nickname_list=tree.xpath('//div[@class="content"]/p/@nick-name')
    content_list=tree.xpath('//div[@class="content"]/p/text()')
    book = Workbook()
    sheet = book.active
    filename=str(page)+"weibo.xlsx"
    for i in (range(1,len(content_list))):
        nickname_row='A'+str(i)
        sheet[nickname_row]=content_list[i]
        book.save(filename)
    
        
if __name__=='__main__':
    start_page=int(input('please enter the start page:'))
    end_page=int(input('please enter the end page:'))
    for page in range(start_page,end_page+1):
        request=topic_create_request(page)
        content=topic_get_content(request)
        download(content,page)