资讯专栏INFORMATION COLUMN

惊!淘宝还可以这样爬!源码奉上!如有不懂点赞加关注,私信必回!赶紧收藏!

番茄西红柿 / 6219人阅读

摘要:手机手机手机手机获取获取时间戳运算加密计算成功产品名称店名卖家小名促销价格原价月销量件产地网址产品名称副标题卖家小名促销价格原价月销量件网址笔记本电脑可设置页数和商品名字需要过滑块的私信我

import re,hashlib,requests,json5,timefrom requests.utils import dict_from_cookiejarfrom loguru import loggerappKey = '12574478'def get_sign(token, t, ___):    pre_sign = token + '&' + t + '&' + appKey + '&' + ___    sign = hashlib.md5(pre_sign.encode(encoding='UTF-8')).hexdigest()    return signdef get_cookies():    url = 'https://h5api.m.taobao.com/h5/mtop.alimama.union.xt.en.api.entry/1.0/?jsv=2.5.1&appKey=12574478&t=1622426487791&sign=7771a311a65bbb533c3f3d4534d50f5e&api=mtop.alimama.union.xt.en.api.entry&v=1.0&AntiCreep=true&timeout=20000&AntiFlood=true&type=jsonp&dataType=jsonp&callback=mtopjsonp2&data=%7B%22floorId%22%3A195%2C%22count%22%3A10%2C%22p4pPid%22%3A%22430748_1006%22%2C%22spm%22%3A%22a2e1u.19484427.29996459%22%2C%22app_pvid%22%3A%22201_11.11.62.22_407060_1622426486844%22%2C%22ctm%22%3A%22spm-url%3Aa231o.13503973.search.1%3Bpage_url%3Ahttps%253A%252F%252Fai.taobao.com%252Fsearch%252Findex.htm%253Fspm%253Da231o.13503973.search.1%2526key%253D%2525E4%2525B8%2525B8%2525E7%2525BE%25258E%2526pid%253Dmm_110807073_1262350149_109959000489%2526union_lens%253Drecoveryid%25253A201_11.11.62.22_399283_1622423612735%25253Bprepvid%25253A201_11.11.62.22_399283_1622423612735%22%2C%22variableMap%22%3A%22%7B%5C%22union_lens%5C%22%3A%5C%22recoveryid%3A201_11.11.62.22_399283_1622423612735%3Bprepvid%3A201_11.11.62.22_399283_1622423612735%5C%22%2C%5C%22recoveryId%5C%22%3A%5C%22201_11.11.62.22_407060_1622426486844%5C%22%7D%22%7D'    headers = {        'referer': 'https://ai.taobao.com/',        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'    }    res = requests.get(url, headers=headers).cookies    cook = dict_from_cookiejar(res)    cooks = ''    for i, k in cook.items():        cooks += i + '=' + k + ';'    return cooksdef data(num,x):    # cookie    ck = get_cookies()    # mm_110807073_1262350149_109959000489    data_s = '{"pNum":%s,"pSize":"60","refpid":"mm_26632258_3504122_32538762",' % num    # data_tm = r'"variableMap":"{/"q/":/"手机/",/"navigator/":true,/"usertype/":/"1/",/"union_lens/":/"recoveryid:201_11.11.43.24_685807_1619341742886;prepvid:201_11.27.89.99_473565_1619342131641/",/"recoveryId/":/"201_11.186.139.24_536838_1619342621063/"}","qieId":"34374","spm":"a2e1u.19484427.29996460","app_pvid":"201_11.186.139.24_536838_1619342621063","ctm":"spm-url:a2e1u.19484427.filter.6;page_url:https%3A%2F%2Fai.taobao.com%2Fsearch%2Findex.htm%3Fkey%3D%25E4%25B8%25B8%25E7%25BE%258E%26pid%3Dmm_110807073_1262350149_109959000489%26union_lens%3Drecoveryid%253A201_11.11.43.24_685807_1619341742886%253Bprepvid%253A201_11.27.89.99_473565_1619342131641%26spm%3Da2e1u.19484427.filter.6%26usertype%3D1%26pnum%3D0"}'.replace('手机',x)    data_tm=r'"variableMap":"{/"q/":/"手机/",/"navigator/":false,/"clk1/":/"25b8f4d10e5d19d5a1b4c05c0cda428b/",/"union_lens/":/"recoveryid:201_11.20.200.89_16797742_1632624677134;prepvid:201_11.20.207.176_16801805_1632625096923/",/"recoveryId/":/"201_11.175.96.106_16839576_1632632392630/"}","qieId":"36308","spm":"a2e0b.20350158.31919782","app_pvid":"201_11.175.96.106_16839576_1632632392630","ctm":"spm-url:a2e0b.20350158.31919782.1;page_url:https%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Frefpid%3Dmm_26632258_3504122_32538762%26keyword%3Dadidas%2520yeezy%2520boost%26clk1%3D25b8f4d10e5d19d5a1b4c05c0cda428b%26upsId%3D25b8f4d10e5d19d5a1b4c05c0cda428b%26spm%3Da2e0b.20350158.31919782.1%26pid%3Dmm_26632258_3504122_32538762%26union_lens%3Drecoveryid%253A201_11.20.200.89_16797742_1632624677134%253Bprepvid%253A201_11.20.207.176_16801805_1632625096923%26pnum%3D1"}'.replace('手机',x)    data_s = data_s + data_tm    # 获取token    token = re.findall(r"_m_h5_tk=(.*?);", ck)    token = ''.join([i.split("_")[0] for i in token])    # 获取时间戳    date = int(time.time() * 1000)    # 运算sign加密    cx = get_sign(token, str(date), data_s)    logger.debug("|sign计算成功>>>>%s|" % cx)    headers = {        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",        "cookie": ck}    data_a = {        'jsv': '2.5.1',        'appKey': '12574478',        't': date,        'sign': cx,        'api': 'mtop.alimama.union.xt.en.api.entry',        'v': '1.0',        'AntiCreep': 'true',        'timeout': '20000',        'AntiFlood': 'true',        'type': 'jsonp',        'dataType': 'jsonp',        'callback': 'mtopjsonp2',        'data': data_s    }    return headers, data_adef run(num,word):    for page in range(num):        headers, data_s = data(page,word)        #  data_s={"pNum":2,"pSize":"60","refpid":"mm_26632258_3504122_32538762","variableMap":"{/"q/":/"adidas yeezy boost/",/"navigator/":false,/"clk1/":/"25b8f4d10e5d19d5a1b4c05c0cda428b/",/"union_lens/":/"recoveryid:201_11.20.200.89_16797742_1632624677134;prepvid:201_11.20.207.176_16801805_1632625096923/",/"recoveryId/":/"201_11.0.174.53_16801750_1632625790037/"}","qieId":"36308","spm":"a2e0b.20350158.31919782","app_pvid":"201_11.0.174.53_16801750_1632625790037","ctm":"spm-url:a2e0b.20350158.search.1;page_url:https%3A%2F%2Fuland.taobao.com%2Fsem%2Ftbsearch%3Frefpid%3Dmm_26632258_3504122_32538762%26keyword%3Dadidas%2520yeezy%2520boost%26clk1%3D25b8f4d10e5d19d5a1b4c05c0cda428b%26upsId%3D25b8f4d10e5d19d5a1b4c05c0cda428b%26spm%3Da2e0b.20350158.search.1%26pid%3Dmm_26632258_3504122_32538762%26union_lens%3Drecoveryid%253A201_11.20.200.89_16797742_1632624677134%253Bprepvid%253A201_11.20.207.176_16801805_1632625096923"}        url='https://h5api.m.taobao.com/h5/mtop.alimama.union.xt.en.api.entry/1.0/'        # url = 'https://h5api.m.taobao.com/h5/mtop.alimama.union.xt.en.api.entry/1.0/'        res = requests.get(url, headers=headers,params=data_s,timeout=30).content.decode('utf-8')        a = json5.loads(str(res).replace('mtopjsonp2', '').replace('(', '').replace(')', ''))        a = a['data']['recommend']['resultList']        for x in a:            print(                f"产品名称:{x['itemName']},店名:{x['shopTitle']},卖家小名:{x['sellerNickName']},促销价格:{x['promotionPrice']},原价:{x['price']},月销量:{x['monthSellCount']}件,产地:{x['provcity']},网址:{x['url']}")        # for x in a:        #     print(        #         f"产品名称:{str(x['itemName']).replace('','').replace('','')}, 副标题:{x['subTitle']}, 卖家小名:{x['nick']}, 促销价格:{x['promotionPrice']}, 原价:{x['price']}, 月销量:{x['monthSellCount']}件, 网址:https{x['url']}, pic_url:https{x['pic']}")if '__name__'=='__main__':    run(5,'笔记本电脑')#可设置页数和商品名字

需要过滑块的私信我!

文章版权归作者所有,未经允许请勿转载,若此文章存在违规行为,您可以联系管理员删除。

转载请注明本文地址:https://www.ucloud.cn/yun/121289.html

相关文章

  • 资源集 - 收藏集 - 掘金

    摘要:行爬取顶点全网任意小说掘金之前连续多篇文章介绍客户端爬取平台,今天我们从零开始,实现爬取顶点小说网任意一本小说的功能。文件标记所有文件我的后端书架后端掘金我的后端书架月前本书架主要针对后端开发与架构。 30行js爬取顶点全网任意小说 - 掘金之前连续多篇文章介绍客户端爬取平台(dspider),今天我们从零开始,实现爬取顶点小说网任意一本小说的功能。 如果你还不知道客户端爬取,可以先看...

    stdying 评论0 收藏0
  • 资源集 - 收藏集 - 掘金

    摘要:行爬取顶点全网任意小说掘金之前连续多篇文章介绍客户端爬取平台,今天我们从零开始,实现爬取顶点小说网任意一本小说的功能。文件标记所有文件我的后端书架后端掘金我的后端书架月前本书架主要针对后端开发与架构。 30行js爬取顶点全网任意小说 - 掘金之前连续多篇文章介绍客户端爬取平台(dspider),今天我们从零开始,实现爬取顶点小说网任意一本小说的功能。 如果你还不知道客户端爬取,可以先看...

    马忠志 评论0 收藏0
  • 保姆级教程HTML两万字笔记大总结【建议收藏】(上篇)

    摘要:标签不区分大小写,但推荐小写。标签可以嵌套,但不能交叉嵌套。标签也称为元素。比如行内标签亦可成行内元素。 ❤️HTML必备知识详解❤️ 第一部分:HTML框架简介...

    paulli3 评论0 收藏0
  • Java 类文章 - 收藏集 - 掘金

    摘要:而调用后端服务就应用了的高级特分布式配置管理平台后端掘金轻量的分布式配置管理平台。关于网络深度解读后端掘金什么是网络呢总的来说,网络中的容器们可以相互通信,网络外的又访问不了这些容器。 在 Java 路上,我看过的一些书、源码和框架(持续更新) - 后端 - 掘金简书 占小狼转载请注明原创出处,谢谢!如果读完觉得有收获的话,欢迎点赞加关注 物有本末,事有终始,知所先后,则近道矣 ......

    RayKr 评论0 收藏0

发表评论

0条评论

最新活动
阅读需要支付1元查看
<