爬虫实战-爬取兴盛优选首页所有商品数据并写入excel

爬⾍实战-爬取兴盛优选⾸页所有商品数据并写⼊excel

兴盛优选这个⼩程序很⽕，⼩编今天就教⼤家如何爬取兴盛优选的所有商品数据信息吧

1.⾸先获取windowid

这个请求是获取所有的windows的id的这个请求实际返回windows brandHouseWindows classifyWindows 这三个其实都返回了windowId 通过观察我们可知 windows⾥⾯的是包含所有的所以我们只需要对他进⾏遍历即可

2.查看获取商品信息请求这⾥其实有三个接⼝获取商品数据分别如下【查看下⽅可知，我们⼀开始获取的windosid是⽤于这⾥获取商品数据的】

相对于前⾯⼆个接⼝第三个可能会稍微⿇烦⼀点，因为他有pageIndex pageSize参数为了避免翻页⼩编将pageSize修改成1000，发送请求于是发现他们做了控制好像必须传⼊10的⼤⼩，好吧不能投机取巧了我们还是⽼实的写个翻页吧⼩编的思路是直接来个很⼤的循

环然后当响应数据中 ds的数据是空的时候就跳出循环。

好了看到这⾥我们⼤致可以知道获取商品信息总共分三个接⼝，他们的请求地址，以及请求参数都不⼀样！

3.确定思路

好了分析到这⾥⼩编就分享⼀波源码给⼤家当然self.userkey你需要去替换成你的万⼀你⽤我的作死的访问导致⼩编被拉⿊了就得不偿

失了。

# author:Administrator

# datetime:2019/5/28 12:32

# project_name:python_code_warehouse

# file_name:xingshengyouxuan

# email:1163739580@qq

import requests

from requests.ptions import InsecureRequestWarning

import pandas as pd

import datatime

import time

import os

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

def myprint(*con):草酸亚铁

print("RunInfo----" + str(w().replace(microsecond=0)) + ":" + "".join(*con))

class XingShengYouXuna(object):

def __init__(self):

self.userkey = "填你⾃⼰的"

self.base_param = {'userKey': self.userkey}鲍德温

self.header = {

'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15F79 MicroMessenger/7.0.4 self.url = "mall.xsyxsc"

self.sku_info_list = []

self.sku_name = {} # ⽤于去重

def get_windows_id(self):

"""获取⾸页返回的windowId

viki郭希:return: ⾸页window信息 key为id&tpye value为标题

"""

index_window_info = {}

body = {'areaId': 101,

'storeId': '66880000082975'}

body.update(self.base_param)

resp = requests.post(url=self.url + "/user/product/indexSortWindows", data=body, verify=False).json().get(

"data")

"""其实这个接⼝返回了

windows brandHouseWindows classifyWindows

细⼼观察可知 windows才是全部的其次分类界⾯所有的分类也是属于其中

"""

女票for window ("windows"):

myprint("搜索到标题为'{}' windows_id为'{}' 类型为'{}'".("windowName"), ("windowId"),

<("windowType")))

index_window_info[("windowId")) + "&" + ("windowType")] = (

"windowName")

myprint("合计搜索到的windows个数为{}".format(len(index_window_info)))

return index_window_info

def load_index_sku(self, **kwargs):

"""将⾸页的所有商品信息添加到sku列表中

:param kwargs: 函数 get_windows_id的返回值

"""

for id_type in kwargs:

windowid, windowtype = id_type.split("&")

if windowtype in ("ACTIVITY", "CLASSIFY"):

url = self.url + "/user/product/{}Products".format(windowtype.lower())

if windowtype == "ACTIVITY":

body = {'windowId': windowid, 'openBrandHouse': 'OPEN', 'storeId': '66880000082975',

'areaId': '101'}

else:

body = {'windowId': windowid, 'areaId': '101', 'storeId': '66880000082975', 'excludeAct': 'N'}

else:

url = self.url + "/user/brandhouse/window/getProducts"

body = {'windowId': windowid, 'areaId': '101', 'storeId': '66880000082975', 'excludeAct': 'N',

'pageIndex': '1', 'pageSize': '10'}

body.update(self.base_param)

sku = {}

if windowtype == "BRAND_HOUSE":

pass

for pageIndex in range(1, 10000):

body.update({'pageIndex': pageIndex})

resp = requests.post(url=url, data=body, verify=False).json().get("data")

("records") is None:

del url, body

break

for com_info ("records"):

if ("prName") not in self.sku_name.keys():

self.sku_name[("prName")] = "" # ⽤于去重

sku["prName"] = ("prName") # 名称

sku["saleAmt"] = ("saleAmt") # 价格

sku["marketAmt"] = ("marketAmt") # 原价

sku["tmBuyStart"] = ("tmBuyStart") # 开始购买时间

sku["tmBuyEnd"] = ("tmBuyEnd") # 结束购买时间

myprint("活动标题'{}'下到商品'{}'".(id_type), ("prName")))

self.sku_info_list.append(sku)

sku = {}

else:

myprint(

"活动标题'{}'下到商品'{} 此商品已经存在直接过滤'".(id_type), ("prName")))

else:

resp = requests.post(url=url, data=body, verify=False).json()

for com_info ("data"):

if ("prName") not in self.sku_name.keys():

self.sku_name[("prName")] = "" # ⽤于去重

sku["prName"] = ("prName") # 名称

sku["saleAmt"] = ("saleAmt") # 价格

sku["marketAmt"] = ("marketAmt") # 原价

sku["tmBuyStart"] = ("tmBuyStart") # 开始购买时间

sku["tmBuyEnd"] = ("tmBuyEnd") # 结束购买时间

myprint("活动标题'{}'下到商品'{}'".(id_type), ("prName")))

self.sku_info_list.append(sku)

sku = {}

else:

myprint("活动标题'{}'下到商品'{} 此商品已经存在直接过滤'".(id_type), ("prName"))) myprint("累计搜索到{}个商品.".format(len(self.sku_info_list)))

def to_excel_data(self):

self.load_index_sku(**_windows_id())

sku_name = []

sku_price = []

sku_raw_price = []

sku_buy_start = []

sku_buy_end = []

sku_excel_dict = {}

for sku in self.sku_info_list:

sku_name.("prName"))

sku_price.("saleAmt"))

sku_raw_price.("marketAmt"))

sku_buy_start.("tmBuyStart"))

sku_buy_end.("tmBuyEnd"))

sku_excel_dict["商品名称"] = sku_name

sku_excel_dict["⽬前售价"] = sku_price

sku_excel_dict["原始价格"] = sku_raw_price

sku_excel_dict["开始购买时间"] = sku_buy_start

sku_excel_dict["结束购买时间"] = sku_buy_end

富迪探索者

file_path = r'兴盛优选商品数据_%s.xlsx' % (time.strftime('%Y-%m-%d', time.localtime(time.time()))) writer = pd.ExcelWriter(file_path)

df = pd.DataFrame(sku_excel_dict)

中国船级社df.to_excel(writer, columns=sku_excel_dict.keys(), index=False, encoding='utf-8',

sheet_name='兴盛优选商品数据')

writer.save()

myprint("写⼊excel成功写⼊路径为{}".wd() + r"/" + file_path))

if __name__ == "__main__":

xiejiangpeng = XingShengYouXuna()

<_excel_data()

最后分享上运⾏截图

本文发布于:2024-09-22 20:29:30，感谢您对本站的认可！

本文链接：https://www.17tex.com/xueshu/523474.html

上一篇：DeviceNet协议

下一篇：3 屋顶光伏系统阴影计算和模拟-Sketchup分析法 V1

标签：商品请求数据优选

留言与评论（共有 0 条评论）