网络营销推广,微商引流48招技能
', 'iKfHVlWDz0U': '微商技巧:微信群的精准引流技巧教程', 'f8W68cYP1R0': '微商从零做起,55法开发粉丝群渠道', 'gwhOE3qIFtQ': '微商营销精准引流技巧分享!', } title = title_dict[videoId] print('start to '*10, title) url = 'https://www.bilibili.com/video/{}?p={}' page_num = page_num or 1 # 有些视频有20页以上评论,就不要去爬取了 max_page_num = 20 if page_num > max_page_num: print(' end to '*10, title, ' 截止页数') return complete_url = url.format(videoId, page_num) html = self._get_html(complete_url) dom_tree = etree.HTML(html) comment_dom_list = dom_tree.xpath('//div[@class="lzl_single_post"]/div[@class="lzl_content_main"]') if len(comment_dom_list) == 0: # 说明就直接爬取的最新的评论 return comments = [] for comment_dom in comment_dom_list: comment_content = comment_dom.xpath('string(.)') comment_like_count_str = comment_dom.xpath('div[@class="lzl_cnt"]//span[2]/text()') # print(' comment_like_count_str ', comment_like_count_str) if len(comment_like_count_str) == 0 or len(comment_like_count_str[0]) == 0: comment_like_count = 0 else: comment_like_count = int(comment_like_count_str[0]) # print(' comment_like_count ', comment_like_count) comments.append({ 'comment_content': comment_content.replace('\n', '').strip(), 'comment_like_count': int(comment_like_count), }) for comment in comments: print(json.dumps(comment, ensure_ascii=False, indent=4)) item = BiliCommentItem() item['comment_content'] = comment['comment_content'] item['comment_like_count'] = comment['comment_like_count'] item['title'] = title item['url'] = complete_url item['created_at'] = dt.now() yield item page_num += 1 time.sleep(50) yield from self._parse_detail(videoId, page_num)