from bs4 import BeautifulSoupwith open('new_index.html') as wb_date: Soup = BeautifulSoup(wb_date,'lxml') lis = Soup.select('body > div.main-content > ul > li') for li in lis: image=li.select('li > img')[0].get('src') title= li.select('li > div.article-info > h3 > a')[0].get_text() descs = li.select('li > div.article-info > p.description')[0].get_text() rates = li.select('li > div.rate > span')[0].get_text() cates =list(li.select('li > div.article-info > p.meta-info ')[0].stripped_strings) if float(rates)>3: print(title,descs,rates,image) '''from bs4 import BeautifulSoupinfo =[]with open('new_index.html') as wb_date: Soup = BeautifulSoup(wb_date,'lxml') images=Soup.select('body > div.main-content > ul > li > img') titles = Soup.select('body > div.main-content > ul > li > div.article-info > h3 > a') descs = Soup.select('body > div.main-content > ul > li > div.article-info > p.description') rates = Soup.select('body > div.main-content > ul > li > div.rate > span') cates =Soup.select('body > div.main-content > ul > li > div.article-info > p.meta-info ') #print(images,title,descs,rates,cates,sep ='\n------------------\n') for title,desc,rate,cate,image in zip(titles,descs,rates,cates,images): data = { 'title': title.get_text(), 'desc': desc.get_text(), 'rate': rate.get_text(), 'cate': list(cate.stripped_strings), 'image': image.get('src') } #print(date) info.append(data)for i in info: if float(i['rate'])>3: print(i['title'],i['cate'])'''