对于元素进行定位。把a标签的href值全部拿出来,而且也把对应的名称取出来 tree = etree.HTML(resp.content) node_list = tree.xpath('/html/body/div[2]/div[2]/div[3]/ul/li') sub_url_list = [] for node in node_list: if len(node.xpath('./a/@href')) > 0: sub_url = node.xpath('./a/@href')[0] if len(node.xpath('./a/@href')) > 0: title = node.xpath('./a/b/text()')[0] sub_url_list.append((sub_url, title)) 4.3 访问详情页 base_url = 'http://www.netbian.com/' for sub_url, title in sub_url_list: s_page = base_url + sub_url s_resp = requests.get(s_page) with open('s.html', 'wb') as f: f.write(s_resp.content)