问题描述
- 为什么我用scrapy抓页面时,<strong>标签里面的字没有?代码如下
-
from scrapy.contrib.spiders import CrawlSpider
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import Selector
from scrapy.selector import HtmlXPathSelector
from kkk1.items import Kkk1Item
from scrapy.http import Request
from scrapy.http import HtmlResponse
import re
import math
class DmozSpider(CrawlSpider):
name = "kkk1"
allowed_domains = ['item.jd.com']
start_urls = [
"http://item.jd.com/1130480.html"]
def parse(self, response):
item = Kkk1Item()
sel = Selector(response)
item['gid']=sel.select("//div[@class='w']/div[@class='right']/div[@id='product-detail']/div[@id='product-detail-1']/ul/li[2]/text()").extract()
item['name'] = sel.select("id('name')/h1/text()").extract()
item['brand'] = response.xpath("//div[@class='w']/div[@class='right']/div[@id='product-detail']/div[@id='product-detail-1']/ul/li[3]/a/text()").extract()
item['price']=sel.select("//*[@id='jd-price']").extract()
#item['price']=sel.select("//*[@id='jd-price']/text()").extract()
return item控制台打出来的:
{
'brand': [u'u5c0fu7c73uff08MIuff09'],
'gid': [u'u5546u54c1u7f16u53f7uff1a1130480'],
'name': [u'u5c0fu7c73 u7ea2u7c731s u79fbu52a83Gu624bu673auff08u91d1u5c5eu7070uff09 TD-SCDMA/GSM u53ccu5361u53ccu5f85 u79fbu52a8u5408u7ea6u7248uff08u4e0du542bu5408u7ea6u8ba1u5212uff09']
'price': [u'《strong class="p-price" id="jd-price"></strong》'],
'salereminder': []}u《strong class="p-price" id="jd-price"></strong》'里面的价格怎么没有?求大神,感激不尽,新手,已经困扰我好几天了,就是没办法啊,
时间: 2024-09-14 20:19:08