Hello everyone I'm trying to fetch content inside of script tag.
http://www.teknosa.com/urunler/145051447/samsung-hm1500-bluetooth-kulaklik
this is the website.
Also this is script tag which I want to enter inside.
$.Teknosa.ProductDetail = {"ProductComputedIndex":145051447,"ProductName":"SAMSUNG HM1500 BLUETOOTH KULAKLIK","ProductSeoName":"samsung-hm1500-bluetooth-kulaklik","ProductBarcode":"8808993790425","ProductPriceInclTax":79.9,"ProductDiscountedPriceInclTax":null,"ProductStockQuantity":1,"ProductMinStockQuantity":null,"ProductShortDescription":null,"ProductFullDescription":null,"ProductModelName":"HM1500","ProductAdminComment":null,"ProductMetaTitle":null,"ProductMetaKeywords":null,"ProductMetaDescription":null,"ProductBrandId":299,"ProductBrandName":"SAMSUNG","ProductBrandImageName":"//img-teknosa.mncdn.com/StaticContent/images/Brand/SAMSUNG-medium.png","ProductCommentCout":29,"ProductQuestionAnswerCout":0,"ProductRatingStar":4,"ProductType":1,"ProductOriginalComputedIndex":null,"ProductIsSolo":false,"ProductIsClickCollect":true,"ProductStoreStockAmount":1,"ProductGroupDisplayName":null,"ProductOrigin":"PRC","ProductIsTss":false,"ProductIsKit":false,"AddBasketButtonType":0,"ProductViewType":0,"ProductDetailDefaultPicture":"145051447-1-samsung-hm1500-bluetooth-kulaklik.jpg","ProductRatingStarText":"Çok İyi","ProductPrice":"79,9","IsThereOutletProduct":false,"ProductIsActiveProductOriginal":false,"ProductErpCatalogCode":"_TELEKOM","ProductErpCategoryCode":"_BLUETOOTH_KULAKLIKLAR1636","ProductCategory":{"CategoryName":"Bluetooth Kulaklık ve Kit","CategorySeoName":"bluetooth-kulaklik-ve-kit","CategoryDescription":null,"CategoryParentId":134,"CategoryLevel":2,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":{"CategoryName":"Telefon Aksesuarları","CategorySeoName":"telefon-aksesuarlari","CategoryDescription":null,"CategoryParentId":108,"CategoryLevel":1,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":{"CategoryName":"Telefon","CategorySeoName":"telefon","CategoryDescription":null,"CategoryParentId":null,"CategoryLevel":0,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":null,"DisplayOrder":6,"StatusId":100110,"StartDate":"\/Date(1434351061000)\/","EndDate":null,"Id":108},"DisplayOrder":3,"StatusId":100110,"StartDate":"\/Date(1434351245000)\/","EndDate":null,"Id":134},"DisplayOrder":3,"StatusId":100110,"StartDate":"\/Date(1434351367000)\/","EndDate":null,"Id":173},"ProductDetailPictures":[{"ProductPictureName":"145051447-1-samsung-hm1500-bluetooth-kulaklik.jpg","ProductPictureOrder":1,"ProductPictureIsDefault":true},{"ProductPictureName":"145051447-2-samsung-hm1500-bluetooth-kulaklik.jpg","ProductPictureOrder":2,"ProductPictureIsDefault":false}],"ProductDetailAttributes":[{"Key":"Ağırlık","Value":"18.1","UnitItemName":"gr","ProductAttributeDisplayOrder":0,"DisplayOrder":2,"Description":null},{"Key":"Model","Value":"HM1500","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":4,"Description":null},{"Key":"Şarj Kullanım Süresi","Value":"2 Saat","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":80,"Description":null},{"Key":"Bekleme Süresi (Saat)","Value":"250 Saat (Maks.)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":116,"Description":null},{"Key":"Kullanım Mesafesi","Value":"10 m. (Maks.)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":145,"Description":null},{"Key":"Bluetooth Profili","Value":"HSP (Kulaklık), HFP (Ahizesiz)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":149,"Description":null}],"ProductSuggestions":[],"ProductContents":[],"ProductKitItems":[],"ProductVideos":[],"ProductGroups":[],"ProductBadges":[{"BadgeItemBadgeId":7,"BadgeItemApplicationId":1,"BadgeItemText":null,"BadgeItemImageName":"//img-teknosa.mncdn.com/StaticContent/images/Badge/ucretsiz-kargo.png","BadgeItemDescription":null,"BadgeItemPagePosition":"ImageBottom","BadgeItemImagePosition":null,"BadgeItemDisplayView":"ProductDetail","BadgeItemType":"Image","BadgeItemDynamicType":"WebStock","BadgeItemDynamicTypeText1":null,"BadgeItemDynamicTypeText2":null,"BadgeItemDynamicTypeCalculationType":null,"BadgeItemDynamicTypeDisplayType":null,"BadgeItemEvaluationExpression":null,"BadgeItemClassName":null,"DisplayOrder":0,"StatusId":100110,"StartDate":"\/Date(1474440397000)\/","EndDate":null,"Id":5}],"DisplayOrder":1000,"StatusId":100110,"StartDate":"\/Date(1429000863000)\/","EndDate":null,"Id":4715};
And I tried this.
yield scrapy.Request(response.urljoin(url), callback = self.parseProduct, meta={
'splash': {
'endpoint': 'render.html',
'args': {'wait': 0.09}},
'url': url
})
def parseProduct(self, response):
data_bundles = {}
script = response.xpath('/html/body/div[1]/div[2]/script[2]/text()').extract_first()
print script
jstree = js2xml.parse(script)
for a in jstree.xpath('//assign[left//property/identifier/@name="$.Teknosa.ProductDetail" and right/object]'):
bundle_prop = a.xpath('./left/bracketaccessor/property/string/text()')
print bundle_prop
if bundle_prop is not None:
curr_prop = bundle_prop[0]
data_bundles[curr_prop] = {}
Thanks for your help.
This should do it:
You can select
script
tag that contains "Teknosa.ProductDetails =" in it's text.Edit: If you want to load up javascript dictionary from script you need to extract text from the script and you can simply load it up with python's
json
module.