Switft get html value of amazon product image

480 views Asked by At

I am trying to get the image of a amazon product inside my app. I inspected the image in my browser and found out that their class-Name is gc-design-img-preview. There are actually more Elements with the same class so I tried only getting the first one.

This is what I tried:

func getAmazonImage(doc: Document) -> String {
    let images: Elements? = try? doc.getElementsByClass("gc-design-img-preview")
    
    guard (images?.first()) != nil else { return "nope" }
        
    guard  let imageUrl : String = try! images?.first()!.text() else { return "nope2" }
    
    print("image: " + imageUrl)
    
    return imageUrl
}

However this is not returning anything and instead just returning an empty String... What am I missing here? I am using SwiftSoup, maybe there is another way to do this?

Update:

I think this is what I need but in Swift:

const imgSrc = document.querySelector('li.image.item.itemNo0.maintain-height.selected img').getAttribute('src')

console.log(imgSrc) // https://images-na.ssl-images-amazon.com/images/I/71y%2BUGuJl5L._SX522_.jpg
4

There are 4 answers

1
ManuelMB On

For instance, in this url: https://www.amazon.com/DualSense-Wireless-Controller-PlayStation-5/dp/B08FC6C75Y/ref=sr_1_1?dchild=1&fst=as%3Aoff&pf_rd_i=16225016011&pf_rd_m=ATVPDKIKX0DER&pf_rd_p=03b28c2c-71e9-4947-aa06-f8b5dc8bf880&pf_rd_r=CSWVBS40MDKKJYXEJ0AH&pf_rd_s=merchandised-search-3&pf_rd_t=101&qid=1489016289&rnid=16225016011&s=videogames-intl-ship&sr=1-1

const imgSrc = document.querySelector('li.image.item.itemNo0.maintain-height.selected img').getAttribute('src')

console.log(imgSrc) // https://images-na.ssl-images-amazon.com/images/I/71y%2BUGuJl5L._SX522_.jpg
18
ManuelMB On

To do it with SwiftSoup:

import UIKit
import SwiftSoup

class ViewController: UIViewController {

func getProductImage(url: URL)-> String{
    var result = ""
    do {
        let html = try String(contentsOf: url, encoding: .utf8)
        let doc: Document = try SwiftSoup.parseBodyFragment(html)
        let img: Element = try doc.select("li.image.item.itemNo0.maintain-height.selected img").first()!
        
        let imgOuterHtml: String = try img.outerHtml();
       
        let chunks = imgOuterHtml.components(separatedBy: "\"")

        result = chunks[5]
    }
    catch {
       print(error)
    }
    
    return result
}


override func viewDidLoad() {
    super.viewDidLoad()
    guard let url = URL(string: "https://www.amazon.com/dp/B08FC6C75Y") else {
        fatalError("Can not get url")
    }
     let imgUrl = getProductImage(url: url)
     print(imgUrl)  // https://images-na.ssl-images-amazon.com/images/I/61o7ai%2BYDoL._SL1441_.jpg
  }
 }
0
ManuelMB On

After update XCode I can debug again :

This code works ok in both urls:

import UIKit
import SwiftSoup

class ViewController: UIViewController {

func getProductImage(url: URL)-> String{
    var result = ""
    do {
        let html = try String(contentsOf: url, encoding: .utf8)
        let doc: Document = try SwiftSoup.parseBodyFragment(html)
        let img: Element = try doc.select("li.image.item.itemNo0.maintain-height.selected img").first()!
        let imgOuterHtml: String = try img.outerHtml();
        let imgUrl = getImageUrl(imgOuterHtml)
        result = imgUrl
    }
    catch {
        print(error)
    }
    return result
}

func getImageUrl(_ input: String)->String{
    let detector = try! NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue)
    let matches = detector.matches(in: input, options: [], range: NSRange(location: 0, length: input.utf16.count))
    
    guard let range = Range(matches[0].range, in: input) else { fatalError("Can not get range") }
    let url = input[range]
    
    return String(url).components(separatedBy: "&")[0]
}

override func viewDidLoad() {
    super.viewDidLoad()
    
    //guard let url = URL(string: "https://www.amazon.com/dp/B08FC6C75Y") else {
    guard let url = URL(string: "https://www.amazon.com/dp/B0084DS9EE") else {
        fatalError("Can not get url")
    }
    let imgUrl = getProductImage(url: url)
    //print(imgUrl)// https://images-na.ssl-images-amazon.com/images/I/61o7ai%2BYDoL._SL1441_.jpg
    print(imgUrl)  // https://images-na.ssl-images-amazon.com/images/I/41ZmuuKMtmL._SY450_.jpg
 }
}
0
ManuelMB On

for the second url the image can be obtained in different resolutions, you van get it from different attributes data-midres-replacement, data-zoom-hires or data-a-hires, in the code are all the options, but some of them are commented.

import UIKit
import SwiftSoup

class ViewController: UIViewController {

func getProductImage(url: URL)-> String{
    var result = ""
    do {
        let html = try String(contentsOf: url, encoding: .utf8)
        let doc: Document = try SwiftSoup.parseBodyFragment(html)
                                            
        let img: Element = try doc.select(".image-size-wrapper.fp-image-wrapper.image-block-display-flex img").first()!  
        let src  = try img.attr("src")  

        if src.contains("data:image/gif;base64"){
            
            let dataMidresReplacement  = try img.attr("data-midres-replacement")
            print("dataMidresReplacement: \(dataMidresReplacement)") // dataMidresReplacement: https://images-na.ssl-images-amazon.com/images/I/41ZmuuKMtmL._AC_SY350_.jpg
            result = dataMidresReplacement
            /*
            let dataZoomHires  = try img.attr("data-zoom-hires")
            print("dataZoomHires: \(dataZoomHires)") // dataZoomHires: https://images-na.ssl-images-amazon.com/images/I/41ZmuuKMtmL._AC_SL1500_.jpg
            result = dataZoomHires
            */
            /*
            let dataHires  = try img.attr("data-a-hires")
            print("dataHires: \(dataHires)") // dataHires: https://images-na.ssl-images-amazon.com/images/I/41ZmuuKMtmL._AC_SY1000_.jpg
            result = dataHires
            */
        } else {
            result = src
        }
    }
    catch {
        print(error)
    }
    
    return result
}

override func viewDidLoad() {
    super.viewDidLoad()
    
   //guard let url = URL(string: "https://www.amazon.com/dp/B08FC6C75Y") else {
   guard let url = URL(string: "https://www.amazon.com/dp/B0084DS9EE") else {
        fatalError("Can not get url")
    }
    let imgUrl = getProductImage(url: url)
    //print(imgUrl) // https://images-na.ssl-images-amazon.com/images/I/61o7ai%2BYDoL._AC_SY350_QL15_.jpg
    print(imgUrl)  // data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7
   }
}