I am new at coding. This code works in two steps. In the first step, user types the name of the article he wants to find. Click on "Research Gate" button. This website gives us DOI number. Then in the second step, we search this DOI number by clicking "Sci-Hub" button. That's it.
What I am trying to do is to get this DOI number from parsed text. Then search DOI in the Sci-Hub. So, I presume we can do this with only one textbox and a button. So, when the user types the name of the article, it opens the founded result at Sci-Hub.
I wrote a little bit of web parsing below.
from PyQt5.QtWidgets import *
from PyQt5.QtWidgets import QApplication, QMainWindow, QMessageBox
from bs4 import BeautifulSoup
import sys
import webbrowser
def doi(event):
if window.textbox.text() == "":
QMessageBox.about(window, "Notification", "Please type the DOI of the article you want to find")
else:
lib = window.textbox.text()
url = "https://sci-hub.se/"+(str(lib))
webbrowser.open_new(url)
def research():
if window.textbox.text() == "":
QMessageBox.about(window, "Notification", "Please type the name of the article")
else:
lib = window.textbox.text()
url = "https://www.researchgate.net/search/publication?q="+(str(lib))
webbrowser.open_new(url)
def quit_window():
window.close()
app = QApplication(sys.argv)
window = QMainWindow()
window.setGeometry(500,300,300,300)
window.setWindowTitle("Publication Search")
window.textbox = QLineEdit(window)
window.textbox.setPlaceholderText("Please type the name of the article you want to find")
window.textbox.move(20, 70)
window.textbox.resize(270,30)
button1 = QPushButton(window)
button1.setText("ResearchGate")
button1.clicked.connect(research)
button1.move(100, 130)
button2 = QPushButton(window)
button2.setText("SCI-HUB")
button2.clicked.connect(doi)
button2.move(100, 170)
button3 = QPushButton(window)
button3.setText("Exit")
button3.clicked.connect(quit_window)
button3.move(100, 210)
window.show()
sys.exit(app.exec_())
Here is what I did to find parsed text from web site.
import requests
import bs4
from bs4 import BeautifulSoup
text= "Mobile TV: a new form of entertainment?"
url = 'https://www.researchgate.net/search/publication?q=' + text
result=requests.get(url)
soup = bs4.BeautifulSoup(result.text, "html.parser")
print(soup.get_text())
##print(soup.prettify())
I see
DOI
in<span class="">
so you can trysoup.find_all('span')
and later you can check iftext
starts withDOI:
I use
get(url, params={'q': text})
instead ofurl&g=text
and it will use special codes instead of spaces in text. Some servers may need it.Code for many pages with results on
researchgate.net