import lxml
import urllib.request
from bs4 import BeautifulSoup
headerss = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'
}
def data_requests(main_data):
list1 = []
list2 = []
for i in main_data:
a = urllib.request.Request(i["href"], headers=headerss)
data = urllib.request.urlopen(a)
data = data.read()
data = data.decode()
soup = BeautifulSoup(data, "lxml")
hx = soup.select("article[class='hentry'] h1[class='entry-title']")
list1.append(hx)
hd = soup.select("div[id='primary'] div[class='entry-content']")
list2.append(hd)
for i in range(0, len(list2) - 1):
# print(list1[i][0].text)
p=open(list1[i][0].text+".html","w")
p.write(str(list2[i][0]))
def mian():
url = "https://www.idle.fit/"
a = urllib.request.Request(url, headers=headerss)
data = urllib.request.urlopen(a)
data = data.read()
data = data.decode()
soup = BeautifulSoup(data, "lxml")
main_data = soup.select("main[class='site-main indexMain'] h1[class='entry-title'] a")
return data_requests(main_data)
if __name__ == '__main__':
Comments | NOTHING