from urllib.request import urlopen
from urllib.parse import quote_plus
from bs4 import BeautifulSoup
import pandas as pd

pageNum = 1
search = input('검색어: ')

page = input('끝 페이지 입력')
lastpage = int(page) * 10 -9

searchlist = []
while pageNum < lastpage + 1: 
     url = f'https://search.naver.com/search.naver?&where=news&query={quote_plus(search)}&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=37&start=1&refresh_start={pageNum}'

     html = urlopen(url).read()
     soup = BeautifulSoup(html, 'html.parser')

     title = soup.find_all(class_='_sp_each_title')

     

     
     for i in title :
          print(i.attrs['title'])
          print(i.attrs['href'])
          print(pageNum)

          temp = []
          temp.append(i.attrs['title'])
          temp.append(i.attrs['href'])
                         
          searchlist.append(temp)     
          searchfile = pd.DataFrame(searchlist)     
     pageNum += 10
               
searchfile.to_csv('naver.csv', encoding = 'CP949', index = False, header = ['제목' , '기사 url'])

'Python' 카테고리의 다른 글

[python] Beautifulsoup 네이버 뉴스 크롤링  (0) 2020.05.27