Donate. I desperately need donations to survive due to my health

Get paid by answering surveys Click here

Click here to donate

Remote/Work from Home jobs

Showing error on python code but they are correct

the problem I am facing is this news scraper is not working and I can't figure out why. If it is possible can someone help me and amend the code. Thank you very much. I have put the code and the error that occurs when I run it. I have tried many possible solutions I just can't figure it out it shows where the errors are however I can't figure out what to change them to.

import requests
from bs4 import BeautifulSoup

from selenium import webdriver
import time

import pandas as pd
import numpy as np
from datetime import datetime

page = requests.get('https://qz.com/africa/latest')

soup = BeautifulSoup(page.content, 'html.parser')
weblinks = soup.find_all('article')

pagelinks = []
for link in weblinks[5:]:
    url = link.contents[0].find_all('a')[0]
    pagelinks.append('http://qz.com'+url.get('href'))

authorname = []
title = []
thearticle = []
for link in pagelinks:    
    # store the text for each article
    paragraphtext = []    
    # get url
    url = link
    # get page text
    page = requests.get(url)
    # parse with BFS
    soup = BeautifulSoup(page.text, 'html.parser')    
    # get author name, if there's a named author
    try:
        abody = soup.find(class_='d3284 africa').find('a')
        aname = abody.get_text() 
    except:
        aname = 'Anonymous'    

    # get article title
    atitle = soup.find(class_="_21349 africa none _4ca8e")
    thetitle = atitle.get_text() 
    # get main article page
    articlebody = soup.find(class_='_61c55')
    # get text
    articletext = soup.find_all('p')[8:]
    # print text
    for paragraph in articletext[:-1]:
        # get the text only
        text = paragraph.get_text()
        paragraphtext.append(text)        
    # combine all paragraphs into an article
    thearticle.append(paragraphtext)
    authorname.append(aname)
    title.append(thetitle)

# join paragraphs to re-create the article
myarticle = [' '.join(article) for article in thearticle]

# save article data to file
data = {'Title':title, 
        'Author':authorname, 
        'PageLink':pagelinks, 
        'Article':myarticle, 
        'Date':datetime.now()}

oldnews = pd.read_excel('quartz\\news.xlsx')
news = pd.DataFrame(data=data)
cols = ['Title', 'Author', 'PageLink', 'Article', 'Date']
news = news[cols]

afronews = oldnews.append(news)
afronews.drop_duplicates(subset='Title', keep='last', inplace=True)
afronews.reset_index(inplace=True)
afronews.drop(labels='index', axis=1, inplace=True)

filename = 'quartz\\news.xlsx'
wks_name = 'Data'

writer = pd.ExcelWriter(filename)
afronews.to_excel(writer, wks_name, index=False)

writer.save()

The error that comes out is:

FileNotFoundError                         Traceback (most recent call last)
<ipython-input-3-dddb080986df> in <module>()
     65         'Date':datetime.now()}
     66 
---> 67 oldnews = pd.read_excel('quartz\\news.xlsx')
     68 news = pd.DataFrame(data=data)
     69 cols = ['Title', 'Author', 'PageLink', 'Article', 'Date']

~/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    176                 else:
    177                     kwargs[new_arg_name] = new_arg_value
--> 178             return func(*args, **kwargs)
    179         return wrapper
    180     return _deprecate_kwarg

~/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    176                 else:
    177                     kwargs[new_arg_name] = new_arg_value
--> 178             return func(*args, **kwargs)
    179         return wrapper
    180     return _deprecate_kwarg

~/anaconda3/lib/python3.7/site-packages/pandas/io/excel.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, **kwds)
    305 
    306     if not isinstance(io, ExcelFile):
--> 307         io = ExcelFile(io, engine=engine)
    308 
    309     return io.parse(

~/anaconda3/lib/python3.7/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
    392             self.book = xlrd.open_workbook(file_contents=data)
    393         elif isinstance(self._io, compat.string_types):
--> 394             self.book = xlrd.open_workbook(self._io)
    395         else:
    396             raise ValueError('Must explicitly set engine if not passing in'

~/anaconda3/lib/python3.7/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    114         peek = file_contents[:peeksz]
    115     else:
--> 116         with open(filename, "rb") as f:
    117             peek = f.read(peeksz)
    118     if peek == b"PK\x03\x04": # a ZIP file

FileNotFoundError: [Errno 2] No such file or directory: 'quartz\\news.xlsx'

Comments