0

I am trying to using async and await, I am still new to it I cannot figure out what I am doing wrong

import requests
import bs4
import colorama
from colorama import Fore
import time
import datetime
import asyncio



async def get_html(episode_number: int) -> str:
    print(Fore.YELLOW + f"Getting HTML for episode {episode_number}", flush=True)

    url = f'https://talkpython.fm/{episode_number}'
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            resp.raise_for_status()
            return await resp.text
    await resp.raise_for_status()
   # return await resp.text


def get_title(html: str, episode_number: int) -> str:
    print(colorama.Fore.CYAN + f"Getting TITLE for episode {episode_number}", flush=True)
    soup = bs4.BeautifulSoup(html, 'html.parser')
    header = soup.select_one('h1')
    if not header:
        return "MISSING"

    return header.text.strip()


def main():

    t0 = datetime.datetime.now()
    print(colorama.Fore.WHITE + ' App started.', flush=True )

    loop = asyncio.get_event_loop()
    final_task = asyncio.gather(loop)
    #get_title_range()
    dt = datetime.datetime.now() - t0
    loop.run_until_complete(final_task)
    print(colorama.Fore.CYAN + "Done. " + ' App exiting total time: {:,.2f} sec.'.format(dt.total_seconds()), flush=True)


def get_title_range():
  
    for n in range(150, 170):
        html = get_html(n)
        title = get_title(html, n)
        print(Fore.CYAN + f"Title found: {title}", flush=True)


if __name__ == '__main__':
    main()

1

1 Answer 1

1

It looks like you're not initializing tasks for your event loop to run on. I typically follow this pattern:

async def main():
    headers = {'Connection': 'keep-alive', 'Content-Type': 'application/json', 'Authorization': auth}
    url = 'some-api.com/post-request-something'

    # We use a session to take advantage of tcp keep-alive
    timeout = aiohttp.ClientTimeout(total=10000)
    async with aiohttp.ClientSession(timeout=timeout) as session:
        tasks = [async_wrap(session, q, url, headers) for q in queue]
        # gather literally 'gathers' all the tasks and schedules them in the event loop
        await asyncio.gather(*tasks, return_exceptions=True)

if __name__ == '__main__':
    ts = time()
    # Create the asyncio event loop - from the main function
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main())
    finally:
        # Lets avoid an unclosed loop running a DDoS attack on ourselves
        loop.close()
    logger.info('Took %s seconds to complete', time() - ts)

note the line containing, takes those tasks gathered to schedule as coroutines in the main event loop:

loop.run_until_complete(main())

and then this, which calls my function, async_wrap() for each record I wanted to send in the http client (which I had stored in a list), but in your case it would call your asynchronous function get_html() using each record from get_title_range():

tasks = [async_wrap(session, q, url, headers) for q in queue] # -> mine
await asyncio.gather(*tasks, return_exceptions=True) # -> gather those tasks!

tasks = [get_html(episode_number=episode) for episode in list_of_episode_nums] # -> yours
await asyncio.gather(*tasks, return_exceptions=True) # -> gather those tasks!

Hope this helps you shore some details up, but unfortunately, asynchronous code can be quite a headache, requiring lots of trial-and-error.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.