Blogs / How to make a bot with aws lambda
Jesse Torres June 5, 2022 8:15am
Creating your first bot is simple, in this guide I will walk you through how to create a bot using the server less compute service, Lambda in AWS. If you are someone that is looking to get into the world of cloud, this guide is for you!
In this example, we are going to use Lambda, API Gateway, Python, JSON, HTML, CSS, and JavaScript. It’s important to note that in this example I will be hosting our website on HostGator, but you can host your files with your hosting provider. At the end of this guide you will have a Graphical User Interface (GUI) that will display SEO data from any website that you submit.
Here is a link to the finished product: SEOChecker
Each service and programming language will play an important role.
import requests from bs4 import BeautifulSoup from urllib.request import urlopen import json import boto3 from botocore.exceptions import ClientError GET_RAW_PATH = "/seocheck" headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0', } def lambda_handler(event, context): URL = event['queryStringParameters']['websiteurl'] #URL = "https://scotttech.co/" #Response Time response_time = requests.get(URL).elapsed.total_seconds() page = requests.get(URL,headers=headers) soup = BeautifulSoup(page.text, 'lxml') #print(soup) #Header Tags header_tags = [] for heading in soup.find_all(["h1", "h2", "h3"]): headerTag = [heading.name, heading.text.strip()] header_tags.append(headerTag) #Title String title = soup.find('title').string #Description desc=""; meta_status=""; metas = soup.find_all('meta') #Get Meta Description for m in metas: if m.get ('name') == 'description': desc = m.get('content') if title != "": if desc != "": meta_status = "PASS" else: meta_status = "FAILL" else: meta_status = "FAILL" #Links links = soup.find_all('a') all_links = [] total_links = 0 for l in links: total_links = total_links + 1 href = l.get('href') all_links.append(href) #Image Information altcount = 0 imagecount = 0 alt_string = [] img_src = [] img_no_alt = [] image_data = [] media = soup.find_all('img') for image in media: imagecount = imagecount +1 alt = image.get("alt") src = image.get("src") img_src.append(src) image_data.append([src,alt]) if alt != "": alt_string.append(alt) else: altcount = altcount +1 alt_string.append("No alt attribute provided") if alt == "": img_no_alt.append(src) #Robots File robotTxt = str("/".join(URL.split("/",3)[:3]))+"/robots.txt" page = requests.get(robotTxt,headers=headers) robotsFile = page.text data_set = { "Website": { "Url": URL, "Title": title, "Desc": desc, "Meta_status": meta_status, "Response_time": response_time, "robotsFile": robotsFile }, "Images": { "Image_src_alt": image_data, "img_with_no_alt": img_no_alt, "img_src": img_src, "alt_string": alt_string, "Total_images": imagecount, }, "Header_Tags": header_tags, "all_web_links": all_links, "total_links": total_links } json_dump = json.dumps(data_set) return json_dump
<!DOCTYPE html><!-- This site was created in Webflow. http://www.webflow.com --> <!-- Last Published: Thu May 26 2022 23:23:07 GMT+0000 (Coordinated Universal Time) --> <html data-wf-page="628cf609a2de32bf3f27026c" data-wf-site="628437b9344b3822e5ad404f"> <head> <meta charset="utf-8"> <title>Seocheck</title> <link href="https://jessehtorres.com/resume/css/normalize.css" rel="stylesheet" type="text/css"> <link href="https://jessehtorres.com/resume/css/webflow.css" rel="stylesheet" type="text/css"> <link href="https://jessehtorres.com/resume/css/resume-707eba.webflow.css" rel="stylesheet" type="text/css"> <script src="https://ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js" type="text/javascript"></script> <script type="text/javascript">WebFont.load({ google: { families: ["Montserrat:100,100italic,200,200italic,300,300italic,400,400italic,500,500italic,600,600italic,700,700italic,800,800italic,900,900italic"] }});</script> <!-- [if lt IE 9]><script src="https://cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv.min.js" type="text/javascript"></script><![endif] --> <script type="text/javascript">!function(o,c){var n=c.documentElement,t=" w-mod-";n.className+=t+"js",("ontouchstart"in o||o.DocumentTouch&&c instanceof DocumentTouch)&&(n.className+=t+"touch")}(window,document);</script> <script src="https://code.jquery.com/jquery-3.6.0.js" integrity="sha256-H+K7U5CnXl1h5ywQfKtSj8PCmoN9aaq30gDh27Xc0jk=" crossorigin="anonymous"></script> <link href="https://jessehtorres.com/resume/images/favicon.png" rel="shortcut icon" type="image/x-icon"> <link href="https://jessehtorres.com/resume/images/webclip.png" rel="apple-touch-icon"> </head> <body> <div class="hero_seocheck wf-section"> <div class="container-2 w-container"> <h2 class="heading-7">Free SEO Checker</h2><img src="https://jessehtorres.com/resume/images/jesse_portfolio-min.png" loading="lazy" width="150" alt="A portrait picture of Jesse Torres, a web designer and IT expert in the rockford IL area" class="image-5"> <h1 class="heading-6"><strong class="bold-text-4">Test your Website with our SEO Checker!</strong></h1> <p class="paragraph-5">Compliance with search engine guidelines is an important factor for the success of your website. The SEO Checker analyzes your website and gives you tips on optimizing your site for a long-term success in search engine optimization!</p> <div class="form-block w-form"> <form id="seoform" name="seoform" data-name="seoform" method="get" class="form-2" onsubmit="logSubmit()"> <input type="url" class="text-field-2 w-input" maxlength="256" name="websiteurl" data-name="websiteurl" placeholder="https://jessehtorres.com/" id="websiteurl" required=""> <input type="submit" value="Analyze Website" data-wait="Please wait..." class="submit-button-2 w-button"> </form> </div> </div> </div> <section id='seo_section'> <div class="section-9 wf-section"> <div class="container-3 w-container"> <div class="div-block-7"> <h2> <strong class="bold-text-6">Response time</strong></h2> <p class="paragraph-14" id="response_time">1.17 S</p> </div> <div class="div-block-7"> <h2><strong class="bold-text-6">Media files</strong></h2> <p class="paragraph-15" id="media_files">8</p> </div> <div class="div-block-7"> <h2><strong class="bold-text-6">Number of links</strong></h2> <p class="paragraph-16" id="total_links">4</p> </div> <div class="div-block-7"> <h2><strong class="bold-text-6">Meta Data</strong></h2> <p class="paragraph-17" id="meta_data">OK</p> </div> </div> </div> <div class="section-3 wf-section"> <h2 class="heading-11">Meta specifications</h2> <div class="columns-3 w-row"> <div class="column-4 w-col w-col-3"> <div class="div-block-8"> <h3 class="heading-12"><strong class="bold-text-7">Title</strong></h3> </div> </div> <div class="w-col w-col-9"> <p class="paragraph-6"><strong id="title">Rockford Website Design | JesseHTorres</strong></p> </div> </div> <div class="columns-3 w-row"> <div class="column-4 w-col w-col-3"> <div class="div-block-8"> <h3 class="heading-13"><strong class="bold-text-8">Meta description</strong></h3> </div> </div> <div class="w-col w-col-9"> <p class="paragraph-6"><strong id="desc">Looking for a Rockford Website Designer? We offer custom website designs, analytic reporting, and automated technology to increase your bottom line.</strong></p> </div> </div> </div> <div class="section-4 wf-section"> <h2 class="heading-11">Media list</h2> <div class="columns-4 w-row"> <div class="column-5 w-col w-col-7"> <h3 class="heading-8"><strong>URL</strong></h3> <div id='media_container'> </div> </div> <div class="w-col w-col-5"> <h3 class="heading-8"><strong>Alt attribute</strong></h3> <div id='alt_container'> </div> </div> </div> </div> <div class="section-5 wf-section"> <h2 class="heading-11">Meta specifications</h2> <div class="w-row"> <div class="column-6 w-col w-col-3 w-col-small-small-stack"> <h3 class="heading-8 meta_heading"><strong>Tag</strong></h3> <div id='tag_container'> </div> </div> <div class="w-col w-col-9 w-col-small-small-stack"> <h4 class="heading-8"><strong>Content</strong></h4> <div id='content_container'> </div> </div> </div> </div> <div class="section-6 wf-section"> <h2 class="heading-11">Found links on this page</h2> <div id='links_container'> </div> </div> <div class="section-8 wf-section"> <h2 class="heading-11">Images with no Alt</h2> <div id='img_no_alt_container'> </div> </div> <div class="section-7 wf-section"> <div class="w-row"> <div class="w-col w-col-6 w-col-stack"> <h2 class="heading-10 heading-11">Robots.txt</h2> <p class="paragraph-9" id='robots_container'></p> </div> <div class="w-col w-col-6 w-col-stack"> <h2 class="heading-9 heading-11">Search preview<br></h2> <div id='preview_container'> </div> </div> </div> </div> <div class="section"> <div class="container"> <div class="footer-wrap"> <p class="paragraph-4 copyright copyright">Jesse Torres</p> <p class="paragraph-4">© Jessehtorres. All rights reserved.</p> </div> </div> </div> </section> <script src="https://d3e54v103j8qbb.cloudfront.net/js/jquery-3.5.1.min.dc5e7f18c8.js?site=628437b9344b3822e5ad404f" type="text/javascript" integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0=" crossorigin="anonymous"></script> <script src="resume/js/webflow.js" type="text/javascript"></script>
</script> <!-- API CALL --> <script type="text/javascript"> function logSubmit(event) { api_call = document.getElementById('websiteurl').value; const params = new URLSearchParams({ websiteurl: api_call }) const url = `REPLACE_WITH_YOUR_API_ENDPOINT/seocheck?${ params.toString() }` fetch(url).then((data)=>{ return data.json(); }).then((completedata)=>{ let websiteUrl = completedata.Website.Url; let responseTime = parseFloat(completedata.Website.Response_time).toFixed(3) + " S"; let mediaFiles = completedata.Images.Total_images; let totalLinks = completedata.total_links; let metaStatus = completedata.Website.Meta_status; let title = completedata.Website.Title; let desc = completedata.Website.Desc; let h_tag = completedata.Header_Tags; let temp_img_src = completedata.Images.img_src; let img_alt = completedata.Images.alt_string; let temp_all_web_links = completedata.all_web_links; let temp_img_no_alt = completedata.Images.img_with_no_alt; img_no_alt=[] for (var i=0;i < temp_img_no_alt.length; i++) { var statement = temp_img_no_alt[i].includes('https') if (statement == true) { img_no_alt.push(temp_img_no_alt[i]) } else { img_no_alt.push(websiteUrl+temp_img_no_alt[i]) } } img_src=[] for (var i=0;i < temp_img_src.length; i++) { var statement = temp_img_src[i].includes('https') if (statement == true) { img_src.push(temp_img_src[i]) } else { img_src.push(websiteUrl+temp_img_src[i]) } } all_web_links=[] for (var i=0;i < temp_all_web_links.length; i++) { var statement = temp_all_web_links[i].includes('https') if (statement == true) { all_web_links.push(temp_all_web_links[i]) } else { all_web_links.push(websiteUrl+temp_all_web_links[i]) } } let robots_file = completedata.Website.robotsFile; document.getElementsByName('websiteurl')[0].placeholder=websiteUrl; document.getElementById('response_time').innerHTML=responseTime; document.getElementById('media_files').innerHTML=mediaFiles; document.getElementById('total_links').innerHTML=totalLinks; document.getElementById('meta_data').innerHTML=metaStatus; document.getElementById('title').innerHTML=title; document.getElementById('desc').innerHTML=desc; // -------------- Media list -------------- src_text = ""; alt_text = ""; for (var i=0;i < img_src.length; i++) { src_text += `<p class="paragraph-7"> <a href="${img_src[i]}" target="_blank" class="link-4">${img_src[i]}</a> </p>` alt_text += `<p><strong class="bold-text-5">${img_alt[i]}</strong></p>` } document.getElementById('media_container').innerHTML=src_text; document.getElementById('alt_container').innerHTML=alt_text; // -------------- Meta specifications -------------- setTimeout(function(){ tag_text = ""; content_text = ""; for (var i=0;i < h_tag.length; i++) { tag_text += `<div> <p class="paragraph-8"><strong>${h_tag[i][0].toUpperCase()}</strong></p> </div>` content_text += `<p>${h_tag[i][1]}</p>` } document.getElementById('tag_container').innerHTML=tag_text; document.getElementById('content_container').innerHTML=content_text; }, 500); // -------------- Found links on this page -------------- link_text = ""; for (var i=0;i < all_web_links.length; i++) { link_text += `<p class="paragraph-7"> <a href="${all_web_links[i]}" target="_blank" class="link">${all_web_links[i]}</a> </p>` } document.getElementById('links_container').innerHTML=link_text; // -------------- Images with no Alt -------------- img_src_text = ""; for (var i=0;i < img_no_alt.length; i++) { img_src_text += `<div class="div-block-9"><img src="${img_no_alt[i]}" loading="lazy" width="70" alt="" class="image-6"></div>` } document.getElementById('img_no_alt_container').innerHTML=img_src_text; // -------------- Robots.txt -------------- robots_text = `${robots_file.replaceAll('\n', '<br>')}` document.getElementById('robots_container').innerHTML=robots_text; // -------------- Search preview -------------- preview_text = `<p class="paragraph-13">${websiteUrl}</p> <p class="paragraph-12"><strong>${title}</strong></p> <p>${desc}</p>` document.getElementById('preview_container').innerHTML=preview_text; // -------------- Display All Results -------------- document.getElementById('seo_section').style.display="block" document.getElementById("seo_section").scrollIntoView({behavior: "smooth"}); }) } const form = document.getElementById('seoform'); form.addEventListener('apicall', logSubmit); </script>