From 1d52b0f2853ab4f275c8c0de8b73a8e7f9cb614b Mon Sep 17 00:00:00 2001 From: o5pxels Date: Fri, 9 Apr 2021 20:10:46 -0500 Subject: update README, initial commit for scrape.js --- src/scraper/scrape.js | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/scraper/scrape.js (limited to 'src/scraper/scrape.js') diff --git a/src/scraper/scrape.js b/src/scraper/scrape.js new file mode 100644 index 0000000..2002130 --- /dev/null +++ b/src/scraper/scrape.js @@ -0,0 +1,25 @@ +const puppeteer = require('puppeteer'); + +async function performSearch(query) { + const browser = await puppeteer.launch({ + headless: false + }); + const pages = await browser.pages(); + const page = pages[0]; + await page.goto('https://google.com'); + const searchBox = await page.$x("/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input"); + await searchBox[0].type(`${query} streaming`); + await page.keyboard.press('Enter'); + await page.waitForNavigation(); + const resultsContainer = await page.$$('.r0VsPb') + var streamingPlatforms; + var streamingPrices; + for (let i = 0; i < resultsContainer.length; i++) { + streamingPlatforms = await resultsContainer[i].$$eval('.i3LlFf', nodes => nodes.map(n => n.innerText)); + streamingPrices = await resultsContainer[i].$$eval('.V8xno', nodes => nodes.map(n => n.innerText)) + } + console.log(streamingPlatforms); + console.log(streamingPrices); +} + +// performSearch("sorry to bother you") \ No newline at end of file -- cgit v1.2.3 From 41909f9189810df0d005b7c8ecac419d0118037f Mon Sep 17 00:00:00 2001 From: o5pxels Date: Fri, 9 Apr 2021 21:58:01 -0500 Subject: Add MongoDB dependency, create basic file layout --- src/scraper/scrape.js | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/scraper/scrape.js') diff --git a/src/scraper/scrape.js b/src/scraper/scrape.js index 2002130..131ae40 100644 --- a/src/scraper/scrape.js +++ b/src/scraper/scrape.js @@ -1,8 +1,14 @@ const puppeteer = require('puppeteer'); +const mongodb = require('mongodb') -async function performSearch(query) { +// Specifically for scraping +// Stores in database once finished scraping +// query is the search string (title of movie or show) and the type (music, show, movie) +// will be used + +async function performSearch(query, type) { const browser = await puppeteer.launch({ - headless: false + headless: true }); const pages = await browser.pages(); const page = pages[0]; @@ -20,6 +26,8 @@ async function performSearch(query) { } console.log(streamingPlatforms); console.log(streamingPrices); + // do the database storage + await browser.close(); } -// performSearch("sorry to bother you") \ No newline at end of file +// performSearch('spongebob', 'tv'); \ No newline at end of file -- cgit v1.2.3