From 034720cdad9e7344db4177a6fef89c6b29a5829a Mon Sep 17 00:00:00 2001 From: o5pxels Date: Sat, 10 Apr 2021 14:20:15 -0500 Subject: check.js incomplete implementation, move modules to src, aexport scrape function --- src/scraper/scrape.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/scraper/scrape.js') diff --git a/src/scraper/scrape.js b/src/scraper/scrape.js index 131ae40..a24d1e8 100644 --- a/src/scraper/scrape.js +++ b/src/scraper/scrape.js @@ -1,12 +1,13 @@ const puppeteer = require('puppeteer'); const mongodb = require('mongodb') + // Specifically for scraping // Stores in database once finished scraping // query is the search string (title of movie or show) and the type (music, show, movie) // will be used -async function performSearch(query, type) { +exports.performSearch = async function(query, type) { const browser = await puppeteer.launch({ headless: true }); @@ -29,5 +30,3 @@ async function performSearch(query, type) { // do the database storage await browser.close(); } - -// performSearch('spongebob', 'tv'); \ No newline at end of file -- cgit v1.2.3 From 74598733bcc7b39b8a9b6521195545c543e38850 Mon Sep 17 00:00:00 2001 From: o5pxels Date: Sat, 10 Apr 2021 22:22:51 -0500 Subject: Complete implementation for check.js, complete scrape.js --- src/scraper/scrape.js | 72 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 25 deletions(-) (limited to 'src/scraper/scrape.js') diff --git a/src/scraper/scrape.js b/src/scraper/scrape.js index a24d1e8..605c234 100644 --- a/src/scraper/scrape.js +++ b/src/scraper/scrape.js @@ -1,32 +1,54 @@ const puppeteer = require('puppeteer'); -const mongodb = require('mongodb') - +const MongoClient = require('mongodb').MongoClient; // Specifically for scraping // Stores in database once finished scraping -// query is the search string (title of movie or show) and the type (music, show, movie) -// will be used +// query is the movie or show name +// type is the media type ("tv" for tv show or "movie") -exports.performSearch = async function(query, type) { - const browser = await puppeteer.launch({ - headless: true - }); - const pages = await browser.pages(); - const page = pages[0]; - await page.goto('https://google.com'); - const searchBox = await page.$x("/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input"); - await searchBox[0].type(`${query} streaming`); - await page.keyboard.press('Enter'); - await page.waitForNavigation(); - const resultsContainer = await page.$$('.r0VsPb') - var streamingPlatforms; - var streamingPrices; - for (let i = 0; i < resultsContainer.length; i++) { - streamingPlatforms = await resultsContainer[i].$$eval('.i3LlFf', nodes => nodes.map(n => n.innerText)); - streamingPrices = await resultsContainer[i].$$eval('.V8xno', nodes => nodes.map(n => n.innerText)) +exports.performSearch = async function (id, query, type) { + try { + const uri = "mongodb+srv://user0:8HL0NBINt6B8mIYF@cluster0.kfyrm.mongodb.net/StreamFinder?retryWrites=true&w=majority"; + const client = new MongoClient(uri, { useNewUrlParser: true, useUnifiedTopology: true }); + await client.connect(); + const database = client.db("db"); + const tv = database.collection("tv"); + const movie = database.collection('movie') + const browser = await puppeteer.launch({ + headless: true + }); + const pages = await browser.pages(); + const page = pages[0]; + await page.goto('https://google.com'); + const searchBox = await page.$x("/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input"); + await searchBox[0].type(`${query} ${type} streaming`); + await page.keyboard.press('Enter'); + await page.waitForNavigation(); + const resultsContainer = await page.$$('.r0VsPb') + var streamingPlatforms; + var streamingPrices; + for (let i = 0; i < resultsContainer.length; i++) { + streamingPlatforms = await resultsContainer[i].$$eval('.i3LlFf', nodes => nodes.map(n => n.innerText)); + streamingPrices = await resultsContainer[i].$$eval('.V8xno', nodes => nodes.map(n => n.innerText)) + } + if (type == "movie") { + await movie.insertOne({ + title: query, + id: id, + service: streamingPlatforms, + price: streamingPrices + }) + } else if (type == "tv") { + await tv.insertOne({ + title: query, + id: id, + service: streamingPlatforms, + price: streamingPrices + }) + } + await client.close() + await browser.close(); + } catch (error) { + console.log(error) } - console.log(streamingPlatforms); - console.log(streamingPrices); - // do the database storage - await browser.close(); } -- cgit v1.2.3