I am trying to scrap the website that you can find on the code. My main issue is to successfully logging in. From what I've read online on Google Chrome the technique is to go to Network ->log in -> look at a connection file to get the "formdata". Unfortunately there is no such file. What can I do without using this file ?
import scrapy
class QuotesSpider(scrapy.Spider):
name = "quotes"
urls = [
'https://app.nominations.hospimedia.fr'
]
def parse(self, response):
# the function "callback" is used after you have logging in
return scrapy.FormRequest.from_response(
response,
formdata={'email': 'XXX', 'pwd': 'XXXX'},
callback=self.starts_scraping
)
def start_scraping(self, response):
name = response.xpath('//span[@class"name-first-name]/text()"').extract()
yield {'user_name': name}
Alternatively I have also tried with Request but this doesn't work out
import scrapy
import json
class QuotesSpider(scrapy.Spider):
name = "quotes"
urls = [
'https://app.nominations.hospimedia.fr'
]
def parse(self, response):
payload = {
'payload': {
'email': 'XXX',
'pwd': 'XX',
}
}
# the function "callback" is used after you have logging in
yield scrapy.Request(
url='https://app.nominations.hospimedia.fr',
body=json.dumps(payload),
method='POST',
callback=self.starts_scraping
)
def start_scraping(self, response):
name = response.xpath('//span[@class"name-first-name]/text()"').extract()
yield {'user_name': name}