I've built a scraper that gets product data from different shopping websites.
When I run python scraper.py the program will print a JSON object containing all the data like this:
{ 'ebay': [ { 'advertiser': 'ebay',
'advertiser_url': 'https://rover.ebay.com/rover/1/711-53200-19255-0/1?ff3=2&toolid=10041&campid=5338482617&customid=&lgeo=1&vectorid=229466&item=302847614914',
'description': '30-Day Warranty - Free Charger & Cable - '
'Easy Returns!',
'main_image': 'https://thumbs1.ebaystatic.com/pict/04040_0.jpg',
'price': '290.0',
'title': 'Apple iPhone 8 Plus Smartphone AT&T Sprint '
'T-Mobile Verizon or Unlocked 4G LTE'}
]}
I want this data to be added to the database automatically every time I run the scraper.
Here's my database structure:
models.py
class Product(models.Model):
similarity_id = models.CharField(max_length=255, blank=True, null=True)
name = models.CharField(max_length=255, blank=True, null=True)
url = models.SlugField(blank=True, unique=True, allow_unicode=True)
advertiser_url = models.TextField(blank=True, null=True)
main_image = models.TextField(blank=True, null=True)
second_image = models.TextField(blank=True, null=True)
third_image = models.TextField(blank=True, null=True)
old_price = models.FloatField(default=0.00)
price = models.FloatField(default=0.00)
discount = models.FloatField(default=0.00)
currency = models.CharField(max_length=255, default="$")
description = models.TextField(blank=True, null=True)
keywords = models.CharField(max_length=255, blank=True, null=True)
asin = models.CharField(max_length=80, blank=True, null=True)
iban = models.CharField(max_length=255, blank=True, null=True)
sku = models.CharField(max_length=255, blank=True, null=True)
seller = models.CharField(max_length=255, blank=True, null=True)
free_shipping = models.BooleanField(default=False)
in_stock = models.BooleanField(default=True)
sold_items = models.IntegerField(default=0)
likes_count = models.IntegerField(default=0)
category = models.CharField(max_length=255, blank=True, null=True)
sub_category = models.CharField(max_length=255, blank=True, null=True)
reviews_count = models.IntegerField(default=0)
rating = models.FloatField(default=0)
active = models.BooleanField(default=True)
is_prime = models.BooleanField(default=False)
created_on = models.DateTimeField(auto_now_add=True)
advertiser = models.CharField(max_length=255, blank=True, null=True)
objects = ProductManager()
class Meta:
verbose_name_plural = "products"
def __str__(self):
return self.name