Source code for scrapereads.meta

"""
Baseline class for `Good Reads` objects.
This class handles connection to `Good Reads` server.
"""

import string
from abc import ABC, abstractmethod

from .connect import connect
from .utils import *
from scrapereads import scrape


[docs]class GoodReadsMeta(ABC): """Defines the base of all `Good Reads` objects, that scrape and extract online data. * :attr:`base`: base page of the `Good Reads`. * :attr:`href`: href of a page. * :attr:`url`: url page of a `Good Reads` element. """ def __init__(self): self.base = f'https://www.goodreads.com' self.href = '/' self._soup = None @property def url(self): if self.href: return self.base + self.href return None @url.setter def url(self, value): raise AttributeError('Cannot modify an URL directly. Please modify it through `base` and `href` attributes.') def _next_page(self, npage=1): page = f'?page={npage}' if npage > 1 else '' return page
[docs] def connect(self, href=None): """Connect to a `Good Reads` page. Args: href (string, optional): if provided, connect to the page reference, else connect to the main page. Returns: bs4.element.Tag """ url = self.base + (href or self.href) return connect(url)
[docs]class AuthorMeta(GoodReadsMeta): """Defines an abstract author, from the page info from ``https://www.goodreads.com/``. * :attr:`author_name`: name of the author. * :attr:`author_id`: key id of the author. * :attr:`base`: base page of `Good Reads`. * :attr:`href`: href page of the author. * :attr:`url`: url page of the author. """ def __init__(self, author_id, author_name=None): super().__init__() # Connect to the author page to find out its name href = f'/author/show/{author_id}' if not author_name: self._soup = self.connect(href=href) author_name = scrape.get_author_name(self._soup) # Save attribute self.author_id = author_id self.author_name = author_name.replace('_', ' ').title() self.href = f'/author/show/{author_id}.{name_to_goodreads(self.author_name)}' # TODO: finish and add nested JSON option
[docs] @abstractmethod def to_json(self): """Encode the author to a JSON format. Returns: dict """ data = { 'author': self.author_name, **self.get_info() } return data
def __repr__(self): rep = f'Author: {self.author_name}' return rep
[docs]class BookMeta(AuthorMeta): """Abstract Book class, used as baseline. * :attr:`author_name`: name of the author. * :attr:`author_id`: key id of the author. * :attr:`book_name`: name of the book. * :attr:`book_id`: key if of the book. * :attr:`year`: year of publication of the book. * :attr:`edition`: edition of the book. * :attr:`base`: base page of `Good Reads`. * :attr:`href`: href page of the book. * :attr:`url`: url page of the book. """ def __init__(self, author_id, book_id, book_name=None, author_name=None, edition=None, year=None): super().__init__(author_id, author_name=author_name) self.book_id = book_id or 0 self.book_name = string.capwords(book_name, sep=None) if book_name else 'Unknown' self.edition = edition self.year = year self.href = f'/book/show/{self.book_id}.{name_to_goodreads(self.book_name)}' self._author = None
[docs] def get_author(self): """Get the author pointing to the quote. Returns: Author """ return self._author
[docs] def register_author(self, author): """Point a quote to an Author. Args: author (Author): author to link the quote. """ self._author = author
[docs] @abstractmethod def to_json(self, encode='ascii'): """Encode the book to a JSON format. Returns: dict """ # Default data, without any encoding raise NotImplementedError
def __repr__(self): rep_ed = f', {self.edition}' if self.edition else '' rep_year = f' ({self.year})' if self.year else '' rep = f'{self.author_name}: "{self.book_name}"{rep_ed}{rep_year}' return rep
[docs]class QuoteMeta(AuthorMeta): """Defines a quote from the quote page from ``https://www.goodreads.com/author/quotes/``. * :attr:`quote_id`: nif of the quote. * :attr:`book_name`: name of the book / title. * :attr:`book_name`: name of the book / title. * :attr:`book_name`: name of the book / title. * :attr:`quote`: text. """ def __init__(self, author_id, quote_id, quote_name=None, text=None, author_name=None, tags=None, likes=None): super().__init__(author_id, author_name=author_name) self.quote_id = quote_id self.quote_name = quote_name self.text = text or '' self.tags = tags or [] self.likes = likes self._book = None self._author = None
[docs] def get_author(self): """Get the author pointing to the quote. Returns: Author """ return self._author
[docs] def get_book(self): """Get the book pointing to the quote. Returns: Book """ return self._book
[docs] def register_author(self, author): """Point a quote to an Author. Args: author (Author): author to link the quote. """ self._author = author
[docs] def register_book(self, book): """Point a quote to a Book. Args: book (Book): book to link the quote. """ self._book = book
[docs] @abstractmethod def to_json(self, encode='ascii'): """Encode the quote to a JSON format. Returns: dict """ raise NotImplementedError
def __repr__(self): # Template: # “quote here” # ― Author Name, from "Book Name" (year) # Likes: n, Tags, some, tags, here book = self.get_book() rep_book = f', from "{book.book_name}"' if book and book.book_name else '' rep_year = f' ({book.year})' if book and book.year else '' rep_tags = f", Tags: {', '.join(self.tags)}" if len(self.tags) > 0 else '' rep_likes = f'Likes: {self.likes}' rep_info = f'\n {rep_likes}{rep_tags}' rep = f'“{self.text}\n{self.author_name}{rep_book}{rep_year}{rep_info}' return rep