Generating epubs.
This commit is contained in:
@@ -2,6 +2,7 @@ import logging
|
|||||||
import urllib
|
import urllib
|
||||||
import os
|
import os
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import cssutils
|
||||||
|
|
||||||
from book import Item, Book
|
from book import Item, Book
|
||||||
|
|
||||||
@@ -31,6 +32,8 @@ class EnrichHtml(object):
|
|||||||
item.title = item.soup.title.string
|
item.title = item.soup.title.string
|
||||||
else:
|
else:
|
||||||
logger.info("No title for {}".format(item.url))
|
logger.info("No title for {}".format(item.url))
|
||||||
|
elif item.content_type == 'text/css':
|
||||||
|
item.css = cssutils.parseString(item.payload, None, item.url)
|
||||||
else:
|
else:
|
||||||
logger.info("Skipping {} {}".format(item.content_type, item.url))
|
logger.info("Skipping {} {}".format(item.content_type, item.url))
|
||||||
|
|
||||||
@@ -80,6 +83,19 @@ class EnrichHtml(object):
|
|||||||
else:
|
else:
|
||||||
logger.warn(" No content found: {}".format(item.url))
|
logger.warn(" No content found: {}".format(item.url))
|
||||||
remove.append(item)
|
remove.append(item)
|
||||||
|
# Remove annotator
|
||||||
|
remove_content = []
|
||||||
|
remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-outer"}))
|
||||||
|
remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-modal-wrapper"}))
|
||||||
|
remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-adder"}))
|
||||||
|
for elem in remove_content:
|
||||||
|
elem.decompose()
|
||||||
|
# Unwrap
|
||||||
|
remove_content = []
|
||||||
|
remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-wrapper"}))
|
||||||
|
remove_content.extend(item.soup.find_all('div', attrs={"id": "sbo-rt-content"}))
|
||||||
|
for elem in remove_content:
|
||||||
|
elem.unwrap()
|
||||||
|
|
||||||
for item in remove:
|
for item in remove:
|
||||||
book.remove(item)
|
book.remove(item)
|
||||||
@@ -95,14 +111,14 @@ class EnrichHtml(object):
|
|||||||
links = item.soup.find_all('a', attrs={"class": "prev nav-link"})
|
links = item.soup.find_all('a', attrs={"class": "prev nav-link"})
|
||||||
if len(links):
|
if len(links):
|
||||||
item.prev = book.content[
|
item.prev = book.content[
|
||||||
item.getAbsoluteUrl(links[0]['href'])]
|
item.getAbsoluteUrl(links[0].get('href'))]
|
||||||
logger.info(" prev = {}:".format(item.prev.url))
|
logger.info(" prev = {}:".format(item.prev.url))
|
||||||
|
|
||||||
# Try to get next chapter.
|
# Try to get next chapter.
|
||||||
links = item.soup.find_all('a', attrs={"class": "next nav-link"})
|
links = item.soup.find_all('a', attrs={"class": "next nav-link"})
|
||||||
if len(links):
|
if len(links):
|
||||||
item.next = book.content[
|
item.next = book.content[
|
||||||
item.getAbsoluteUrl(links[0]['href'])]
|
item.getAbsoluteUrl(links[0].get('href'))]
|
||||||
logger.info(" next = {}:".format(item.next.url))
|
logger.info(" next = {}:".format(item.next.url))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import os
|
|||||||
import logging
|
import logging
|
||||||
import urllib
|
import urllib
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from tidylib import tidy_document
|
||||||
|
|
||||||
from book import Item, Book
|
from book import Item, Book
|
||||||
|
|
||||||
@@ -33,6 +34,13 @@ class PrepareEpub(object):
|
|||||||
|
|
||||||
elif mime == 'text/html':
|
elif mime == 'text/html':
|
||||||
local_url = cls._createLocalName(book,item,'')
|
local_url = cls._createLocalName(book,item,'')
|
||||||
|
local_url_split = local_url.split('.')
|
||||||
|
logger.info('{}'.format(local_url_split))
|
||||||
|
if local_url_split[-1] == 'htm' \
|
||||||
|
or local_url_split[-1] == 'html':
|
||||||
|
local_url_split[-1] = 'xhtml'
|
||||||
|
local_url = '.'.join(local_url_split)
|
||||||
|
logger.info('converting to xhtml {}'.format(local_url_split))
|
||||||
else:
|
else:
|
||||||
local_url = cls._createLocalName(book,item,mime.split("/")[0])
|
local_url = cls._createLocalName(book,item,mime.split("/")[0])
|
||||||
|
|
||||||
@@ -105,4 +113,7 @@ class PrepareEpub(object):
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info("Createing XML for {}".format(item.url))
|
logger.info("Createing XML for {}".format(item.url))
|
||||||
|
|
||||||
item.payload = item.soup.prettify("utf-8")
|
item.payload, err = tidy_document( item.soup.prettify("utf-8"),
|
||||||
|
options={ 'output-xhtml' : 1, 'tidy-mark' : 1})
|
||||||
|
item.content_type = 'text/xhtml'
|
||||||
|
logger.info("Errors: {}".format(err))
|
||||||
@@ -14,6 +14,8 @@ class SaveEpub(object):
|
|||||||
|
|
||||||
ebook = ebooklib.EpubBook()
|
ebook = ebooklib.EpubBook()
|
||||||
cls.fillEpubBook(ebook, book)
|
cls.fillEpubBook(ebook, book)
|
||||||
|
cls.fillSpine(ebook, book)
|
||||||
|
cls.fillGuide(ebook, book)
|
||||||
cls.writeEpubBook(book.file_name, ebook)
|
cls.writeEpubBook(book.file_name, ebook)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,6 +33,8 @@ class SaveEpub(object):
|
|||||||
"""
|
"""
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
options = {'plugins': []}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
epub = ebooklib.EpubWriter(name, ebook, options)
|
epub = ebooklib.EpubWriter(name, ebook, options)
|
||||||
epub.process()
|
epub.process()
|
||||||
@@ -54,7 +58,7 @@ class SaveEpub(object):
|
|||||||
or mime == 'application/font-woff2':
|
or mime == 'application/font-woff2':
|
||||||
return 'font'
|
return 'font'
|
||||||
|
|
||||||
elif mime == 'text/html':
|
elif mime == 'text/html' or mime == 'text/xhtml':
|
||||||
return 'html'
|
return 'html'
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@@ -93,9 +97,39 @@ class SaveEpub(object):
|
|||||||
eitem.content = item.payload
|
eitem.content = item.payload
|
||||||
|
|
||||||
ebook.add_item(eitem)
|
ebook.add_item(eitem)
|
||||||
item.id = eitem.get_id()
|
item.idref = eitem.get_id()
|
||||||
|
|
||||||
# class EpubNav(EpubHtml):
|
@classmethod
|
||||||
# class EpubNcx(EpubItem):
|
def fillSpine(cls, ebook, book):
|
||||||
# class Link(object):
|
logger = logging.getLogger(__name__)
|
||||||
# class Section(object):
|
|
||||||
|
ebook.add_item(ebooklib.EpubNcx())
|
||||||
|
ebook.add_item(ebooklib.EpubNav())
|
||||||
|
|
||||||
|
# define CSS style
|
||||||
|
style = 'BODY {color: white;}'
|
||||||
|
nav_css = ebooklib.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
|
||||||
|
ebook.add_item(nav_css)
|
||||||
|
ebook.spine = ['nav']
|
||||||
|
|
||||||
|
# ebook.toc = (ebooklib.Link(item.url, item.title, 'intro'),
|
||||||
|
# (epub.Section(item.title),
|
||||||
|
# (c1, ))
|
||||||
|
# )
|
||||||
|
|
||||||
|
item = book.first
|
||||||
|
while item:
|
||||||
|
logger.info("EPUB: Adding to spin {}".format(item.url))
|
||||||
|
ebook.spine.append(item.idref)
|
||||||
|
|
||||||
|
if hasattr(item, 'next'):
|
||||||
|
item = item.next
|
||||||
|
else:
|
||||||
|
item = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fillGuide(cls, ebook, book):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
item = book.first
|
||||||
|
ebook.guide.append({ 'href' : item.url, 'title' : item.title, 'type' : 'text'})
|
||||||
|
|||||||
Reference in New Issue
Block a user