diff --git a/gragir/enrich_html.py b/gragir/enrich_html.py
index 988b37f..7173409 100644
--- a/gragir/enrich_html.py
+++ b/gragir/enrich_html.py
@@ -2,6 +2,7 @@ import logging
import urllib
import os
from bs4 import BeautifulSoup
+import cssutils
from book import Item, Book
@@ -31,6 +32,8 @@ class EnrichHtml(object):
item.title = item.soup.title.string
else:
logger.info("No title for {}".format(item.url))
+ elif item.content_type == 'text/css':
+ item.css = cssutils.parseString(item.payload, None, item.url)
else:
logger.info("Skipping {} {}".format(item.content_type, item.url))
@@ -80,6 +83,19 @@ class EnrichHtml(object):
else:
logger.warn(" No content found: {}".format(item.url))
remove.append(item)
+ # Remove annotator
+ remove_content = []
+ remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-outer"}))
+ remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-modal-wrapper"}))
+ remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-adder"}))
+ for elem in remove_content:
+ elem.decompose()
+ # Unwrap
+ remove_content = []
+ remove_content.extend(item.soup.find_all('div', attrs={"class": "annotator-wrapper"}))
+ remove_content.extend(item.soup.find_all('div', attrs={"id": "sbo-rt-content"}))
+ for elem in remove_content:
+ elem.unwrap()
for item in remove:
book.remove(item)
@@ -95,14 +111,14 @@ class EnrichHtml(object):
links = item.soup.find_all('a', attrs={"class": "prev nav-link"})
if len(links):
item.prev = book.content[
- item.getAbsoluteUrl(links[0]['href'])]
+ item.getAbsoluteUrl(links[0].get('href'))]
logger.info(" prev = {}:".format(item.prev.url))
# Try to get next chapter.
links = item.soup.find_all('a', attrs={"class": "next nav-link"})
if len(links):
item.next = book.content[
- item.getAbsoluteUrl(links[0]['href'])]
+ item.getAbsoluteUrl(links[0].get('href'))]
logger.info(" next = {}:".format(item.next.url))
@classmethod
diff --git a/gragir/prepare_epub.py b/gragir/prepare_epub.py
index e751554..0b16180 100644
--- a/gragir/prepare_epub.py
+++ b/gragir/prepare_epub.py
@@ -2,6 +2,7 @@ import os
import logging
import urllib
from bs4 import BeautifulSoup
+from tidylib import tidy_document
from book import Item, Book
@@ -33,6 +34,13 @@ class PrepareEpub(object):
elif mime == 'text/html':
local_url = cls._createLocalName(book,item,'')
+ local_url_split = local_url.split('.')
+ logger.info('{}'.format(local_url_split))
+ if local_url_split[-1] == 'htm' \
+ or local_url_split[-1] == 'html':
+ local_url_split[-1] = 'xhtml'
+ local_url = '.'.join(local_url_split)
+ logger.info('converting to xhtml {}'.format(local_url_split))
else:
local_url = cls._createLocalName(book,item,mime.split("/")[0])
@@ -105,4 +113,7 @@ class PrepareEpub(object):
logger = logging.getLogger(__name__)
logger.info("Createing XML for {}".format(item.url))
- item.payload = item.soup.prettify("utf-8")
+ item.payload, err = tidy_document( item.soup.prettify("utf-8"),
+ options={ 'output-xhtml' : 1, 'tidy-mark' : 1})
+ item.content_type = 'text/xhtml'
+ logger.info("Errors: {}".format(err))
\ No newline at end of file
diff --git a/gragir/save_epub.py b/gragir/save_epub.py
index 3045721..8adfa97 100644
--- a/gragir/save_epub.py
+++ b/gragir/save_epub.py
@@ -14,6 +14,8 @@ class SaveEpub(object):
ebook = ebooklib.EpubBook()
cls.fillEpubBook(ebook, book)
+ cls.fillSpine(ebook, book)
+ cls.fillGuide(ebook, book)
cls.writeEpubBook(book.file_name, ebook)
@@ -31,6 +33,8 @@ class SaveEpub(object):
"""
logger = logging.getLogger(__name__)
+ options = {'plugins': []}
+
try:
epub = ebooklib.EpubWriter(name, ebook, options)
epub.process()
@@ -54,7 +58,7 @@ class SaveEpub(object):
or mime == 'application/font-woff2':
return 'font'
- elif mime == 'text/html':
+ elif mime == 'text/html' or mime == 'text/xhtml':
return 'html'
else:
@@ -93,9 +97,39 @@ class SaveEpub(object):
eitem.content = item.payload
ebook.add_item(eitem)
- item.id = eitem.get_id()
+ item.idref = eitem.get_id()
- # class EpubNav(EpubHtml):
- # class EpubNcx(EpubItem):
- # class Link(object):
- # class Section(object):
\ No newline at end of file
+ @classmethod
+ def fillSpine(cls, ebook, book):
+ logger = logging.getLogger(__name__)
+
+ ebook.add_item(ebooklib.EpubNcx())
+ ebook.add_item(ebooklib.EpubNav())
+
+ # define CSS style
+ style = 'BODY {color: white;}'
+ nav_css = ebooklib.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
+ ebook.add_item(nav_css)
+ ebook.spine = ['nav']
+
+ # ebook.toc = (ebooklib.Link(item.url, item.title, 'intro'),
+ # (epub.Section(item.title),
+ # (c1, ))
+ # )
+
+ item = book.first
+ while item:
+ logger.info("EPUB: Adding to spin {}".format(item.url))
+ ebook.spine.append(item.idref)
+
+ if hasattr(item, 'next'):
+ item = item.next
+ else:
+ item = None
+
+ @classmethod
+ def fillGuide(cls, ebook, book):
+ logger = logging.getLogger(__name__)
+
+ item = book.first
+ ebook.guide.append({ 'href' : item.url, 'title' : item.title, 'type' : 'text'})