Files
gragir/gragir/prepare_epub.py
2018-10-21 21:22:56 +01:00

119 lines
3.9 KiB
Python

import os
import logging
import urllib
from bs4 import BeautifulSoup
from tidylib import tidy_document
from book import Item, Book
class PrepareEpub(object):
@classmethod
def prepare(cls, book):
logger = logging.getLogger(__name__)
logger.info("BEGIN Prepare EPUB.")
cls.localize_url(book)
book.print()
logger.info("END Prepare EPUB.")
@classmethod
def localize_url(cls, book):
logger = logging.getLogger(__name__)
for item in book.content.values():
#
# Create local name. It will have dir/filename structure.
#
mime = item.content_type
if mime == 'text/css':
local_url = cls._createLocalName(book,item,'css')
elif mime == 'application/font-woff' \
or mime == 'application/font-woff2':
local_url = cls._createLocalName(book,item,'font')
elif mime == 'text/html':
local_url = cls._createLocalName(book,item,'')
local_url_split = local_url.split('.')
logger.info('{}'.format(local_url_split))
if local_url_split[-1] == 'htm' \
or local_url_split[-1] == 'html':
local_url_split[-1] = 'xhtml'
local_url = '.'.join(local_url_split)
logger.info('converting to xhtml {}'.format(local_url_split))
else:
local_url = cls._createLocalName(book,item,mime.split("/")[0])
cls._moveTo(item,local_url)
for item in book.content.values():
if item.content_type == 'text/html':
cls._setXmlContent(item)
@classmethod
def _createLocalName(cls, book, item, category):
#
# Get file name.
#
parsed_url= urllib.parse.urlsplit(item.url)
file_name = parsed_url.path.split('/')[-1]
#
# Append category
#
if category:
new_url = category + "/" + file_name
else:
new_url = file_name
#
# If file name already exist then generate a unique one.
#
if item.url != new_url \
and new_url in book.content:
new_url = cls._findUniqueName(book, category, file_name)
return new_url
@classmethod
def _findUniqueName(cls, book, category, filename):
i = 0
file_name_base, file_ext = os.path.splitext(filename)
while True:
i+=1
if category:
new_url = category + '/' + file_name_base + '_' + i + file_ext
else:
new_url = file_name_base + '_' + i + file_ext
if new_url not in book.content:
break
return new_url
@classmethod
def _moveTo(cls, item, local_url):
logger = logging.getLogger(__name__)
logger.info("Renaming {} -> {}".format(item.url, local_url))
for ref_elem in item.needed_by_elem:
if ref_elem.name == 'a':
_,fragment = urllib.parse.urldefrag(ref_elem['href'])
if fragment:
ref_elem['href'] = local_url + "#" + fragment
else:
ref_elem['href'] = local_url
elif ref_elem.name == 'img':
ref_elem['src'] = local_url
ref_elem['data-mfp-src'] = local_url
else:
logger.info("Renaming {} -> {}".format(item.url, local_url))
item.url = local_url
@classmethod
def _setXmlContent(cls, item):
logger = logging.getLogger(__name__)
logger.info("Createing XML for {}".format(item.url))
item.payload, err = tidy_document( item.soup.prettify("utf-8"),
options={ 'output-xhtml' : 1, 'tidy-mark' : 1})
item.content_type = 'text/xhtml'
logger.info("Errors: {}".format(err))