119 lines
3.9 KiB
Python
119 lines
3.9 KiB
Python
import os
|
|
import logging
|
|
import urllib
|
|
from bs4 import BeautifulSoup
|
|
from tidylib import tidy_document
|
|
|
|
from book import Item, Book
|
|
|
|
|
|
class PrepareEpub(object):
|
|
|
|
@classmethod
|
|
def prepare(cls, book):
|
|
logger = logging.getLogger(__name__)
|
|
logger.info("BEGIN Prepare EPUB.")
|
|
cls.localize_url(book)
|
|
book.print()
|
|
logger.info("END Prepare EPUB.")
|
|
|
|
@classmethod
|
|
def localize_url(cls, book):
|
|
logger = logging.getLogger(__name__)
|
|
for item in book.content.values():
|
|
#
|
|
# Create local name. It will have dir/filename structure.
|
|
#
|
|
mime = item.content_type
|
|
if mime == 'text/css':
|
|
local_url = cls._createLocalName(book,item,'css')
|
|
|
|
elif mime == 'application/font-woff' \
|
|
or mime == 'application/font-woff2':
|
|
local_url = cls._createLocalName(book,item,'font')
|
|
|
|
elif mime == 'text/html':
|
|
local_url = cls._createLocalName(book,item,'')
|
|
local_url_split = local_url.split('.')
|
|
logger.info('{}'.format(local_url_split))
|
|
if local_url_split[-1] == 'htm' \
|
|
or local_url_split[-1] == 'html':
|
|
local_url_split[-1] = 'xhtml'
|
|
local_url = '.'.join(local_url_split)
|
|
logger.info('converting to xhtml {}'.format(local_url_split))
|
|
else:
|
|
local_url = cls._createLocalName(book,item,mime.split("/")[0])
|
|
|
|
cls._moveTo(item,local_url)
|
|
|
|
for item in book.content.values():
|
|
if item.content_type == 'text/html':
|
|
cls._setXmlContent(item)
|
|
|
|
|
|
@classmethod
|
|
def _createLocalName(cls, book, item, category):
|
|
#
|
|
# Get file name.
|
|
#
|
|
parsed_url= urllib.parse.urlsplit(item.url)
|
|
file_name = parsed_url.path.split('/')[-1]
|
|
#
|
|
# Append category
|
|
#
|
|
if category:
|
|
new_url = category + "/" + file_name
|
|
else:
|
|
new_url = file_name
|
|
#
|
|
# If file name already exist then generate a unique one.
|
|
#
|
|
if item.url != new_url \
|
|
and new_url in book.content:
|
|
new_url = cls._findUniqueName(book, category, file_name)
|
|
return new_url
|
|
|
|
|
|
@classmethod
|
|
def _findUniqueName(cls, book, category, filename):
|
|
i = 0
|
|
file_name_base, file_ext = os.path.splitext(filename)
|
|
while True:
|
|
i+=1
|
|
if category:
|
|
new_url = category + '/' + file_name_base + '_' + i + file_ext
|
|
else:
|
|
new_url = file_name_base + '_' + i + file_ext
|
|
if new_url not in book.content:
|
|
break
|
|
return new_url
|
|
|
|
|
|
@classmethod
|
|
def _moveTo(cls, item, local_url):
|
|
logger = logging.getLogger(__name__)
|
|
logger.info("Renaming {} -> {}".format(item.url, local_url))
|
|
|
|
for ref_elem in item.needed_by_elem:
|
|
if ref_elem.name == 'a':
|
|
_,fragment = urllib.parse.urldefrag(ref_elem['href'])
|
|
if fragment:
|
|
ref_elem['href'] = local_url + "#" + fragment
|
|
else:
|
|
ref_elem['href'] = local_url
|
|
elif ref_elem.name == 'img':
|
|
ref_elem['src'] = local_url
|
|
ref_elem['data-mfp-src'] = local_url
|
|
else:
|
|
logger.info("Renaming {} -> {}".format(item.url, local_url))
|
|
item.url = local_url
|
|
|
|
@classmethod
|
|
def _setXmlContent(cls, item):
|
|
logger = logging.getLogger(__name__)
|
|
logger.info("Createing XML for {}".format(item.url))
|
|
|
|
item.payload, err = tidy_document( item.soup.prettify("utf-8"),
|
|
options={ 'output-xhtml' : 1, 'tidy-mark' : 1})
|
|
item.content_type = 'text/xhtml'
|
|
logger.info("Errors: {}".format(err)) |