save_epub

This commit is contained in:
2018-10-15 07:58:12 +01:00
parent db2ae46989
commit 8a310a72f3
2 changed files with 217 additions and 157 deletions

View File

@@ -14,12 +14,11 @@ import sys
import logging
import argparse
import ebooklib.epub as ebooklib
from book import Book, Item
from parse_mhtml import parseMhtmlZipFile
from enrich_html import EnrichHtml
from prepare_epub import PrepareEpub
from save_epub import SaveEpub
def parseArguments():
"""
@@ -58,156 +57,6 @@ def configLogger(args):
logger.addHandler(fh)
# for name in content.keys():
def createEpubHtml(item):
html = ebooklib.EpubHtml()
return html
def createEpubBook(book):
logger = logging.getLogger(__name__)
ebook = ebooklib.EpubBook()
it = book.first
while it:
if it.content_type == 'text/html':
html = createEpubHtml(it)
ebook.add_item(html)
elif it.content_type == 'image/html':
html = createEpubHtml(it)
ebook.add_item(html)
writeEpubBook(book.file_name, ebook)
# class EpubImage(EpubItem):
# class EpubNav(EpubHtml):
# class EpubCoverHtml(EpubHtml):
# class EpubHtml(EpubItem):
# class EpubCover(EpubItem):
# class EpubNcx(EpubItem):
# class EpubItem(object):
# class EpubException(Exception):
# class Link(object):
# class Section(object):
# def set_identifier(self, uid)
# def set_title(self, title)
# def set_language(self, lang)
# def set_cover(self, file_name, content, create_page=True):
# """
# Set cover and create cover document if needed.
# :Args:
# - file_name: file name of the cover page
# - content: Content for the cover image
# - create_page: Should cover page be defined. Defined as bool value (optional). Default value is True.
# """
# def add_author(self, author, file_as=None, role=None, uid='creator'):
# def add_metadata(self, namespace, name, value, others=None):
# def set_unique_metadata(self, namespace, name, value, others=None):
# "Add metadata if metadata with this identifier does not already exist, otherwise update existing metadata."
# def add_item(self, item):
# def get_metadata(self, namespace, name):
# def get_item_with_id(self, uid):
# """
# Returns item for defined UID.
# >>> book.get_item_with_id('image_001')
# :Args:
# - uid: UID for the item
# :Returns:
# Returns item object. Returns None if nothing was found.
# """
# def get_item_with_href(self, href):
# """
# Returns item for defined HREF.
# >>> book.get_item_with_href('EPUB/document.xhtml')
# :Args:
# - href: HREF for the item we are searching for
# :Returns:
# Returns item object. Returns None if nothing was found.
# """
# def get_items(self):
# def get_items_of_media_type(self, media_type):
# def get_items_of_type(self, item_type):
# """
# Returns all items of specified type.
# >>> book.get_items_of_type(epub.ITEM_IMAGE)
# :Args:
# - item_type: Type for items we are searching for
# :Returns:
# Returns found items as tuple.
# """
# return (item for item in self.items if item.get_type() == item_type)
# def get_template(self, name):
# def set_template(self, name, value):
# """
# Defines templates which are used to generate certain types of pages. When defining new value for the template
# we have to use content of type 'str' (Python 2) or 'bytes' (Python 3).
# At the moment we use these templates:
# - ncx
# - nav
# - chapter
# - cover
# :Args:
# - name: Name for the template
# - value: Content for the template
# """
# def add_prefix(self, name, uri):
# """
# Appends custom prefix to be added to the content.opf document
# >>> epub_book.add_prefix('bkterms', 'http://booktype.org/')
# :Args:
# - name: namespave name
# - uri: URI for the namespace
# """
return book
def writeEpubBook(name, book, options=None):
"""
Creates epub file with the content defined in EpubBook.
>>> makeEpub('book.epub', book)
:Args:
- name: file name for the output file
- book: instance of EpubBook
- options: extra opions as dictionary (optional)
"""
logger = logging.getLogger(__name__)
try:
epub = ebooklib.EpubWriter(name, book, options)
epub.process()
epub.write()
except Exception as e:
logger.error("Exception {}.".format(e))
def main():
"""
"""
@@ -222,9 +71,8 @@ def main():
parseMhtmlZipFile(args.zip, book)
EnrichHtml.enrich(book)
PrepareEpub.prepare(book)
book.save_in_dir('test_out/test_save')
#createDAG(book)
#createEpubBook(book)
#book.save_in_dir('test_out/test_save')
SaveEpub.save(book)
if __name__ == "__main__":

212
gragir/save_epub.py Normal file
View File

@@ -0,0 +1,212 @@
import logging
import ebooklib.epub as ebooklib
from book import Book, Item
class SaveEpub(object):
def __init__(self):
pass
@classmethod
def save(cls, book):
logger = logging.getLogger(__name__)
ebook = ebooklib.EpubBook()
cls.writeEpubBook(book.file_name, ebook)
@classmethod
def writeEpubBook(cls, name, ebook, options=None):
"""
Creates epub file with the content defined in EpubBook.
>>> makeEpub('book.epub', book)
:Args:
- name: file name for the output file
- book: instance of EpubBook
- options: extra opions as dictionary (optional)
"""
logger = logging.getLogger(__name__)
try:
epub = ebooklib.EpubWriter(name, ebook, options)
epub.process()
epub.write()
except Exception as e:
logger.error("Exception {}.".format(e))
@classmethod
def createEpubHtml(cls, ebook, book):
html = ebooklib.EpubHtml()
return html
@classmethod
def getType(cls, item):
mime = item.content_type
if mime == 'text/css':
return 'css'
elif mime == 'application/font-woff' \
or mime == 'application/font-woff2':
return 'font'
elif mime == 'text/html':
return 'html'
else:
local_url = cls._createLocalName(book,item,mime.split("/")[0])
@classmethod
def createContent(cls, ebook, book):
it = book.first
while it:
item_type = cls.getType(item)
@classmethod
def fillEpubBook(cls, ebook, book):
logger = logging.getLogger(__name__)
for item in book.content:
item_type = cls.getType(item)
if item_type == 'image':
eitem = ebooklib.EpubImage()
if item_type == 'image':
eitem = ebooklib.EpubItem()
local_url = cls._createLocalName(book,item,'css')
elif mime == 'application/font-woff' \
or mime == 'application/font-woff2':
local_url = cls._createLocalName(book,item,'font')
elif mime == 'text/html':
local_url = cls._createLocalName(book,item,'')
else:
local_url = cls._createLocalName(book,item,mime.split("/")[0])
if it.content_type == 'text/html':
html = createEpubHtml(it)
ebook.add_item(html)
elif it.content_type == 'image/html':
html = createEpubHtml(it)
ebook.add_item(html)
if item_type == 'html':
eitem.set_content(item.soup.prettify("utf-8"))
else:
eitem.set_content(item.payload)
# class EpubImage(EpubItem):
# class EpubNav(EpubHtml):
# class EpubCoverHtml(EpubHtml):
# class EpubHtml(EpubItem):
# class EpubCover(EpubItem):
# class EpubNcx(EpubItem):
# class EpubItem(object):
# class EpubException(Exception):
# class Link(object):
# class Section(object):
# def set_identifier(self, uid)
# def set_title(self, title)
# def set_language(self, lang)
# def set_cover(self, file_name, content, create_page=True):
# """
# Set cover and create cover document if needed.
# :Args:
# - file_name: file name of the cover page
# - content: Content for the cover image
# - create_page: Should cover page be defined. Defined as bool value (optional). Default value is True.
# """
# def add_author(self, author, file_as=None, role=None, uid='creator'):
# def add_metadata(self, namespace, name, value, others=None):
# def set_unique_metadata(self, namespace, name, value, others=None):
# "Add metadata if metadata with this identifier does not already exist, otherwise update existing metadata."
# def add_item(self, item):
# def get_metadata(self, namespace, name):
# def get_item_with_id(self, uid):
# """
# Returns item for defined UID.
# >>> book.get_item_with_id('image_001')
# :Args:
# - uid: UID for the item
# :Returns:
# Returns item object. Returns None if nothing was found.
# """
# def get_item_with_href(self, href):
# """
# Returns item for defined HREF.
# >>> book.get_item_with_href('EPUB/document.xhtml')
# :Args:
# - href: HREF for the item we are searching for
# :Returns:
# Returns item object. Returns None if nothing was found.
# """
# def get_items(self):
# def get_items_of_media_type(self, media_type):
# def get_items_of_type(self, item_type):
# """
# Returns all items of specified type.
# >>> book.get_items_of_type(epub.ITEM_IMAGE)
# :Args:
# - item_type: Type for items we are searching for
# :Returns:
# Returns found items as tuple.
# """
# return (item for item in self.items if item.get_type() == item_type)
# def get_template(self, name):
# def set_template(self, name, value):
# """
# Defines templates which are used to generate certain types of pages. When defining new value for the template
# we have to use content of type 'str' (Python 2) or 'bytes' (Python 3).
# At the moment we use these templates:
# - ncx
# - nav
# - chapter
# - cover
# :Args:
# - name: Name for the template
# - value: Content for the template
# """
# def add_prefix(self, name, uri):
# """
# Appends custom prefix to be added to the content.opf document
# >>> epub_book.add_prefix('bkterms', 'http://booktype.org/')
# :Args:
# - name: namespave name
# - uri: URI for the namespace
# """
return book