save_epub
This commit is contained in:
@@ -14,12 +14,11 @@ import sys
|
|||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import ebooklib.epub as ebooklib
|
|
||||||
|
|
||||||
from book import Book, Item
|
from book import Book, Item
|
||||||
from parse_mhtml import parseMhtmlZipFile
|
from parse_mhtml import parseMhtmlZipFile
|
||||||
from enrich_html import EnrichHtml
|
from enrich_html import EnrichHtml
|
||||||
from prepare_epub import PrepareEpub
|
from prepare_epub import PrepareEpub
|
||||||
|
from save_epub import SaveEpub
|
||||||
|
|
||||||
def parseArguments():
|
def parseArguments():
|
||||||
"""
|
"""
|
||||||
@@ -47,7 +46,7 @@ def configLogger(args):
|
|||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format='%(message)s',
|
format='%(message)s',
|
||||||
level=loggingLevel)
|
level=loggingLevel)
|
||||||
|
|
||||||
|
|
||||||
fh = logging.FileHandler('gragir.log', mode='w')
|
fh = logging.FileHandler('gragir.log', mode='w')
|
||||||
fh.setLevel(logging.DEBUG)
|
fh.setLevel(logging.DEBUG)
|
||||||
@@ -58,162 +57,12 @@ def configLogger(args):
|
|||||||
logger.addHandler(fh)
|
logger.addHandler(fh)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# for name in content.keys():
|
|
||||||
|
|
||||||
def createEpubHtml(item):
|
|
||||||
html = ebooklib.EpubHtml()
|
|
||||||
return html
|
|
||||||
|
|
||||||
def createEpubBook(book):
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
ebook = ebooklib.EpubBook()
|
|
||||||
|
|
||||||
it = book.first
|
|
||||||
while it:
|
|
||||||
if it.content_type == 'text/html':
|
|
||||||
html = createEpubHtml(it)
|
|
||||||
ebook.add_item(html)
|
|
||||||
elif it.content_type == 'image/html':
|
|
||||||
html = createEpubHtml(it)
|
|
||||||
ebook.add_item(html)
|
|
||||||
|
|
||||||
writeEpubBook(book.file_name, ebook)
|
|
||||||
|
|
||||||
# class EpubImage(EpubItem):
|
|
||||||
# class EpubNav(EpubHtml):
|
|
||||||
# class EpubCoverHtml(EpubHtml):
|
|
||||||
# class EpubHtml(EpubItem):
|
|
||||||
# class EpubCover(EpubItem):
|
|
||||||
# class EpubNcx(EpubItem):
|
|
||||||
# class EpubItem(object):
|
|
||||||
# class EpubException(Exception):
|
|
||||||
# class Link(object):
|
|
||||||
# class Section(object):
|
|
||||||
|
|
||||||
|
|
||||||
# def set_identifier(self, uid)
|
|
||||||
# def set_title(self, title)
|
|
||||||
# def set_language(self, lang)
|
|
||||||
# def set_cover(self, file_name, content, create_page=True):
|
|
||||||
# """
|
|
||||||
# Set cover and create cover document if needed.
|
|
||||||
|
|
||||||
# :Args:
|
|
||||||
# - file_name: file name of the cover page
|
|
||||||
# - content: Content for the cover image
|
|
||||||
# - create_page: Should cover page be defined. Defined as bool value (optional). Default value is True.
|
|
||||||
# """
|
|
||||||
|
|
||||||
# def add_author(self, author, file_as=None, role=None, uid='creator'):
|
|
||||||
# def add_metadata(self, namespace, name, value, others=None):
|
|
||||||
# def set_unique_metadata(self, namespace, name, value, others=None):
|
|
||||||
# "Add metadata if metadata with this identifier does not already exist, otherwise update existing metadata."
|
|
||||||
# def add_item(self, item):
|
|
||||||
|
|
||||||
# def get_metadata(self, namespace, name):
|
|
||||||
# def get_item_with_id(self, uid):
|
|
||||||
# """
|
|
||||||
# Returns item for defined UID.
|
|
||||||
|
|
||||||
# >>> book.get_item_with_id('image_001')
|
|
||||||
|
|
||||||
# :Args:
|
|
||||||
# - uid: UID for the item
|
|
||||||
|
|
||||||
# :Returns:
|
|
||||||
# Returns item object. Returns None if nothing was found.
|
|
||||||
# """
|
|
||||||
|
|
||||||
# def get_item_with_href(self, href):
|
|
||||||
# """
|
|
||||||
# Returns item for defined HREF.
|
|
||||||
|
|
||||||
# >>> book.get_item_with_href('EPUB/document.xhtml')
|
|
||||||
|
|
||||||
# :Args:
|
|
||||||
# - href: HREF for the item we are searching for
|
|
||||||
|
|
||||||
# :Returns:
|
|
||||||
# Returns item object. Returns None if nothing was found.
|
|
||||||
# """
|
|
||||||
|
|
||||||
# def get_items(self):
|
|
||||||
# def get_items_of_media_type(self, media_type):
|
|
||||||
# def get_items_of_type(self, item_type):
|
|
||||||
# """
|
|
||||||
# Returns all items of specified type.
|
|
||||||
|
|
||||||
# >>> book.get_items_of_type(epub.ITEM_IMAGE)
|
|
||||||
|
|
||||||
# :Args:
|
|
||||||
# - item_type: Type for items we are searching for
|
|
||||||
|
|
||||||
# :Returns:
|
|
||||||
# Returns found items as tuple.
|
|
||||||
# """
|
|
||||||
# return (item for item in self.items if item.get_type() == item_type)
|
|
||||||
|
|
||||||
|
|
||||||
# def get_template(self, name):
|
|
||||||
# def set_template(self, name, value):
|
|
||||||
# """
|
|
||||||
# Defines templates which are used to generate certain types of pages. When defining new value for the template
|
|
||||||
# we have to use content of type 'str' (Python 2) or 'bytes' (Python 3).
|
|
||||||
|
|
||||||
# At the moment we use these templates:
|
|
||||||
# - ncx
|
|
||||||
# - nav
|
|
||||||
# - chapter
|
|
||||||
# - cover
|
|
||||||
|
|
||||||
# :Args:
|
|
||||||
# - name: Name for the template
|
|
||||||
# - value: Content for the template
|
|
||||||
# """
|
|
||||||
|
|
||||||
|
|
||||||
# def add_prefix(self, name, uri):
|
|
||||||
# """
|
|
||||||
# Appends custom prefix to be added to the content.opf document
|
|
||||||
|
|
||||||
# >>> epub_book.add_prefix('bkterms', 'http://booktype.org/')
|
|
||||||
|
|
||||||
# :Args:
|
|
||||||
# - name: namespave name
|
|
||||||
# - uri: URI for the namespace
|
|
||||||
# """
|
|
||||||
|
|
||||||
return book
|
|
||||||
|
|
||||||
def writeEpubBook(name, book, options=None):
|
|
||||||
"""
|
|
||||||
Creates epub file with the content defined in EpubBook.
|
|
||||||
|
|
||||||
>>> makeEpub('book.epub', book)
|
|
||||||
|
|
||||||
:Args:
|
|
||||||
- name: file name for the output file
|
|
||||||
- book: instance of EpubBook
|
|
||||||
- options: extra opions as dictionary (optional)
|
|
||||||
"""
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
try:
|
|
||||||
epub = ebooklib.EpubWriter(name, book, options)
|
|
||||||
epub.process()
|
|
||||||
epub.write()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Exception {}.".format(e))
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
args = parseArguments()
|
args = parseArguments()
|
||||||
configLogger(args)
|
configLogger(args)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info("Parsing {}.".format(args.zip))
|
logger.info("Parsing {}.".format(args.zip))
|
||||||
|
|
||||||
@@ -222,9 +71,8 @@ def main():
|
|||||||
parseMhtmlZipFile(args.zip, book)
|
parseMhtmlZipFile(args.zip, book)
|
||||||
EnrichHtml.enrich(book)
|
EnrichHtml.enrich(book)
|
||||||
PrepareEpub.prepare(book)
|
PrepareEpub.prepare(book)
|
||||||
book.save_in_dir('test_out/test_save')
|
#book.save_in_dir('test_out/test_save')
|
||||||
#createDAG(book)
|
SaveEpub.save(book)
|
||||||
#createEpubBook(book)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
212
gragir/save_epub.py
Normal file
212
gragir/save_epub.py
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
import logging
|
||||||
|
import ebooklib.epub as ebooklib
|
||||||
|
|
||||||
|
from book import Book, Item
|
||||||
|
|
||||||
|
class SaveEpub(object):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def save(cls, book):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
ebook = ebooklib.EpubBook()
|
||||||
|
cls.writeEpubBook(book.file_name, ebook)
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def writeEpubBook(cls, name, ebook, options=None):
|
||||||
|
"""
|
||||||
|
Creates epub file with the content defined in EpubBook.
|
||||||
|
|
||||||
|
>>> makeEpub('book.epub', book)
|
||||||
|
|
||||||
|
:Args:
|
||||||
|
- name: file name for the output file
|
||||||
|
- book: instance of EpubBook
|
||||||
|
- options: extra opions as dictionary (optional)
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
epub = ebooklib.EpubWriter(name, ebook, options)
|
||||||
|
epub.process()
|
||||||
|
epub.write()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Exception {}.".format(e))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def createEpubHtml(cls, ebook, book):
|
||||||
|
html = ebooklib.EpubHtml()
|
||||||
|
return html
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getType(cls, item):
|
||||||
|
mime = item.content_type
|
||||||
|
|
||||||
|
if mime == 'text/css':
|
||||||
|
return 'css'
|
||||||
|
|
||||||
|
elif mime == 'application/font-woff' \
|
||||||
|
or mime == 'application/font-woff2':
|
||||||
|
return 'font'
|
||||||
|
|
||||||
|
elif mime == 'text/html':
|
||||||
|
return 'html'
|
||||||
|
|
||||||
|
else:
|
||||||
|
local_url = cls._createLocalName(book,item,mime.split("/")[0])
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def createContent(cls, ebook, book):
|
||||||
|
it = book.first
|
||||||
|
while it:
|
||||||
|
item_type = cls.getType(item)
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fillEpubBook(cls, ebook, book):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
for item in book.content:
|
||||||
|
item_type = cls.getType(item)
|
||||||
|
|
||||||
|
if item_type == 'image':
|
||||||
|
eitem = ebooklib.EpubImage()
|
||||||
|
if item_type == 'image':
|
||||||
|
eitem = ebooklib.EpubItem()
|
||||||
|
|
||||||
|
|
||||||
|
local_url = cls._createLocalName(book,item,'css')
|
||||||
|
|
||||||
|
elif mime == 'application/font-woff' \
|
||||||
|
or mime == 'application/font-woff2':
|
||||||
|
local_url = cls._createLocalName(book,item,'font')
|
||||||
|
|
||||||
|
elif mime == 'text/html':
|
||||||
|
local_url = cls._createLocalName(book,item,'')
|
||||||
|
|
||||||
|
else:
|
||||||
|
local_url = cls._createLocalName(book,item,mime.split("/")[0])
|
||||||
|
|
||||||
|
if it.content_type == 'text/html':
|
||||||
|
html = createEpubHtml(it)
|
||||||
|
ebook.add_item(html)
|
||||||
|
elif it.content_type == 'image/html':
|
||||||
|
html = createEpubHtml(it)
|
||||||
|
ebook.add_item(html)
|
||||||
|
|
||||||
|
if item_type == 'html':
|
||||||
|
eitem.set_content(item.soup.prettify("utf-8"))
|
||||||
|
else:
|
||||||
|
eitem.set_content(item.payload)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# class EpubImage(EpubItem):
|
||||||
|
# class EpubNav(EpubHtml):
|
||||||
|
# class EpubCoverHtml(EpubHtml):
|
||||||
|
# class EpubHtml(EpubItem):
|
||||||
|
# class EpubCover(EpubItem):
|
||||||
|
# class EpubNcx(EpubItem):
|
||||||
|
# class EpubItem(object):
|
||||||
|
# class EpubException(Exception):
|
||||||
|
# class Link(object):
|
||||||
|
# class Section(object):
|
||||||
|
|
||||||
|
|
||||||
|
# def set_identifier(self, uid)
|
||||||
|
# def set_title(self, title)
|
||||||
|
# def set_language(self, lang)
|
||||||
|
# def set_cover(self, file_name, content, create_page=True):
|
||||||
|
# """
|
||||||
|
# Set cover and create cover document if needed.
|
||||||
|
|
||||||
|
# :Args:
|
||||||
|
# - file_name: file name of the cover page
|
||||||
|
# - content: Content for the cover image
|
||||||
|
# - create_page: Should cover page be defined. Defined as bool value (optional). Default value is True.
|
||||||
|
# """
|
||||||
|
|
||||||
|
# def add_author(self, author, file_as=None, role=None, uid='creator'):
|
||||||
|
# def add_metadata(self, namespace, name, value, others=None):
|
||||||
|
# def set_unique_metadata(self, namespace, name, value, others=None):
|
||||||
|
# "Add metadata if metadata with this identifier does not already exist, otherwise update existing metadata."
|
||||||
|
# def add_item(self, item):
|
||||||
|
|
||||||
|
# def get_metadata(self, namespace, name):
|
||||||
|
# def get_item_with_id(self, uid):
|
||||||
|
# """
|
||||||
|
# Returns item for defined UID.
|
||||||
|
|
||||||
|
# >>> book.get_item_with_id('image_001')
|
||||||
|
|
||||||
|
# :Args:
|
||||||
|
# - uid: UID for the item
|
||||||
|
|
||||||
|
# :Returns:
|
||||||
|
# Returns item object. Returns None if nothing was found.
|
||||||
|
# """
|
||||||
|
|
||||||
|
# def get_item_with_href(self, href):
|
||||||
|
# """
|
||||||
|
# Returns item for defined HREF.
|
||||||
|
|
||||||
|
# >>> book.get_item_with_href('EPUB/document.xhtml')
|
||||||
|
|
||||||
|
# :Args:
|
||||||
|
# - href: HREF for the item we are searching for
|
||||||
|
|
||||||
|
# :Returns:
|
||||||
|
# Returns item object. Returns None if nothing was found.
|
||||||
|
# """
|
||||||
|
|
||||||
|
# def get_items(self):
|
||||||
|
# def get_items_of_media_type(self, media_type):
|
||||||
|
# def get_items_of_type(self, item_type):
|
||||||
|
# """
|
||||||
|
# Returns all items of specified type.
|
||||||
|
|
||||||
|
# >>> book.get_items_of_type(epub.ITEM_IMAGE)
|
||||||
|
|
||||||
|
# :Args:
|
||||||
|
# - item_type: Type for items we are searching for
|
||||||
|
|
||||||
|
# :Returns:
|
||||||
|
# Returns found items as tuple.
|
||||||
|
# """
|
||||||
|
# return (item for item in self.items if item.get_type() == item_type)
|
||||||
|
|
||||||
|
|
||||||
|
# def get_template(self, name):
|
||||||
|
# def set_template(self, name, value):
|
||||||
|
# """
|
||||||
|
# Defines templates which are used to generate certain types of pages. When defining new value for the template
|
||||||
|
# we have to use content of type 'str' (Python 2) or 'bytes' (Python 3).
|
||||||
|
|
||||||
|
# At the moment we use these templates:
|
||||||
|
# - ncx
|
||||||
|
# - nav
|
||||||
|
# - chapter
|
||||||
|
# - cover
|
||||||
|
|
||||||
|
# :Args:
|
||||||
|
# - name: Name for the template
|
||||||
|
# - value: Content for the template
|
||||||
|
# """
|
||||||
|
|
||||||
|
|
||||||
|
# def add_prefix(self, name, uri):
|
||||||
|
# """
|
||||||
|
# Appends custom prefix to be added to the content.opf document
|
||||||
|
|
||||||
|
# >>> epub_book.add_prefix('bkterms', 'http://booktype.org/')
|
||||||
|
|
||||||
|
# :Args:
|
||||||
|
# - name: namespave name
|
||||||
|
# - uri: URI for the namespace
|
||||||
|
# """
|
||||||
|
|
||||||
|
return book
|
||||||
Reference in New Issue
Block a user