Localization works.
This commit is contained in:
@@ -9,7 +9,7 @@ class Item(object):
|
|||||||
self.content_type = content_type
|
self.content_type = content_type
|
||||||
self.payload = payload
|
self.payload = payload
|
||||||
self.needed_by = set()
|
self.needed_by = set()
|
||||||
self.needed_by_elem = set()
|
self.needed_by_elem = []
|
||||||
self.needs = set()
|
self.needs = set()
|
||||||
self.soup = None
|
self.soup = None
|
||||||
|
|
||||||
@@ -23,7 +23,7 @@ class Item(object):
|
|||||||
if directory[-1] != '/':
|
if directory[-1] != '/':
|
||||||
directory += '/'
|
directory += '/'
|
||||||
file_name = directory + self.url
|
file_name = directory + self.url
|
||||||
logger.info("Saved {}".format(file_name))
|
logger.info("Saveing {}".format(file_name))
|
||||||
#
|
#
|
||||||
# Ensure directory exist.
|
# Ensure directory exist.
|
||||||
#
|
#
|
||||||
@@ -44,6 +44,13 @@ class Item(object):
|
|||||||
defrag,_ =urllib.parse.urldefrag(link)
|
defrag,_ =urllib.parse.urldefrag(link)
|
||||||
return urllib.parse.urljoin(self.url, defrag)
|
return urllib.parse.urljoin(self.url, defrag)
|
||||||
|
|
||||||
|
def refersTo(self, ref_item):
|
||||||
|
self.needs.add(ref_item)
|
||||||
|
|
||||||
|
def referencedBy(self, item, element):
|
||||||
|
self.needed_by.add(item)
|
||||||
|
self.needed_by_elem.append(element)
|
||||||
|
|
||||||
|
|
||||||
class Book(object):
|
class Book(object):
|
||||||
|
|
||||||
@@ -61,15 +68,6 @@ class Book(object):
|
|||||||
for item in self.content.values():
|
for item in self.content.values():
|
||||||
item.save_file(directory)
|
item.save_file(directory)
|
||||||
|
|
||||||
def insertDependency(self, item, element, url):
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
if url in self.content:
|
|
||||||
item.needs.add(self.content[url])
|
|
||||||
self.content[url].needed_by.add(item)
|
|
||||||
self.content[url].needed_by_elem.add(element)
|
|
||||||
elif url:
|
|
||||||
logger.info(" refered but no item exist: {}".format(url))
|
|
||||||
|
|
||||||
def print(self):
|
def print(self):
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info("Book Structure:")
|
logger.info("Book Structure:")
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ class EnrichHtml(object):
|
|||||||
if item.soup is not None:
|
if item.soup is not None:
|
||||||
logger.info("Create DAG {}".format(item.url))
|
logger.info("Create DAG {}".format(item.url))
|
||||||
cls.normalizeUrlAndSetDependecy(book, item, 'a', 'href')
|
cls.normalizeUrlAndSetDependecy(book, item, 'a', 'href')
|
||||||
|
cls.normalizeUrlAndSetDependecy(book, item, 'link', 'href')
|
||||||
cls.normalizeUrlAndSetDependecy(book, item, 'img', 'src')
|
cls.normalizeUrlAndSetDependecy(book, item, 'img', 'src')
|
||||||
cls.normalizeUrlAndSetDependecy(book, item, 'img', 'data-mfp-src')
|
cls.normalizeUrlAndSetDependecy(book, item, 'img', 'data-mfp-src')
|
||||||
|
|
||||||
@@ -53,8 +54,14 @@ class EnrichHtml(object):
|
|||||||
if not url:
|
if not url:
|
||||||
continue
|
continue
|
||||||
normal_url = item.getAbsoluteUrl(url)
|
normal_url = item.getAbsoluteUrl(url)
|
||||||
logger.info(" depends on: {}".format(normal_url))
|
logger.info(" refers to: {}".format(normal_url))
|
||||||
book.insertDependency(item, element, normal_url)
|
if normal_url in book.content:
|
||||||
|
ref_item = book.content[normal_url]
|
||||||
|
item.refersTo(ref_item)
|
||||||
|
ref_item.referencedBy(item, element)
|
||||||
|
else:
|
||||||
|
logger.info(" refered but no item exist: {}".format(url))
|
||||||
|
element[attr] = ''
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -71,7 +78,7 @@ class EnrichHtml(object):
|
|||||||
body.clear()
|
body.clear()
|
||||||
body.append(content)
|
body.append(content)
|
||||||
else:
|
else:
|
||||||
logger.error(" No content found: {}".format(item.url))
|
logger.warn(" No content found: {}".format(item.url))
|
||||||
remove.append(item)
|
remove.append(item)
|
||||||
|
|
||||||
for item in remove:
|
for item in remove:
|
||||||
|
|||||||
@@ -14,16 +14,16 @@ def parseMht(mht, book):
|
|||||||
|
|
||||||
parts = mhtContent.get_payload()
|
parts = mhtContent.get_payload()
|
||||||
# Multiple parts, usually? If single 'str' part, then convert to a list.
|
# Multiple parts, usually? If single 'str' part, then convert to a list.
|
||||||
if not type(parts) is list:
|
if not type(parts) is list:
|
||||||
parts = [mhtContent]
|
parts = [mhtContent]
|
||||||
|
|
||||||
logger.info(' Number of parts: {}'.format(len(parts)))
|
logger.info(' Number of parts: {}'.format(len(parts)))
|
||||||
|
|
||||||
# Save all parts to files.
|
# Save all parts to files.
|
||||||
for p in parts: # walk() for a tree, but I'm guessing MHT is never nested?
|
for p in parts: # walk() for a tree, but I'm guessing MHT is never nested?
|
||||||
#??? cs = p.get_charset() # Expecting "utf-8" for root HTML, None for all other parts.
|
#??? cs = p.get_charset() # Expecting "utf-8" for root HTML, None for all other parts.
|
||||||
ct = p.get_content_type()
|
ct = p.get_content_type()
|
||||||
# String coerced to lower case of the form maintype/subtype, else get_default_type().
|
# String coerced to lower case of the form maintype/subtype, else get_default_type().
|
||||||
fp = p.get("content-location") or "index.html" # File path. Expecting root HTML is only part with no location.
|
fp = p.get("content-location") or "index.html" # File path. Expecting root HTML is only part with no location.
|
||||||
|
|
||||||
logger.info(' Content type: {}, Location: {}, Size: {}'
|
logger.info(' Content type: {}, Location: {}, Size: {}'
|
||||||
@@ -34,9 +34,9 @@ def parseMht(mht, book):
|
|||||||
|
|
||||||
def parseMhtFile(zip, mhtInfo, book):
|
def parseMhtFile(zip, mhtInfo, book):
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info('Parsing {}, size: {}, csize: {} '
|
logger.info('Reading {}, size: {}, csize: {} '
|
||||||
.format(mhtInfo.filename,
|
.format(mhtInfo.filename,
|
||||||
mhtInfo.file_size,
|
mhtInfo.file_size,
|
||||||
mhtInfo.compress_size))
|
mhtInfo.compress_size))
|
||||||
|
|
||||||
with zip.open(mhtInfo) as mht:
|
with zip.open(mhtInfo) as mht:
|
||||||
|
|||||||
@@ -86,9 +86,13 @@ class PrepareEpub(object):
|
|||||||
for ref_elem in item.needed_by_elem:
|
for ref_elem in item.needed_by_elem:
|
||||||
if ref_elem.name == 'a':
|
if ref_elem.name == 'a':
|
||||||
_,fragment = urllib.parse.urldefrag(ref_elem['href'])
|
_,fragment = urllib.parse.urldefrag(ref_elem['href'])
|
||||||
ref_elem['href'] = local_url + "#" + fragment
|
if fragment:
|
||||||
if ref_elem.name == 'img':
|
ref_elem['href'] = local_url + "#" + fragment
|
||||||
|
else:
|
||||||
|
ref_elem['href'] = local_url
|
||||||
|
elif ref_elem.name == 'img':
|
||||||
ref_elem['src'] = local_url
|
ref_elem['src'] = local_url
|
||||||
if ref_elem.name == 'img':
|
|
||||||
ref_elem['data-mfp-src'] = local_url
|
ref_elem['data-mfp-src'] = local_url
|
||||||
|
else:
|
||||||
|
logger.info("Renaming {} -> {}".format(item.url, local_url))
|
||||||
item.url = local_url
|
item.url = local_url
|
||||||
|
|||||||
Reference in New Issue
Block a user