91 lines
2.6 KiB
Python
91 lines
2.6 KiB
Python
import os
|
|
import logging
|
|
import urllib
|
|
|
|
class Item(object):
|
|
|
|
def __init__(self, url, content_type, payload):
|
|
self.url = url
|
|
self.content_type = content_type
|
|
self.payload = payload
|
|
self.needed_by = set()
|
|
self.needed_by_elem = set()
|
|
self.needs = set()
|
|
self.soup = None
|
|
|
|
def save_file(self, directory):
|
|
logger = logging.getLogger(__name__)
|
|
if hasattr(self, 'remove'):
|
|
return
|
|
#
|
|
# Create file name.
|
|
#
|
|
if directory[-1] != '/':
|
|
directory += '/'
|
|
file_name = directory + self.url
|
|
logger.info("Saved {}".format(file_name))
|
|
#
|
|
# Ensure directory exist.
|
|
#
|
|
dir = os.path.dirname(file_name)
|
|
if not os.path.exists(dir):
|
|
os.makedirs(dir)
|
|
#
|
|
# Save content.
|
|
#
|
|
if self.soup:
|
|
with open( file_name, 'wb') as file:
|
|
file.write(self.soup.prettify("utf-8"))
|
|
else:
|
|
with open( file_name, 'wb') as file:
|
|
file.write(self.payload)
|
|
|
|
def getAbsoluteUrl(self, link):
|
|
defrag,_ =urllib.parse.urldefrag(link)
|
|
return urllib.parse.urljoin(self.url, defrag)
|
|
|
|
|
|
class Book(object):
|
|
|
|
def __init__(self, file_name):
|
|
self.file_name = file_name
|
|
self.content = {}
|
|
self.first = None
|
|
|
|
def remove(self, item):
|
|
del self.content[item.url]
|
|
|
|
def save_in_dir(self, directory):
|
|
if not os.path.exists(directory):
|
|
os.makedirs(directory)
|
|
for item in self.content.values():
|
|
item.save_file(directory)
|
|
|
|
def insertDependency(self, item, element, url):
|
|
logger = logging.getLogger(__name__)
|
|
if url in self.content:
|
|
item.needs.add(self.content[url])
|
|
self.content[url].needed_by.add(item)
|
|
self.content[url].needed_by_elem.add(element)
|
|
elif url:
|
|
logger.info(" refered but no item exist: {}".format(url))
|
|
|
|
def print(self):
|
|
logger = logging.getLogger(__name__)
|
|
logger.info("Book Structure:")
|
|
item = self.first
|
|
while item is not None:
|
|
logger.info("Item: {}".format(item.title))
|
|
if hasattr(item, 'prev'):
|
|
logger.info(" Prev: {}".format(item.prev.url))
|
|
if hasattr(item, 'next'):
|
|
logger.info(" Next: {}".format(item.next.url))
|
|
for ref_item in item.needs:
|
|
logger.info(" Needs: {}".format(ref_item.url))
|
|
logger.info("")
|
|
|
|
if hasattr(item, 'next'):
|
|
item = item.next
|
|
else:
|
|
item = None
|