Generating epub.

This commit is contained in:
Vahagn Khachatryan
2019-01-10 22:58:31 +00:00
parent d050d282ba
commit 510d8c083a
4 changed files with 366 additions and 309 deletions

View File

@@ -1,13 +1,12 @@
var isNode=new Function("try {return this===global;}catch(e){return false;}"); // var JSZip = require('jszip');
if (isNode()){ // var StringDecoder = require('string_decoder').StringDecoder;
var JSZip = require('jszip'); // let html = new StringDecoder("utf-8").write(content)
var StringDecoder = require('string_decoder').StringDecoder; var textDecoder = new TextDecoder("utf-8");
var XMLSerializer = require('xmlserializer') // var xmlSerializer = require('xmlserializer')
var DOMParser = require('xmldom').DOMParser var xmlSerializer = new XMLSerializer();
var generatedBufferType = 'uint8array' // var DOMParser = require('xmldom').DOMParser
} else { var domParser = new DOMParser();
var generatedBufferType = 'uint8array' var generatedBufferType = 'blob'
}
class EpubXhtml{ class EpubXhtml{
constructor(filename) { constructor(filename) {
@@ -18,9 +17,9 @@ class EpubXhtml{
} }
convert(content){ convert(content){
let html = new StringDecoder("utf-8").write(content) let html = textDecoder.decode(content);
let dom = new DOMParser().parseFromString(html, 'text/html'); let dom = domParser.parseFromString(html, 'text/html');
let xml = XMLSerializer.serializeToString(dom); let xml = xmlSerializer.serializeToString(dom);
// let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?> // let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?>
// <!DOCTYPE html> // <!DOCTYPE html>
let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?> let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?>
@@ -159,8 +158,6 @@ class EpubNav{
class EpubMeta{ class EpubMeta{
constructor() { constructor() {
this.zip = new JSZip();
this.metadata = { this.metadata = {
book_id: 'book_id', book_id: 'book_id',
title: 'title', title: 'title',
@@ -201,7 +198,7 @@ class EpubMeta{
addManifest(filename, mime){ addManifest(filename, mime){
let key = filename let key = filename
if (mime.startsWith('text/html')){ if (mime && mime.startsWith('text/html')){
let o = new EpubXhtml(filename) let o = new EpubXhtml(filename)
this.override[key] = o this.override[key] = o
filename = o.filename filename = o.filename
@@ -253,7 +250,7 @@ class EpubMeta{
} }
class EpubWriter{ class EpubWriter{
constructor(book_id) { constructor() {
this.zip = new JSZip(); this.zip = new JSZip();
this.meta = new EpubMeta(); this.meta = new EpubMeta();
this.override = {} this.override = {}
@@ -272,6 +269,10 @@ class EpubWriter{
` ); ` );
} }
addMetaInfoFile(filename, content){
this.zip.file("META-INF/"+filename, content)
}
addFile(filename, content){ addFile(filename, content){
console.log(`epub ${filename}:`) console.log(`epub ${filename}:`)
@@ -294,6 +295,4 @@ class EpubWriter{
} }
} }
if (isNode()){ // module.exports = EpubWriter
module.exports = EpubWriter
}

View File

@@ -16,7 +16,8 @@
"activeTab", "activeTab",
"tabs", "tabs",
"downloads", "downloads",
"*://*.safaribooksonline.com/*" "*://*.safaribooksonline.com/*",
"*://*.oreilly.com/*"
], ],
"browser_action": { "browser_action": {

View File

@@ -1,285 +1,338 @@
function getCurrentTab() { function getCurrentTab() {
console.debug("Querying active tab."); console.debug("Querying active tab.");
var queryInfo = { var queryInfo = {
active: true, active: true,
currentWindow: true currentWindow: true
}; };
return browser.tabs.query(queryInfo) return browser.tabs.query(queryInfo)
.then(function(tabs) { .then(function(tabs) {
if (tabs.length == 1){ if (tabs.length == 1){
let url = tabs[0].url; let url = tabs[0].url;
console.info(`Active URL: ${url}`); console.info(`Active URL: ${url}`);
return url; return url;
} else { } else {
console.error(`Expected 1 active tab, received: ${tabs}`); console.error(`Expected 1 active tab, received: ${tabs}`);
throw 'Failed to get active tab.'; throw 'Failed to get active tab.';
} }
}, onError); }, onError);
} }
function extractBookId(url){ function extractBookId(url){
console.debug(`Extracting book id from ${url}`); console.debug(`Extracting book id from ${url}`);
// match a url like: // match a url like:
// https://www.safaribooksonline.com/library/view/startup-opportunities-2nd/9781119378181/ // https://www.safaribooksonline.com/library/view/startup-opportunities-2nd/9781119378181/
// https://www.safaribooksonline.com/library/view/startup-growth-engines/77961SEM00001/ // https://www.safaribooksonline.com/library/view/startup-growth-engines/77961SEM00001/
let match = url.match(/\/library\/view\/[^\/]+\/(\w+)\//); let match = url.match(/\/library\/view\/[^\/]+\/(\w+)\//);
let bookId = match && match[1]; let bookId = match && match[1];
if (bookId) { if (bookId) {
console.debug(`Extracted book id: ${bookId}`); console.debug(`Extracted book id: ${bookId}`);
return bookId; return bookId;
}else{ }else{
console.error('Could not extract book id from url, only ' console.error('Could not extract book id from url, only '
+'domain "www.safaribooksonline.com“ is supported.'); +'domain "www.safaribooksonline.com“ is supported.');
throw 'Failed to extract book id.'; throw 'Failed to extract book id.';
} }
} }
class Book{ class Book{
constructor(book_id, epub) { constructor(book_id, epub) {
this.book_id = book_id this.book_id = book_id
this.raw_book = {} this.raw_book = {}
this.chapter_list = [] this.chapter_list = []
this.chapter_info = {} this.chapter_info = {}
this.book_files = {} this.book_files = {}
this.book_info = null this.book_info = null
this.book_toc = null this.book_toc = null
this.book_flattoc = null this.book_flattoc = null
} }
downloadResource(url){ downloadResource(url){
console.info(`Downloading ${url}`) console.info(`Downloading ${url}`)
return fetch(url, { return fetch(url, {
credentials: 'include' credentials: 'include',
}).then((res) => { mode: "no-cors" // no-cors, cors, *same-origin
// console.log(`Downloaded.`) }).then((res) => {
return res; // console.log(`Downloaded.`)
}, onError) return res;
} }, onError)
}
downloadJson(url){
return this.downloadResource(url) downloadJson(url){
.then((res) => { return this.downloadResource(url)
return res.json(); .then((res) => {
}, onError); return res.json();
} }, onError);
}
downloadBookInfo(){
console.info(`Downloading book info for ${this.book_id}`); downloadBookInfo(){
let url = `https://www.safaribooksonline.com/api/v1/book/${this.book_id}/`; console.info(`Downloading book info for ${this.book_id}`);
return this.downloadJson(url) let url = `https://learning.oreilly.com/api/v1/book/${this.book_id}/`;
.then((book_info) => { return this.downloadJson(url)
this.book_info = book_info; .then((book_info) => {
}, onError); this.book_info = book_info;
} }, onError);
}
downloadChapterList(){
function helper(book, url){ downloadMetaContent(){
console.info(`Downloading chapter list ${url}`); let downloads = []
return book.downloadJson(url)
.then((chapter_list) => { this.insertBookFile(
book.chapter_list this.book_info.cover,
= book.chapter_list.concat(chapter_list.results); "cover.img")
if (chapter_list.next != null){ downloads.push(
return helper(book, chapter_list.next); this.downloadJson(this.book_info.toc)
} .then((json) => {
}, onError); this.book_toc=json
} }))
downloads.push(
return helper(this, this.book_info.chapter_list) this.downloadJson(this.book_info.flat_toc)
.then(() => { .then((json) => {
console.info(`Chapter List Downloaded.`); this.book_flattoc=json
}, onError); }))
} downloads.push(
Promise.map(this.book_info.chapters, (chapter) => {
downloadMetaContent(){ renderProgress(`${chapter}`)
let downloads = [] return this.downloadJson(chapter)
.then((json) => {
this.insertBookFile( this.chapter_info[chapter] = json
this.book_info.cover, return this.extractChapterAssets(json)
"cover.img") })
},{concurrency: 10}))
downloads.push(
this.downloadJson(this.book_info.toc) return Promise.all(downloads)
.then((json) => { }
this.book_toc=json
}))
downloads.push( extractChapterAssets(json){
this.downloadJson(this.book_info.flat_toc) if (!json.asset_base_url || !json.content){
.then((json) => { throw "Missing data."
this.book_flattoc=json }
})) // Html
downloads.push( this.insertBookFile(
Promise.map(this.book_info.chapters, (chapter) => { json.content,
renderProgress(`${chapter}`) json.full_path)
return this.downloadJson(chapter) // List of images.
.then((json) => { for (let idx in json.images){
this.chapter_info[chapter] = json this.insertBookFile(
return this.extractChapterAssets(json) json.asset_base_url + json.images[idx],
}) json.images[idx])
},{concurrency: 10})) }
// List of stylesheets.
return Promise.all(downloads) for (let idx in json.stylesheets){
} this.insertBookFile(
json.stylesheets[idx].original_url,
json.stylesheets[idx].full_path)
extractChapterAssets(json){ }
if (!json.asset_base_url || !json.content){ }
throw "Missing data."
} downloadContent(){
// Html return Promise.map(Object.keys(this.book_files), (url) => {
this.insertBookFile( renderProgress(`${this.book_files[url].filename}`)
json.content, return this.downloadResource(url)
json.full_path) .then((res) => {
// List of images. if (res.ok){
for (let idx in json.images){ this.book_files[url].headers = res.headers
this.insertBookFile( this.book_files[url].mime = res.headers.get('Content-Type')
json.asset_base_url + json.images[idx], return res.arrayBuffer().then((arrBuffer)=>{
json.images[idx]) this.book_files[url].body = arrBuffer
} })
// List of stylesheets. }
for (let idx in json.stylesheets){ })
this.insertBookFile( },{concurrency: 10})
json.stylesheets[idx].original_url, }
json.stylesheets[idx].full_path)
} insertBookFile(url, filename){
} this.book_files[url] = {
url: url,
downloadContent(){ filename: filename,
return Promise.map(Object.keys(this.book_files), (url) => { headers: null,
renderProgress(`${this.book_files[url].filename}`) mime: null,
return this.downloadResource(url) body: null
.then((res) => { }
if (res.ok){ }
this.book_files[url].headers = res.headers }
this.book_files[url].mime = res.headers.get('Content-Type')
this.book_files[url].body = res.blob() class SidebarPage{
}
}) constructor() {
},{concurrency: 10}) $('#loading').show();
} $('#error-message').hide();
$('#book-info').hide();
insertBookFile(url, filename){ $("#book-file-list").empty();
this.book_files[url] = { }
url: url,
filename: filename, renderInfo(book){
headers: null, $("#book-name").text(book.book_info.title);
mime: null, $("#book-cover").attr("src", book.book_info.cover);
body: null $('#book-info').show();
} }
}
} renderDone(){
$('#loading').hide();
class SidebarPage{ }
constructor() { renderChapterList(book){
$('#loading').show(); // Add chapters to UI
$('#error-message').hide(); for (let chapter_idx in book.chapter_list) {
$('#book-info').hide(); let chapter = book.chapter_list[chapter_idx];
$("#book-file-list").empty(); var chapter_dom = $("<li></li>")
} .addClass("list-group-item")
.html(chapter.title)
renderInfo(book){ .attr("chapterIndex", chapter_idx);
$("#book-name").text(book.book_info.title); $("#book-chapter-list").append(chapter_dom);
$("#book-cover").attr("src", book.book_info.cover); }
$('#book-info').show(); $('#loading').hide();
} }
}
renderDone(){
$('#loading').hide(); function onError(error) {
} console.error(`Error: ${error}`);
$('#error-message').text(`Error: ${error}`);
renderChapterList(book){ $('#error-message').show();
// Add chapters to UI $('#loading').hide();
for (let chapter_idx in book.chapter_list) { $('#book-info').hide();
let chapter = book.chapter_list[chapter_idx]; }
var chapter_dom = $("<li></li>")
.addClass("list-group-item") function renderProgress(txt){
.html(chapter.title) // Add chapters to UI
.attr("chapterIndex", chapter_idx); var progress_dom = $("<li></li>")
$("#book-chapter-list").append(chapter_dom); .addClass("list-group-item")
} .html(txt)
$('#loading').hide(); $("#book-file-list").prepend(progress_dom);
} }
}
function onError(error) { function fillMetadata(epub, book)
console.error(`Error: ${error}`); {
$('#error-message').text(`Error: ${error}`); epub.meta.addTitle(book.book_info.title)
$('#error-message').show(); epub.meta.addLanguage(book.book_info.language)
$('#loading').hide(); epub.meta.addBookId(book.book_info.isbn)
$('#book-info').hide(); for (let i in book.book_info.authors){
} let author = book.book_info.authors[i]
epub.meta.addAuthor(author)
function renderProgress(txt){ }
// Add chapters to UI for (let i in book.book_info.publishers){
var progress_dom = $("<li></li>") let publisher = book.book_info.publishers[i]
.addClass("list-group-item") epub.meta.addPublisher(publisher.name)
.html(txt) }
$("#book-file-list").prepend(progress_dom); // # The metadata element or deprecated dc-metadata element contains
} // # at least one identifier element, at least one title element,
// # and at least one language element drawn from the Dublin Core tag
// # set.
// epub.set_title('Test Title')
function createEpub(book, epub){ // epub.set_language('en')
epub.addFile("book.json", JSON.stringify(book, null, '\t')) // epub.set_direction('ltr')
for (let url in book.book_files){ // # epub.set_cover(file_name, content, create_page=True):
file = book.book_files[url] // # epub.add_author(author, file_as=None, role=None, uid='creator'):
epub.addFile(file.filename, file.body) // # epub.add_metadata(namespace, name, value, others=None):
} // # epub.set_unique_metadata(namespace, name, value, others=None):
} }
function onDownloadBookClicked(){ function fillManifest(epub, book)
console.info("Begin book download."); {
page = new SidebarPage() for (let key in book.book_files){
let f = book.book_files[key]
getCurrentTab() epub.meta.addManifest(f.filename, f.mime)
.then(extractBookId, onError) }
.then((book_id) => {
epub = new EpubWriter(); let f = book.book_files[book.book_info.cover]
book = new Book(book_id); epub.meta.addCover(f.filename)
book.downloadBookInfo() }
.then(() => { page.renderInfo(book); })
// .then(() => { return book.downloadChapterList(); }) function fillToc(epub, book)
// .then(() => { return page.renderChapterList(book); }) {
.then(() => { return book.downloadMetaContent(); }) for (let i in book.book_toc){
.then(() => { return book.downloadContent(); }) let toc_item = book.book_toc[i]
.then(() => { return createEpub(book, epub); }) epub.meta.addToc(
.then(() => { return epub.generateAsync(); }) toc_item.filename,
.then((file) => { toc_item.label,
let title = book.book_info.title toc_item.depth)
let filename = "books/" }
+ title.replace(/[^a-z0-9]/gi, '_').toLowerCase() }
+ ".zip"
console.log(`Zip file name ${filename}`) function fillSpine(epub, book)
renderProgress(`Saved to ${filename}`) {
let url = window.URL.createObjectURL(file) for (let i in book.book_info.chapters){
return browser.downloads.download({ "filename" : filename, url : url}) let url = book.book_info.chapters[i]
}) let full_path = book.chapter_info[url].full_path
.then(() =>{ epub.meta.addSpine(full_path)
page.renderDone(); }
}) }
}, onError); function fillGuide(epub, book)
} {}
document.addEventListener('DOMContentLoaded', function() { function createEpub(book, epub){
console.log("Start safari book hunter."); epub.addMetaInfoFile("book.json", JSON.stringify(book, null, '\t'))
$('#loading').hide();
$('#error-message').hide(); // OPF file info.
$('#book-info').hide(); fillManifest(epub, book)
fillSpine(epub, book)
$('#download-book-button').click(() => { fillGuide(epub, book)
onDownloadBookClicked(); fillMetadata(epub, book)
});
// NCX and NAV files.
fillToc(epub, book)
$('#deselect-all-button').show() for (let url in book.book_files){
$('#download-button').show() file = book.book_files[url]
$('#download-section').hide(); epub.addFile(file.filename, file.body)
// let bookInfo = new BookInfo(); }
}) }
function onDownloadBookClicked(){
console.info("Begin book download.");
page = new SidebarPage()
getCurrentTab()
.then(extractBookId, onError)
.then((book_id) => {
epub = new EpubWriter();
book = new Book(book_id);
book.downloadBookInfo()
.then(() => { page.renderInfo(book); })
// .then(() => { return book.downloadChapterList(); })
// .then(() => { return page.renderChapterList(book); })
.then(() => { return book.downloadMetaContent(); })
.then(() => { return book.downloadContent(); })
.then(() => { return createEpub(book, epub); })
.then(() => { return epub.generateAsync(); })
.then((file) => {
let title = book.book_info.title
let filename = "books/"
+ title.replace(/[^a-z0-9]/gi, '_').toLowerCase()
+ ".epub"
console.log(`Zip file name ${filename}`)
renderProgress(`Saved to ${filename}`)
let url = window.URL.createObjectURL(file)
return browser.downloads.download({ "filename" : filename, url : url})
})
.then(() =>{
page.renderDone();
})
}, onError);
}
document.addEventListener('DOMContentLoaded', function() {
console.log("Start safari book hunter.");
$('#loading').hide();
$('#error-message').hide();
$('#book-info').hide();
$('#download-book-button').click(() => {
onDownloadBookClicked();
});
$('#deselect-all-button').show()
$('#download-button').show()
$('#download-section').hide();
// let bookInfo = new BookInfo();
})

View File

@@ -1,6 +1,7 @@
let fs = require("fs"); var fs = require("fs");
let JSZip = require('jszip'); var JSZip = require('jszip');
let EpubWriter = require('../src/epub') var EpubWriter = require('../src/epub')
let zip = new JSZip() let zip = new JSZip()
let epub = new EpubWriter() let epub = new EpubWriter()
@@ -134,6 +135,9 @@ function fillSpine(epub, book_info)
for (let i in book_info.book_info.chapters){ for (let i in book_info.book_info.chapters){
let s = book_info.book_info.chapters[i].split('/') let s = book_info.book_info.chapters[i].split('/')
epub.meta.addSpine(s[s.length-1]) epub.meta.addSpine(s[s.length-1])
// let url = book_info.book_info.chapters[i]
// let full_name = book_info.chapter_info[url].full_name
// epub.meta.addSpine(full_name)
} }
} }