Generating epub.

This commit is contained in:
Vahagn Khachatryan
2019-01-10 22:58:31 +00:00
parent d050d282ba
commit 510d8c083a
4 changed files with 366 additions and 309 deletions

View File

@@ -1,13 +1,12 @@
var isNode=new Function("try {return this===global;}catch(e){return false;}");
if (isNode()){
var JSZip = require('jszip');
var StringDecoder = require('string_decoder').StringDecoder;
var XMLSerializer = require('xmlserializer')
var DOMParser = require('xmldom').DOMParser
var generatedBufferType = 'uint8array'
} else {
var generatedBufferType = 'uint8array'
}
// var JSZip = require('jszip');
// var StringDecoder = require('string_decoder').StringDecoder;
// let html = new StringDecoder("utf-8").write(content)
var textDecoder = new TextDecoder("utf-8");
// var xmlSerializer = require('xmlserializer')
var xmlSerializer = new XMLSerializer();
// var DOMParser = require('xmldom').DOMParser
var domParser = new DOMParser();
var generatedBufferType = 'blob'
class EpubXhtml{
constructor(filename) {
@@ -18,9 +17,9 @@ class EpubXhtml{
}
convert(content){
let html = new StringDecoder("utf-8").write(content)
let dom = new DOMParser().parseFromString(html, 'text/html');
let xml = XMLSerializer.serializeToString(dom);
let html = textDecoder.decode(content);
let dom = domParser.parseFromString(html, 'text/html');
let xml = xmlSerializer.serializeToString(dom);
// let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?>
// <!DOCTYPE html>
let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?>
@@ -159,8 +158,6 @@ class EpubNav{
class EpubMeta{
constructor() {
this.zip = new JSZip();
this.metadata = {
book_id: 'book_id',
title: 'title',
@@ -201,7 +198,7 @@ class EpubMeta{
addManifest(filename, mime){
let key = filename
if (mime.startsWith('text/html')){
if (mime && mime.startsWith('text/html')){
let o = new EpubXhtml(filename)
this.override[key] = o
filename = o.filename
@@ -253,7 +250,7 @@ class EpubMeta{
}
class EpubWriter{
constructor(book_id) {
constructor() {
this.zip = new JSZip();
this.meta = new EpubMeta();
this.override = {}
@@ -272,6 +269,10 @@ class EpubWriter{
` );
}
addMetaInfoFile(filename, content){
this.zip.file("META-INF/"+filename, content)
}
addFile(filename, content){
console.log(`epub ${filename}:`)
@@ -294,6 +295,4 @@ class EpubWriter{
}
}
if (isNode()){
module.exports = EpubWriter
}
// module.exports = EpubWriter

View File

@@ -16,7 +16,8 @@
"activeTab",
"tabs",
"downloads",
"*://*.safaribooksonline.com/*"
"*://*.safaribooksonline.com/*",
"*://*.oreilly.com/*"
],
"browser_action": {

View File

@@ -1,285 +1,338 @@
function getCurrentTab() {
console.debug("Querying active tab.");
var queryInfo = {
active: true,
currentWindow: true
};
return browser.tabs.query(queryInfo)
.then(function(tabs) {
if (tabs.length == 1){
let url = tabs[0].url;
console.info(`Active URL: ${url}`);
return url;
} else {
console.error(`Expected 1 active tab, received: ${tabs}`);
throw 'Failed to get active tab.';
}
}, onError);
}
function extractBookId(url){
console.debug(`Extracting book id from ${url}`);
// match a url like:
// https://www.safaribooksonline.com/library/view/startup-opportunities-2nd/9781119378181/
// https://www.safaribooksonline.com/library/view/startup-growth-engines/77961SEM00001/
let match = url.match(/\/library\/view\/[^\/]+\/(\w+)\//);
let bookId = match && match[1];
if (bookId) {
console.debug(`Extracted book id: ${bookId}`);
return bookId;
}else{
console.error('Could not extract book id from url, only '
+'domain "www.safaribooksonline.com“ is supported.');
throw 'Failed to extract book id.';
}
}
class Book{
constructor(book_id, epub) {
this.book_id = book_id
this.raw_book = {}
this.chapter_list = []
this.chapter_info = {}
this.book_files = {}
this.book_info = null
this.book_toc = null
this.book_flattoc = null
}
downloadResource(url){
console.info(`Downloading ${url}`)
return fetch(url, {
credentials: 'include'
}).then((res) => {
// console.log(`Downloaded.`)
return res;
}, onError)
}
downloadJson(url){
return this.downloadResource(url)
.then((res) => {
return res.json();
}, onError);
}
downloadBookInfo(){
console.info(`Downloading book info for ${this.book_id}`);
let url = `https://www.safaribooksonline.com/api/v1/book/${this.book_id}/`;
return this.downloadJson(url)
.then((book_info) => {
this.book_info = book_info;
}, onError);
}
downloadChapterList(){
function helper(book, url){
console.info(`Downloading chapter list ${url}`);
return book.downloadJson(url)
.then((chapter_list) => {
book.chapter_list
= book.chapter_list.concat(chapter_list.results);
if (chapter_list.next != null){
return helper(book, chapter_list.next);
}
}, onError);
}
return helper(this, this.book_info.chapter_list)
.then(() => {
console.info(`Chapter List Downloaded.`);
}, onError);
}
downloadMetaContent(){
let downloads = []
this.insertBookFile(
this.book_info.cover,
"cover.img")
downloads.push(
this.downloadJson(this.book_info.toc)
.then((json) => {
this.book_toc=json
}))
downloads.push(
this.downloadJson(this.book_info.flat_toc)
.then((json) => {
this.book_flattoc=json
}))
downloads.push(
Promise.map(this.book_info.chapters, (chapter) => {
renderProgress(`${chapter}`)
return this.downloadJson(chapter)
.then((json) => {
this.chapter_info[chapter] = json
return this.extractChapterAssets(json)
})
},{concurrency: 10}))
return Promise.all(downloads)
}
extractChapterAssets(json){
if (!json.asset_base_url || !json.content){
throw "Missing data."
}
// Html
this.insertBookFile(
json.content,
json.full_path)
// List of images.
for (let idx in json.images){
this.insertBookFile(
json.asset_base_url + json.images[idx],
json.images[idx])
}
// List of stylesheets.
for (let idx in json.stylesheets){
this.insertBookFile(
json.stylesheets[idx].original_url,
json.stylesheets[idx].full_path)
}
}
downloadContent(){
return Promise.map(Object.keys(this.book_files), (url) => {
renderProgress(`${this.book_files[url].filename}`)
return this.downloadResource(url)
.then((res) => {
if (res.ok){
this.book_files[url].headers = res.headers
this.book_files[url].mime = res.headers.get('Content-Type')
this.book_files[url].body = res.blob()
}
})
},{concurrency: 10})
}
insertBookFile(url, filename){
this.book_files[url] = {
url: url,
filename: filename,
headers: null,
mime: null,
body: null
}
}
}
class SidebarPage{
constructor() {
$('#loading').show();
$('#error-message').hide();
$('#book-info').hide();
$("#book-file-list").empty();
}
renderInfo(book){
$("#book-name").text(book.book_info.title);
$("#book-cover").attr("src", book.book_info.cover);
$('#book-info').show();
}
renderDone(){
$('#loading').hide();
}
renderChapterList(book){
// Add chapters to UI
for (let chapter_idx in book.chapter_list) {
let chapter = book.chapter_list[chapter_idx];
var chapter_dom = $("<li></li>")
.addClass("list-group-item")
.html(chapter.title)
.attr("chapterIndex", chapter_idx);
$("#book-chapter-list").append(chapter_dom);
}
$('#loading').hide();
}
}
function onError(error) {
console.error(`Error: ${error}`);
$('#error-message').text(`Error: ${error}`);
$('#error-message').show();
$('#loading').hide();
$('#book-info').hide();
}
function renderProgress(txt){
// Add chapters to UI
var progress_dom = $("<li></li>")
.addClass("list-group-item")
.html(txt)
$("#book-file-list").prepend(progress_dom);
}
function createEpub(book, epub){
epub.addFile("book.json", JSON.stringify(book, null, '\t'))
for (let url in book.book_files){
file = book.book_files[url]
epub.addFile(file.filename, file.body)
}
}
function onDownloadBookClicked(){
console.info("Begin book download.");
page = new SidebarPage()
getCurrentTab()
.then(extractBookId, onError)
.then((book_id) => {
epub = new EpubWriter();
book = new Book(book_id);
book.downloadBookInfo()
.then(() => { page.renderInfo(book); })
// .then(() => { return book.downloadChapterList(); })
// .then(() => { return page.renderChapterList(book); })
.then(() => { return book.downloadMetaContent(); })
.then(() => { return book.downloadContent(); })
.then(() => { return createEpub(book, epub); })
.then(() => { return epub.generateAsync(); })
.then((file) => {
let title = book.book_info.title
let filename = "books/"
+ title.replace(/[^a-z0-9]/gi, '_').toLowerCase()
+ ".zip"
console.log(`Zip file name ${filename}`)
renderProgress(`Saved to ${filename}`)
let url = window.URL.createObjectURL(file)
return browser.downloads.download({ "filename" : filename, url : url})
})
.then(() =>{
page.renderDone();
})
}, onError);
}
document.addEventListener('DOMContentLoaded', function() {
console.log("Start safari book hunter.");
$('#loading').hide();
$('#error-message').hide();
$('#book-info').hide();
$('#download-book-button').click(() => {
onDownloadBookClicked();
});
$('#deselect-all-button').show()
$('#download-button').show()
$('#download-section').hide();
// let bookInfo = new BookInfo();
})
function getCurrentTab() {
console.debug("Querying active tab.");
var queryInfo = {
active: true,
currentWindow: true
};
return browser.tabs.query(queryInfo)
.then(function(tabs) {
if (tabs.length == 1){
let url = tabs[0].url;
console.info(`Active URL: ${url}`);
return url;
} else {
console.error(`Expected 1 active tab, received: ${tabs}`);
throw 'Failed to get active tab.';
}
}, onError);
}
function extractBookId(url){
console.debug(`Extracting book id from ${url}`);
// match a url like:
// https://www.safaribooksonline.com/library/view/startup-opportunities-2nd/9781119378181/
// https://www.safaribooksonline.com/library/view/startup-growth-engines/77961SEM00001/
let match = url.match(/\/library\/view\/[^\/]+\/(\w+)\//);
let bookId = match && match[1];
if (bookId) {
console.debug(`Extracted book id: ${bookId}`);
return bookId;
}else{
console.error('Could not extract book id from url, only '
+'domain "www.safaribooksonline.com“ is supported.');
throw 'Failed to extract book id.';
}
}
class Book{
constructor(book_id, epub) {
this.book_id = book_id
this.raw_book = {}
this.chapter_list = []
this.chapter_info = {}
this.book_files = {}
this.book_info = null
this.book_toc = null
this.book_flattoc = null
}
downloadResource(url){
console.info(`Downloading ${url}`)
return fetch(url, {
credentials: 'include',
mode: "no-cors" // no-cors, cors, *same-origin
}).then((res) => {
// console.log(`Downloaded.`)
return res;
}, onError)
}
downloadJson(url){
return this.downloadResource(url)
.then((res) => {
return res.json();
}, onError);
}
downloadBookInfo(){
console.info(`Downloading book info for ${this.book_id}`);
let url = `https://learning.oreilly.com/api/v1/book/${this.book_id}/`;
return this.downloadJson(url)
.then((book_info) => {
this.book_info = book_info;
}, onError);
}
downloadMetaContent(){
let downloads = []
this.insertBookFile(
this.book_info.cover,
"cover.img")
downloads.push(
this.downloadJson(this.book_info.toc)
.then((json) => {
this.book_toc=json
}))
downloads.push(
this.downloadJson(this.book_info.flat_toc)
.then((json) => {
this.book_flattoc=json
}))
downloads.push(
Promise.map(this.book_info.chapters, (chapter) => {
renderProgress(`${chapter}`)
return this.downloadJson(chapter)
.then((json) => {
this.chapter_info[chapter] = json
return this.extractChapterAssets(json)
})
},{concurrency: 10}))
return Promise.all(downloads)
}
extractChapterAssets(json){
if (!json.asset_base_url || !json.content){
throw "Missing data."
}
// Html
this.insertBookFile(
json.content,
json.full_path)
// List of images.
for (let idx in json.images){
this.insertBookFile(
json.asset_base_url + json.images[idx],
json.images[idx])
}
// List of stylesheets.
for (let idx in json.stylesheets){
this.insertBookFile(
json.stylesheets[idx].original_url,
json.stylesheets[idx].full_path)
}
}
downloadContent(){
return Promise.map(Object.keys(this.book_files), (url) => {
renderProgress(`${this.book_files[url].filename}`)
return this.downloadResource(url)
.then((res) => {
if (res.ok){
this.book_files[url].headers = res.headers
this.book_files[url].mime = res.headers.get('Content-Type')
return res.arrayBuffer().then((arrBuffer)=>{
this.book_files[url].body = arrBuffer
})
}
})
},{concurrency: 10})
}
insertBookFile(url, filename){
this.book_files[url] = {
url: url,
filename: filename,
headers: null,
mime: null,
body: null
}
}
}
class SidebarPage{
constructor() {
$('#loading').show();
$('#error-message').hide();
$('#book-info').hide();
$("#book-file-list").empty();
}
renderInfo(book){
$("#book-name").text(book.book_info.title);
$("#book-cover").attr("src", book.book_info.cover);
$('#book-info').show();
}
renderDone(){
$('#loading').hide();
}
renderChapterList(book){
// Add chapters to UI
for (let chapter_idx in book.chapter_list) {
let chapter = book.chapter_list[chapter_idx];
var chapter_dom = $("<li></li>")
.addClass("list-group-item")
.html(chapter.title)
.attr("chapterIndex", chapter_idx);
$("#book-chapter-list").append(chapter_dom);
}
$('#loading').hide();
}
}
function onError(error) {
console.error(`Error: ${error}`);
$('#error-message').text(`Error: ${error}`);
$('#error-message').show();
$('#loading').hide();
$('#book-info').hide();
}
function renderProgress(txt){
// Add chapters to UI
var progress_dom = $("<li></li>")
.addClass("list-group-item")
.html(txt)
$("#book-file-list").prepend(progress_dom);
}
function fillMetadata(epub, book)
{
epub.meta.addTitle(book.book_info.title)
epub.meta.addLanguage(book.book_info.language)
epub.meta.addBookId(book.book_info.isbn)
for (let i in book.book_info.authors){
let author = book.book_info.authors[i]
epub.meta.addAuthor(author)
}
for (let i in book.book_info.publishers){
let publisher = book.book_info.publishers[i]
epub.meta.addPublisher(publisher.name)
}
// # The metadata element or deprecated dc-metadata element contains
// # at least one identifier element, at least one title element,
// # and at least one language element drawn from the Dublin Core tag
// # set.
// epub.set_title('Test Title')
// epub.set_language('en')
// epub.set_direction('ltr')
// # epub.set_cover(file_name, content, create_page=True):
// # epub.add_author(author, file_as=None, role=None, uid='creator'):
// # epub.add_metadata(namespace, name, value, others=None):
// # epub.set_unique_metadata(namespace, name, value, others=None):
}
function fillManifest(epub, book)
{
for (let key in book.book_files){
let f = book.book_files[key]
epub.meta.addManifest(f.filename, f.mime)
}
let f = book.book_files[book.book_info.cover]
epub.meta.addCover(f.filename)
}
function fillToc(epub, book)
{
for (let i in book.book_toc){
let toc_item = book.book_toc[i]
epub.meta.addToc(
toc_item.filename,
toc_item.label,
toc_item.depth)
}
}
function fillSpine(epub, book)
{
for (let i in book.book_info.chapters){
let url = book.book_info.chapters[i]
let full_path = book.chapter_info[url].full_path
epub.meta.addSpine(full_path)
}
}
function fillGuide(epub, book)
{}
function createEpub(book, epub){
epub.addMetaInfoFile("book.json", JSON.stringify(book, null, '\t'))
// OPF file info.
fillManifest(epub, book)
fillSpine(epub, book)
fillGuide(epub, book)
fillMetadata(epub, book)
// NCX and NAV files.
fillToc(epub, book)
for (let url in book.book_files){
file = book.book_files[url]
epub.addFile(file.filename, file.body)
}
}
function onDownloadBookClicked(){
console.info("Begin book download.");
page = new SidebarPage()
getCurrentTab()
.then(extractBookId, onError)
.then((book_id) => {
epub = new EpubWriter();
book = new Book(book_id);
book.downloadBookInfo()
.then(() => { page.renderInfo(book); })
// .then(() => { return book.downloadChapterList(); })
// .then(() => { return page.renderChapterList(book); })
.then(() => { return book.downloadMetaContent(); })
.then(() => { return book.downloadContent(); })
.then(() => { return createEpub(book, epub); })
.then(() => { return epub.generateAsync(); })
.then((file) => {
let title = book.book_info.title
let filename = "books/"
+ title.replace(/[^a-z0-9]/gi, '_').toLowerCase()
+ ".epub"
console.log(`Zip file name ${filename}`)
renderProgress(`Saved to ${filename}`)
let url = window.URL.createObjectURL(file)
return browser.downloads.download({ "filename" : filename, url : url})
})
.then(() =>{
page.renderDone();
})
}, onError);
}
document.addEventListener('DOMContentLoaded', function() {
console.log("Start safari book hunter.");
$('#loading').hide();
$('#error-message').hide();
$('#book-info').hide();
$('#download-book-button').click(() => {
onDownloadBookClicked();
});
$('#deselect-all-button').show()
$('#download-button').show()
$('#download-section').hide();
// let bookInfo = new BookInfo();
})

View File

@@ -1,6 +1,7 @@
let fs = require("fs");
let JSZip = require('jszip');
let EpubWriter = require('../src/epub')
var fs = require("fs");
var JSZip = require('jszip');
var EpubWriter = require('../src/epub')
let zip = new JSZip()
let epub = new EpubWriter()
@@ -134,6 +135,9 @@ function fillSpine(epub, book_info)
for (let i in book_info.book_info.chapters){
let s = book_info.book_info.chapters[i].split('/')
epub.meta.addSpine(s[s.length-1])
// let url = book_info.book_info.chapters[i]
// let full_name = book_info.chapter_info[url].full_name
// epub.meta.addSpine(full_name)
}
}