Escaping & + removing html from desctipion.

This commit is contained in:
Vahagn Khachatryan
2019-01-22 23:22:59 +00:00
parent 7550b71f4d
commit 20b2657440

View File

@@ -1,380 +1,399 @@
// var JSZip = require('jszip'); // var JSZip = require('jszip');
// var StringDecoder = require('string_decoder').StringDecoder; // var StringDecoder = require('string_decoder').StringDecoder;
// let html = new StringDecoder("utf-8").write(content) // let html = new StringDecoder("utf-8").write(content)
var textDecoder = new TextDecoder("utf-8"); var textDecoder = new TextDecoder("utf-8");
// var xmlSerializer = require('xmlserializer') // var xmlSerializer = require('xmlserializer')
var xmlSerializer = new XMLSerializer(); var xmlSerializer = new XMLSerializer();
// var DOMParser = require('xmldom').DOMParser // var DOMParser = require('xmldom').DOMParser
var domParser = new DOMParser(); var domParser = new DOMParser();
var generatedBufferType = 'blob' var generatedBufferType = 'blob'
class EpubXhtml{ /**
constructor(filename) { * I hope JS is single threaded and not preemptive.
this.filename = filename * Escape XML entities. Ex. & -> &
this.mime = 'application/xhtml+xml' * @param {*} text
} */
var escape = function (text){
convert(content){ return $('<div>').text(text).html()
let html = textDecoder.decode(content); }
let dom = domParser.parseFromString(html, 'text/html');
let xml = xmlSerializer.serializeToString(dom); var html2txt = function (html) {
html = html.replace(/<\/p>/gi, "\n");
let css_link = '' html = html.replace(/<\/li>/gi, "\n");
if (this.css) { html = html.replace(/<li>/gi, "-");
for (let i in this.css){ html = html.replace(/<b>/gi, "*");
css_link += ` <link href="${this.css[i]}" rel="stylesheet" type="text/css"/>\n` html = html.replace(/<\/b>/gi, "*");
} return $('<div>').html(html).text()
} }
let title = ''
if (this.title) { class EpubXhtml{
title = this.title constructor(filename) {
} this.filename = filename
this.mime = 'application/xhtml+xml'
let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?> }
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" convert(content){
xmlns:epub="http://www.idpf.org/2007/ops"> let html = textDecoder.decode(content);
<head> let dom = domParser.parseFromString(html, 'text/html');
<title>${title}</title> let xml = xmlSerializer.serializeToString(dom);
${css_link}
</head> let css_link = ''
<body> if (this.css) {
${xml} for (let i in this.css){
</body> css_link += ` <link href="${this.css[i]}" rel="stylesheet" type="text/css"/>\n`
</html> }
` }
return cnt
} let title = ''
} if (this.title) {
title = this.title
class EpubOpf{ }
constructor(filename, meta) {
this.filename = filename let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?>
this.meta = meta <!DOCTYPE html>
} <html xmlns="http://www.w3.org/1999/xhtml"
xmlns:epub="http://www.idpf.org/2007/ops">
generate(){ <head>
let m = this.meta <title>${title}</title>
let cnt = `<?xml version="1.0" encoding="utf-8"?> ${css_link}
<package xmlns="http://www.idpf.org/2007/opf" </head>
xmlns:dc="http://purl.org/dc/elements/1.1/" <body>
xmlns:dcterms="http://purl.org/dc/terms/" ${xml}
unique-identifier="pub-id" </body>
version="3.0"> </html>
<metadata> `
<meta property="dcterms:modified">2013-03-22T12:24:00Z</meta> return cnt
<dc:rights>Please read the legal notice included in this e-book and/or check the copyright status in your country.</dc:rights> }
<dc:identifier id="pub-id">${m.metadata.book_id}</dc:identifier> }
<dc:title>${m.metadata.title}</dc:title>
<dc:language>${m.metadata.language}</dc:language> class EpubOpf{
` constructor(filename, meta) {
this.filename = filename
if (m.metadata.description){ this.meta = meta
cnt += ` <dc:description>${m.metadata.description}</dc:description>\n` }
}
if (m.metadata.isbn){ generate(){
cnt += ` <dc:identifier id="isbn">${m.metadata.isbn}</dc:identifier>\n` let m = this.meta
let onixcode = m.metadata.isbn.length > 10 ? '15' : '02' let cnt = `<?xml version="1.0" encoding="utf-8"?>
cnt += ` <meta refines="#isbn" property="identifier-type" scheme="onix:codelist5">${onixcode}</meta>\n` <package xmlns="http://www.idpf.org/2007/opf"
} xmlns:dc="http://purl.org/dc/elements/1.1/"
if (m.metadata.publisher){ xmlns:dcterms="http://purl.org/dc/terms/"
cnt += ` <dc:publisher>${m.metadata.publisher}</dc:publisher>\n` unique-identifier="pub-id"
} version="3.0">
if (m.metadata.cover_image){ <metadata>
cnt += ` <meta content="cover-image" name="${m.metadata.cover_image}"/>\n` <meta property="dcterms:modified">2013-03-22T12:24:00Z</meta>
} <dc:rights>Please read the legal notice included in this e-book and/or check the copyright status in your country.</dc:rights>
for (let s in m.metadata.subject){ <dc:identifier id="pub-id">${m.metadata.book_id}</dc:identifier>
cnt += ` <dc:subject>${m.metadata.subject[s]}</dc:subject>\n` <dc:title>${m.metadata.title}</dc:title>
} <dc:language>${m.metadata.language}</dc:language>
for (let a in m.metadata.author){ `
cnt += ` <dc:creator id="creator-${a}">${m.metadata.author[a]}</dc:creator>\n`
cnt += ` <meta refines="#creator-${a}" property="role" schema="marc:relators">aut</meta>\n` if (m.metadata.description){
} cnt += ` <dc:description>${m.metadata.description}</dc:description>\n`
}
cnt +=' </metadata>\n <manifest>\n' if (m.metadata.isbn){
cnt += ` <dc:identifier id="isbn">${m.metadata.isbn}</dc:identifier>\n`
for (let id in m.manifest){ let onixcode = m.metadata.isbn.length > 10 ? '15' : '02'
let f = m.manifest[id] cnt += ` <meta refines="#isbn" property="identifier-type" scheme="onix:codelist5">${onixcode}</meta>\n`
if (f.hasOwnProperty('property')){ }
cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}" properties="${f.property}"/>\n` if (m.metadata.publisher){
}else{ cnt += ` <dc:publisher>${m.metadata.publisher}</dc:publisher>\n`
cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}"/>\n` }
} if (m.metadata.cover_image){
} cnt += ` <meta content="cover-image" name="${m.metadata.cover_image}"/>\n`
cnt += ` </manifest>\n <spine toc="${m.getId(m.ncx().filename)}">\n` }
for (let i in m.spine){ for (let s in m.metadata.subject){
cnt += ` <itemref idref="${m.spine[i]}" />\n` cnt += ` <dc:subject>${m.metadata.subject[s]}</dc:subject>\n`
} }
cnt += ' </spine>\n <guide>\n' for (let a in m.metadata.author){
cnt += ` <reference href="${m.nav().filename}" title="Table of Contents" type="toc"/>\n` cnt += ` <dc:creator id="creator-${a}">${m.metadata.author[a]}</dc:creator>\n`
cnt += ' </guide>\n</package>\n' cnt += ` <meta refines="#creator-${a}" property="role" schema="marc:relators">aut</meta>\n`
return cnt }
}
} cnt +=' </metadata>\n <manifest>\n'
class EpubNcx{ for (let id in m.manifest){
constructor(filename, meta) { let f = m.manifest[id]
this.filename = filename if (f.hasOwnProperty('property')){
this.meta = meta cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}" properties="${f.property}"/>\n`
this.mime = 'application/x-dtbncx+xml' }else{
} cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}"/>\n`
}
generate(){ }
let m = this.meta cnt += ` </manifest>\n <spine toc="${m.getId(m.ncx().filename)}">\n`
for (let i in m.spine){
let max_depth = 0; cnt += ` <itemref idref="${m.spine[i]}" />\n`
let toc = '' }
for (let i = 0; i < m.toc.length; ++i) { cnt += ' </spine>\n <guide>\n'
let toc_item = m.toc[i] cnt += ` <reference href="${m.nav().filename}" title="Table of Contents" type="toc"/>\n`
toc += cnt += ' </guide>\n</package>\n'
` <navPoint class="chapter" id="navpoint-${i}" playOrder="${i}">\n` return cnt
+` <navLabel><text>${toc_item.label}</text></navLabel>\n` }
+` <content src="${toc_item.href}"/>\n` }
if (max_depth < toc_item.depth) { class EpubNcx{
max_depth = toc_item.depth constructor(filename, meta) {
} this.filename = filename
this.meta = meta
let next_depth = 1 this.mime = 'application/x-dtbncx+xml'
if (i+1 < m.toc.length) { }
next_depth = m.toc[i+1].depth
} generate(){
let m = this.meta
for (let current_dept = toc_item.depth;
current_dept >= next_depth; let max_depth = 0;
current_dept--) { let toc = ''
toc += ' </navPoint>\n' for (let i = 0; i < m.toc.length; ++i) {
} let toc_item = m.toc[i]
} toc +=
` <navPoint class="chapter" id="navpoint-${i}" playOrder="${i}">\n`
return `<?xml version="1.0" encoding="UTF-8"?> +` <navLabel><text>${toc_item.label}</text></navLabel>\n`
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="en" version="2005-1"> +` <content src="${toc_item.href}"/>\n`
<head>
<meta name="dtb:uid" content="${m.metadata.book_id}"/> if (max_depth < toc_item.depth) {
<meta name="dtb:depth" content="${max_depth}"/> max_depth = toc_item.depth
<meta name="dtb:totalPageCount" content="0"/> }
<meta name="dtb:maxPageNumber" content="0"/>
</head> let next_depth = 1
<docTitle> if (i+1 < m.toc.length) {
<text>${m.metadata.title}</text> next_depth = m.toc[i+1].depth
</docTitle> }
<navMap>
${toc} for (let current_dept = toc_item.depth;
</navMap> current_dept >= next_depth;
</ncx> current_dept--) {
` toc += ' </navPoint>\n'
} }
} }
class EpubNav{ return `<?xml version="1.0" encoding="UTF-8"?>
constructor(filename, meta) { <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="en" version="2005-1">
this.filename = filename <head>
this.meta = meta <meta name="dtb:uid" content="${m.metadata.book_id}"/>
this.mime = 'application/xhtml+xml' <meta name="dtb:depth" content="${max_depth}"/>
} <meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
generate(){ </head>
let m = this.meta <docTitle>
<text>${m.metadata.title}</text>
let toc = '' </docTitle>
for (let i = 0; i < m.toc.length; ++i){ <navMap>
let toc_item = m.toc[i] ${toc}
toc += ` <li><a href="${toc_item.href}">${toc_item.label}</a>` </navMap>
</ncx>
let next_depth = 1 `
if (i+1 < m.toc.length){ }
next_depth = m.toc[i+1].depth }
}
let current_dept = toc_item.depth class EpubNav{
constructor(filename, meta) {
if (current_dept == next_depth){ this.filename = filename
toc += ' </li>\n' this.meta = meta
} this.mime = 'application/xhtml+xml'
else if (current_dept < next_depth){ }
for (; current_dept < next_depth; current_dept++){
toc += ' <ol>\n' generate(){
} let m = this.meta
}
else if (current_dept > next_depth){ let toc = ''
for (; current_dept > next_depth; current_dept--){ for (let i = 0; i < m.toc.length; ++i){
toc += ' </li></ol>\n' let toc_item = m.toc[i]
} toc += ` <li><a href="${toc_item.href}">${toc_item.label}</a>`
toc += ' </li>\n'
} let next_depth = 1
} if (i+1 < m.toc.length){
next_depth = m.toc[i+1].depth
return `<?xml version="1.0" encoding="utf-8" standalone="no"?> }
<!DOCTYPE html> let current_dept = toc_item.depth
<html xmlns="http://www.w3.org/1999/xhtml"
xmlns:epub="http://www.idpf.org/2007/ops"> if (current_dept == next_depth){
<head> toc += ' </li>\n'
<title>${m.metadata.title}</title> }
</head> else if (current_dept < next_depth){
<body> for (; current_dept < next_depth; current_dept++){
<h1>${m.metadata.title}</h1> toc += ' <ol>\n'
<nav epub:type="toc" id="toc"> }
<h2>Table Of Content</h2> }
<ol> else if (current_dept > next_depth){
${toc} for (; current_dept > next_depth; current_dept--){
</ol> toc += ' </li></ol>\n'
</nav> }
</body> toc += ' </li>\n'
</html> }
` }
}
} return `<?xml version="1.0" encoding="utf-8" standalone="no"?>
<!DOCTYPE html>
class EpubMeta{ <html xmlns="http://www.w3.org/1999/xhtml"
constructor() { xmlns:epub="http://www.idpf.org/2007/ops">
this.metadata = { <head>
book_id: 'book_id', <title>${m.metadata.title}</title>
isbn: null, </head>
title: 'title', <body>
language: 'en', <h1>${m.metadata.title}</h1>
author: [], <nav epub:type="toc" id="toc">
publisher: null, <h2>Table Of Content</h2>
cover_image: null, <ol>
subject: [], ${toc}
description: null </ol>
} </nav>
this.manifest = {} </body>
this.spine = [] </html>
this.guide = [] `
this.toc = [] }
this.override = {} }
this.addManifest(this.ncx().filename, this.ncx().mime) class EpubMeta{
this.addManifest(this.nav().filename, this.nav().mime, 'nav') constructor() {
this.manifest[this.nav().filename].property = 'nav' //TODO this.metadata = {
} book_id: 'book_id',
isbn: null,
addBookId(book_id){ title: 'title',
this.metadata.book_id = book_id language: 'en',
} author: [],
addTitle(title){ publisher: null,
this.metadata.title = title cover_image: null,
} subject: [],
addAuthor(author){ description: null
this.metadata.author.push(author) }
} this.manifest = {}
addPublisher(publisher){ this.spine = []
this.metadata.publisher = publisher this.guide = []
} this.toc = []
addSubject(subject){ this.override = {}
this.metadata.subject.push(subject)
} this.addManifest(this.ncx().filename, this.ncx().mime)
addLanguage(lang){ this.addManifest(this.nav().filename, this.nav().mime, 'nav')
this.metadata.language = lang this.manifest[this.nav().filename].property = 'nav' //TODO
} }
addDescription(description){
this.metadata.description = description addBookId(book_id){
} this.metadata.book_id = escape(book_id)
addIsbn(isbn){ }
this.metadata.isbn = isbn addTitle(title){
} this.metadata.title = escape(title)
}
addMetaData(){ addAuthor(author){
} this.metadata.author.push(escape(author))
}
addManifest(filename, mime){ addPublisher(publisher){
if (mime && mime.startsWith('text/html')){ this.metadata.publisher = escape(publisher)
let o = new EpubXhtml(filename) }
this.override[filename] = o addSubject(subject){
mime = o.mime this.metadata.subject.push(escape(subject))
} }
let id = 'id_' + filename.replace(/[^a-z0-9]/gi, '_').toLowerCase() addLanguage(lang){
this.manifest[filename] = this.metadata.language = escape(lang)
{ }
filename : filename, addDescription(description){
mime : mime, this.metadata.description = escape(html2txt(description))
id : id }
} addIsbn(isbn){
} this.metadata.isbn = escape(isbn)
}
addHtmlComponents(filename, title, css){
if (filename in this.override){ addMetaData(){
this.override[filename].title = title }
this.override[filename].css = css
} addManifest(filename, mime){
} if (mime && mime.startsWith('text/html')){
let o = new EpubXhtml(filename)
getId(filename){ this.override[filename] = o
return this.manifest[filename].id mime = o.mime
} }
let id = 'id_' + filename.replace(/[^a-z0-9]/gi, '_').toLowerCase()
addSpine(filename){ this.manifest[filename] =
this.spine.push(this.manifest[filename].id) {
} filename : filename,
mime : mime,
addCover(filename){ id : id
this.manifest[filename].property = 'cover-image' }
} }
addToc(filename, label, depth){ addHtmlComponents(filename, title, css){
this.toc.push({ if (filename in this.override){
href: filename, this.override[filename].title = title
label: label, this.override[filename].css = css
depth: depth }
}) }
}
getId(filename){
opf(){ return this.manifest[filename].id
return new EpubOpf("content.opf", this) }
}
addSpine(filename){
ncx(){ this.spine.push(this.manifest[filename].id)
return new EpubNcx("content.ncx", this) }
}
addCover(filename){
nav(){ this.manifest[filename].property = 'cover-image'
return new EpubNav("content.xhtml", this) }
}
} addToc(filename, label, depth){
this.toc.push({
class EpubWriter{ href: filename,
constructor() { label: label,
this.zip = new JSZip(); depth: depth
this.meta = new EpubMeta(); })
this.override = {} }
this.createEpubStruct();
} opf(){
return new EpubOpf("content.opf", this)
createEpubStruct(){ }
this.zip.file("mimetype", "application/epub+zip", {compression: "STORE"});
this.zip.file("META-INF/container.xml", ncx(){
`<?xml version='1.0' encoding='utf-8'?> return new EpubNcx("content.ncx", this)
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0"> }
<rootfiles>
<rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/> nav(){
</rootfiles> return new EpubNav("content.xhtml", this)
</container> }
` ); }
}
class EpubWriter{
addMetaInfoFile(filename, content){ constructor() {
this.zip.file("META-INF/"+filename, content) this.zip = new JSZip();
} this.meta = new EpubMeta();
this.override = {}
addFile(filename, content){ this.createEpubStruct();
console.log(`epub ${filename}:`) }
if (filename in this.meta.override){ createEpubStruct(){
let o = this.meta.override[filename] this.zip.file("mimetype", "application/epub+zip", {compression: "STORE"});
content = o.convert(content) this.zip.file("META-INF/container.xml",
} `<?xml version='1.0' encoding='utf-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
this.zip.file("EPUB/"+filename, content) <rootfiles>
} <rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
</rootfiles>
generateAsync(){ </container>
this.addFile(this.meta.ncx().filename, this.meta.ncx().generate()) ` );
this.addFile(this.meta.nav().filename, this.meta.nav().generate()) }
this.addFile(this.meta.opf().filename, this.meta.opf().generate())
return this.zip.generateAsync({ addMetaInfoFile(filename, content){
type: generatedBufferType, this.zip.file("META-INF/"+filename, content)
compression: "DEFLATE"}) }
}
} addFile(filename, content){
console.log(`epub ${filename}:`)
// module.exports = EpubWriter
if (filename in this.meta.override){
let o = this.meta.override[filename]
content = o.convert(content)
}
this.zip.file("EPUB/"+filename, content)
}
generateAsync(){
this.addFile(this.meta.ncx().filename, this.meta.ncx().generate())
this.addFile(this.meta.nav().filename, this.meta.nav().generate())
this.addFile(this.meta.opf().filename, this.meta.opf().generate())
return this.zip.generateAsync({
type: generatedBufferType,
compression: "DEFLATE"})
}
}
// module.exports = EpubWriter