Escaping & + removing html from desctipion.

This commit is contained in:
Vahagn Khachatryan
2019-01-22 23:22:59 +00:00
parent 7550b71f4d
commit 20b2657440

View File

@@ -1,380 +1,399 @@
// var JSZip = require('jszip');
// var StringDecoder = require('string_decoder').StringDecoder;
// let html = new StringDecoder("utf-8").write(content)
var textDecoder = new TextDecoder("utf-8");
// var xmlSerializer = require('xmlserializer')
var xmlSerializer = new XMLSerializer();
// var DOMParser = require('xmldom').DOMParser
var domParser = new DOMParser();
var generatedBufferType = 'blob'
class EpubXhtml{
constructor(filename) {
this.filename = filename
this.mime = 'application/xhtml+xml'
}
convert(content){
let html = textDecoder.decode(content);
let dom = domParser.parseFromString(html, 'text/html');
let xml = xmlSerializer.serializeToString(dom);
let css_link = ''
if (this.css) {
for (let i in this.css){
css_link += ` <link href="${this.css[i]}" rel="stylesheet" type="text/css"/>\n`
}
}
let title = ''
if (this.title) {
title = this.title
}
let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"
xmlns:epub="http://www.idpf.org/2007/ops">
<head>
<title>${title}</title>
${css_link}
</head>
<body>
${xml}
</body>
</html>
`
return cnt
}
}
class EpubOpf{
constructor(filename, meta) {
this.filename = filename
this.meta = meta
}
generate(){
let m = this.meta
let cnt = `<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://www.idpf.org/2007/opf"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
unique-identifier="pub-id"
version="3.0">
<metadata>
<meta property="dcterms:modified">2013-03-22T12:24:00Z</meta>
<dc:rights>Please read the legal notice included in this e-book and/or check the copyright status in your country.</dc:rights>
<dc:identifier id="pub-id">${m.metadata.book_id}</dc:identifier>
<dc:title>${m.metadata.title}</dc:title>
<dc:language>${m.metadata.language}</dc:language>
`
if (m.metadata.description){
cnt += ` <dc:description>${m.metadata.description}</dc:description>\n`
}
if (m.metadata.isbn){
cnt += ` <dc:identifier id="isbn">${m.metadata.isbn}</dc:identifier>\n`
let onixcode = m.metadata.isbn.length > 10 ? '15' : '02'
cnt += ` <meta refines="#isbn" property="identifier-type" scheme="onix:codelist5">${onixcode}</meta>\n`
}
if (m.metadata.publisher){
cnt += ` <dc:publisher>${m.metadata.publisher}</dc:publisher>\n`
}
if (m.metadata.cover_image){
cnt += ` <meta content="cover-image" name="${m.metadata.cover_image}"/>\n`
}
for (let s in m.metadata.subject){
cnt += ` <dc:subject>${m.metadata.subject[s]}</dc:subject>\n`
}
for (let a in m.metadata.author){
cnt += ` <dc:creator id="creator-${a}">${m.metadata.author[a]}</dc:creator>\n`
cnt += ` <meta refines="#creator-${a}" property="role" schema="marc:relators">aut</meta>\n`
}
cnt +=' </metadata>\n <manifest>\n'
for (let id in m.manifest){
let f = m.manifest[id]
if (f.hasOwnProperty('property')){
cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}" properties="${f.property}"/>\n`
}else{
cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}"/>\n`
}
}
cnt += ` </manifest>\n <spine toc="${m.getId(m.ncx().filename)}">\n`
for (let i in m.spine){
cnt += ` <itemref idref="${m.spine[i]}" />\n`
}
cnt += ' </spine>\n <guide>\n'
cnt += ` <reference href="${m.nav().filename}" title="Table of Contents" type="toc"/>\n`
cnt += ' </guide>\n</package>\n'
return cnt
}
}
class EpubNcx{
constructor(filename, meta) {
this.filename = filename
this.meta = meta
this.mime = 'application/x-dtbncx+xml'
}
generate(){
let m = this.meta
let max_depth = 0;
let toc = ''
for (let i = 0; i < m.toc.length; ++i) {
let toc_item = m.toc[i]
toc +=
` <navPoint class="chapter" id="navpoint-${i}" playOrder="${i}">\n`
+` <navLabel><text>${toc_item.label}</text></navLabel>\n`
+` <content src="${toc_item.href}"/>\n`
if (max_depth < toc_item.depth) {
max_depth = toc_item.depth
}
let next_depth = 1
if (i+1 < m.toc.length) {
next_depth = m.toc[i+1].depth
}
for (let current_dept = toc_item.depth;
current_dept >= next_depth;
current_dept--) {
toc += ' </navPoint>\n'
}
}
return `<?xml version="1.0" encoding="UTF-8"?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="en" version="2005-1">
<head>
<meta name="dtb:uid" content="${m.metadata.book_id}"/>
<meta name="dtb:depth" content="${max_depth}"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>${m.metadata.title}</text>
</docTitle>
<navMap>
${toc}
</navMap>
</ncx>
`
}
}
class EpubNav{
constructor(filename, meta) {
this.filename = filename
this.meta = meta
this.mime = 'application/xhtml+xml'
}
generate(){
let m = this.meta
let toc = ''
for (let i = 0; i < m.toc.length; ++i){
let toc_item = m.toc[i]
toc += ` <li><a href="${toc_item.href}">${toc_item.label}</a>`
let next_depth = 1
if (i+1 < m.toc.length){
next_depth = m.toc[i+1].depth
}
let current_dept = toc_item.depth
if (current_dept == next_depth){
toc += ' </li>\n'
}
else if (current_dept < next_depth){
for (; current_dept < next_depth; current_dept++){
toc += ' <ol>\n'
}
}
else if (current_dept > next_depth){
for (; current_dept > next_depth; current_dept--){
toc += ' </li></ol>\n'
}
toc += ' </li>\n'
}
}
return `<?xml version="1.0" encoding="utf-8" standalone="no"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"
xmlns:epub="http://www.idpf.org/2007/ops">
<head>
<title>${m.metadata.title}</title>
</head>
<body>
<h1>${m.metadata.title}</h1>
<nav epub:type="toc" id="toc">
<h2>Table Of Content</h2>
<ol>
${toc}
</ol>
</nav>
</body>
</html>
`
}
}
class EpubMeta{
constructor() {
this.metadata = {
book_id: 'book_id',
isbn: null,
title: 'title',
language: 'en',
author: [],
publisher: null,
cover_image: null,
subject: [],
description: null
}
this.manifest = {}
this.spine = []
this.guide = []
this.toc = []
this.override = {}
this.addManifest(this.ncx().filename, this.ncx().mime)
this.addManifest(this.nav().filename, this.nav().mime, 'nav')
this.manifest[this.nav().filename].property = 'nav' //TODO
}
addBookId(book_id){
this.metadata.book_id = book_id
}
addTitle(title){
this.metadata.title = title
}
addAuthor(author){
this.metadata.author.push(author)
}
addPublisher(publisher){
this.metadata.publisher = publisher
}
addSubject(subject){
this.metadata.subject.push(subject)
}
addLanguage(lang){
this.metadata.language = lang
}
addDescription(description){
this.metadata.description = description
}
addIsbn(isbn){
this.metadata.isbn = isbn
}
addMetaData(){
}
addManifest(filename, mime){
if (mime && mime.startsWith('text/html')){
let o = new EpubXhtml(filename)
this.override[filename] = o
mime = o.mime
}
let id = 'id_' + filename.replace(/[^a-z0-9]/gi, '_').toLowerCase()
this.manifest[filename] =
{
filename : filename,
mime : mime,
id : id
}
}
addHtmlComponents(filename, title, css){
if (filename in this.override){
this.override[filename].title = title
this.override[filename].css = css
}
}
getId(filename){
return this.manifest[filename].id
}
addSpine(filename){
this.spine.push(this.manifest[filename].id)
}
addCover(filename){
this.manifest[filename].property = 'cover-image'
}
addToc(filename, label, depth){
this.toc.push({
href: filename,
label: label,
depth: depth
})
}
opf(){
return new EpubOpf("content.opf", this)
}
ncx(){
return new EpubNcx("content.ncx", this)
}
nav(){
return new EpubNav("content.xhtml", this)
}
}
class EpubWriter{
constructor() {
this.zip = new JSZip();
this.meta = new EpubMeta();
this.override = {}
this.createEpubStruct();
}
createEpubStruct(){
this.zip.file("mimetype", "application/epub+zip", {compression: "STORE"});
this.zip.file("META-INF/container.xml",
`<?xml version='1.0' encoding='utf-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
</rootfiles>
</container>
` );
}
addMetaInfoFile(filename, content){
this.zip.file("META-INF/"+filename, content)
}
addFile(filename, content){
console.log(`epub ${filename}:`)
if (filename in this.meta.override){
let o = this.meta.override[filename]
content = o.convert(content)
}
this.zip.file("EPUB/"+filename, content)
}
generateAsync(){
this.addFile(this.meta.ncx().filename, this.meta.ncx().generate())
this.addFile(this.meta.nav().filename, this.meta.nav().generate())
this.addFile(this.meta.opf().filename, this.meta.opf().generate())
return this.zip.generateAsync({
type: generatedBufferType,
compression: "DEFLATE"})
}
}
// module.exports = EpubWriter
// var JSZip = require('jszip');
// var StringDecoder = require('string_decoder').StringDecoder;
// let html = new StringDecoder("utf-8").write(content)
var textDecoder = new TextDecoder("utf-8");
// var xmlSerializer = require('xmlserializer')
var xmlSerializer = new XMLSerializer();
// var DOMParser = require('xmldom').DOMParser
var domParser = new DOMParser();
var generatedBufferType = 'blob'
/**
* I hope JS is single threaded and not preemptive.
* Escape XML entities. Ex. & -> &amp;
* @param {*} text
*/
var escape = function (text){
return $('<div>').text(text).html()
}
var html2txt = function (html) {
html = html.replace(/<\/p>/gi, "\n");
html = html.replace(/<\/li>/gi, "\n");
html = html.replace(/<li>/gi, "-");
html = html.replace(/<b>/gi, "*");
html = html.replace(/<\/b>/gi, "*");
return $('<div>').html(html).text()
}
class EpubXhtml{
constructor(filename) {
this.filename = filename
this.mime = 'application/xhtml+xml'
}
convert(content){
let html = textDecoder.decode(content);
let dom = domParser.parseFromString(html, 'text/html');
let xml = xmlSerializer.serializeToString(dom);
let css_link = ''
if (this.css) {
for (let i in this.css){
css_link += ` <link href="${this.css[i]}" rel="stylesheet" type="text/css"/>\n`
}
}
let title = ''
if (this.title) {
title = this.title
}
let cnt = `<?xml version="1.0" encoding="utf-8" standalone="no"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"
xmlns:epub="http://www.idpf.org/2007/ops">
<head>
<title>${title}</title>
${css_link}
</head>
<body>
${xml}
</body>
</html>
`
return cnt
}
}
class EpubOpf{
constructor(filename, meta) {
this.filename = filename
this.meta = meta
}
generate(){
let m = this.meta
let cnt = `<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://www.idpf.org/2007/opf"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
unique-identifier="pub-id"
version="3.0">
<metadata>
<meta property="dcterms:modified">2013-03-22T12:24:00Z</meta>
<dc:rights>Please read the legal notice included in this e-book and/or check the copyright status in your country.</dc:rights>
<dc:identifier id="pub-id">${m.metadata.book_id}</dc:identifier>
<dc:title>${m.metadata.title}</dc:title>
<dc:language>${m.metadata.language}</dc:language>
`
if (m.metadata.description){
cnt += ` <dc:description>${m.metadata.description}</dc:description>\n`
}
if (m.metadata.isbn){
cnt += ` <dc:identifier id="isbn">${m.metadata.isbn}</dc:identifier>\n`
let onixcode = m.metadata.isbn.length > 10 ? '15' : '02'
cnt += ` <meta refines="#isbn" property="identifier-type" scheme="onix:codelist5">${onixcode}</meta>\n`
}
if (m.metadata.publisher){
cnt += ` <dc:publisher>${m.metadata.publisher}</dc:publisher>\n`
}
if (m.metadata.cover_image){
cnt += ` <meta content="cover-image" name="${m.metadata.cover_image}"/>\n`
}
for (let s in m.metadata.subject){
cnt += ` <dc:subject>${m.metadata.subject[s]}</dc:subject>\n`
}
for (let a in m.metadata.author){
cnt += ` <dc:creator id="creator-${a}">${m.metadata.author[a]}</dc:creator>\n`
cnt += ` <meta refines="#creator-${a}" property="role" schema="marc:relators">aut</meta>\n`
}
cnt +=' </metadata>\n <manifest>\n'
for (let id in m.manifest){
let f = m.manifest[id]
if (f.hasOwnProperty('property')){
cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}" properties="${f.property}"/>\n`
}else{
cnt += ` <item id="${f.id}" media-type="${f.mime}" href="${f.filename}"/>\n`
}
}
cnt += ` </manifest>\n <spine toc="${m.getId(m.ncx().filename)}">\n`
for (let i in m.spine){
cnt += ` <itemref idref="${m.spine[i]}" />\n`
}
cnt += ' </spine>\n <guide>\n'
cnt += ` <reference href="${m.nav().filename}" title="Table of Contents" type="toc"/>\n`
cnt += ' </guide>\n</package>\n'
return cnt
}
}
class EpubNcx{
constructor(filename, meta) {
this.filename = filename
this.meta = meta
this.mime = 'application/x-dtbncx+xml'
}
generate(){
let m = this.meta
let max_depth = 0;
let toc = ''
for (let i = 0; i < m.toc.length; ++i) {
let toc_item = m.toc[i]
toc +=
` <navPoint class="chapter" id="navpoint-${i}" playOrder="${i}">\n`
+` <navLabel><text>${toc_item.label}</text></navLabel>\n`
+` <content src="${toc_item.href}"/>\n`
if (max_depth < toc_item.depth) {
max_depth = toc_item.depth
}
let next_depth = 1
if (i+1 < m.toc.length) {
next_depth = m.toc[i+1].depth
}
for (let current_dept = toc_item.depth;
current_dept >= next_depth;
current_dept--) {
toc += ' </navPoint>\n'
}
}
return `<?xml version="1.0" encoding="UTF-8"?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" xml:lang="en" version="2005-1">
<head>
<meta name="dtb:uid" content="${m.metadata.book_id}"/>
<meta name="dtb:depth" content="${max_depth}"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>${m.metadata.title}</text>
</docTitle>
<navMap>
${toc}
</navMap>
</ncx>
`
}
}
class EpubNav{
constructor(filename, meta) {
this.filename = filename
this.meta = meta
this.mime = 'application/xhtml+xml'
}
generate(){
let m = this.meta
let toc = ''
for (let i = 0; i < m.toc.length; ++i){
let toc_item = m.toc[i]
toc += ` <li><a href="${toc_item.href}">${toc_item.label}</a>`
let next_depth = 1
if (i+1 < m.toc.length){
next_depth = m.toc[i+1].depth
}
let current_dept = toc_item.depth
if (current_dept == next_depth){
toc += ' </li>\n'
}
else if (current_dept < next_depth){
for (; current_dept < next_depth; current_dept++){
toc += ' <ol>\n'
}
}
else if (current_dept > next_depth){
for (; current_dept > next_depth; current_dept--){
toc += ' </li></ol>\n'
}
toc += ' </li>\n'
}
}
return `<?xml version="1.0" encoding="utf-8" standalone="no"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml"
xmlns:epub="http://www.idpf.org/2007/ops">
<head>
<title>${m.metadata.title}</title>
</head>
<body>
<h1>${m.metadata.title}</h1>
<nav epub:type="toc" id="toc">
<h2>Table Of Content</h2>
<ol>
${toc}
</ol>
</nav>
</body>
</html>
`
}
}
class EpubMeta{
constructor() {
this.metadata = {
book_id: 'book_id',
isbn: null,
title: 'title',
language: 'en',
author: [],
publisher: null,
cover_image: null,
subject: [],
description: null
}
this.manifest = {}
this.spine = []
this.guide = []
this.toc = []
this.override = {}
this.addManifest(this.ncx().filename, this.ncx().mime)
this.addManifest(this.nav().filename, this.nav().mime, 'nav')
this.manifest[this.nav().filename].property = 'nav' //TODO
}
addBookId(book_id){
this.metadata.book_id = escape(book_id)
}
addTitle(title){
this.metadata.title = escape(title)
}
addAuthor(author){
this.metadata.author.push(escape(author))
}
addPublisher(publisher){
this.metadata.publisher = escape(publisher)
}
addSubject(subject){
this.metadata.subject.push(escape(subject))
}
addLanguage(lang){
this.metadata.language = escape(lang)
}
addDescription(description){
this.metadata.description = escape(html2txt(description))
}
addIsbn(isbn){
this.metadata.isbn = escape(isbn)
}
addMetaData(){
}
addManifest(filename, mime){
if (mime && mime.startsWith('text/html')){
let o = new EpubXhtml(filename)
this.override[filename] = o
mime = o.mime
}
let id = 'id_' + filename.replace(/[^a-z0-9]/gi, '_').toLowerCase()
this.manifest[filename] =
{
filename : filename,
mime : mime,
id : id
}
}
addHtmlComponents(filename, title, css){
if (filename in this.override){
this.override[filename].title = title
this.override[filename].css = css
}
}
getId(filename){
return this.manifest[filename].id
}
addSpine(filename){
this.spine.push(this.manifest[filename].id)
}
addCover(filename){
this.manifest[filename].property = 'cover-image'
}
addToc(filename, label, depth){
this.toc.push({
href: filename,
label: label,
depth: depth
})
}
opf(){
return new EpubOpf("content.opf", this)
}
ncx(){
return new EpubNcx("content.ncx", this)
}
nav(){
return new EpubNav("content.xhtml", this)
}
}
class EpubWriter{
constructor() {
this.zip = new JSZip();
this.meta = new EpubMeta();
this.override = {}
this.createEpubStruct();
}
createEpubStruct(){
this.zip.file("mimetype", "application/epub+zip", {compression: "STORE"});
this.zip.file("META-INF/container.xml",
`<?xml version='1.0' encoding='utf-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile media-type="application/oebps-package+xml" full-path="EPUB/content.opf"/>
</rootfiles>
</container>
` );
}
addMetaInfoFile(filename, content){
this.zip.file("META-INF/"+filename, content)
}
addFile(filename, content){
console.log(`epub ${filename}:`)
if (filename in this.meta.override){
let o = this.meta.override[filename]
content = o.convert(content)
}
this.zip.file("EPUB/"+filename, content)
}
generateAsync(){
this.addFile(this.meta.ncx().filename, this.meta.ncx().generate())
this.addFile(this.meta.nav().filename, this.meta.nav().generate())
this.addFile(this.meta.opf().filename, this.meta.opf().generate())
return this.zip.generateAsync({
type: generatedBufferType,
compression: "DEFLATE"})
}
}
// module.exports = EpubWriter