From 20b2657440fd801eeb89467d3853c43023bbd2ea Mon Sep 17 00:00:00 2001 From: Vahagn Khachatryan Date: Tue, 22 Jan 2019 23:22:59 +0000 Subject: [PATCH] Escaping & + removing html from desctipion. --- src/epub.js | 779 +++++++++++++++++++++++++++------------------------- 1 file changed, 399 insertions(+), 380 deletions(-) diff --git a/src/epub.js b/src/epub.js index 75b2d95..25c29e3 100644 --- a/src/epub.js +++ b/src/epub.js @@ -1,380 +1,399 @@ -// var JSZip = require('jszip'); -// var StringDecoder = require('string_decoder').StringDecoder; -// let html = new StringDecoder("utf-8").write(content) -var textDecoder = new TextDecoder("utf-8"); -// var xmlSerializer = require('xmlserializer') -var xmlSerializer = new XMLSerializer(); -// var DOMParser = require('xmldom').DOMParser -var domParser = new DOMParser(); -var generatedBufferType = 'blob' - -class EpubXhtml{ - constructor(filename) { - this.filename = filename - this.mime = 'application/xhtml+xml' - } - - convert(content){ - let html = textDecoder.decode(content); - let dom = domParser.parseFromString(html, 'text/html'); - let xml = xmlSerializer.serializeToString(dom); - - let css_link = '' - if (this.css) { - for (let i in this.css){ - css_link += ` \n` - } - } - - let title = '' - if (this.title) { - title = this.title - } - - let cnt = ` - - - - ${title} -${css_link} - - -${xml} - - -` - return cnt - } -} - -class EpubOpf{ - constructor(filename, meta) { - this.filename = filename - this.meta = meta - } - - generate(){ - let m = this.meta - let cnt = ` - - - 2013-03-22T12:24:00Z - Please read the legal notice included in this e-book and/or check the copyright status in your country. - ${m.metadata.book_id} - ${m.metadata.title} - ${m.metadata.language} -` - - if (m.metadata.description){ - cnt += ` ${m.metadata.description}\n` - } - if (m.metadata.isbn){ - cnt += ` ${m.metadata.isbn}\n` - let onixcode = m.metadata.isbn.length > 10 ? '15' : '02' - cnt += ` ${onixcode}\n` - } - if (m.metadata.publisher){ - cnt += ` ${m.metadata.publisher}\n` - } - if (m.metadata.cover_image){ - cnt += ` \n` - } - for (let s in m.metadata.subject){ - cnt += ` ${m.metadata.subject[s]}\n` - } - for (let a in m.metadata.author){ - cnt += ` ${m.metadata.author[a]}\n` - cnt += ` aut\n` - } - - cnt +=' \n \n' - - for (let id in m.manifest){ - let f = m.manifest[id] - if (f.hasOwnProperty('property')){ - cnt += ` \n` - }else{ - cnt += ` \n` - } - } - cnt += ` \n \n` - for (let i in m.spine){ - cnt += ` \n` - } - cnt += ' \n \n' - cnt += ` \n` - cnt += ' \n\n' - return cnt - } -} - -class EpubNcx{ - constructor(filename, meta) { - this.filename = filename - this.meta = meta - this.mime = 'application/x-dtbncx+xml' - } - - generate(){ - let m = this.meta - - let max_depth = 0; - let toc = '' - for (let i = 0; i < m.toc.length; ++i) { - let toc_item = m.toc[i] - toc += - ` \n` - +` ${toc_item.label}\n` - +` \n` - - if (max_depth < toc_item.depth) { - max_depth = toc_item.depth - } - - let next_depth = 1 - if (i+1 < m.toc.length) { - next_depth = m.toc[i+1].depth - } - - for (let current_dept = toc_item.depth; - current_dept >= next_depth; - current_dept--) { - toc += ' \n' - } - } - - return ` - - - - - - - - - ${m.metadata.title} - - -${toc} - - -` - } -} - -class EpubNav{ - constructor(filename, meta) { - this.filename = filename - this.meta = meta - this.mime = 'application/xhtml+xml' - } - - generate(){ - let m = this.meta - - let toc = '' - for (let i = 0; i < m.toc.length; ++i){ - let toc_item = m.toc[i] - toc += `
  • ${toc_item.label}` - - let next_depth = 1 - if (i+1 < m.toc.length){ - next_depth = m.toc[i+1].depth - } - let current_dept = toc_item.depth - - if (current_dept == next_depth){ - toc += '
  • \n' - } - else if (current_dept < next_depth){ - for (; current_dept < next_depth; current_dept++){ - toc += '
      \n' - } - } - else if (current_dept > next_depth){ - for (; current_dept > next_depth; current_dept--){ - toc += '
    \n' - } - toc += ' \n' - } - } - - return ` - - - - ${m.metadata.title} - - -

    ${m.metadata.title}

    - - - -` - } -} - -class EpubMeta{ - constructor() { - this.metadata = { - book_id: 'book_id', - isbn: null, - title: 'title', - language: 'en', - author: [], - publisher: null, - cover_image: null, - subject: [], - description: null - } - this.manifest = {} - this.spine = [] - this.guide = [] - this.toc = [] - this.override = {} - - this.addManifest(this.ncx().filename, this.ncx().mime) - this.addManifest(this.nav().filename, this.nav().mime, 'nav') - this.manifest[this.nav().filename].property = 'nav' //TODO - } - - addBookId(book_id){ - this.metadata.book_id = book_id - } - addTitle(title){ - this.metadata.title = title - } - addAuthor(author){ - this.metadata.author.push(author) - } - addPublisher(publisher){ - this.metadata.publisher = publisher - } - addSubject(subject){ - this.metadata.subject.push(subject) - } - addLanguage(lang){ - this.metadata.language = lang - } - addDescription(description){ - this.metadata.description = description - } - addIsbn(isbn){ - this.metadata.isbn = isbn - } - - addMetaData(){ - } - - addManifest(filename, mime){ - if (mime && mime.startsWith('text/html')){ - let o = new EpubXhtml(filename) - this.override[filename] = o - mime = o.mime - } - let id = 'id_' + filename.replace(/[^a-z0-9]/gi, '_').toLowerCase() - this.manifest[filename] = - { - filename : filename, - mime : mime, - id : id - } - } - - addHtmlComponents(filename, title, css){ - if (filename in this.override){ - this.override[filename].title = title - this.override[filename].css = css - } - } - - getId(filename){ - return this.manifest[filename].id - } - - addSpine(filename){ - this.spine.push(this.manifest[filename].id) - } - - addCover(filename){ - this.manifest[filename].property = 'cover-image' - } - - addToc(filename, label, depth){ - this.toc.push({ - href: filename, - label: label, - depth: depth - }) - } - - opf(){ - return new EpubOpf("content.opf", this) - } - - ncx(){ - return new EpubNcx("content.ncx", this) - } - - nav(){ - return new EpubNav("content.xhtml", this) - } -} - -class EpubWriter{ - constructor() { - this.zip = new JSZip(); - this.meta = new EpubMeta(); - this.override = {} - this.createEpubStruct(); - } - - createEpubStruct(){ - this.zip.file("mimetype", "application/epub+zip", {compression: "STORE"}); - this.zip.file("META-INF/container.xml", -` - - - - - -` ); - } - - addMetaInfoFile(filename, content){ - this.zip.file("META-INF/"+filename, content) - } - - addFile(filename, content){ - console.log(`epub ${filename}:`) - - if (filename in this.meta.override){ - let o = this.meta.override[filename] - content = o.convert(content) - } - - this.zip.file("EPUB/"+filename, content) - } - - generateAsync(){ - this.addFile(this.meta.ncx().filename, this.meta.ncx().generate()) - this.addFile(this.meta.nav().filename, this.meta.nav().generate()) - this.addFile(this.meta.opf().filename, this.meta.opf().generate()) - return this.zip.generateAsync({ - type: generatedBufferType, - compression: "DEFLATE"}) - } -} - -// module.exports = EpubWriter +// var JSZip = require('jszip'); +// var StringDecoder = require('string_decoder').StringDecoder; +// let html = new StringDecoder("utf-8").write(content) +var textDecoder = new TextDecoder("utf-8"); +// var xmlSerializer = require('xmlserializer') +var xmlSerializer = new XMLSerializer(); +// var DOMParser = require('xmldom').DOMParser +var domParser = new DOMParser(); +var generatedBufferType = 'blob' + +/** + * I hope JS is single threaded and not preemptive. + * Escape XML entities. Ex. & -> & + * @param {*} text + */ +var escape = function (text){ + return $('
    ').text(text).html() +} + +var html2txt = function (html) { + html = html.replace(/<\/p>/gi, "\n"); + html = html.replace(/<\/li>/gi, "\n"); + html = html.replace(/
  • /gi, "-"); + html = html.replace(//gi, "*"); + html = html.replace(/<\/b>/gi, "*"); + return $('
    ').html(html).text() +} + + +class EpubXhtml{ + constructor(filename) { + this.filename = filename + this.mime = 'application/xhtml+xml' + } + + convert(content){ + let html = textDecoder.decode(content); + let dom = domParser.parseFromString(html, 'text/html'); + let xml = xmlSerializer.serializeToString(dom); + + let css_link = '' + if (this.css) { + for (let i in this.css){ + css_link += ` \n` + } + } + + let title = '' + if (this.title) { + title = this.title + } + + let cnt = ` + + + + ${title} +${css_link} + + +${xml} + + +` + return cnt + } +} + +class EpubOpf{ + constructor(filename, meta) { + this.filename = filename + this.meta = meta + } + + generate(){ + let m = this.meta + let cnt = ` + + + 2013-03-22T12:24:00Z + Please read the legal notice included in this e-book and/or check the copyright status in your country. + ${m.metadata.book_id} + ${m.metadata.title} + ${m.metadata.language} +` + + if (m.metadata.description){ + cnt += ` ${m.metadata.description}\n` + } + if (m.metadata.isbn){ + cnt += ` ${m.metadata.isbn}\n` + let onixcode = m.metadata.isbn.length > 10 ? '15' : '02' + cnt += ` ${onixcode}\n` + } + if (m.metadata.publisher){ + cnt += ` ${m.metadata.publisher}\n` + } + if (m.metadata.cover_image){ + cnt += ` \n` + } + for (let s in m.metadata.subject){ + cnt += ` ${m.metadata.subject[s]}\n` + } + for (let a in m.metadata.author){ + cnt += ` ${m.metadata.author[a]}\n` + cnt += ` aut\n` + } + + cnt +=' \n \n' + + for (let id in m.manifest){ + let f = m.manifest[id] + if (f.hasOwnProperty('property')){ + cnt += ` \n` + }else{ + cnt += ` \n` + } + } + cnt += ` \n \n` + for (let i in m.spine){ + cnt += ` \n` + } + cnt += ' \n \n' + cnt += ` \n` + cnt += ' \n\n' + return cnt + } +} + +class EpubNcx{ + constructor(filename, meta) { + this.filename = filename + this.meta = meta + this.mime = 'application/x-dtbncx+xml' + } + + generate(){ + let m = this.meta + + let max_depth = 0; + let toc = '' + for (let i = 0; i < m.toc.length; ++i) { + let toc_item = m.toc[i] + toc += + ` \n` + +` ${toc_item.label}\n` + +` \n` + + if (max_depth < toc_item.depth) { + max_depth = toc_item.depth + } + + let next_depth = 1 + if (i+1 < m.toc.length) { + next_depth = m.toc[i+1].depth + } + + for (let current_dept = toc_item.depth; + current_dept >= next_depth; + current_dept--) { + toc += ' \n' + } + } + + return ` + + + + + + + + + ${m.metadata.title} + + +${toc} + + +` + } +} + +class EpubNav{ + constructor(filename, meta) { + this.filename = filename + this.meta = meta + this.mime = 'application/xhtml+xml' + } + + generate(){ + let m = this.meta + + let toc = '' + for (let i = 0; i < m.toc.length; ++i){ + let toc_item = m.toc[i] + toc += `
  • ${toc_item.label}` + + let next_depth = 1 + if (i+1 < m.toc.length){ + next_depth = m.toc[i+1].depth + } + let current_dept = toc_item.depth + + if (current_dept == next_depth){ + toc += '
  • \n' + } + else if (current_dept < next_depth){ + for (; current_dept < next_depth; current_dept++){ + toc += '
      \n' + } + } + else if (current_dept > next_depth){ + for (; current_dept > next_depth; current_dept--){ + toc += '
    \n' + } + toc += ' \n' + } + } + + return ` + + + + ${m.metadata.title} + + +

    ${m.metadata.title}

    + + + +` + } +} + +class EpubMeta{ + constructor() { + this.metadata = { + book_id: 'book_id', + isbn: null, + title: 'title', + language: 'en', + author: [], + publisher: null, + cover_image: null, + subject: [], + description: null + } + this.manifest = {} + this.spine = [] + this.guide = [] + this.toc = [] + this.override = {} + + this.addManifest(this.ncx().filename, this.ncx().mime) + this.addManifest(this.nav().filename, this.nav().mime, 'nav') + this.manifest[this.nav().filename].property = 'nav' //TODO + } + + addBookId(book_id){ + this.metadata.book_id = escape(book_id) + } + addTitle(title){ + this.metadata.title = escape(title) + } + addAuthor(author){ + this.metadata.author.push(escape(author)) + } + addPublisher(publisher){ + this.metadata.publisher = escape(publisher) + } + addSubject(subject){ + this.metadata.subject.push(escape(subject)) + } + addLanguage(lang){ + this.metadata.language = escape(lang) + } + addDescription(description){ + this.metadata.description = escape(html2txt(description)) + } + addIsbn(isbn){ + this.metadata.isbn = escape(isbn) + } + + addMetaData(){ + } + + addManifest(filename, mime){ + if (mime && mime.startsWith('text/html')){ + let o = new EpubXhtml(filename) + this.override[filename] = o + mime = o.mime + } + let id = 'id_' + filename.replace(/[^a-z0-9]/gi, '_').toLowerCase() + this.manifest[filename] = + { + filename : filename, + mime : mime, + id : id + } + } + + addHtmlComponents(filename, title, css){ + if (filename in this.override){ + this.override[filename].title = title + this.override[filename].css = css + } + } + + getId(filename){ + return this.manifest[filename].id + } + + addSpine(filename){ + this.spine.push(this.manifest[filename].id) + } + + addCover(filename){ + this.manifest[filename].property = 'cover-image' + } + + addToc(filename, label, depth){ + this.toc.push({ + href: filename, + label: label, + depth: depth + }) + } + + opf(){ + return new EpubOpf("content.opf", this) + } + + ncx(){ + return new EpubNcx("content.ncx", this) + } + + nav(){ + return new EpubNav("content.xhtml", this) + } +} + +class EpubWriter{ + constructor() { + this.zip = new JSZip(); + this.meta = new EpubMeta(); + this.override = {} + this.createEpubStruct(); + } + + createEpubStruct(){ + this.zip.file("mimetype", "application/epub+zip", {compression: "STORE"}); + this.zip.file("META-INF/container.xml", +` + + + + + +` ); + } + + addMetaInfoFile(filename, content){ + this.zip.file("META-INF/"+filename, content) + } + + addFile(filename, content){ + console.log(`epub ${filename}:`) + + if (filename in this.meta.override){ + let o = this.meta.override[filename] + content = o.convert(content) + } + + this.zip.file("EPUB/"+filename, content) + } + + generateAsync(){ + this.addFile(this.meta.ncx().filename, this.meta.ncx().generate()) + this.addFile(this.meta.nav().filename, this.meta.nav().generate()) + this.addFile(this.meta.opf().filename, this.meta.opf().generate()) + return this.zip.generateAsync({ + type: generatedBufferType, + compression: "DEFLATE"}) + } +} + +// module.exports = EpubWriter