2020-03-22 13:51:40 +00:00
|
|
|
require("dotenv").config();
|
|
|
|
const http = require('http');
|
|
|
|
const fs = require("fs");
|
2020-04-08 10:19:26 +00:00
|
|
|
const axios = require('axios');
|
|
|
|
const uuid4 = require('uuid4');
|
2020-03-22 13:51:40 +00:00
|
|
|
const puppeteer = require('puppeteer');
|
2020-04-08 10:19:26 +00:00
|
|
|
const JSZip = require('jszip');
|
2020-03-22 13:51:40 +00:00
|
|
|
|
|
|
|
const server = http.createServer(async (req, res) => {
|
|
|
|
const incomingURL = new URL(`http://localhost:8000${req.url}`);
|
|
|
|
|
|
|
|
if (incomingURL.searchParams.get("website")) {
|
|
|
|
const website = new URL(incomingURL.searchParams.get("website"));
|
|
|
|
console.log(`Fetching '${website.toString()}'`);
|
2020-04-08 10:19:26 +00:00
|
|
|
|
|
|
|
let fn = sendPage;
|
|
|
|
if (website.toString().endsWith(".pdf")) {
|
|
|
|
fn = sendPDF;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (await fn(website)) {
|
2020-03-22 13:51:40 +00:00
|
|
|
fs.readFile(__dirname + "/success.html", function (err,data) {
|
|
|
|
if (err) {
|
|
|
|
res.writeHead(404);
|
|
|
|
res.end(JSON.stringify(err));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
res.writeHead(200, {'Content-Type': 'text/html'});
|
|
|
|
res.end(data);
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
fs.readFile(__dirname + "/failure.html", function (err,data) {
|
|
|
|
if (err) {
|
|
|
|
res.writeHead(404);
|
|
|
|
res.end(JSON.stringify(err));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
res.writeHead(500, {'Content-Type': 'text/html'});
|
|
|
|
res.end(data);
|
|
|
|
});
|
|
|
|
}
|
2020-04-08 10:19:26 +00:00
|
|
|
} else {
|
2020-03-22 13:51:40 +00:00
|
|
|
let url = req.url === "/" ? "/index.html": req.url;
|
|
|
|
fs.readFile(__dirname + url || "/index.html", function (err,data) {
|
|
|
|
if (err) {
|
|
|
|
res.writeHead(404);
|
|
|
|
res.end(JSON.stringify(err));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (url.endsWith(".js")) {
|
|
|
|
res.writeHead(200, {'Content-Type': 'application/javascript'});
|
|
|
|
} else if (url.endsWith(".json")) {
|
|
|
|
res.writeHead(200, {'Content-Type': 'application/json'});
|
|
|
|
} else if (url.endsWith(".png")) {
|
|
|
|
res.writeHead(200, {'Content-Type': 'image/png'});
|
|
|
|
} else {
|
|
|
|
res.writeHead(200, {'Content-Type': 'text/html'});
|
|
|
|
}
|
|
|
|
|
|
|
|
res.end(data);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
server.listen(8000);
|
|
|
|
|
2020-04-08 10:19:26 +00:00
|
|
|
async function sendPDF(website, tries = 0) {
|
|
|
|
try {
|
|
|
|
const response = await axios.get(website.toString(), {
|
|
|
|
responseType: 'arraybuffer'
|
|
|
|
})
|
|
|
|
const title = website.toString().substring(website.toString().lastIndexOf("/")+1, website.toString().lastIndexOf("."))
|
|
|
|
await sendToRemarkable(title, Buffer.from(response.data, 'binary'));
|
|
|
|
|
|
|
|
return true;
|
|
|
|
} catch (ex) {
|
|
|
|
console.log(ex);
|
|
|
|
if (tries < 5) {
|
|
|
|
return await sendPDF(website, ++tries);
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-22 13:51:40 +00:00
|
|
|
async function sendPage(website, tries = 0) {
|
|
|
|
const browser = await puppeteer.launch({
|
2020-04-08 10:19:26 +00:00
|
|
|
ignoreHTTPSErrors: true,
|
2020-03-22 13:51:40 +00:00
|
|
|
executablePath: process.env.CHROMIUM_PATH,
|
|
|
|
args: ['--disable-dev-shm-usage', '--no-sandbox']
|
|
|
|
});
|
|
|
|
try {
|
|
|
|
const page = await browser.newPage();
|
2020-03-26 17:24:53 +00:00
|
|
|
await page.goto(website.toString(), { referer: "https://www.google.com/" });
|
2020-03-22 13:51:40 +00:00
|
|
|
const title = await page.title()
|
|
|
|
console.log("Page loaded. Title - " + title)
|
|
|
|
|
2020-04-08 10:19:26 +00:00
|
|
|
await page.addStyleTag({ content: `
|
|
|
|
body {
|
|
|
|
font-family: Georgia, serif;
|
|
|
|
font-size: 18pt;
|
|
|
|
background: none;
|
|
|
|
color: black;
|
|
|
|
text-align: left;
|
|
|
|
}
|
|
|
|
h1, h2, h3, h4, h5 {
|
|
|
|
page-break-after: avoid;
|
|
|
|
}
|
|
|
|
table, figure, ul, img {
|
|
|
|
page-break-inside: avoid;
|
|
|
|
}
|
|
|
|
a {
|
|
|
|
color: black;
|
|
|
|
}
|
|
|
|
a:after {
|
|
|
|
content: " [" attr(href) "] ";
|
|
|
|
font-size: 0.7em;
|
|
|
|
}
|
|
|
|
a[href^="#"]:after, a[href^="/"]:after {
|
|
|
|
content: "";
|
|
|
|
}
|
|
|
|
blockquote {
|
|
|
|
margin: 10px 2px;
|
|
|
|
line-height: 2em;
|
|
|
|
border: 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
code {
|
|
|
|
background: none !important;
|
|
|
|
font-family: monospace;
|
|
|
|
}
|
|
|
|
|
|
|
|
ul li {
|
|
|
|
list-style: disc !important;
|
|
|
|
}
|
|
|
|
|
|
|
|
h1 {
|
|
|
|
font-size: 1.7em;
|
|
|
|
}
|
|
|
|
|
|
|
|
p {
|
|
|
|
margin-bottom: 12px;
|
|
|
|
}
|
|
|
|
|
|
|
|
header {
|
|
|
|
margin-bottom: 14px;
|
|
|
|
border-bottom: 8px solid black;
|
|
|
|
text-align: center;
|
|
|
|
}
|
|
|
|
`});
|
|
|
|
|
|
|
|
await page.evaluate(async () => {
|
|
|
|
return await new Promise(resolve => {
|
|
|
|
var REGEXPS={unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i};function isNodeVisible(node){return(!node.style||node.style.display!="none")&&!node.hasAttribute("hidden")&&(!node.hasAttribute("aria-hidden")||node.getAttribute("aria-hidden")!="true"||(node.className&&node.className.indexOf&&node.className.indexOf("fallback-image")!==-1))}function isProbablyReaderable(doc,isVisible){if(!isVisible){isVisible=isNodeVisible}var nodes=doc.querySelectorAll("p, pre");var brNodes=doc.querySelectorAll("div > br");if(brNodes.length){var set=new Set(nodes);[].forEach.call(brNodes,function(node){set.add(node.parentNode)});nodes=Array.from(set)}var score=0;return[].some.call(nodes,function(node){if(!isVisible(node)){return false}var matchString=node.className+" "+node.id;if(REGEXPS.unlikelyCandidates.test(matchString)&&!REGEXPS.okMaybeItsACandidate.test(matchString)){return false}if(node.matches("li p")){return false}var textContentLength=node.textContent.trim().length;if(textContentLength<140){return false}score+=Math.sqrt(textContentLength-140);if(score>20){return true}return false})}if(typeof exports==="object"){exports.isProbablyReaderable=isProbablyReaderable}
|
|
|
|
function Readability(doc,options){if(options&&options.documentElement){doc=options;options=arguments[2]}else if(!doc||!doc.documentElement){throw new Error("First argument to Readability constructor should be a document object.")}options=options||{};this._doc=doc;this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__;this._articleTitle=null;this._articleByline=null;this._articleDir=null;this._articleSiteName=null;this._attempts=[];this._debug=!!options.debug;this._maxElemsToParse=options.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE;this._nbTopCandidates=options.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES;this._charThreshold=options.charThreshold||this.DEFAULT_CHAR_THRESHOLD;this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve||[]);this._keepClasses=!!options.keepClasses;this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY;var logEl;if(this._debug){logEl=function(e){var rv=e.nodeName+" ";if(e.nodeType==e.TEXT_NODE){return rv+'("'+e.textContent+'")'}var classDesc=e.className&&("."+e.className.replace(/ /g,"."));var elDesc="";if(e.id){elDesc="(#"+e.id+classDesc+")"}else if(classDesc){elDesc="("+classDesc+")"}return rv+elDesc};this.log=function(){if(typeof dump!=="undefined"){var msg=Array.prototype.map.call(arguments,function(x){return(x&&x.nodeName)?logEl(x):x}).join(" ");dump("Reader: (Readability) "+msg+"\n")}else if(typeof console!=="undefined"){var args=["Reader: (Readability) "].concat(arguments);console.log.apply(console,args)}}}else{this.log=function(){}}}Readability.prototype={FLAG_STRIP_UNLIKELYS:0x1,FLAG_WEIGHT_CLASSES:0x2,FLAG_CLEAN_CONDITIONALLY:0x4,ELEMENT_NODE:1,TEXT_NODE:3,DEFAULT_MAX_ELEMS_TO_PARSE:0,DEFAULT_N_TOP_CANDIDATES:5,DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),DEFAULT_CHAR_THRESHOLD:500,REGEXPS:{unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\/?)font[^>]*>/gi,normalize:/\s{2,}/g,videos:/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i,shareElements:/(\b|_)(share|sharedaddy)(\b|_)/i,nextLink:/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,prevLink:/(prev|earl|old|new|<|«)/i,whitespace:/^\s*$/,hasContent:/\S$/},DIV_TO_P_ELEMS:["A","BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL","SELECT"],ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],PHRASING_ELEMS:["ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],CLASSES_TO_PRESERVE:["page"],_postProcessContent:function(articleContent){this._fixRelativeUris(articleContent);if(!this._keepClasses){this._cleanClasses(articleContent)}},_removeNodes:function(nodeList,filterFn){if(this._docJSDOMParser&&nodeList._isLiveNodeList){throw new Error("Do not pass live node lists to _removeNodes")}for(var i=nodeList.length-1;i>=0;i-=1){var node=nodeList[i
|
|
|
|
|
|
|
|
[...document.querySelectorAll('details')].forEach(details => details.setAttribute('open', ''));
|
|
|
|
|
|
|
|
[...document.querySelectorAll('*')].forEach(node => {
|
|
|
|
const pos = window.getComputedStyle(node).getPropertyValue("position");
|
|
|
|
if (pos == "fixed" || pos == "sticky") {
|
|
|
|
node.style.position = "unset";
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
if (isProbablyReaderable(document.cloneNode(true))) {
|
|
|
|
var documentClone = document.cloneNode(true);
|
|
|
|
var article = new Readability(documentClone).parse();
|
|
|
|
var postedDate = document.querySelector('time[datetime]');
|
|
|
|
var content = `
|
|
|
|
<header>
|
|
|
|
<h1>${article.title}</h1>
|
|
|
|
${article.byline ? `<blockquote>${article.byline}</blockquote>` : ""}
|
|
|
|
${postedDate && postedDate.getAttribute('datetime') ? `<blockquote>${postedDate.getAttribute('datetime')}</blockquote>` : ""}
|
|
|
|
</header>
|
|
|
|
` + article.content;
|
|
|
|
document.body.innerHTML = content;
|
2020-03-22 13:51:40 +00:00
|
|
|
}
|
2020-04-08 10:19:26 +00:00
|
|
|
|
|
|
|
var im = document.createElement("img");
|
|
|
|
im.src = `https://qr.cluster.fun/?website=${window.location.toString()}`;
|
|
|
|
im.style = "position:absolute;top:0;right:0;z-index:99999999";
|
|
|
|
im.onload = resolve;
|
|
|
|
im.onerror = () => {
|
|
|
|
document.body.removeChild(im);
|
|
|
|
resolve();
|
|
|
|
}
|
|
|
|
document.body.appendChild(im);
|
2020-03-22 13:51:40 +00:00
|
|
|
})
|
2020-04-08 10:19:26 +00:00
|
|
|
});
|
2020-03-22 13:51:40 +00:00
|
|
|
|
2020-04-08 10:19:26 +00:00
|
|
|
const myPDF = await page.pdf({ format: 'A4', margin: {top: 40, bottom: 40, left: 40, right: 40} });
|
2020-03-22 13:51:40 +00:00
|
|
|
console.log("Saved to PDF")
|
|
|
|
|
2020-04-08 10:19:26 +00:00
|
|
|
await sendToRemarkable(title, myPDF);
|
2020-03-22 13:51:40 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
} catch (ex) {
|
|
|
|
console.log(ex);
|
|
|
|
if (tries < 5) {
|
|
|
|
return await sendPage(website, ++tries);
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
await browser.close();
|
|
|
|
}
|
|
|
|
}
|
2020-04-08 10:19:26 +00:00
|
|
|
|
|
|
|
async function sendToRemarkable(title, myPDF) {
|
|
|
|
try {
|
|
|
|
// Refresh token
|
|
|
|
let response = await axios.post(
|
|
|
|
"https://my.remarkable.com/token/json/2/user/new",
|
|
|
|
{},
|
|
|
|
{
|
|
|
|
headers: {
|
|
|
|
'Authorization': `Bearer ${process.env.REMARKABLE_TOKEN}`,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
);
|
|
|
|
let token = response.data;
|
|
|
|
console.log(`Refreshed token: ${token}`);
|
|
|
|
|
|
|
|
// Get storage endpoint
|
|
|
|
response = await axios.get(
|
|
|
|
"https://service-manager-production-dot-remarkable-production.appspot.com/service/json/1/document-storage?environment=production&group=auth0%7C5a68dc51cb30df3877a1d7c4&apiVer=2",
|
|
|
|
{
|
|
|
|
headers: {
|
|
|
|
'Authorization': `Bearer ${token}`,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
);
|
|
|
|
let storageHost = response.data.Host;
|
|
|
|
console.log(`Got storage host: ${storageHost}`);
|
|
|
|
|
|
|
|
// Generate upload request
|
|
|
|
const ID = uuid4();
|
|
|
|
response = await axios.put(
|
|
|
|
`https://${storageHost}/document-storage/json/2/upload/request`,
|
|
|
|
[{
|
|
|
|
"ID": ID,
|
|
|
|
"Type": "DocumentType",
|
|
|
|
"Version": 1
|
|
|
|
}],
|
|
|
|
{
|
|
|
|
headers: {
|
|
|
|
'Authorization': `Bearer ${token}`,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
);
|
|
|
|
let uploadURL = response.data[0].BlobURLPut;
|
|
|
|
console.log(`Got upload URL: ${uploadURL}`);
|
|
|
|
|
|
|
|
// Build zip to upload
|
|
|
|
let zip = new JSZip();
|
|
|
|
zip.file(`${ID}.content`, JSON.stringify({
|
|
|
|
extraMetadata: {},
|
|
|
|
fileType: 'pdf',
|
|
|
|
lastOpenedPage: 0,
|
|
|
|
lineHeight: -1,
|
|
|
|
margins: 180,
|
|
|
|
pageCount: 0,
|
|
|
|
textScale: 1,
|
|
|
|
transform: {},
|
|
|
|
}));
|
|
|
|
zip.file(`${ID}.pagedata`, []);
|
|
|
|
zip.file(`${ID}.pdf`, myPDF);
|
|
|
|
const zipContent = await zip.generateAsync({ type: 'nodebuffer' });
|
|
|
|
|
|
|
|
// Upload zip
|
|
|
|
response = await axios.put(
|
|
|
|
uploadURL,
|
|
|
|
zipContent,
|
|
|
|
{
|
|
|
|
headers: {
|
|
|
|
'Content-Type': '',
|
|
|
|
'Authorization': `Bearer ${token}`,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
);
|
|
|
|
console.log("Uploaded");
|
|
|
|
|
|
|
|
// Populate metadata
|
|
|
|
response = await axios.put(
|
|
|
|
`https://${storageHost}/document-storage/json/2/upload/update-status`,
|
|
|
|
[{
|
|
|
|
ID: ID,
|
|
|
|
deleted: false,
|
|
|
|
lastModified: new Date().toISOString(),
|
|
|
|
ModifiedClient: new Date().toISOString(),
|
|
|
|
metadatamodified: false,
|
|
|
|
modified: false,
|
|
|
|
parent: '',
|
|
|
|
pinned: false,
|
|
|
|
synced: true,
|
|
|
|
type: "DocumentType",
|
|
|
|
version: 1,
|
|
|
|
VissibleName: title,
|
|
|
|
}],
|
|
|
|
{
|
|
|
|
headers: {
|
|
|
|
'Authorization': `Bearer ${token}`,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
);
|
|
|
|
console.log("Upload complete")
|
|
|
|
} catch (error) {
|
|
|
|
console.error(error.response);
|
|
|
|
throw error;
|
|
|
|
}
|
|
|
|
}
|