Files
SkillMate/backend/scripts/extract-pdf.js
2025-09-24 00:28:00 +02:00

26 Zeilen
970 B
JavaScript

// Simple helper to extract text from the root PDF for inspection
const fs = require('fs')
const path = require('path')
const pdfParse = require('pdf-parse')
async function main() {
const pdfPath = path.resolve(__dirname, '..', '..', 'Organigramm_ohne_Namen.pdf')
if (!fs.existsSync(pdfPath)) {
console.error('PDF not found at', pdfPath)
process.exit(1)
}
const buf = fs.readFileSync(pdfPath)
const res = await pdfParse(buf)
const outPath = path.resolve(__dirname, '..', '..', 'organigramm_text.txt')
fs.writeFileSync(outPath, res.text, 'utf8')
console.log('Extracted text length:', res.text.length)
console.log('Pages:', res.numpages)
console.log('Saved to:', outPath)
// Print first 200 lines to stdout for quick view
const lines = res.text.split('\n').map(s => s.trim()).filter(Boolean)
console.log('--- First lines ---')
console.log(lines.slice(0, 200).join('\n'))
}
main().catch(err => { console.error(err); process.exit(1) })