// Simple helper to extract text from the root PDF for inspection const fs = require('fs') const path = require('path') const pdfParse = require('pdf-parse') async function main() { const pdfPath = path.resolve(__dirname, '..', '..', 'Organigramm_ohne_Namen.pdf') if (!fs.existsSync(pdfPath)) { console.error('PDF not found at', pdfPath) process.exit(1) } const buf = fs.readFileSync(pdfPath) const res = await pdfParse(buf) const outPath = path.resolve(__dirname, '..', '..', 'organigramm_text.txt') fs.writeFileSync(outPath, res.text, 'utf8') console.log('Extracted text length:', res.text.length) console.log('Pages:', res.numpages) console.log('Saved to:', outPath) // Print first 200 lines to stdout for quick view const lines = res.text.split('\n').map(s => s.trim()).filter(Boolean) console.log('--- First lines ---') console.log(lines.slice(0, 200).join('\n')) } main().catch(err => { console.error(err); process.exit(1) })