26 Zeilen
970 B
JavaScript
26 Zeilen
970 B
JavaScript
// Simple helper to extract text from the root PDF for inspection
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
const pdfParse = require('pdf-parse')
|
|
|
|
async function main() {
|
|
const pdfPath = path.resolve(__dirname, '..', '..', 'Organigramm_ohne_Namen.pdf')
|
|
if (!fs.existsSync(pdfPath)) {
|
|
console.error('PDF not found at', pdfPath)
|
|
process.exit(1)
|
|
}
|
|
const buf = fs.readFileSync(pdfPath)
|
|
const res = await pdfParse(buf)
|
|
const outPath = path.resolve(__dirname, '..', '..', 'organigramm_text.txt')
|
|
fs.writeFileSync(outPath, res.text, 'utf8')
|
|
console.log('Extracted text length:', res.text.length)
|
|
console.log('Pages:', res.numpages)
|
|
console.log('Saved to:', outPath)
|
|
// Print first 200 lines to stdout for quick view
|
|
const lines = res.text.split('\n').map(s => s.trim()).filter(Boolean)
|
|
console.log('--- First lines ---')
|
|
console.log(lines.slice(0, 200).join('\n'))
|
|
}
|
|
|
|
main().catch(err => { console.error(err); process.exit(1) })
|