Roll Back Punkt - Ansicht klappt so semi
Dieser Commit ist enthalten in:
25
backend/scripts/extract-pdf.js
Normale Datei
25
backend/scripts/extract-pdf.js
Normale Datei
@ -0,0 +1,25 @@
|
||||
// Simple helper to extract text from the root PDF for inspection
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const pdfParse = require('pdf-parse')
|
||||
|
||||
async function main() {
|
||||
const pdfPath = path.resolve(__dirname, '..', '..', 'Organigramm_ohne_Namen.pdf')
|
||||
if (!fs.existsSync(pdfPath)) {
|
||||
console.error('PDF not found at', pdfPath)
|
||||
process.exit(1)
|
||||
}
|
||||
const buf = fs.readFileSync(pdfPath)
|
||||
const res = await pdfParse(buf)
|
||||
const outPath = path.resolve(__dirname, '..', '..', 'organigramm_text.txt')
|
||||
fs.writeFileSync(outPath, res.text, 'utf8')
|
||||
console.log('Extracted text length:', res.text.length)
|
||||
console.log('Pages:', res.numpages)
|
||||
console.log('Saved to:', outPath)
|
||||
// Print first 200 lines to stdout for quick view
|
||||
const lines = res.text.split('\n').map(s => s.trim()).filter(Boolean)
|
||||
console.log('--- First lines ---')
|
||||
console.log(lines.slice(0, 200).join('\n'))
|
||||
}
|
||||
|
||||
main().catch(err => { console.error(err); process.exit(1) })
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren