From d9c99421974c13719a368fa98bc759be5988de99 Mon Sep 17 00:00:00 2001 From: Claude Project Manager Date: Sat, 12 Jul 2025 22:29:41 +0200 Subject: [PATCH] Initial commit --- .claude/settings.local.json | 12 + CLAUDE_PROJECT_README.md | 78 ++ install_dependencies.bat | 6 + main.py | 37 + requirements.txt | 12 + src/__init__.py | 0 src/core/__init__.py | 0 src/core/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 187 bytes .../__pycache__/web_crawler.cpython-310.pyc | Bin 0 -> 23816 bytes src/core/web_crawler.py | 1027 +++++++++++++++++ src/resources/icons/check.svg | 3 + src/resources/icons/download.svg | 4 + src/resources/icons/folder.svg | 4 + src/resources/icons/gear.svg | 5 + src/resources/icons/globe.svg | 4 + src/resources/icons/moon.svg | 4 + src/resources/icons/sun.svg | 7 + src/resources/logo/intelsight-full-dark.svg | 54 + src/resources/logo/intelsight-full-light.svg | 54 + .../logo/intelsight-icon-transparent-dark.svg | 40 + src/resources/logo/intelsight-name-dark.svg | 53 + src/resources/logo/intelsight-name-light.svg | 53 + .../logo/intelsight-name-transparent-dark.svg | 53 + .../__pycache__/dark_theme.cpython-310.pyc | Bin 0 -> 6641 bytes .../__pycache__/light_theme.cpython-310.pyc | Bin 0 -> 6554 bytes src/resources/styles/dark_theme.py | 337 ++++++ src/resources/styles/light_theme.py | 347 ++++++ src/ui/__init__.py | 1 + src/ui/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 185 bytes .../custom_widgets.cpython-310.pyc | Bin 0 -> 1584 bytes .../__pycache__/main_window.cpython-310.pyc | Bin 0 -> 15024 bytes src/ui/custom_widgets.py | 54 + src/ui/main_window.py | 604 ++++++++++ src/utils/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 188 bytes .../__pycache__/local_server.cpython-310.pyc | Bin 0 -> 2626 bytes .../__pycache__/pdf_report.cpython-310.pyc | Bin 0 -> 9048 bytes src/utils/local_server.py | 86 ++ src/utils/pdf_report.py | 418 +++++++ start.bat | 4 + 40 files changed, 3361 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 CLAUDE_PROJECT_README.md create mode 100644 install_dependencies.bat create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 src/__init__.py create mode 100644 src/core/__init__.py create mode 100644 src/core/__pycache__/__init__.cpython-310.pyc create mode 100644 src/core/__pycache__/web_crawler.cpython-310.pyc create mode 100644 src/core/web_crawler.py create mode 100644 src/resources/icons/check.svg create mode 100644 src/resources/icons/download.svg create mode 100644 src/resources/icons/folder.svg create mode 100644 src/resources/icons/gear.svg create mode 100644 src/resources/icons/globe.svg create mode 100644 src/resources/icons/moon.svg create mode 100644 src/resources/icons/sun.svg create mode 100644 src/resources/logo/intelsight-full-dark.svg create mode 100644 src/resources/logo/intelsight-full-light.svg create mode 100644 src/resources/logo/intelsight-icon-transparent-dark.svg create mode 100644 src/resources/logo/intelsight-name-dark.svg create mode 100644 src/resources/logo/intelsight-name-light.svg create mode 100644 src/resources/logo/intelsight-name-transparent-dark.svg create mode 100644 src/resources/styles/__pycache__/dark_theme.cpython-310.pyc create mode 100644 src/resources/styles/__pycache__/light_theme.cpython-310.pyc create mode 100644 src/resources/styles/dark_theme.py create mode 100644 src/resources/styles/light_theme.py create mode 100644 src/ui/__init__.py create mode 100644 src/ui/__pycache__/__init__.cpython-310.pyc create mode 100644 src/ui/__pycache__/custom_widgets.cpython-310.pyc create mode 100644 src/ui/__pycache__/main_window.cpython-310.pyc create mode 100644 src/ui/custom_widgets.py create mode 100644 src/ui/main_window.py create mode 100644 src/utils/__init__.py create mode 100644 src/utils/__pycache__/__init__.cpython-310.pyc create mode 100644 src/utils/__pycache__/local_server.cpython-310.pyc create mode 100644 src/utils/__pycache__/pdf_report.cpython-310.pyc create mode 100644 src/utils/local_server.py create mode 100644 src/utils/pdf_report.py create mode 100644 start.bat diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..f35455e --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,12 @@ +{ + "permissions": { + "allow": [ + "Bash(mkdir:*)", + "Bash(cp:*)", + "Bash(python:*)", + "Bash(find:*)", + "Bash(rm:*)" + ], + "deny": [] + } +} \ No newline at end of file diff --git a/CLAUDE_PROJECT_README.md b/CLAUDE_PROJECT_README.md new file mode 100644 index 0000000..d6005d1 --- /dev/null +++ b/CLAUDE_PROJECT_README.md @@ -0,0 +1,78 @@ +# Toolbox-Webseiten-Crawler + +*This README was automatically generated by Claude Project Manager* + +## Project Overview + +- **Path**: `C:/Users/hendr/Desktop/IntelSight/Projektablage/Toolbox-Webseiten-Crawler` +- **Files**: 23 files +- **Size**: 98.9 KB +- **Last Modified**: 2025-07-12 20:11 + +## Technology Stack + +### Languages +- Batch +- Python + +## Project Structure + +``` +CLAUDE_PROJECT_README.md +install_dependencies.bat +main.py +requirements.txt +start.bat +src/ + ├── __init__.py + ├── core/ + │ ├── web_crawler.py + │ └── __init__.py + ├── resources/ + │ ├── icons/ + │ │ ├── check.svg + │ │ ├── download.svg + │ │ ├── folder.svg + │ │ ├── gear.svg + │ │ ├── globe.svg + │ │ ├── moon.svg + │ │ └── sun.svg + │ └── styles/ + │ ├── dark_theme.py + │ └── light_theme.py + ├── ui/ + │ ├── custom_widgets.py + │ ├── main_window.py + │ └── __init__.py + └── utils/ + ├── local_server.py + ├── pdf_report.py + └── __init__.py +``` + +## Key Files + +- `requirements.txt` + +## Claude Integration + +This project is managed with Claude Project Manager. To work with this project: + +1. Open Claude Project Manager +2. Click on this project's tile +3. Claude will open in the project directory + +## Notes + +*Add your project-specific notes here* + +--- + +## Development Log + +- README generated on 2025-07-11 21:27:29 +- README updated on 2025-07-11 21:27:36 +- README updated on 2025-07-12 12:18:47 +- README updated on 2025-07-12 20:10:48 +- README updated on 2025-07-12 20:11:07 +- README updated on 2025-07-12 20:11:21 diff --git a/install_dependencies.bat b/install_dependencies.bat new file mode 100644 index 0000000..0e9dcdd --- /dev/null +++ b/install_dependencies.bat @@ -0,0 +1,6 @@ +@echo off +echo Installing dependencies for IntelSight Webseiten-Crawler... +pip install -r requirements.txt +echo. +echo Installation complete! +pause \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..5d6e235 --- /dev/null +++ b/main.py @@ -0,0 +1,37 @@ +import sys +import os +from PyQt6.QtWidgets import QApplication +from PyQt6.QtGui import QIcon, QFontDatabase +from PyQt6.QtCore import Qt + +# Füge src zum Python-Pfad hinzu +sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src')) + +from ui.main_window import WebsiteCrawlerWindow + + +def main(): + # High DPI Support + if hasattr(Qt.HighDpiScaleFactorRoundingPolicy, 'PassThrough'): + QApplication.setHighDpiScaleFactorRoundingPolicy( + Qt.HighDpiScaleFactorRoundingPolicy.PassThrough + ) + + app = QApplication(sys.argv) + app.setApplicationName("IntelSight Webseiten-Crawler") + app.setOrganizationName("IntelSight") + + # Setze App-Icon wenn vorhanden + icon_path = os.path.join(os.path.dirname(__file__), 'src', 'resources', 'icons', 'globe.svg') + if os.path.exists(icon_path): + app.setWindowIcon(QIcon(icon_path)) + + # Hauptfenster erstellen und anzeigen + window = WebsiteCrawlerWindow() + window.show() + + sys.exit(app.exec()) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..491470f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +PyQt6==6.6.1 +PyQt6-Qt6==6.6.1 +PyQt6-sip==13.6.0 +beautifulsoup4==4.12.3 +requests==2.31.0 +lxml==5.1.0 +selenium==4.18.1 +wget==3.2 +urllib3==2.2.1 +pywebcopy==7.0.2 +reportlab==4.0.9 +chardet==5.2.0 \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/core/__init__.py b/src/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/core/__pycache__/__init__.cpython-310.pyc b/src/core/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..967027e65c4016db2cbe7d4fa1f826728f837cc5 GIT binary patch literal 187 zcmd1j<>g`k0=I zlaZR2QWWEoTAW>yUl8M&SCX0&oSB|c5))9ApOu7ZnpI9sw?*~D02$BMp#4Z571%jj{k(6Xhlq^z`wW7RP;0}OY>Y(1PRXfxZ_=ChC4G5+(l6qqTp$@hnwksdL&;Dzt|QqY>Ix^r z;@O$(#M7ILglqXSuE$y7t5uX;`qxvmn~)rP5^;Ynj5+kLa1C2VU$oG zOqEnWM+zmbj}IfmCohT^Eus7&g-V5p3W?oj`u>jl+%$vz|y8}X7c(1wb}gF ziol&^F8fJdYNDMAvVB*Ppe5k3+Vy8b+9nr zg6stA(nBn&hZa@V&3aI-gZ1h@)`v1G>t_SV6=s8o8NzQEu|77!wjs8Yjk4{iKf-p@ z^Y3IDQo7hl5r^DtOr%A1->_#fz;?4eXrVhO0oT1qxeY(cHLfdc0>2o3`|zXIqijE$ zMDCt?&ORNOp&lQLvjp1dV^b(Qjl3$GVFwV?&rXS!52KDFdI0qV(CSfk4D}2ICHy-W zJjC82@=)3Kb{plC#U6G$n?<=nb{yk=hZA~KpTCQz*K#_);6#?o`E(&QuP>)BWr}=3 zZWe-n z+T|mMV%i-gOfPiiZ5eaw@WG>rgGaQ9x7~B${`)4iTxLbr?$#GpiZSi3WnRqdQ!@up z0sYg?rx(*aQ!VoUvB8Op9c4r3sq~UwK%Rk=p_?h5#>~m58VOE! zi5HhRilr8?SPn1F>`G0hNx(#0}g&Y^NlsN}Vsrj^_Q)R%D=TiA}sgx-!#bie{^xUH8!Al;# z6CJsczH9d4!|01~aT(R{i>GyC#VnRCih(?zSz0zPKFEt%eZ|CPi~e4`P%P%=i$sFhp5R$c$2{ecceu}35jpgqF&WaVp8g`P@Q-SFBHlbG1fv6ZXL_q_j_ zguTL#H6JLCp%Bgt%hdEtK`&@ldGXa3bGZx#QzKGnchIm}Z#DJH=G3y8&rO1Uxy(YE zj-IK@6tn+wdu%>;eD%b1;`Yf*9#onF2}+auruK=*W7Z(kLi`ST1H3 zblw9Z@ooUezpSGaHymHNM8|4O<~tCd3^iuDm`X0ts2m-#x8Y~>0fgm%B)9wv|Jd5q zJj`3K9)BJy2VL8ALJ-qKqyPOO0F1bYhj1ErY)OnU_I29ZUy{IS@P6c|;>!u4Hbk9p zO3!0vx{LB4)#(H9NfAlCX$>~x*p$!lUP|Z6x5kT)Q%e&7z2Ca&%(QD+e zlK5^UfXOUm7K=RZDEai|m?|bQpQ3Wp0ATirdhNp#PPy!gNIZ^w1}zMq9FkS3M+qxY zDXd1NKBaZCG?j6}3q_oX*ptoml8$Ajn`K_uG^YcoRgBEj2xLHSCVxmGN&|u!1q3?{ z7-S(pu%LjQ0!COD!co=@*dt)CfPJhVF#~K6aEJ{9g82jl>j}7n?F1ZV+t_xt6Hkrp zdfxX*iB=ICyGCQR+d6tDPQXk)Z)z-~Yp1dFGdLnJJQ`*j#t$!Ay1af9i{Ow`%`4;W zC}g|?K*q|FZ>qBVx~j;psjB>cRj>Rvs!zV5`sE+10r^L2Q2wDBlK)!mkbj_t<^Q90 z%HLNb^7qs(`LEQd{9UzM{*K(!IC6Z~14HtkS^Jwe)SrNj6Toaql^}FFehLvQ6%>0c z#VycYTKl6Trp}L8*qWGBQStLaR*^p;ReZMZwC9O<#cz$7w7L}A&(vp>4|G=oc7UXt zifa2S-k|4ElvF*c=PJbYMhVqkJjQ=Lu(!zt(84| z!@5h<@4YI2;84Bppf$pLPzxxPkR8I#9t4-vVRtb9u!sMg9R#N&!iZJfP{sSGr_kxv zZwIqp5`J0e85w;FRJ@P1U|9$N!}V4S3mP!=t^bPTse~nu)rXVW&pK@Bmpv?O2JG-v zgkDhTM0gZ^0-eH7g-Jc&hSntNkMSbKSZl1a(rE@aLUyN(6Q>ffBcS9xl`gw$qk~1R zN&L4~UopeBY3q(w;sIx zA?wAP%=#)lz_G`U+KSzCO{w(Sz5IuwE`-Fhk2s5K@)2;$WV;0Q2Y|EaW%H;~zh*qoN=9ER)hM3m_N=r%*Fw4BpN4X=kusejI7!od zo_KRpyX*Yi!uaQJ}ZTPTUA#9xByMXz$8j?Cvdaqj#f0_Ol6 zKi5mS^a819d^+bvZdfB{^<^jqG$5SU?$ZsN{g;XbLR`Bc&uTYzVC`L;czo>Q_!Fll z9)BmE6R{`u$J~gEu^V1&KCIPFi4H&3vk94pWGQ?Or(qgg$@?R}G*QtVl+gF`A zlAaWDDNw*%1EDa)pfzo>{`9=N+bZaX(^(ULUFIp2D?*TVw`EA^Zm9jF=}v)9hQJJD z@mtK~^gLQOYycHUOF#+8vJ{2biDO2(?X|E<$4x+$rLaUP0Xc-U2&G?v93cB7`O4_` z)hm(62*qB(u|y?gRi^wQlnBWq3JJ0FQ{@QS*nu?pN}m#zCT>QR2`Y8d8f`v)+EHwg zaA_)&7$W#V;!TB&(+5r+r-k6e2TXZG!Mh3>7dsQ}GfcrATK&b;Ha`3QblI5K*YqWq zw`R~NEH{b0+De&Qy7nM)>4k3}6n|ek#b?koHUhy<3l^FmMhqtwo|Bvc>4pTs_5m15 z8N}d&T}O< z`1st(*)jXnz9$?h#UDcoxHuC1V}7x`ctK>w`plGgmcoirp64{zcoCo`U=aR}Pw26k zDAP`^o|KV>b|hozVzSa|{5;ASq$~+wQqrW2C{au?RkpUbkgUBN?E?^?oAft5Hmga~tK`(GLcM440o~)yW2)&zKVv=}m zZ#G(dFG0k3vfW@6qALn9Y5e;XyU_z7suzcS59_061p=&~$*-Ed**+ZmGV8bfm`s5} zs2(#wahO)#N&s;Kc7P4CA)=HyxG`i)8^hN;*QAXREM5;AevdrtsRV)7HexF(ZY`sB z2$OOIDPYcwKNW2%{7=!x?HeHoX`{es2iuN3JCG-g(mU<2VC90p?6EqFf!zXQk3^wiCGCx^;i8B^Mi_i-^(ePV}NmAD6bk zD~q1)szl5&yOTswHU>7TQ!skFZLo60EHW@iE^}N63KohrvrvUkqORR5GV1z1>e^EQ z8;G!FN7)_``>AGHR>Ewr9liucj6Z9KLD{{)={B&7d%-S73!_#)1Y}R{Heww&#!+$t zwC=Hc*!VD&dC?73qF|MM5@r{+-(8jHV;*ZS<=u$cQRM&Wdh?a$-k{zSZoPsc6JY5h zRcZ)awxq{jHrMc&r!e}a%gRQmxh&Xiu`A4BWn$IFX#Kt5u2h%CGT(1^K1pZ)kLYxq2g1KL`KGp(SPwI%4fm|jRE5<+QqozC%>q@06*wN+N;mcLjzpmP%P`(4Zk*V&xH#QYBwaX-__78e-H9GQX&~3DIZ)V zB<33XW?T@0!R3r$riv@qN!jHdso}et^hS>7hIHJO81AMVX#&J^b5aiQcLG?Ud(S^m z6G8&SMMHOd;`&YextyoGHg9#`Qy`0sY-YUb@e4}zbg$l1HscO>vLU?$N!XE_4 zJco{F{t1B}0$5S%WW2%nYy1)9vIfBZ&x2h6S7j{Lr5No8q$1SfS~Jl==`yLSlNg6r zKS#t|Smp=|ZJM6Ztl>uOdF^!d0!Tl&&X8F&AsN)?TS{my!GiKhD%(Z5 zLwZfNGLk_WP*)a%AYqh>xDw$R%GHG#S-mk4=}G?ucTMnRB-dmk;_8H0B$zG!oAmlV zfMiF;5LzxV|9!X*OlJzoa1E~%h?gWAhZNDz36OLm#IFifAoOPMqR>Ajutq@8MUdn& zUY$Vg4uG#yYU^DBUq#%%$ItlB03j&^jxiud!9DhXdmK?hQjZjsL&^xA5LaY~LJ)_B z6jdI;6-|%SCr3ms=(Lp~ln$e82&JMTegM2E>XHYPknBTBSlV$@zJf4x;1T5NlVsEv zMmbk>g5U%J$jt{)N)Drz9XHi0VU?aCl!_`5=+NX!J|ja>bsSt?5SJ@HD5~zYgADc;LJ6lZTt@PQiE9>!E#UXEUYuJ2 zJbmEhLZlw6gbQBN55-mpbrZJ-#TodUAo$KsyOZB*f?wX~z{#tCFARf|_Ft2!g*ne^ z#P)(i*+r^F2#4UPGY}Ag&50&#KDY^+^4|iR&MGz^Y{rI^bZri&281%L7UNyu-yYr5o*BImqeZ5|L_JC z9_qXoD!aZ7g>_$(j6bsb>~6>wf4urN(M}(=BWjI;22f9Ci4*Usst)_@e%6cJKVrek z091$ltPgKDSwG%ns3!+%V`C4v)IiTh-vGzHDvn=+y0I(Ox%v^?hZ2i|Lb$47L)Fo` zy)tMIvO&z4A$tgzW5yugJ+~O8p_Vx_)Iz!8I$mrmyr`8q@U*LwwhEr=&jltUVys@S z4BNw)zk{OfPuatp@Ok!cflqY~qc@-05+AezzLVPBhIVB>xa6sfpzUoKksioFG>a}& zw%OY@daqFoWTMKbJ&K$WdsIkA%@q5_&w!!KgIi%pGn`rimLnT|Y&&E^U`5;^sQ5b+ zX9IKhjqOA#s7d$>Di;KOHPMT;%65A@R_QMBR;cW-chr0RLDB0SZ=%=l+th2BU;LSR z{q#@SYkBoZQ?Kz>?=|p8ujSRf`0WroX)|`qG8{!d;79!0lxy^MQ@P^?BR>tiu1N7^mH0WL~Xk zwpJgfQ`K>zRiW^s_Kvnyq3~_D!1#?;>F(+(MU7iwOsxp)ehf_96{+yDt!--iv7f2! z@|JC*70h*NcaP}vXRuQ!{4;2M55{koy{o!EDf}b1Xnj}9SnX=rpKAL7YP?13dqmH_ zCq@KvI%dzl%9uUo(&2rc_j-WMx6FZG^ejS#XX8%?*u-hbl~-wezJ*yg$YN|C=AB&G z&Gx_7gR^$>1;yT->O>twHpUohRl&-o6FiP9oP;>Nf4#ED-XqTJz4l)8aEkhFyl(Gh z)6gfVJZA3!2C$vPyuHo74KWe>Hk3FBTJ2#oCp`6>@Z|w zm8U6WhcQ05K-+>>5bcWgJR2iIlN3WOp^a_gq*0AkdPAKiJ5q@i+&ZFm%sm+q|86_R zjzV=Bb88o{9~Wpb@FC4SNu!;>#wa^RcA<2_p@!ebcn-2-*uD3$S$iM5ea^#<<2eWV z1+v@Ghu@+nbTd2beb-0}*)2+(K#BbbQT%?2xA!;lh#UjRG09HalNjMZb|-3b=!t!K zlE&K9?7&rAx5ZAKyU{@$V?rorxufPXg_TW;c;fZW6Te4dk?kpVG6@Q2%&$wHQYVvZ zVbrIbOR8jakPH~hMQ$!YjFyB6K5sELzg%l|b1T^FtAbYof9O(8%-5~B!f z;w_nc30Ph<_P_D{8h@1PdItegg7b?6gwXsrLXOW61{6?!^MwmZ8632d$uA{+c!gws zLz=uHX+os8xn zH%K0SH%8$JKTQ6l_d=8X2v*UmOxhV?cqOO}dq+7j%|DCGXz0s$xPzb}LNQn5v)b+h2M?S+eDJtdd!L>@edz4j<60@r$c}GTJ1||kd|V@W znJfaA3bWb*E>m=VT$?ZA5|PJY)R!q6v)VD_vj!eNLrR)?9an3nZfK8?p$e{1CER1A zL56>Wj#?cJQRT*`-I8u_@uU2E(nv!iJ10ey$Wg*4W6am zXw({+9FLKQ)E}!6@eRM6S3rFs9=ellNztjf6Uk<}v2?GEDp}CII0(9!uaJm#%dS zOA|9~RCTGZX_-ZBqE^)#4Su7^=JgH?&cG@G>*CJ!ds(BR%`0$odvDYl<$kkEu2!?# ziv&*bVlD@p|KcMVeQn~&ns?5uHg4pLMRR$)eL~eLpy%f4xOHrM9nJ5};n*%t$35vs z;n{$_R;aUi4sk+N4fXrhm9A5T!y2Y8e9tvNd!oai+yF0ebX?LC-Q?gh3@~ z)nVdvmCR$6tPEW?jNyf3K34IX3UuRMGU7oll7@ZIr2B1u)?1Z~y|7}LF@1>f!)nF< z3>gRoU>gEGJ;{KpuqJ`T7C6WM71|Rs_5%Me6cqE8xZs$S3_uE5qFEmciv5TWP>!z? z5PJ^E4DJ>LrW6|juAjs@zy`t?%Iu)@#=8GA%H$;;%y8F;Sk1IBlEHznj z02<|odkF0L(hFuv6#NyERr-)X9#lPaSAts<-05IG!#b$dWu&>B6ExD7bk}k&T;uGi zy_S)n%f%3Wcj0GzOaf2-3iObQaIk`g6IzUki%LbUc&kRu8qpkD5TwBY#Lwgvg-@Aq z!P-y-^x80OhDHDqKd7(3^eO;jV2q<{_p{oy8em=+1>+th3riB_vt>ZP8G_RY84L%^ zj*T$1yq#n@g!j-3LEK^}u0@Zn03VoDqumaS_M@v00vA6VC&IWBfr&NiM5`gx6@raH zWJTpg+%v#mmvxf`4qRJ8xCQC4{iwH>Ze3WP2=~9>hh8Iy+JbC=Le-w&HU;%{z-p+2 z^z>|4Sg3?yJrX6OYGIMlUAIsn!|06`n+ljBvk}))rDXxbVT{rl4lyQ|b3NJMdXoG z&uFwmpA5~>L(9P>?Zg<<$0!0a+eyJd&XLepm+QtJzHl~v3^|BVaFggB^u*XDupgwC z7$e4+JW5WCt!0?Ge1hqh;FcB_FO%StVKE2U)POyxeqtsuU8~B>m^uAqgL%a@eENCp zFmqFGirb)&f!xLk0XnSP;j97D|AbH2rJMR#OP*)AX-3Ee}3615H3% zbgRywg>eVYdVE!<0*T6rG+XeMDPw6&pxq zX0bJGi_RWM)4$eib<7WLs=ZCiF>|23dGbazmeaV#1wph!rOVn8{P@y*dSZG~tNu?M zY~tk`U0$AP>uQbaRW-~O{fuLD&0?NN(T<6jH3m#Mp|#QTjUG1IOJKPbZ_(tjW5*gr zXkNObTyJkPX`0%D>*+08Bol-!EH@{b7Wep&i#3CsjcO4zQQSqGz+uv zY-?4}Y14^=Mk9OEK-S4RyA1p0_7&4!4-|dV)9LAjg{|sZKF~HaHG*zM>}qe}qdlu_ zo)K-dm^s`|tfnb5lQ_Kflqu)h@X0}278jf5R^wA2;-=lGL8x4&tuhxyp{~5;{smR0 zw#7nk8ypZ7w(1Rvx_fzxPIS{``3Zx4V9UYIW!~%{ADX6r4U`&mNVk!pHpWMe%pY8A zJ0af0qNuOlwq@JY;FdVonr)w2jdBUHN{W+vNE^mYQ)fDHtYz*HzqpjI3meQtCuTR0v6Ta>EASHRr2Gq7U8*SUbHpJwHyHLO$(p%wzg{p+Xu5w^{qEOx9Yj6DjPI6<+5pW z#0OPbGTc^Z3%RsmoEWP!1yvr4DxX`PISJve`T>XryM?M|8hq7UQ!RAvjh(z(H>zrK ztyX+ivsUM(kh4*)PX6W!#MEd)L7;(|RH)%uAE2?5E`vL(&235-R2(~Lok#Cc_GI-@ zZ}E+4J&iAS;Ml))rIEcXu{WRC`8%L3eBdN*6m%H;)(Br?|1net8YSk+;NSWGAl{y& z-!e(G8ntr$L4@ERok(Op4J+Go);=Op?QbrZY1C1Vto6YfY#Xmy4r{W7;I-)H;jV8& z7_3Aawd(ns`%IB@Q-qH-(a1pEa#O?%o11dyI66-fOPh(YHQJcTMqEf!8tx^8goSC^ zWB{Dks*)F^DQL`^gr{n`SuNHl^pTFbmd>s4ZxLn5l-5;ElAe)&lK?3xU5BEV5ONeq zY&Nk=zK{|6LQ*Ei6r)A$;FOCl4=_t=_k}GU6+wprAgH zha4%-dY}k{n(d1Gn(PaB)xVLM`n+#R{b+bO@R0zA5h_%0-btuS!rEG-B}0 z1{NfPhqm}o%`meq9iNJ zshm)Glm2j%$pY=8!hD33flFkv&_N~(%mZ^qG0zOlu;=7e`r?Kxw2$~AnV+u4Va#C# zC}5CT$A6_}{u_b+4gkZF96ex=LYRvcqFdw<=_HXx^cA zT*E;Q#s;P(MKwQfFa|#_J(Ad+$?w%$tF>X>47N5tpwI^L_GEz6IUw}o0 z>0XEYIc5BgNEY|nm=*3Gu$9HUL(amjRCs{Mbp+*QXjREgI1K;G9$3i_!HORGeQ37_ zu$@I=MNiJYuwX}y2(~)cIk?@;x3%&GXrk(WqX>oYdmDbnQ&1+u19OCQt=BM3@WDHw zN2Lu1A#dHk(pL}pt0DMurUGPv1>Zz$R<4I8tivVvBnDtt2n{H5gz@1Cd;$;KTZF0P zfmHSA^1%MuE4Dj)nqpC2f<-#>!9R{H%{ySF`!Ux0^e@eThF>3Z#}#O*jxL6gy=_Kw%?9a{XAizogENn8d5U-8=jpRyqX}HdX%{|XXfwJoNECvdoSf#YWjkH?-Cjj|u(aSE2J@UImi&&D1XOSC&+4?sIUKvv{vNx4MU<0tHnjlFp9 zV!lDoqU+BW7PJh*a(SXgSK+hQ$-Kg6uhYDZ4V&YrgM0$R+dOPU`0ROw&mR1QH@fVh z>S5V~Hl9N*-T^qG3!h8VgfFD)y;>PI_igOAhecj||FWLvIn>*iorH}yo*Qv-zz%IB zuF)Fkvqfxb675WhmC!aLjfV&WvRM?fi)`tCBt{)p;S__pzQdeG&0aC~J6$h65wD4O z+#6v|>}rZ1qmt$f%J?=8T*DyRTMg%@A1y^U;JsSIfN)moiw z+lmTYekgM8X~YhZ_32)>HP|y!Jq=86Yr=HA2_yKP0;Af#a1f;qVRnJ35Oc)+aL8eJ z8J?`pd-6$$uvl%zyX(ABUfd{%InMS8??=pbaU}0QcY%Kldy#9d*LR*iFsQe>q6tSi z!Czfxl=JTsc!t0~An*YK;;j87g+4^!!vy{zfnNf!IvQ?nt`i;qHA)kLN18$nf4^U) z*B1$hGaL@4un`NLIB^1!kCD#fVr~)gDe(%3vpUakaD4dMBIHZ(>Vew#s>)rcq2`H) z%yd{;UhUzB?la2$ND8&2L4%KkSxeP$abQ7m01F|$n}P}SfC=V0^mf%$J^bPt2l$&n)b+0xl_LwX?A!g5~FbO`RPfFbLpy=~j@t&VR!c zyPW_X8?L3pKco*CA!~D|}+D+gnfx`soTus8Queg$d zSc3i)aXk{HlwJZq03b7517GqbO^d;|QS|Esh%JbXHgN<()cHIm{VsvuBk%5C{3eMhe`mP9((=WttHQfP6 z>kevFPVGtJQNDmos!Cpd?mv92L?Q0PM?QYIrB`8PKt6T!iIT7^<6b|4yJFY-k3N19 zmV2a8;#$aB;0nI7<)>SG_4SaCzBB zuAugi24U}?h29>3Wx)_xKae*cIH*x@OChq8K(4R?&q44?WQTz^U$eF~F{JIi#P=I7 zfWsxHJNL_elJJC%$*zPK^IgDSH9SNg8KoP= ziz*cJO-ygGV#H(>%f`EiFRz;Ja=DjszMH^R0%H36DYQyJtcxIp#LO47RIJ7bEy>0h zijYS6Iss_8iBB5`Jm4+|BApSnPwg2P7!ACw$E!s>?~+HNujAWgH|sxq2@SmH>z;Rc z-ZgTf2RTM}$&*OMzvyeNxsVgzOp}$!Yoe{#pu15}n@MHfIOLjDg)y7qyIZw;$6Tr^ zAV^k$Y1J}5&7Dj7X$m73_fdM5fLOw`q8%T;WeEE?E{GAJG(m?Dg*pg)ia?kE^?55| ezyRJIB^+Cz2<>_S@x7Cp#SP7;QT#CPl>Y}p@=d4! literal 0 HcmV?d00001 diff --git a/src/core/web_crawler.py b/src/core/web_crawler.py new file mode 100644 index 0000000..3ae7249 --- /dev/null +++ b/src/core/web_crawler.py @@ -0,0 +1,1027 @@ +import os +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin, urlparse, urlunparse +import shutil +from pathlib import Path +import re +import time +import random +from typing import Optional, Callable, Set, Dict, Tuple +import hashlib +from datetime import datetime + + +class WebCrawler: + def __init__(self, human_behavior: bool = True): + self.session = requests.Session() + self.human_behavior = human_behavior + # Verschiedene User Agents für zufällige Auswahl + self.user_agents = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' + ] + self._set_random_user_agent() + self.progress_callback: Optional[Callable[[int], None]] = None + self.status_callback: Optional[Callable[[str], None]] = None + self.visited_urls: Set[str] = set() + self.downloaded_resources: Dict[str, str] = {} + self.skipped_urls: Dict[str, str] = {} # URL -> Grund + self.start_time: Optional[datetime] = None + self.end_time: Optional[datetime] = None + self.current_base_url: Optional[str] = None + self.url_mapping: Dict[str, str] = {} # Original URL -> lokaler Pfad für Links + + def _set_random_user_agent(self): + """Setzt einen zufälligen User Agent""" + user_agent = random.choice(self.user_agents) + self.session.headers.update({ + 'User-Agent': user_agent, + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'de-DE,de;q=0.9,en;q=0.8', + 'Accept-Encoding': 'gzip, deflate, br', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1' + }) + + def _emit_status(self, message: str): + if self.status_callback: + self.status_callback(message) + + def _emit_progress(self, value: int): + if self.progress_callback: + self.progress_callback(value) + + def _human_delay(self, base_delay: float = 1.0, variation: float = 0.5): + """Simuliert menschliche Verzögerungen mit Zufälligkeit""" + if not self.human_behavior: + # Minimale Verzögerung wenn menschliches Verhalten deaktiviert + time.sleep(0.1) + return + + # Basis-Verzögerung mit zufälliger Variation + delay = base_delay + random.uniform(-variation, variation) + + # Gelegentlich längere Pausen (simuliert Lesen/Nachdenken) + if random.random() < 0.1: # 10% Chance auf längere Pause + delay += random.uniform(2, 5) + # Keine Status-Meldung mehr für künstliche Pausen + + # Sehr selten extra lange Pausen (simuliert Ablenkung) + if random.random() < 0.02: # 2% Chance + delay += random.uniform(5, 10) + # Keine Status-Meldung mehr für künstliche Pausen + + time.sleep(max(0.5, delay)) # Mindestens 0.5 Sekunden + + def _get_extension_from_content_type(self, content_type: str) -> str: + """Bestimmt die Dateiendung basierend auf dem Content-Type""" + return { + 'image/jpeg': '.jpg', + 'image/png': '.png', + 'image/gif': '.gif', + 'image/svg+xml': '.svg', + 'image/webp': '.webp', + 'image/x-icon': '.ico', + 'image/vnd.microsoft.icon': '.ico', + 'text/css': '.css', + 'application/javascript': '.js', + 'text/javascript': '.js', + 'application/x-javascript': '.js', + 'video/mp4': '.mp4', + 'video/webm': '.webm', + 'font/woff': '.woff', + 'font/woff2': '.woff2', + 'font/ttf': '.ttf', + 'font/otf': '.otf', + 'application/font-woff': '.woff', + 'application/font-woff2': '.woff2', + 'application/x-font-ttf': '.ttf', + 'application/x-font-otf': '.otf', + 'application/x-font-woff': '.woff', + 'application/vnd.ms-fontobject': '.eot' + }.get(content_type, '.dat') + + def _process_css_file(self, css_path: str, css_url: str, base_path: str): + """Verarbeitet CSS-Dateien und lädt referenzierte Ressourcen herunter""" + css_content = None + used_encoding = None + + # Versuche verschiedene Encodings + encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'windows-1252', 'cp1252'] + + for encoding in encodings: + try: + with open(css_path, 'r', encoding=encoding) as f: + css_content = f.read() + used_encoding = encoding + break + except UnicodeDecodeError: + continue + + # Falls kein Encoding funktioniert, lies als Binärdatei + if css_content is None: + try: + with open(css_path, 'rb') as f: + # Versuche automatische Erkennung oder nutze latin-1 als Fallback + raw_content = f.read() + try: + # Versuche chardet wenn verfügbar + import chardet + detected = chardet.detect(raw_content) + if detected['encoding']: + css_content = raw_content.decode(detected['encoding']) + used_encoding = detected['encoding'] + except: + # Fallback: latin-1 kann alles decodieren + css_content = raw_content.decode('latin-1', errors='replace') + used_encoding = 'latin-1' + except Exception as e: + self._emit_status(f"Fehler beim Lesen von CSS {css_path}: {str(e)}") + return + + if not css_content: + return + + try: + # Finde alle URLs in der CSS-Datei + url_pattern = r'url\(["\']?([^"\'()]+)["\']?\)' + urls = re.findall(url_pattern, css_content) + + for url in urls: + # Skip data URLs + if url.startswith('data:'): + continue + + abs_url = urljoin(css_url, url) + + # Bestimme Ressourcentyp basierend auf Erweiterung + ext = os.path.splitext(urlparse(abs_url).path)[1].lower() + if ext in ['.woff', '.woff2', '.ttf', '.otf', '.eot']: + resource_type = 'fonts' + elif ext in ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico']: + resource_type = 'images' + else: + resource_type = 'css' # Für andere CSS-Dateien (@import) + + local_path = self._download_resource(abs_url, base_path, resource_type) + if local_path: + # Ersetze URL in CSS + rel_path = os.path.relpath(local_path, os.path.dirname(css_path)).replace('\\', '/') + css_content = css_content.replace(url, rel_path) + + # Schreibe aktualisierte CSS zurück mit gleichem Encoding + with open(css_path, 'w', encoding=used_encoding or 'utf-8') as f: + f.write(css_content) + + except Exception as e: + self._emit_status(f"Fehler beim Verarbeiten von CSS {css_path}: {str(e)}") + + def _simulate_mouse_movement(self): + """Simuliert Mausbewegungen durch zufällige kurze Pausen""" + if self.human_behavior and random.random() < 0.3: # 30% Chance + time.sleep(random.uniform(0.1, 0.3)) + + def _sanitize_filename(self, url: str) -> str: + parsed = urlparse(url) + path = parsed.path.strip('/') + + if not path: + path = 'index' + + # Ersetze ungültige Zeichen + path = re.sub(r'[<>:"|?*]', '_', path) + + # Füge .html hinzu wenn keine Erweiterung vorhanden + if not os.path.splitext(path)[1]: + path += '.html' + + return path + + def _download_resource(self, url: str, base_path: str, resource_type: str = 'page') -> Optional[str]: + if url in self.downloaded_resources: + return self.downloaded_resources[url] + + try: + # Menschenähnliche Verzögerung vor dem Request + if resource_type == 'page': + self._human_delay(base_delay=1.5, variation=0.8) + else: + # Ressourcen werden schneller geladen, aber trotzdem mit Variation + self._human_delay(base_delay=0.3, variation=0.2) + + # Gelegentlich User Agent wechseln + if self.human_behavior and random.random() < 0.05: # 5% Chance + self._set_random_user_agent() + + # Simuliere gelegentliches Neuladen der Seite + if self.human_behavior and resource_type == 'page' and random.random() < 0.03: # 3% Chance + self._emit_status("Seite wird neu geladen...") + time.sleep(random.uniform(0.5, 1.5)) + + response = self.session.get(url, timeout=30) + + # Bei 403/404 Fehlern überspringen statt abbrechen + if response.status_code in [403, 404]: + self._emit_status(f"Überspringe {url} (HTTP {response.status_code})") + self.skipped_urls[url] = f"HTTP {response.status_code}" + return None + + response.raise_for_status() + + # Simuliere Scroll-Verhalten nach dem Laden + if resource_type == 'page': + self._simulate_mouse_movement() + + # Bestimme den lokalen Pfad + parsed = urlparse(url) + + # Prüfe ob es eine externe Ressource ist + is_external = parsed.netloc and parsed.netloc != urlparse(self.current_base_url).netloc + + if is_external: + # Für externe Ressourcen: Erstelle Ordnerstruktur nach Domain + domain = parsed.netloc.replace(':', '_').replace('.', '_') + if resource_type == 'page': + resource_dir = os.path.join(base_path, 'external', domain) + else: + resource_dir = os.path.join(base_path, 'resources', 'external', domain, resource_type) + os.makedirs(resource_dir, exist_ok=True) + + # Hash für eindeutige Dateinamen bei externen Ressourcen + url_hash = hashlib.md5(url.encode()).hexdigest()[:8] + filename = self._sanitize_filename(url) + ext = os.path.splitext(filename)[1] + + # Bestimme Erweiterung aus Content-Type wenn nötig + if not ext and resource_type != 'page': + content_type = response.headers.get('Content-Type', '').split(';')[0] + ext = self._get_extension_from_content_type(content_type) + + filename = f"{url_hash}{ext}" + local_path = os.path.join(resource_dir, filename) + else: + # Interne Ressourcen + filename = self._sanitize_filename(url) + if resource_type != 'page': + # Für Ressourcen einen separaten Ordner verwenden + resource_dir = os.path.join(base_path, 'resources', resource_type) + os.makedirs(resource_dir, exist_ok=True) + + # Hash für eindeutige Dateinamen + url_hash = hashlib.md5(url.encode()).hexdigest()[:8] + ext = os.path.splitext(filename)[1] or '.dat' + filename = f"{url_hash}{ext}" + local_path = os.path.join(resource_dir, filename) + else: + local_path = os.path.join(base_path, filename) + + # Schreibe Datei + os.makedirs(os.path.dirname(local_path), exist_ok=True) + + if resource_type == 'page': + # HTML verarbeiten + content = response.text + with open(local_path, 'w', encoding='utf-8') as f: + f.write(content) + elif resource_type == 'css': + # CSS-Dateien mit korrektem Encoding speichern + # Versuche das Encoding aus der Response zu bekommen + encoding = response.encoding + if not encoding or encoding == 'ISO-8859-1': + # Oft wird ISO-8859-1 als Standard zurückgegeben, auch wenn es nicht stimmt + # Versuche das Encoding aus dem Content-Type Header zu bekommen + content_type = response.headers.get('Content-Type', '') + if 'charset=' in content_type: + encoding = content_type.split('charset=')[-1].strip() + else: + # Fallback: Versuche zu erkennen + try: + import chardet + detected = chardet.detect(response.content) + if detected['encoding']: + encoding = detected['encoding'] + except: + encoding = 'utf-8' + + # Speichere CSS als Text mit erkanntem Encoding + try: + content = response.content.decode(encoding) + with open(local_path, 'w', encoding='utf-8') as f: + f.write(content) + except: + # Fallback: Speichere als Binärdatei + with open(local_path, 'wb') as f: + f.write(response.content) + else: + # Andere Ressourcen als Binärdaten + with open(local_path, 'wb') as f: + f.write(response.content) + + self.downloaded_resources[url] = local_path + self._emit_status(f"Heruntergeladen: {url}") + + # CSS-Dateien parsen für weitere Ressourcen + if resource_type == 'css' and local_path: + self._process_css_file(local_path, url, base_path) + + # Aktualisiere Fortschritt beim erfolgreichen Download + if local_path and hasattr(self, 'processed_resources'): + self.processed_resources += 1 + progress = min(95, int((self.processed_resources / self.total_resources_estimate) * 90) + 5) + self._emit_progress(progress) + + return local_path + + except requests.exceptions.HTTPError as e: + if e.response.status_code in [403, 404, 401, 429, 503]: + self._emit_status(f"Überspringe {url} (HTTP {e.response.status_code})") + else: + self._emit_status(f"HTTP-Fehler bei {url}: {str(e)}") + return None + except requests.exceptions.ConnectionError: + self._emit_status(f"Verbindungsfehler bei {url} - Überspringe...") + return None + except requests.exceptions.Timeout: + self._emit_status(f"Zeitüberschreitung bei {url} - Überspringe...") + return None + except Exception as e: + self._emit_status(f"Fehler beim Download von {url}: {str(e)}") + return None + + def _process_html(self, html_content: str, base_url: str, base_path: str, + download_images: bool, download_css: bool, download_js: bool, + download_videos: bool = True) -> str: + soup = BeautifulSoup(html_content, 'html.parser') + + # Favicon herunterladen + for link in soup.find_all('link', rel=lambda x: x and ('icon' in str(x) or 'shortcut' in str(x))): + if link.get('href'): + href_url = link['href'] + if href_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{href_url}" + else: + abs_url = urljoin(base_url, href_url) + local_path = self._download_resource(abs_url, base_path, 'images') + if local_path: + link['href'] = os.path.relpath(local_path, base_path).replace('\\', '/') + + # CSS verarbeiten + if download_css: + for link in soup.find_all('link', {'rel': 'stylesheet'}): + if link.get('href'): + href_url = link['href'] + if href_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{href_url}" + else: + abs_url = urljoin(base_url, href_url) + local_path = self._download_resource(abs_url, base_path, 'css') + if local_path: + # Stelle sicher, dass der Pfad korrekt relativ ist + rel_path = os.path.relpath(local_path, base_path) + # Konvertiere Windows-Pfade zu Web-Pfaden + link['href'] = rel_path.replace('\\', '/') + + # Inline CSS mit @import Regeln und background-images + for style in soup.find_all('style'): + if style.string: + css_content = style.string + + # Verarbeite @import Regeln + imports = re.findall(r'@import\s+url\(["\']?([^"\'()]+)["\']?\)', css_content) + for imp_url in imports: + abs_url = urljoin(base_url, imp_url) + local_path = self._download_resource(abs_url, base_path, 'css') + if local_path: + rel_path = os.path.relpath(local_path, base_path).replace('\\', '/') + css_content = css_content.replace(imp_url, rel_path) + + # Verarbeite alle URLs in Inline-CSS (inkl. background-images) + urls = re.findall(r'url\(["\']?([^"\'()]+)["\']?\)', css_content) + for url in urls: + if url.startswith('data:') or url in imports: + continue + + abs_url = urljoin(base_url, url) + # Bestimme Ressourcentyp + ext = os.path.splitext(urlparse(abs_url).path)[1].lower() + if ext in ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico']: + resource_type = 'images' + elif ext in ['.woff', '.woff2', '.ttf', '.otf', '.eot']: + resource_type = 'fonts' + else: + resource_type = 'css' + + local_path = self._download_resource(abs_url, base_path, resource_type) + if local_path: + rel_path = os.path.relpath(local_path, base_path).replace('\\', '/') + css_content = css_content.replace(url, rel_path) + + style.string = css_content + + # JavaScript verarbeiten + if download_js: + for script in soup.find_all('script', {'src': True}): + src_url = script['src'] + if src_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{src_url}" + else: + abs_url = urljoin(base_url, src_url) + local_path = self._download_resource(abs_url, base_path, 'js') + if local_path: + script['src'] = os.path.relpath(local_path, base_path).replace('\\', '/') + + # Bilder verarbeiten + if download_images: + for img in soup.find_all('img'): + if img.get('src'): + src_url = img['src'] + # Behandle relative und absolute Pfade + if src_url.startswith('/'): + # Absoluter Pfad zur Domain - füge Domain hinzu + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{src_url}" + else: + # Relativer Pfad oder vollständige URL + abs_url = urljoin(base_url, src_url) + + local_path = self._download_resource(abs_url, base_path, 'images') + if local_path: + img['src'] = os.path.relpath(local_path, base_path).replace('\\', '/') + + # srcset für responsive Bilder + if img.get('srcset'): + new_srcset = [] + for src_desc in img['srcset'].split(','): + parts = src_desc.strip().split(' ') + if parts: + src_url = parts[0] + # Behandle relative und absolute Pfade + if src_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{src_url}" + else: + abs_url = urljoin(base_url, src_url) + + local_path = self._download_resource(abs_url, base_path, 'images') + if local_path: + rel_path = os.path.relpath(local_path, base_path).replace('\\', '/') + parts[0] = rel_path + new_srcset.append(' '.join(parts)) + else: + new_srcset.append(src_desc) + img['srcset'] = ', '.join(new_srcset) + + # Picture source tags verarbeiten + for picture in soup.find_all('picture'): + for source in picture.find_all('source'): + if source.get('srcset'): + new_srcset = [] + for src_desc in source['srcset'].split(','): + parts = src_desc.strip().split(' ') + if parts: + src_url = parts[0] + # Behandle relative und absolute Pfade + if src_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{src_url}" + else: + abs_url = urljoin(base_url, src_url) + + local_path = self._download_resource(abs_url, base_path, 'images') + if local_path: + rel_path = os.path.relpath(local_path, base_path).replace('\\', '/') + parts[0] = rel_path + new_srcset.append(' '.join(parts)) + else: + new_srcset.append(src_desc) + source['srcset'] = ', '.join(new_srcset) + + # Auch Background-Images in style-Attributen + for element in soup.find_all(style=True): + style = element['style'] + urls = re.findall(r'url\(["\']?([^"\'()]+)["\']?\)', style) + for url in urls: + # Behandle relative und absolute Pfade + if url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{url}" + else: + abs_url = urljoin(base_url, url) + + local_path = self._download_resource(abs_url, base_path, 'images') + if local_path: + rel_path = os.path.relpath(local_path, base_path).replace('\\', '/') + style = style.replace(url, rel_path) + element['style'] = style + + # Videos verarbeiten + if download_videos: + # HTML5 video tags + for video in soup.find_all('video'): + # Video source tags + for source in video.find_all('source'): + if source.get('src'): + src_url = source['src'] + if src_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{src_url}" + else: + abs_url = urljoin(base_url, src_url) + local_path = self._download_resource(abs_url, base_path, 'video') + if local_path: + source['src'] = os.path.relpath(local_path, base_path).replace('\\', '/') + # Direct video src + if video.get('src'): + src_url = video['src'] + if src_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{src_url}" + else: + abs_url = urljoin(base_url, src_url) + local_path = self._download_resource(abs_url, base_path, 'video') + if local_path: + video['src'] = os.path.relpath(local_path, base_path).replace('\\', '/') + # Video poster + if video.get('poster') and download_images: + poster_url = video['poster'] + if poster_url.startswith('/'): + parsed_base = urlparse(base_url) + abs_url = f"{parsed_base.scheme}://{parsed_base.netloc}{poster_url}" + else: + abs_url = urljoin(base_url, poster_url) + local_path = self._download_resource(abs_url, base_path, 'images') + if local_path: + video['poster'] = os.path.relpath(local_path, base_path).replace('\\', '/') + + # iframe embeds (YouTube, Vimeo, etc.) + for iframe in soup.find_all('iframe'): + src = iframe.get('src') + if src and any(domain in src for domain in ['youtube.com', 'vimeo.com', 'dailymotion.com']): + # Für eingebettete Videos erstellen wir einen Platzhalter + placeholder = soup.new_tag('div', style='background-color: #232D53; color: #00D4FF; padding: 20px; text-align: center; border-radius: 8px;') + placeholder.string = f'[Eingebettetes Video: {src}]' + iframe.replace_with(placeholder) + + # Links zu anderen Seiten konvertieren + for a in soup.find_all('a', href=True): + href = a['href'] + + # Spezialbehandlung für JavaScript-basierte Links (z.B. Galerie) + if href.startswith('javascript:') or href == '#': + # Prüfe ob es ein onclick-Attribut gibt + onclick = a.get('onclick', '') + if onclick: + # Füge ein data-Attribut hinzu um den ursprünglichen onclick zu behalten + a['data-original-onclick'] = onclick + # Entferne onclick für bessere Offline-Kompatibilität + del a['onclick'] + + # Wenn href nur "#" ist und es einen data-href oder ähnliches gibt + if href == '#': + # Prüfe alternative href-Attribute + for attr in ['data-href', 'data-url', 'data-link']: + if a.get(attr): + a['href'] = a[attr] + break + continue + + # Skip andere spezielle Links + if href.startswith(('mailto:', 'tel:', 'data:')): + continue + + abs_url = urljoin(base_url, href) + parsed = urlparse(abs_url) + + # Wenn es eine interne Seite ist, konvertiere zu lokalem Pfad + if parsed.netloc == urlparse(base_url).netloc or not parsed.netloc: + # Erstelle lokalen Dateinamen für die verlinkte Seite + if abs_url in self.url_mapping: + local_path = self.url_mapping[abs_url] + a['href'] = os.path.relpath(local_path, base_path).replace('\\', '/') + else: + # Für noch nicht heruntergeladene Seiten + filename = self._sanitize_filename(abs_url) + if parsed.path.endswith('/') or not parsed.path: + filename = 'index.html' + elif not os.path.splitext(filename)[1]: + filename += '.html' + a['href'] = os.path.relpath(os.path.join(base_path, filename), base_path).replace('\\', '/') + + # Meta-Tags für bessere Offline-Darstellung + if not soup.find('meta', {'http-equiv': 'Content-Type'}): + meta = soup.new_tag('meta') + meta['http-equiv'] = 'Content-Type' + meta['content'] = 'text/html; charset=utf-8' + if soup.head: + soup.head.insert(0, meta) + else: + head = soup.new_tag('head') + soup.insert(0, head) + head.insert(0, meta) + + # Füge JavaScript für Offline-Galerie-Navigation hinzu + gallery_script = soup.new_tag('script') + gallery_script.string = ''' + // Offline Gallery Navigation Fix + document.addEventListener('DOMContentLoaded', function() { + // Finde alle Links mit gespeicherten onclick-Events + var links = document.querySelectorAll('a[data-original-onclick]'); + links.forEach(function(link) { + link.style.cursor = 'pointer'; + link.addEventListener('click', function(e) { + e.preventDefault(); + // Versuche die ursprüngliche onclick-Funktion auszuführen + try { + eval(this.getAttribute('data-original-onclick')); + } catch(err) { + console.log('Gallery navigation not available in offline mode'); + } + }); + }); + + // Behandle Hash-Links für Galerie-Navigation + var hashLinks = document.querySelectorAll('a[href^="#"]'); + hashLinks.forEach(function(link) { + link.addEventListener('click', function(e) { + var targetId = this.getAttribute('href').substring(1); + if (targetId) { + var target = document.getElementById(targetId); + if (target) { + e.preventDefault(); + target.scrollIntoView({behavior: 'smooth'}); + } + } + }); + }); + }); + ''' + if soup.body: + soup.body.append(gallery_script) + + return str(soup) + + def _extract_links(self, html_content: str, base_url: str) -> Set[str]: + soup = BeautifulSoup(html_content, 'html.parser') + links = set() + + for a in soup.find_all('a', href=True): + abs_url = urljoin(base_url, a['href']) + parsed = urlparse(abs_url) + + # Nur Links zur gleichen Domain + if parsed.netloc == urlparse(base_url).netloc: + # Entferne Fragment + clean_url = urlunparse(parsed._replace(fragment='')) + links.add(clean_url) + + return links + + def _create_navigation_index(self, save_path: str, original_url: str): + """Erstellt eine Index-Datei mit allen heruntergeladenen Seiten für einfache Navigation""" + try: + # Sammle alle heruntergeladenen HTML-Dateien + html_files = [] + for root, dirs, files in os.walk(save_path): + for file in files: + if file.endswith('.html') and file != '_navigation_index.html': + rel_path = os.path.relpath(os.path.join(root, file), save_path) + html_files.append(rel_path.replace('\\', '/')) + + # Erstelle HTML für Navigation + nav_html = f''' + + + + + Navigation - {urlparse(original_url).netloc} + + + +
+

Webseiten-Navigation

+
Gesicherte Webseite: {original_url}
+ +
+ Seiten: {len(html_files)} + Datum: {datetime.now().strftime("%d.%m.%Y %H:%M")} +
+ + → Zur Hauptseite + +

Alle gesicherten Seiten

+
    +''' + + # Sortiere Dateien für bessere Übersicht + html_files.sort() + + for file in html_files: + if file == 'index.html': + continue # Hauptseite bereits oben verlinkt + + # Erstelle einen lesbaren Titel aus dem Dateipfad + title = file.replace('.html', '').replace('/', ' → ') + if len(title) > 80: + title = '...' + title[-77:] + + nav_html += f'''
  • + + {title} +
    {file}
    +
    +
  • +''' + + nav_html += '''
+
+ +''' + + # Speichere Navigation Index + nav_path = os.path.join(save_path, '_navigation_index.html') + with open(nav_path, 'w', encoding='utf-8') as f: + f.write(nav_html) + + self._emit_status(f"Navigations-Index erstellt: {nav_path}") + + except Exception as e: + self._emit_status(f"Fehler beim Erstellen des Navigations-Index: {str(e)}") + + def _pre_scan_website(self, url: str, follow_links: bool = False, max_depth: int = 1) -> int: + """Führt eine Vorab-Prüfung durch um die Anzahl der Ressourcen zu schätzen""" + try: + self._emit_status("Analysiere Webseite...") + response = self.session.get(url, timeout=10) + response.raise_for_status() + + soup = BeautifulSoup(response.text, 'html.parser') + resource_count = 1 # Die HTML-Seite selbst + + # Zähle Bilder + resource_count += len(soup.find_all('img')) + + # Zähle CSS-Dateien + resource_count += len(soup.find_all('link', {'rel': 'stylesheet'})) + + # Zähle JS-Dateien + resource_count += len(soup.find_all('script', {'src': True})) + + # Zähle Videos + resource_count += len(soup.find_all('video')) + resource_count += len(soup.find_all('source')) + + # Schätze inline CSS Ressourcen + for style in soup.find_all('style'): + if style.string: + resource_count += len(re.findall(r'url\(["\']?[^"\'()]+["\']?\)', style.string)) + + # Schätze style-Attribut Ressourcen + for element in soup.find_all(style=True): + resource_count += len(re.findall(r'url\(["\']?[^"\'()]+["\']?\)', element['style'])) + + if follow_links: + # Füge geschätzte Anzahl von verlinkten Seiten hinzu + links = soup.find_all('a', href=True) + internal_links = [l for l in links if not l['href'].startswith(('http://', 'https://', '#', 'mailto:', 'tel:'))] + resource_count += min(len(internal_links), 20) # Maximal 20 zusätzliche Seiten schätzen + + return max(resource_count, 10) # Mindestens 10 für realistische Fortschrittsanzeige + + except Exception as e: + self._emit_status(f"Vorab-Prüfung fehlgeschlagen: {str(e)}") + return 50 # Fallback-Schätzung + + def download_website(self, url: str, save_path: str, download_images: bool = True, + download_css: bool = True, download_js: bool = True, + download_videos: bool = True, follow_links: bool = False, + max_depth: int = 1, **kwargs) -> bool: + try: + self.start_time = datetime.now() + self.visited_urls.clear() + self.downloaded_resources.clear() + self.skipped_urls.clear() + self.url_mapping.clear() + self.current_base_url = url + + # Erstelle Speicherordner + os.makedirs(save_path, exist_ok=True) + + # Vorab-Prüfung für bessere Fortschrittsschätzung + estimated_resources = self._pre_scan_website(url, follow_links, max_depth) + self.total_resources_estimate = estimated_resources + self.processed_resources = 0 + + self._emit_status(f"Starte Download von {url}") + self._emit_progress(5) # 5% nach Vorab-Prüfung + + # Download-Queue für Link-Verfolgung + to_download = [(url, 0)] # (URL, Tiefe) + total_urls = 1 + processed = 0 + + while to_download and (not follow_links or processed < 100): # Limit für Sicherheit + current_url, depth = to_download.pop(0) + + if current_url in self.visited_urls: + continue + + self.visited_urls.add(current_url) + + try: + # Menschenähnliche Verzögerung vor dem Request + if self.human_behavior: + self._human_delay(base_delay=1.5, variation=0.8) + + # Lade HTML + response = self.session.get(current_url, timeout=30) + + # Bei bestimmten HTTP-Fehlern überspringen + if response.status_code in [403, 404, 401, 429, 503]: + self._emit_status(f"Überspringe {current_url} (HTTP {response.status_code})") + self.skipped_urls[current_url] = f"HTTP {response.status_code}" + processed += 1 + continue + + response.raise_for_status() + except requests.exceptions.RequestException as e: + self._emit_status(f"Fehler bei {current_url}: {str(e)} - Überspringe...") + self.skipped_urls[current_url] = str(e) + processed += 1 + continue + + # Verarbeite HTML + processed_html = self._process_html( + response.text, current_url, save_path, + download_images, download_css, download_js, download_videos + ) + + # Speichere HTML + filename = self._sanitize_filename(current_url) + if current_url == url: + # Hauptseite als index.html + filename = 'index.html' + + html_path = os.path.join(save_path, filename) + os.makedirs(os.path.dirname(html_path), exist_ok=True) + + # Speichere URL-Mapping für Link-Konvertierung + self.url_mapping[current_url] = html_path + + with open(html_path, 'w', encoding='utf-8') as f: + f.write(processed_html) + + # Links extrahieren wenn gewünscht + if follow_links and depth < max_depth: + links = self._extract_links(response.text, current_url) + for link in links: + if link not in self.visited_urls: + to_download.append((link, depth + 1)) + total_urls += 1 + + processed += 1 + # Bessere Fortschrittsberechnung basierend auf geschätzten Ressourcen + self.processed_resources += 5 # Geschätzter Wert für eine HTML-Seite mit Ressourcen + progress = min(95, int((self.processed_resources / self.total_resources_estimate) * 90) + 5) + self._emit_progress(progress) + + # Menschenähnliches Browsing-Muster + if self.human_behavior: + if processed % 5 == 0: # Alle 5 Seiten + # Simuliere eine Kaffeepause (ohne Statusmeldung) + pause_duration = random.uniform(10, 20) + time.sleep(pause_duration) + + # Gelegentlich Session erneuern (simuliert Browser-Neustart) + if processed % 20 == 0: + # Session wird still erneuert + old_cookies = self.session.cookies + self.session.close() + self.session = requests.Session() + self.session.cookies = old_cookies + self._set_random_user_agent() + + self._emit_progress(100) + + # Zusammenfassung + if self.skipped_urls: + self._emit_status(f"\n=== Zusammenfassung ===") + self._emit_status(f"Erfolgreich: {len(self.downloaded_resources)} Dateien") + self._emit_status(f"Übersprungen: {len(self.skipped_urls)} URLs") + self._emit_status("\nÜbersprungene URLs:") + for url, reason in list(self.skipped_urls.items())[:10]: # Zeige max 10 + self._emit_status(f" - {url}: {reason}") + if len(self.skipped_urls) > 10: + self._emit_status(f" ... und {len(self.skipped_urls) - 10} weitere") + else: + self._emit_status(f"\n=== Zusammenfassung ===") + self._emit_status(f"Alle {len(self.downloaded_resources)} Dateien erfolgreich heruntergeladen!") + + self._emit_status("\nDownload abgeschlossen!") + self.end_time = datetime.now() + + # Erstelle Navigations-Index + self._create_navigation_index(save_path, url) + + return True + + except Exception as e: + self._emit_status(f"Fehler: {str(e)}") + self.end_time = datetime.now() + return False \ No newline at end of file diff --git a/src/resources/icons/check.svg b/src/resources/icons/check.svg new file mode 100644 index 0000000..a37c0ce --- /dev/null +++ b/src/resources/icons/check.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/src/resources/icons/download.svg b/src/resources/icons/download.svg new file mode 100644 index 0000000..2fa7c19 --- /dev/null +++ b/src/resources/icons/download.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/resources/icons/folder.svg b/src/resources/icons/folder.svg new file mode 100644 index 0000000..15941e5 --- /dev/null +++ b/src/resources/icons/folder.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/resources/icons/gear.svg b/src/resources/icons/gear.svg new file mode 100644 index 0000000..b28a895 --- /dev/null +++ b/src/resources/icons/gear.svg @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/src/resources/icons/globe.svg b/src/resources/icons/globe.svg new file mode 100644 index 0000000..bde67ae --- /dev/null +++ b/src/resources/icons/globe.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/resources/icons/moon.svg b/src/resources/icons/moon.svg new file mode 100644 index 0000000..8dbdf3a --- /dev/null +++ b/src/resources/icons/moon.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/resources/icons/sun.svg b/src/resources/icons/sun.svg new file mode 100644 index 0000000..1c0898f --- /dev/null +++ b/src/resources/icons/sun.svg @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/src/resources/logo/intelsight-full-dark.svg b/src/resources/logo/intelsight-full-dark.svg new file mode 100644 index 0000000..0e225d0 --- /dev/null +++ b/src/resources/logo/intelsight-full-dark.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IntelSight + SICHERHEIT MADE IN GERMANY + + \ No newline at end of file diff --git a/src/resources/logo/intelsight-full-light.svg b/src/resources/logo/intelsight-full-light.svg new file mode 100644 index 0000000..353b693 --- /dev/null +++ b/src/resources/logo/intelsight-full-light.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IntelSight + SICHERHEIT MADE IN GERMANY + + \ No newline at end of file diff --git a/src/resources/logo/intelsight-icon-transparent-dark.svg b/src/resources/logo/intelsight-icon-transparent-dark.svg new file mode 100644 index 0000000..fcbed9f --- /dev/null +++ b/src/resources/logo/intelsight-icon-transparent-dark.svg @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/resources/logo/intelsight-name-dark.svg b/src/resources/logo/intelsight-name-dark.svg new file mode 100644 index 0000000..5efe807 --- /dev/null +++ b/src/resources/logo/intelsight-name-dark.svg @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IntelSight + + \ No newline at end of file diff --git a/src/resources/logo/intelsight-name-light.svg b/src/resources/logo/intelsight-name-light.svg new file mode 100644 index 0000000..7e5c2dd --- /dev/null +++ b/src/resources/logo/intelsight-name-light.svg @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IntelSight + + \ No newline at end of file diff --git a/src/resources/logo/intelsight-name-transparent-dark.svg b/src/resources/logo/intelsight-name-transparent-dark.svg new file mode 100644 index 0000000..5efe807 --- /dev/null +++ b/src/resources/logo/intelsight-name-transparent-dark.svg @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + IntelSight + + \ No newline at end of file diff --git a/src/resources/styles/__pycache__/dark_theme.cpython-310.pyc b/src/resources/styles/__pycache__/dark_theme.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..02d6ea496cf7c017803d003a9886b2c7fb51c598 GIT binary patch literal 6641 zcmbVR&2HO95LR*u4CGp%w*`d2NnqNPWIGO96t!i$4jLDBAr2*%5#Ic%ucczi!0j+1IVs-#940 z9>Cyx_>+&|BWnHJiXIGF&mPXcf}1yXo|7LEKB5UD@2H6BC}Am?r4c!zGM3&TzfYJX z&v!PSlixN7ysmkgd)IUtC-(vIXqF|+llL-b(=Bq8#OcjB4F_s`3U_Xi%>f&8M&7>K z+#+xIi06EZ9D<-^i%6PEPcji-f3xxX2C8yKXIXyDQdmx)(93fgr|;u5;5^>UtpphYn(%wwKj z#W^ro1FaZp1RYpGYSM0Pf7#ZIG;4#7wSn1JoMy9JAzveIcP(-CyFHlSt(C8q)ZWnt zhXN{+J{1x1w6laT2R<@SJ)DFa)qNV1D!1Dmif@hZhznq^CxA&a8IX=+gkhv5iPqx^ zc?WSqM+Zekm`a!-enfMs#*Hpp@hdGqASq9o)`-t?2qwTzJq3amKj*${etOqmwVN!ue7Y5R1VV*C;!q^NuNa^TdRit|j=TRX4ci;Y==3 z`mndRZ(>soP24g;me7z*xWYrb!BXq*YQsPpI+{WJO;y^vyqL*}9ye-Kp>rJ&mf{6H zYS4Ecs8K-x9oRl8Cd&ZsssPo9b(PCVzG7FwgnxwSq4B8+6P6+iPi2BhxrCtY&SB@Y zcZgIk3yWfjk)oN{b75(zxq8M~tH3VxJ23Bsh#|<|8#Al;Rp8Oi^wkwMZ~Ol7?&+y^ z9cyp9XEcUYtB7AsGFDl@_m2)v_U9#)>YEb0^STEsZRGneU-tVt95rgU+GJgd?>PnQ zO@JoT!Rnkt1$M>9W60VDX{=~UN=)Gnb3+q1>xE!^ts0=1DSXU1j`J}W;?Tiv<0zj5 zr0}nLuba}lnp{U*B($^L|eH%I(V(bx?%WUf5ZMGRtKscH#? zbG0$rVFK})K(s~u?edUx(5g!kdF z)ajZNlfP}*R^A^Kg5Op|s9Cc+!<~xero6IVto>i5PCmxW5A6YcK)(=r0>@2^X?73g zPR9KT9Tp;mJ1kmlisMPn?sAVN@i+}g2qzvUw2PJ_nx*+=N!0)xaF*UwG!6(JI;il* zcoc#KpT6`XHr|3JgG%?rf1s!sBJOV z;T62lmTK-bw@3Jf_yYDe|F&HyG|&|_E8>{iG463u{5p}K z=$WW*zT$_9I>oZkS@MwEZeJJ{ip$WgnZIl>wl3!az^R~9Mg|PJVk-S3lhs^Xy1D-s z2e6K^;(*W^M%|3joiI~{AVIVjVa!4KdWfSY$#p%#di&X)lOc-gIj!qn6*{BAop0H5 zF6e&_5fpMZu_@P#aoD(na=T_RX`-!5Ym~}pvtB8A9$3kt9*!j38RoB` zDfTXAOSvMY*_O0W9<- z0kzT*VW9$Q2)(X+aGp3{ftERElF(1j9(>t2K78}zhpV%b^OHYY;vxKWqCWmf`oZum z{AeME6P89|c+BJtM2X=ms6LVbw$6u8X#B!%a%gmblMk<8_alDiy=NoIpjnxEeL-&% sCWcV04_&oBRJ4)9h>DvJ`Gif`c6R^e(YJ6=oF(i#{1Hc@7LQv00-txl5C8xG literal 0 HcmV?d00001 diff --git a/src/resources/styles/__pycache__/light_theme.cpython-310.pyc b/src/resources/styles/__pycache__/light_theme.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5d781b914d03742edd9cb7f6bca4311ab245fb97 GIT binary patch literal 6554 zcmb_h-EP}P7Iu0Sxaj>}P9YRd0@0>_UKfVhZPZUYnoiXl0YSW_HghN|r? z`X>7VeTVK3bo*{L9zVn@*8S~FR+unlDe{RI$Uw_%! z`nOv6pC_>REBr5?!jE+8hpqIyC~E@?8+>vMQ>NOmS{ z&Kddc?aq+=jmNy=L-Ga`$WoGg09MH#tIN8l+vq9niU_iBIUPM zeN2;^x!_=7kno&~hzw@0W+yZHT3d&E2m1%3gA*;*m+=+3`jDk_R&ADM46<)uWCd6> z7axBwQl<^tAf^)FTS)~gs$Rpu-#2ARE*MR%w?^myK((6&^0v<{@g+^ute6LYZ^f4p zIS9);1oh)TVkTs=0Gulbu7dR)jafcefM)noS73ME^Rmne2|@FteTvQY>$yo)c27prnW&oSQ-4P1h;4?(WIw4nahabki{bLk| zyf{u?j$Wq5x+N9!tOzPzf@-bUlgg4RiM)@{UNJXDJ!56P7SYZ^aSwI1oU(m4vq(c`SL6PsYq^MNue5rUYr$ z!dA{~j-<4rdfl7JtsFt~3SKZ{5nokMv4F^CONGBvM-*jH-yW|C+2#kNn<>t9g$Akl zYO!)`X(PJ9*hsP*ILy+=S#2;sgER!#XDr8x^bX)TOS4DR%R0gh0wgRwN^NmoBY;by zJ`fN_EatAz*erBuv4k{U*y3}5RKG&oqZvNRb-g1DzdkxYxBTpAEgri&$l^3a zWlj^e;2O7I8l8xy@hFChV-P-FL47WCL%n*xk_)p&+a5Nc3l8M8mvRpvZm>}>I>H5@*c$>|NkF`h9g8vNF#6ZziHoTr~X@TxxNY(_Ff=T%Ron5Ko!ZPMMUbv>Q1*Mz3X@t zHnG4N0c$a-J=+lIb^mI@ju$MsIpue{)w+ej$&F}cZ0Vz;KO(u}?9hN60)wHl9K}Op zEz&HZ6~|g(QL4d2x9goX+?^s!2GvRJ_<;mb26t5sq-;?eSlU>qnjq8)1 zD8L`8On&xvIOjixS~G_^W+n1xQY3p&EBQ(!Ozvec#mPObYFGX|w`6uP(K8qr2xpNA z%=;LA{Jk$r8buRLdt484IpJ)89d8)K-1f^{C$GGmI(EafHT?h3@>SGOTC^&ISaf|xfhY0m{+ zGBROsccr_sqMqIxNVoT>+*NZ7N{7y1lFwYZnCmG!3#iUxSgy)~QfHQG)vKuM%CFIl z%_&b`?Rag;uPrU+scdjP2@jEA-6fPTXs!f%qg|_IzkS}h5z_`6nhKmJ!#fpNd0u0C zg@ge+#(WubG{PGdLQu+6VT_&}spZTNyBuuJg+9S-r}*~ylRrLv_x78M z>mRQ#&Mwb>+Y+C^pE&f7KbPZZ`W^gnK~5K}NX7JPCU2k~Oy9yNkWbV_^%Oe%Pwb|G z`vJ)Bsd6~xcfki1O9uCSMKBihHfLfA6ZO QWidget { + background-color: #f8f9fa; +} + +/* Widget Hintergründe */ +QWidget { + background-color: transparent; + color: #212529; +} + +/* App Header */ +QWidget#appHeader { + background-color: transparent; + padding-bottom: 20px; +} + +/* Überschriften */ +QLabel#heading { + font-family: 'Poppins', sans-serif; + font-size: 32px; + font-weight: 700; + color: #212529 !important; + letter-spacing: -0.5px; + background-color: transparent; +} + +QLabel#subheading { + font-size: 16px; + color: #6c757d; + margin-top: -5px; +} + +QLabel#sectionTitle { + font-size: 18px; + font-weight: 600; + color: #212529; + margin-bottom: 10px; +} + +QLabel#inputLabel { + font-size: 14px; + font-weight: 600; + color: #495057; + margin-bottom: 4px; +} + +/* Content Card - Hauptcontainer */ +QWidget#contentCard { + background-color: #ffffff; + border: 1px solid #e9ecef; + border-radius: 12px; + padding: 32px; +} + +/* Tabellen-Style */ +QTableWidget#dataTable { + background-color: transparent; + border: none; + outline: none; +} + +QTableWidget#dataTable::item { + background-color: transparent; + color: #495057; + font-weight: 600; + padding: 12px 16px; + border-bottom: 1px solid #e9ecef; +} + +QTableWidget#dataTable::item:selected { + background-color: transparent; +} + +/* Eingabefelder */ +QLineEdit { + background-color: #f8f9fa; + border: 1px solid #ced4da; + border-radius: 8px; + padding: 12px 16px; + color: #212529; + font-size: 14px; + min-height: 24px; +} + +QLineEdit:focus { + border-color: #00D4FF; + background-color: #ffffff; + outline: none; +} + +QLineEdit::placeholder { + color: #adb5bd; +} + +/* Buttons */ +QPushButton { + background-color: #ffffff; + color: #212529; + border: 1px solid #ced4da; + border-radius: 24px; + padding: 0 24px; + min-height: 40px; + font-size: 14px; + font-weight: 600; +} + +QPushButton:hover { + background-color: #f8f9fa; + border-color: #00D4FF; +} + +QPushButton:pressed { + background-color: #e9ecef; +} +QPushButton:disabled { + background-color: #f8f9fa; + color: rgba(0, 0, 0, 0.3); + border: 1px solid rgba(0, 0, 0, 0.1); +} + +/* Primary Button */ +QPushButton#primaryButton { + background-color: #00D4FF; + color: #212529; + border: none; +} + +QPushButton#primaryButton:hover { + background-color: #00B8E6; + color: #ffffff; +} + +QPushButton#primaryButton:pressed { + background-color: #0099CC; +} + +QPushButton#primaryButton:disabled { + background-color: #e9ecef; + color: #adb5bd; +} + +/* Mode Toggle Button */ +QPushButton#modeToggle { + background-color: rgba(0, 0, 0, 0.05); + border: none; + border-radius: 20px; + padding: 8px; + min-width: 40px; + min-height: 40px; + max-width: 40px; + max-height: 40px; +} + +QPushButton#modeToggle:hover { + background-color: rgba(0, 0, 0, 0.1); +} + +/* CheckBox */ +QCheckBox { + spacing: 10px; + color: #212529; + font-size: 14px; + padding: 4px 0; +} + +QCheckBox::indicator { + width: 20px; + height: 20px; + border-radius: 4px; + border: 2px solid #adb5bd; + background-color: #ffffff; +} + +QCheckBox::indicator:hover { + border: 2px solid #6c757d; + background-color: #f8f9fa; +} + +QCheckBox::indicator:checked { + background-color: #00D4FF; + border: 2px solid #00D4FF; + image: url(src/resources/icons/check.svg); + padding: 2px; +} + +/* SpinBox */ +QSpinBox { + background-color: #f8f9fa; + border: 1px solid #ced4da; + border-radius: 8px; + padding: 8px 12px; + color: #212529; + font-size: 14px; + min-width: 60px; +} + +QSpinBox:focus { + border-color: #00D4FF; + background-color: #ffffff; +} + +QSpinBox::up-button, QSpinBox::down-button { + background-color: transparent; + border: none; + width: 20px; +} + +QSpinBox::up-button:hover, QSpinBox::down-button:hover { + background-color: rgba(0, 212, 255, 0.1); +} + +QSpinBox::up-arrow, QSpinBox::down-arrow { + image: none; + width: 0; + height: 0; +} + +/* Progress Bar */ +QProgressBar { + background-color: #e9ecef; + border: none; + border-radius: 6px; + height: 12px; + text-align: center; + font-size: 12px; + color: #212529; +} + +QProgressBar::chunk { + background: linear-gradient(90deg, #00D4FF 0%, #00B8E6 100%); + border-radius: 6px; +} + +/* Text Edit für Status */ +QTextEdit#statusLog { + background-color: #f8f9fa; + border: 1px solid #e9ecef; + border-radius: 8px; + padding: 12px; + color: #495057; + font-family: 'SF Mono', Monaco, 'Cascadia Code', monospace; + font-size: 13px; +} + +/* Scrollbar */ +QScrollBar:vertical { + background-color: transparent; + width: 10px; + border-radius: 5px; +} + +QScrollBar::handle:vertical { + background-color: #ced4da; + border-radius: 5px; + min-height: 30px; +} + +QScrollBar::handle:vertical:hover { + background-color: #adb5bd; +} + +QScrollBar::add-line:vertical, QScrollBar::sub-line:vertical { + height: 0; +} + +/* Frame Separator */ +QFrame#separator { + background-color: #e9ecef; + max-height: 1px; + margin: 20px 0; +} + +/* Status Bar */ +QStatusBar { + background-color: #f8f9fa; + color: #6c757d; + border-top: 1px solid #e9ecef; + font-size: 13px; + padding: 4px; +} + +/* Message Box */ +QMessageBox { + background-color: #ffffff; + color: #212529; +} + +QMessageBox QPushButton { + min-width: 80px; +} + +/* ComboBox */ +QComboBox { + background-color: #f8f9fa; + border: 1px solid #ced4da; + border-radius: 8px; + padding: 10px 16px; + color: #212529; + font-size: 14px; +} + +QComboBox:hover { + border-color: #00D4FF; +} + +QComboBox::drop-down { + border: none; + width: 24px; +} + +QComboBox::down-arrow { + width: 0; + height: 0; + border-style: solid; + border-width: 6px 4px 0 4px; + border-color: #00D4FF transparent transparent transparent; +} + +QComboBox QAbstractItemView { + background-color: #ffffff; + border: 1px solid #ced4da; + selection-background-color: #e3f2fd; + color: #212529; +} + +/* Tool Tips */ +QToolTip { + background-color: #212529; + color: #ffffff; + border: none; + border-radius: 6px; + padding: 8px 12px; + font-size: 13px; +} +""" \ No newline at end of file diff --git a/src/ui/__init__.py b/src/ui/__init__.py new file mode 100644 index 0000000..ed45bbc --- /dev/null +++ b/src/ui/__init__.py @@ -0,0 +1 @@ +# UI Module \ No newline at end of file diff --git a/src/ui/__pycache__/__init__.cpython-310.pyc b/src/ui/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..76328b70d0757afb6e51187df036f2bacd5c8c56 GIT binary patch literal 185 zcmd1j<>g`k0`;cC3~nI(7{oyaj6jY95Erumi4=xl22Do4l?+87VFd9j#@Q++v^ce> zI3^=CFQq8PCAB!aB)=fWGp{5yCpa@bqa-GvC_gJTyCg9wCow%WCL}*UCn>)|H#{|| zI5o2*HBZ;MD6u>zwJ4^zC^@DyGbTPhGcU6wK3=b&@)n0pZhlH>PO2Tq)?y|g!NLFl Db2~7v literal 0 HcmV?d00001 diff --git a/src/ui/__pycache__/custom_widgets.cpython-310.pyc b/src/ui/__pycache__/custom_widgets.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..759a229938bf50cc00dd362a20b97ca2b0e86771 GIT binary patch literal 1584 zcmZuxOK%%D5GMDn)x(ahy6)@LlTyUUq3|IHiY9WL_E1-foCY8SEM$?kSJ{U`QnhTz zhe83;bAN$!^k3RziynOIIq0D?lx)jDDR4A1T+V(o^UdbfYDi%H{oA|KpFBeTLSuC~ zV0;f-wLoyfX+l!UXlgMlwV6%PXC+SRGB@>@XKXw1(|`qOiIvijg=v|UDS1h_!`=6U zyMj(_R^eWUH2iCbN*Z<%v^&E%d!CPr&2~qhC*tR!j`Qrs=`EZMtFYR-aNDm$UpE}M z)#}BW77|?hd8P}e-4a>hwVQd8%Ra_lU6>0l^$>(0jB>&(?tLJ~lv~_>Ut+e=5of@h z55ytNMU+LgIZ`@Lmv9dHmWi_vsN2h})L)#=qUefzJZe7azE(o2?y1PQ?CuHmR_DX+kC1QDiBC>- zwV**i;w>?+ysj^b|LEiQ16qB<02b9%xwKXAAdTC9pnXRpXGjOp8+MAL~dqQUZ z#6}ymU`}Qw9S+Ka%EX5M>Yz3ubLZ0e-?KGy=IV8b<4wHT18#~7+6K2wL~FJ=@s_dc zb0U8M1fS?F{VBJnlsi-NR2JTP-8(P%wkMAMZ1DNu3+~N<(#GR1J^s@3<(w?q*$57< zv2ceM{@0Hm?|r+!e~sk>YuBug^qUd#;m_ayXnZBBpcUZ|Y5L+wi6DwL5DsKg(o*p(=o~6_1f_7lQdMMW4zY0gIp>Xj&ZH|iye6Ej=} zO;KIWbzdY&6fLkCzy`WOU52!*sl; y+?+RaDNMB`Mk&fS{+FZJ(9U%Au0`erD`;+OSOiZOIekYVjzTWf;q$N!p#B5Q#+PpZ literal 0 HcmV?d00001 diff --git a/src/ui/__pycache__/main_window.cpython-310.pyc b/src/ui/__pycache__/main_window.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..14c7143b9ed706b60df32bf97b0b416d73f5f73f GIT binary patch literal 15024 zcmb7L>u(&_b>G>??Cg`vr$mXe)_Pehi}*Kujic+eZ(o~1!q(r zb;k5DXIvk5CiDqsQlE6D^eN|ne!w}XA9SYmX@&ZV+J~IO`eA2ApK*@pN1Vs>$DE`3 zQ7((wMdxw-ap#zR%sH+fcTVUhoRj)V=ahbm%j2l~g#HBM3H!8u8toBKg)Q^e$IJbf1dHQ{ettN z{vzWU`z7Z~`j;5b+VjrK`pZh>O2o(+`41vS-kiA~)6XM4Vib@rmSigY4icj zeQDayEMKo!jd!eu(cJWt%kNmmhUp>q*2U(%Yn82Ln-W)t33d5eWzDqx)bcf}VO};Y zD!8v#+x@z9@m^!ADvyf@o@H^$yWwmKQDC zyku4E<^~eiK~dQ-sU*F;;#IshSJ0MMT2@2mu2$9t{9dzMFG!4lwSMAt&vg9kGAL~` zC@zFkT#f;n6~h#_EOXOWmls9FF~QkN%eFkv6kukhDw?)^L70_N!jCR{n5oqoQih*y zZN2NQSR0LsjjF4b1A0_7J#5X1P`IYJRfW@~mdV^^?_Ldg6ZEHSKlrEV>fU5qKWp zQ^ik}%T>GLy5;hxksTTiYhUwjV^7!3J07+f_U!G| zX4774-kZg|x~AosjoDzxw_Q=a-L`H!ICJGqK6G=fE%6LGeGGptiO95~@^4z{OzoPf zx&FYakdT9)1h5?b7Vzi(2qhiYgY-0_AJT;3uO6*qy6Q?f4wwjVm2&DyKn;JX^{9o( ztfxOwzN>62owHu%eqwYF^iN%Rxig}`9@G_CcM7wbkh+6}QD%ogO$tr3zuHg57;xYA?(WFW^ ze&rdbR8&#_Yb;$*^2#wqdq4WcSS+thDN(>7E#X7jtP+1;MZH5&(Eod9bhnfM52ZXw z!3NYAq+q&ScA7@pCOlg%zuT_ZvP2wVo}p;O1z_R|0TLsAd<_CjT&4770wkZrbt=gP zaLRLjN|xDum+oi20P(TkYLA_3?^P$ zPrIZtNobPHG&Xk$V5rDgZABTz7zT>WQ%P#yCk01xIa$|e++NB^d`Rjp-Hr4zqzFlh zl+-0ZJybt>em6sY%BYAtwCpBCj34*5T4pILo(3PhmVUk}6=2zuR!jl9&I>lV)RlS8 z8S2igO@=}lS`cP}IEt2h%?+#^KXJEWw@q=A+P_6bN!Gw^>r+J@B;Fl^bHY57LFX0# z`ua}AS((6J=g98C$l2Xr`OTodia(xa71*~TJt#jCyTCA=_{AjdKrka6rRnoxP;E+A z>0vt&9k7I_(sT!@)G&oQP;QhTC=R8U?XqI?a^2{C1u8Gr%NwfvrCyL8kz$`~#Jllc z!Qhs?QBrc-iEaWlQv92w8s6A_@KzrGR-``On=sNwW+zJAA~gw4rs@Y69=sR{Mm0^N zKr4rchItz^vS?ZDs^}}nqd;j_4Y0#J>KX3k2Fz22nN-E(# zC4QE1>Xlc5O6#+|qbyEI!0}#0?%6@^V|#N^I!CP##~A-PFNd6S;Blfm5#stNawfZz z1Fk2~C$ARxD>8piq25y=t{>;zr;+<~DEAoWK7-t6Lb=B|_gUmV+s{SsG}@6WyD5sh7OFUEa=xx6Dav)bwN@*@X&*iTm{YvjuSf49A;w7l@ScTam1BIJ=kg86<~Pyi8$?Cl-Zg_)n$_b|OP!gadMuJc z+2w~lFrYN7zlFPL6`?y;9~6;(Fae;aDy^0b2MAK))>*&@i@-Xg4>hAFYcMI{=pzP2 zy0*EoVVi9-OjlrPo5CFST(p~aDt2-Gv!9CMTkGpKf+w?jdqY_3>w4O4uk{EsBO^ z7AtqWHmpChxI$A}gk4{}XxRv#G>YwpQCw^ar{Y;A%+QTe=fV}!tvDd-w_S9ZNwfzK zz%0U;z*OA9h>LGiz(Xz*H+0=ZZ;jGJW)?q`5twh9A zC@Mw$!g?EKD)|}h7F$E`s7#L|PNo4bKZ$v$-ht&$#z@GH*UsA&XU(X*`p?u41tg{- zojDr9UZZa=Fm&e5vwh^g3Pe_YB;UfM&H@f|ZZOl#p4gzG8iCy$i8}KBh4SwJbTT*L z)v@2#nD0!;*)L+hwP^}%IEIbR#4XD(o9>?6>4lY**-LPjFtdA#if>fzRuJd1THgK? ziwLEdtqPp2rsz!cH`grI+jFZmfl!&Ab4ANK3rek_y%g^9++gmVuO$6!=5rbn)PEk)xs@0)Dp+1AWt4(2ba26}J4E`Zn zd827t)h%(Ds*UW))kg;z7u+S&gmbPRLNm8a;aRYp2c@zRy@0X>oO6Nb8^>G)?Q2XI zF;CnGiiI(E3sNGu*DGSfYPb*=rnl0nR7vjSfWNg?$FwiO+Y{t+2r`KMG@_`@vQ4pC zS{857x@Q_IR>$Pj1sf4d2i@zr0lJ9L6{eg@u9@ptOh#`JlZdLphh9gpp;o4LAtVh$ zvZ<$mu6V-qum?z9`56+0WshZ{o~&ZZaKdoZnvDk7^s}-=E`eWYwnk@{@5`t9U6(H|%RQXISr>gHvtUn!=ck{P)KXuvp5nOAucHqOe{K;LQ(7q~ zT1<_l5!T8paPfg@u|gl?(!6pIv7s#T)5zDNS}aTX$VmgIJg&uGjVSmJr9^?UexBN| zg)ar%sAjkvDFouE7Qw056!%Uv)WWb-wo6pOFZfXrh9es4O^b3J4(buF@;4o&GVmRx zUNFLkpbP_|Z&08W9%X0dS6LHPGCBG`{rivwWN^R)!6CADI4)tp;weQtS9F|f?;NZt z#CEH9JT}yB4vd7^w>aJjoxfIbVVWV7T&%R$5$F}gT}I$iDJr_?NBkxLaE^k~qJ|=U zyx6~iyyvjaE^$|gcFyiQoJYvhU8WH=K&p<1cLIpV#vFn;Q3}%tx?za+BmZRFv4J05 zYLg8ZvU!V}pM8R0cB2#*6nNApAf_+h!$ly}(*N8C`F1!Qg%L*TF}mQQ;1q6L z6nEN-_Yz&c!P$w)XkRz#o*3lD2Dy>C+Dq;r#*H>K!thN6-*oWJ(APky%TT|sZpRua z;7R^N3{~c#mTc!OBh^rhbT?*X&PPz1wX;r^Zfbz%Y}~AGC*aD(dn4eMLr4fYCAtb6 zQUrx?&w~IF0z*-J5qHJvAt2;#8RPi&dwAcNpdJtyQa_3qV_l753O7HA?X)+J8=gsA z@TE~VgE~{a12U8b{2=f+JA{$?bSVE&2#Ib8p5>@o(kdV5S|I)M?yG4 zXfnhHK8rGh<{iUzWu_auPm$Z>$Uj~`(L0H+3=~oN2~M;7&*ddf%TNxLKgnqs&Oy32 z%b_{UOL>k@z~qvie5WEBaI-o`F49GHal>505nq9iX2NhN!fzyGu1%*(QUqsYKm+Qb z*EqLo{TxPUNfq0uAl@VJ4Fc~2=vnM-ySZ5=cg@ASi^{#Sfoqi7BD=|YoLntk>$@|4 zCZwmt6KDW&Xbt)k4uorV)RS8$os&i4Y(0|MV9utk7vOP8?U0^k_= zEe?*~r(~jPn-#&=AmS@Tbb)|JMI(VF+*iT!CMFl;N<)dSWc<;-5mg@CTwSIXGHjkn zla1yIPQB1kfXF|~@!gl)JRdv*xHusb95bM&d0zMqQO^de!S_hw56}<@Ex}2B1YOg0 zK>u!iuk8E|$X_AZDS(g(5Rc+7faw^NJ(RD8JFKXpeICuwSN^J>Po=U@k@=VgMNKzl zW3dS+VanrQ`6W2afnv_XxRAe4(y`9*eWf(dNx46LyTml9ATpy_O#d4Y9Lk7j5+v~~ z-o^DxRNgF5UOgtNo#(wcqH<7DbqH+a(WN>rZe)2~^HRMu3uRE(Nube}F4QCSoV=I# zFh!T_IMQ^bPBChEBSAf>-RST_yfK5z?$NC^Ty{exbf6MJk@UtOVD?^{$KH$awK=}Q zwRxmB&Y~lN+XcS6=qphcBN`%QmjYj=xM>L(25XQbqDgM|+}zw^s2KOu6!{5Llv1H< z+4GpTJ$JA2s`fP$=sDaQHAUHx?|N1{hc3XoM<=%BC9_|pJU-BT$QLPj1;9_i4|cHa@!3Hdh6W$2 zH;CX(0@n!eEf)hel)jC;@6kb^*Ck~CYB5~+Vq4RNuEu`^Y4fZ9sb=U3EUy&#A~=n0 zul@WH2FEU6>;GXd4x?i%pv1>ut8eIiZ6E%BfV>|Q|7_?fofG>WtUa>p(frJyIXVG- z`xW>u<4(rK;)126ieL>rxcQLy`Yu4gsGkfr(ca1Kf#xT~ES+XD#vd_xyM*dP%u*1A z#ej;YB(sBqAEishzz%`kHpq{M@I(k#LwGWTYau)p!qXu<6T-70Jl@NZSb@+`gu#Mi z$VZ_!YVd0tZ>%>CD-xC;*^7+>pqmh$H%V3%;X8`(@IBaX7xdGhS7C919wmo*hox1+ zVghef7RA>wF}iwLz!KQ#oC!8UakDK9SR9;-5L~r32rO=x9E*J1SLJc;tY3n|F_bo3 zkzW zB5JTlQu{b5<31T}Gx*m$b z77Bhw15<&hU^*Y|@SVUz0rgOT_91rZSTnmM+KJsPodd!YQqGjK6Q$kTiv=n#9()t@ z<((W|;wK8Qa%mLeHUScXf>;!H36RaQ?d@ zE?dfYeYB3pym%q>7a$OHm~`^G=_#xzBGs4VDe8(}oa{Xcv=aY8b4|B$DpVpA<`=~n zROc7j*aUgvC-4?!2cNiEf_!=3t9s4o7R zz~2y{jVm`Z?NC2~+az4Zyi2*iPvCI?UA5dLa%)u=H#oQ^JWQD*7u*)2NjGuH*`%)Y zkvC1V70T4dRz;=Z4s-((O|0M$d9~doeBC48g_n|SP5LQB=;U){JwG^+d_eRZEdie^ zntDCONjh%2+Tyq65XsDEStj?MC=zS`O(%+M;u-i$DxIciKAI)}N{PcX)dDOYlLl)1 zm)Yb@>=E)Ud?UmL@I#hV3?1=Nn*zcQxCgSD4?`5&GuC;jAA>;H61y^_$!Bk@SBzq_ zhG51;v(ffCcsT7lncVMpfOiA6`kLWy&h!kcZiV25^ouoLOhj^My^C)9D@({bBm>zOU@%B+P@!21*uOr~V z14qE4Ll@zp3Gm~u!^Y?wc$18w{^jW1rl?iu_{=Sp(t8XKDQ{1pWgV~G&Boip7N;nc zTNh1-AN$jPS+D^kK1IR4dhS~&I72f<+Ik{7rHn-%A#L~G2<@6MT7J@$JYjzageNR5 zb(fqVcYF}ogp7s6!qjw5p$w9iAcZgSVuwPAk^4~aWs-YpFNI(!se6??Y?RB8DU97N z{9!}=n1iHvCXUwNIcK)M0?!$SL5F&SjHCyoc-I5z%sw{yqUW)Lh`8BXZyVIvR$!q&sgSrU%_~Y1qaH=u4LD&pv6ddl2Qtt0bDm z22PB@jIx){PrfgjaIaVqh~kCDCK5PR#3yd`THc)a5py5*#pOgu)iGe#ollqY(oP=A zTUPuOrIXw2*MQ-lEzq!-qs%`A&_{Ut;NDJ#&xKf|Tn _|BZ&ZXEGp^lCp3`*%?1 zY1*u`>nAwO6pg9~31#UHDHeyn9i=;*U*?mi*sp$sI1Ar=J|xKomgfz0LYq&9=t=&>7oC5aSn*>5w+XN*dy6oRFMdMUp8@#s%?e)Ns}=l5K&yZA;U~iPYA3K7!HBXt9*&R3XFQ>|0L27 z5YW<*Z)n%FxR%hqrp-Xz#kEtKswK5d{tWVxg(HIMlwS2mZfq@kFU&0mk1#S85O-V-k)&)3ew kAHcsryh=LUEdtn)nPd@G4@IG~$_aL;g9tPB`NZP?0uR!Vi2wiq literal 0 HcmV?d00001 diff --git a/src/ui/custom_widgets.py b/src/ui/custom_widgets.py new file mode 100644 index 0000000..f164e0d --- /dev/null +++ b/src/ui/custom_widgets.py @@ -0,0 +1,54 @@ +from PyQt6.QtWidgets import QSpinBox, QStyleOptionSpinBox, QStyle +from PyQt6.QtCore import Qt, QRect +from PyQt6.QtGui import QPainter, QFont, QPen, QColor + + +class CustomSpinBox(QSpinBox): + def __init__(self, parent=None): + super().__init__(parent) + + def paintEvent(self, event): + # Standard paint event + super().paintEvent(event) + + # Zeichne custom + und - Zeichen + painter = QPainter(self) + painter.setRenderHint(QPainter.RenderHint.Antialiasing) + + # Button-Bereiche berechnen + option = QStyleOptionSpinBox() + self.initStyleOption(option) + + up_rect = self.style().subControlRect( + QStyle.ComplexControl.CC_SpinBox, + option, + QStyle.SubControl.SC_SpinBoxUp, + self + ) + + down_rect = self.style().subControlRect( + QStyle.ComplexControl.CC_SpinBox, + option, + QStyle.SubControl.SC_SpinBoxDown, + self + ) + + # Font für Symbole + font = QFont() + font.setPixelSize(14) + font.setBold(True) + painter.setFont(font) + + # Farbe + if self.isEnabled(): + painter.setPen(QPen(QColor("#00D4FF"), 2)) + else: + painter.setPen(QPen(QColor("#666666"), 2)) + + # + Zeichen zeichnen + painter.drawText(up_rect, Qt.AlignmentFlag.AlignCenter, "+") + + # - Zeichen zeichnen + painter.drawText(down_rect, Qt.AlignmentFlag.AlignCenter, "−") # Unicode minus + + painter.end() \ No newline at end of file diff --git a/src/ui/main_window.py b/src/ui/main_window.py new file mode 100644 index 0000000..d239a81 --- /dev/null +++ b/src/ui/main_window.py @@ -0,0 +1,604 @@ +from PyQt6.QtWidgets import (QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, + QLabel, QLineEdit, QPushButton, QGroupBox, + QComboBox, QTextEdit, QProgressBar, QCheckBox, + QFileDialog, QMessageBox, QStatusBar, QSpinBox, + QTabWidget, QListWidget, QListWidgetItem, + QTableWidget, QTableWidgetItem, QHeaderView, + QFrame, QSplitter, QScrollArea) +from PyQt6.QtCore import Qt, QThread, pyqtSignal, QTimer, QSettings +from PyQt6.QtGui import QIcon, QPixmap, QPalette, QColor, QDesktopServices, QPainter +from PyQt6.QtCore import QUrl +import os +import sys +from datetime import datetime + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from resources.styles.dark_theme import DARK_THEME +from resources.styles.light_theme import LIGHT_THEME +from core.web_crawler import WebCrawler +from utils.pdf_report import PDFReport +from ui.custom_widgets import CustomSpinBox +from utils.local_server import LocalWebServer + + +class CrawlerThread(QThread): + progress = pyqtSignal(int) + status = pyqtSignal(str) + finished = pyqtSignal(bool) + report_ready = pyqtSignal(str) # Signal für PDF-Pfad + + def __init__(self, url, save_path, options): + super().__init__() + self.url = url + self.save_path = save_path + self.options = options + self.crawler = None + + def run(self): + try: + # Human behavior ist immer aktiviert + self.crawler = WebCrawler(human_behavior=True) + self.crawler.progress_callback = self.progress.emit + self.crawler.status_callback = self.status.emit + + success = self.crawler.download_website( + self.url, + self.save_path, + **self.options + ) + + # Erstelle PDF-Bericht + try: + pdf_report = PDFReport() + report_path = pdf_report.generate_report( + url=self.url, + save_path=self.save_path, + start_time=self.crawler.start_time, + end_time=self.crawler.end_time, + downloaded_resources=self.crawler.downloaded_resources, + skipped_urls=self.crawler.skipped_urls, + options=self.options, + success=success + ) + self.status.emit(f"PDF-Bericht erstellt: {report_path}") + self.report_ready.emit(report_path) + except Exception as e: + self.status.emit(f"Fehler beim Erstellen des PDF-Berichts: {str(e)}") + + self.finished.emit(success) + except Exception as e: + self.status.emit(f"Fehler: {str(e)}") + + # Versuche trotzdem einen Fehlerbericht zu erstellen + try: + if self.crawler and self.crawler.start_time: + pdf_report = PDFReport() + report_path = pdf_report.generate_report( + url=self.url, + save_path=self.save_path, + start_time=self.crawler.start_time, + end_time=self.crawler.end_time or datetime.now(), + downloaded_resources=self.crawler.downloaded_resources, + skipped_urls=self.crawler.skipped_urls, + options=self.options, + success=False + ) + self.status.emit(f"Fehler-PDF-Bericht erstellt: {report_path}") + self.report_ready.emit(report_path) + except: + pass + + self.finished.emit(False) + + +class WebsiteCrawlerWindow(QMainWindow): + def __init__(self): + super().__init__() + self.crawler_thread = None + self.last_report_path = None + self.last_save_path = None + self.local_server = None + self.settings = QSettings('IntelSight', 'WebsiteCrawler') + self.dark_mode = self.settings.value('dark_mode', True, type=bool) + self.init_ui() + + def init_ui(self): + self.setWindowTitle("IntelSight Webseiten-Crawler") + self.setGeometry(100, 100, 1200, 800) + + # Theme anwenden + self.apply_theme() + + # Scroll Area als zentrales Widget + scroll_area = QScrollArea() + scroll_area.setWidgetResizable(True) + scroll_area.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAsNeeded) + scroll_area.setVerticalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAsNeeded) + self.setCentralWidget(scroll_area) + + # Container Widget für den Inhalt + central_widget = QWidget() + scroll_area.setWidget(central_widget) + + # Hauptlayout + main_layout = QVBoxLayout(central_widget) + main_layout.setContentsMargins(32, 32, 32, 32) + main_layout.setSpacing(20) + + # Header mit Logo, Titel und Mode Toggle + header_widget = QWidget() + header_widget.setObjectName("appHeader") + header_layout = QHBoxLayout(header_widget) + header_layout.setContentsMargins(0, 0, 0, 20) + + # IntelSight Logo - Neu implementiert + self.logo_label = QLabel() + self.logo_label.setFixedSize(300, 70) # Feste Größe für konsistente Darstellung + self.logo_label.setAlignment(Qt.AlignmentFlag.AlignLeft | Qt.AlignmentFlag.AlignVCenter) + self.refresh_logo() # Neue Methode verwenden + header_layout.addWidget(self.logo_label) + + # Titel (ohne IntelSight, da es im Logo ist) + title_label = QLabel("Webseiten-Crawler") + title_label.setObjectName("heading") + header_layout.addWidget(title_label) + + header_layout.addStretch() + + # Mode Toggle Button + self.mode_toggle = QPushButton() + self.mode_toggle.setObjectName("modeToggle") + self.mode_toggle.clicked.connect(self.toggle_theme) + self.update_mode_icon() + header_layout.addWidget(self.mode_toggle) + + main_layout.addWidget(header_widget) + + # Beschreibung + desc_label = QLabel("Sichere Webseiten lokal für Offline-Zugriff") + desc_label.setObjectName("subheading") + main_layout.addWidget(desc_label) + + # Container für Formular + content_widget = QWidget() + content_widget.setObjectName("contentCard") + content_layout = QVBoxLayout(content_widget) + content_layout.setSpacing(20) + + # URL Eingabe + url_widget = QWidget() + url_layout = QVBoxLayout(url_widget) + url_layout.setSpacing(8) + url_label = QLabel("URL:") + url_label.setObjectName("inputLabel") + self.url_input = QLineEdit() + self.url_input.setPlaceholderText("https://example.com") + url_layout.addWidget(url_label) + url_layout.addWidget(self.url_input) + content_layout.addWidget(url_widget) + + # Speicherort + save_widget = QWidget() + save_layout = QVBoxLayout(save_widget) + save_layout.setSpacing(8) + save_label = QLabel("Speicherort:") + save_label.setObjectName("inputLabel") + save_input_layout = QHBoxLayout() + save_input_layout.setContentsMargins(0, 0, 0, 0) + self.save_path_input = QLineEdit() + self.save_path_input.setPlaceholderText("C:\\Downloads\\Webseite") + self.browse_button = QPushButton("Durchsuchen") + self.browse_button.clicked.connect(self.browse_folder) + save_input_layout.addWidget(self.save_path_input) + save_input_layout.addWidget(self.browse_button) + save_layout.addWidget(save_label) + save_layout.addLayout(save_input_layout) + content_layout.addWidget(save_widget) + + # Sicherungsart + backup_type_widget = QWidget() + backup_type_layout = QVBoxLayout(backup_type_widget) + backup_type_layout.setSpacing(8) + backup_type_label = QLabel("Sicherungsart:") + backup_type_label.setObjectName("inputLabel") + backup_type_layout.addWidget(backup_type_label) + + self.snapshot_mode = QCheckBox("Webseiten-Snapshot erstellen (nur die aktuelle Seite mit Bildern und Formatierung)") + self.snapshot_mode.setChecked(True) + self.full_backup_mode = QCheckBox("Gesamte Webseite sichern (alle verlinkten Unterseiten und Medien)") + backup_type_layout.addWidget(self.snapshot_mode) + backup_type_layout.addWidget(self.full_backup_mode) + content_layout.addWidget(backup_type_widget) + + # Checkbox-Verhalten: nur eine Option auswählbar + self.snapshot_mode.toggled.connect(lambda checked: self.full_backup_mode.setChecked(not checked) if checked else None) + self.full_backup_mode.toggled.connect(lambda checked: self.snapshot_mode.setChecked(not checked) if checked else None) + self.full_backup_mode.toggled.connect(self.toggle_resource_options) + + # Ressourcen-Optionen (nur sichtbar bei "Gesamte Webseite sichern") + self.resources_widget = QWidget() + resources_layout = QVBoxLayout(self.resources_widget) + resources_layout.setSpacing(8) + resources_label = QLabel("Ressourcen:") + resources_label.setObjectName("inputLabel") + resources_layout.addWidget(resources_label) + + self.download_images = QCheckBox("Bilder herunterladen") + self.download_images.setChecked(True) + self.download_videos = QCheckBox("Videos herunterladen") + self.download_videos.setChecked(True) + self.download_css = QCheckBox("CSS-Dateien herunterladen") + self.download_css.setChecked(True) + self.download_js = QCheckBox("JavaScript-Dateien herunterladen") + self.download_js.setChecked(True) + resources_layout.addWidget(self.download_images) + resources_layout.addWidget(self.download_videos) + resources_layout.addWidget(self.download_css) + resources_layout.addWidget(self.download_js) + + + content_layout.addWidget(self.resources_widget) + self.resources_widget.setVisible(False) # Standardmäßig ausgeblendet + + # Separator + separator = QFrame() + separator.setFrameShape(QFrame.Shape.HLine) + separator.setObjectName("separator") + content_layout.addWidget(separator) + + # Progress Section + progress_widget = QWidget() + progress_layout = QVBoxLayout(progress_widget) + + progress_label = QLabel("Download-Fortschritt") + progress_label.setObjectName("sectionTitle") + progress_layout.addWidget(progress_label) + + self.progress_bar = QProgressBar() + self.progress_bar.setTextVisible(True) + progress_layout.addWidget(self.progress_bar) + + # Status Log + self.status_text = QTextEdit() + self.status_text.setReadOnly(True) + self.status_text.setMaximumHeight(120) + self.status_text.setObjectName("statusLog") + progress_layout.addWidget(self.status_text) + + content_layout.addWidget(progress_widget) + + # Buttons + button_widget = QWidget() + button_layout = QHBoxLayout(button_widget) + button_layout.setContentsMargins(0, 20, 0, 0) + + button_layout.addStretch() + + self.view_button = QPushButton("Webseite lokal anzeigen") + self.view_button.setEnabled(False) + self.view_button.clicked.connect(self.view_website) + + self.report_button = QPushButton("Download Bericht (PDF)") + self.report_button.setEnabled(False) + self.report_button.clicked.connect(self.open_last_report) + + self.stop_button = QPushButton("Abbrechen") + self.stop_button.setEnabled(False) + self.stop_button.clicked.connect(self.stop_download) + + self.start_button = QPushButton("Download starten") + self.start_button.setObjectName("primaryButton") + self.start_button.clicked.connect(self.start_download) + + button_layout.addWidget(self.view_button) + button_layout.addWidget(self.report_button) + button_layout.addWidget(self.stop_button) + button_layout.addWidget(self.start_button) + + content_layout.addWidget(button_widget) + + main_layout.addWidget(content_widget) + main_layout.addStretch() + + # Status Bar + self.status_bar = QStatusBar() + self.setStatusBar(self.status_bar) + self.status_bar.showMessage("Bereit") + + def toggle_resource_options(self, checked): + """Zeigt/versteckt die Ressourcen-Optionen basierend auf der Sicherungsart""" + self.resources_widget.setVisible(checked) + + def browse_folder(self): + folder = QFileDialog.getExistingDirectory(self, "Speicherort wählen") + if folder: + self.save_path_input.setText(folder) + + def start_download(self): + url = self.url_input.text().strip() + save_path = self.save_path_input.text().strip() + + if not url: + QMessageBox.warning(self, "Warnung", "Bitte geben Sie eine URL ein.") + return + + if not save_path: + QMessageBox.warning(self, "Warnung", "Bitte wählen Sie einen Speicherort.") + return + + # Optionen sammeln + if self.full_backup_mode.isChecked(): + options = { + 'download_images': self.download_images.isChecked(), + 'download_videos': self.download_videos.isChecked(), + 'download_css': self.download_css.isChecked(), + 'download_js': self.download_js.isChecked(), + 'follow_links': True, + 'max_depth': 999 # Sehr hohe Tiefe für vollständige Sicherung + } + else: + # Snapshot-Modus: nur die aktuelle Seite mit minimalen Ressourcen + options = { + 'download_images': True, + 'download_videos': False, + 'download_css': True, + 'download_js': False, + 'follow_links': False, + 'max_depth': 0 + } + + options['backup_type'] = 'full' if self.full_backup_mode.isChecked() else 'snapshot' + + # Erstelle Verzeichnisstruktur: Datum_Webseitenname_Art + from datetime import datetime + from urllib.parse import urlparse + + # Datum im Format YYMMDD + date_str = datetime.now().strftime("%y%m%d") + + # Webseitenname aus URL extrahieren + parsed_url = urlparse(url) + website_name = parsed_url.netloc.replace('www.', '').replace(':', '_') + if not website_name: + website_name = 'website' + + # Art der Sicherung + backup_type = 'complete' if self.full_backup_mode.isChecked() else 'snapshot' + + # Verzeichnisname erstellen + dir_name = f"{date_str}_{website_name}_{backup_type}" + final_save_path = os.path.join(save_path, dir_name) + + # UI für Download vorbereiten - Deaktiviere alle Eingabefelder + self.start_button.setEnabled(False) + self.stop_button.setEnabled(True) + self.view_button.setEnabled(False) + self.report_button.setEnabled(False) + + # Deaktiviere alle Eingabefelder + self.url_input.setEnabled(False) + self.save_path_input.setEnabled(False) + self.browse_button.setEnabled(False) + + # Deaktiviere Sicherungsart + self.snapshot_mode.setEnabled(False) + self.full_backup_mode.setEnabled(False) + + # Deaktiviere Ressourcen-Checkboxen + self.download_images.setEnabled(False) + self.download_videos.setEnabled(False) + self.download_css.setEnabled(False) + self.download_js.setEnabled(False) + + # Deaktiviere Theme-Toggle + self.mode_toggle.setEnabled(False) + + self.progress_bar.setValue(0) + self.status_text.clear() + + # Thread starten mit angepasstem Pfad + self.crawler_thread = CrawlerThread(url, final_save_path, options) + self.crawler_thread.progress.connect(self.update_progress) + self.crawler_thread.status.connect(self.update_status) + self.crawler_thread.finished.connect(self.download_finished) + self.crawler_thread.report_ready.connect(self.open_report) + self.crawler_thread.start() + + # Speichere den finalen Pfad für die lokale Anzeige + self.last_save_path = final_save_path + + def stop_download(self): + if self.crawler_thread and self.crawler_thread.isRunning(): + self.update_status("Download wird abgebrochen...") + + # Versuche Bericht zu erstellen bevor Thread beendet wird + if self.crawler_thread.crawler: + try: + pdf_report = PDFReport() + report_path = pdf_report.generate_report( + url=self.crawler_thread.url, + save_path=self.crawler_thread.save_path, + start_time=self.crawler_thread.crawler.start_time, + end_time=datetime.now(), + downloaded_resources=self.crawler_thread.crawler.downloaded_resources, + skipped_urls=self.crawler_thread.crawler.skipped_urls, + options=self.crawler_thread.options, + success=False, + error_message="Download manuell abgebrochen" + ) + self.last_report_path = report_path + self.report_button.setEnabled(True) + self.update_status(f"Abbruch-Bericht erstellt: {report_path}") + except Exception as e: + self.update_status(f"Fehler beim Erstellen des Abbruch-Berichts: {str(e)}") + + self.crawler_thread.terminate() + self.download_finished(False) + + def update_progress(self, value): + self.progress_bar.setValue(value) + + def update_status(self, message): + self.status_text.append(message) + self.status_bar.showMessage(message) + + def download_finished(self, success): + # Aktiviere alle UI-Elemente wieder + self.start_button.setEnabled(True) + self.stop_button.setEnabled(False) + + # Aktiviere alle Eingabefelder wieder + self.url_input.setEnabled(True) + self.save_path_input.setEnabled(True) + self.browse_button.setEnabled(True) + + # Aktiviere Sicherungsart wieder + self.snapshot_mode.setEnabled(True) + self.full_backup_mode.setEnabled(True) + + # Aktiviere Ressourcen-Checkboxen wieder + self.download_images.setEnabled(True) + self.download_videos.setEnabled(True) + self.download_css.setEnabled(True) + self.download_js.setEnabled(True) + + # Aktiviere Theme-Toggle wieder + self.mode_toggle.setEnabled(True) + + # Speichere den Pfad für die Webseiten-Anzeige + self.last_save_path = self.save_path_input.text() + + if success: + self.view_button.setEnabled(True) + self.progress_bar.setValue(100) + QMessageBox.information(self, "Erfolg", "Website wurde erfolgreich heruntergeladen!") + else: + # Bei Fehler trotzdem Report-Button aktivieren, wenn ein Bericht vorhanden ist + if self.last_report_path and os.path.exists(self.last_report_path): + self.report_button.setEnabled(True) + QMessageBox.warning(self, "Fehler", "Der Download wurde unterbrochen oder es ist ein Fehler aufgetreten.\nDetails finden Sie im PDF-Bericht.") + + self.status_bar.showMessage("Bereit") + + def apply_theme(self): + if self.dark_mode: + self.setStyleSheet(DARK_THEME) + else: + self.setStyleSheet(LIGHT_THEME) + + def toggle_theme(self): + self.dark_mode = not self.dark_mode + self.settings.setValue('dark_mode', self.dark_mode) + self.apply_theme() + self.update_mode_icon() + self.refresh_logo() # Neue Methode verwenden + + def update_mode_icon(self): + if self.dark_mode: + icon_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + 'resources', 'icons', 'sun.svg') + else: + icon_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + 'resources', 'icons', 'moon.svg') + + if os.path.exists(icon_path): + self.mode_toggle.setIcon(QIcon(icon_path)) + + def refresh_logo(self): + """Neue Methode zum Laden des Logos - komplett neu implementiert""" + # Bestimme den Basispfad für die Ressourcen + base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + resources_path = os.path.join(base_path, 'resources', 'logo') + + # Wähle die richtige Logo-Datei basierend auf dem Theme + if self.dark_mode: + logo_filename = 'intelsight-name-dark.svg' + else: + logo_filename = 'intelsight-name-light.svg' + + logo_path = os.path.join(resources_path, logo_filename) + + # Prüfe ob die Datei existiert + if not os.path.exists(logo_path): + print(f"Logo-Datei nicht gefunden: {logo_path}") + self.logo_label.setText("IntelSight") # Fallback Text + return + + # Lade das Logo + pixmap = QPixmap(logo_path) + + # Prüfe ob das Pixmap erfolgreich geladen wurde + if pixmap.isNull(): + print(f"Fehler beim Laden des Logos: {logo_path}") + self.logo_label.setText("IntelSight") # Fallback Text + return + + # Skaliere das Logo auf die gewünschte Größe + scaled_pixmap = pixmap.scaled( + 290, 65, # Etwas kleiner als die Label-Größe für Padding + Qt.AspectRatioMode.KeepAspectRatio, + Qt.TransformationMode.SmoothTransformation + ) + + # Setze das Logo + self.logo_label.setPixmap(scaled_pixmap) + + # Debug-Info + print(f"Logo erfolgreich geladen: {logo_filename} (Dark Mode: {self.dark_mode})") + + def open_report(self, report_path: str): + """Speichert den PDF-Bericht-Pfad ohne Benutzeraufforderung""" + self.last_report_path = report_path + self.report_button.setEnabled(True) + + def open_last_report(self): + """Öffnet den letzten PDF-Bericht""" + if self.last_report_path and os.path.exists(self.last_report_path): + QDesktopServices.openUrl(QUrl.fromLocalFile(self.last_report_path)) + else: + QMessageBox.information(self, "Info", "Kein Bericht vorhanden.") + + def view_website(self): + """Startet einen lokalen Webserver und öffnet die Webseite""" + if not self.last_save_path or not os.path.exists(self.last_save_path): + QMessageBox.information(self, "Info", "Keine gesicherte Webseite vorhanden.") + return + + try: + # Stoppe vorherigen Server falls vorhanden + if self.local_server: + self.local_server.stop() + + # Starte neuen Server + self.local_server = LocalWebServer(self.last_save_path) + url = self.local_server.start() + + # Öffne im Browser + self.local_server.open_in_browser() + + # Zeige Info + QMessageBox.information( + self, + "Webserver gestartet", + f"Die Webseite wird auf {url} bereitgestellt.\n\n" + "Der Server läuft im Hintergrund und wird beim Beenden der Anwendung gestoppt." + ) + + except Exception as e: + QMessageBox.critical(self, "Fehler", f"Fehler beim Starten des Webservers:\n{str(e)}") + + def closeEvent(self, event): + """Wird beim Schließen der Anwendung aufgerufen""" + # Stoppe lokalen Server falls vorhanden + if self.local_server: + self.local_server.stop() + + # Speichere Einstellungen + self.settings.setValue('dark_mode', self.dark_mode) + + # Stoppe laufenden Download + if self.crawler_thread and self.crawler_thread.isRunning(): + self.crawler_thread.terminate() + self.crawler_thread.wait() + + event.accept() \ No newline at end of file diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/__pycache__/__init__.cpython-310.pyc b/src/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3ae5979ede7add2809b84c6172490a2c7c1baf90 GIT binary patch literal 188 zcmd1j<>g`k0;7z=3=sVoL?8o3AjbiSi&=m~3PUi1CZpdI zlaZR2QWWEoTAW>yUl8M&SCX0&oSB|c5))9ApOuUVH-{8{>N9cG2LA8e;!cbS`laV9cgO@>=wjm5LveoC5I zp5u9FZC>C-Xmh*-C#@vqZ=#MDJQVG^kdK8N--a#k`eE1W^o4YSXxj^*h3~Qc?sQ)~ zmk#%Zb1R$*zbBlYh<(&m3pHPf@a4@92uX6>m!hL0xtp+2Bvr>e<^}c*csJpVKL(-5 zjPBC|dPru(HD(4#NCZeo14u{$7)&2jEQ!mKtQ2t+fXNsQY2lQW+L)Bm)>VBeJYF#q z7K@;Jr1%Gs1<-%h+1zM-54v%yFG4O`cSO9cqEYL9s6kHE3X}RJ<)1J zQP7T_T-M=0KQC`eZvp{o#j?{HD?f-^0mjo!EvSxm6WewD&{wXDN5(dYGPU5Br_*yM zV_#ipb|`2q9&{F0={WrY9Yqf4j0}uJfR8c?TbIWjFO>mbV^Y6H7#%;+J;k3A&e z1x8eEw?+y6W`x0aZeE4ZyEf`_2B55}JkRXYpXehJ(h1oqsKTHKU)ccQ{WF~AmN}kT zc2`gyoH>NOcox_HN`5mSFZT4ky7B-7%HblM<0d@9O(P*M(JTMci-b&TUkg8Ux>EQ6 znk_(vGoAP`peS^%)oL|o9CGIi?DRwz`XYp@V8Ql1puYmBF0 z%6?nFD`g~;+}$S~F;adMCb<+*e%MQ_h6YzN#hTlVqyQ{Rpd7#<_rws?GckswVo0Q< zB->BV)!0i#5$5H)AXedxKLSxSZPPFfx<(l-(lVgLrtFRJ+W6;e-iA@pn68~djXvZA zWH8XntA)ElqKL%Q0dz2R&@&E9okZXp%{j1d9k4%2^S8Y34=OATqaF9hibs<$b&(As zAF`r7JLM4EHrVJh^avAR8F}!7y`H}HAG@Q`3A52;WCe}%3((P+BZ#m)s^|ft8ZbXY ziZsSRFyzIvZZ|~80Us^yq~i~r+cKI!S|x_HVy5}bPei+&&L=Dy0jB18F~>4E?ZOHw zz`b?nD{8P&j2u%0Zy24vf6A9r>OkF+;n{h-{-HM%uAAgtcNp<;fc>KD?u@-){w3?W zJnF!-87rBXeyC&#R%>KP2Ybkm$81C@ks3T0FN|`GBbzX~X4sa!UbG~BX{5Iy=Z#h5 zqKzGl@!~-Xm(21b41j^5DD46FB24y71x00MV*If@!AQ3=D%U|n!)I#dNRI5h102J4 ztH>}~kD)&3WDsJH2+!2XgjB5YpKwUtfoH)LKm-5B7DrX)sJbi@tT)i&mh8nFx`%r% zi_M0!)%CbD9S@x!goG;9>4$#o41L75JXWyg`~*b4++-Q44QBx^oEmxx`AKuz99(+I z^!)Os;J$MP^u}FB}xWjO=6EHZ2=KH1HbuP~%Mz`ekaomQ_$d6c0VP4z65*;j{*Eoq!ADu(#oic2z$@ z%0E%}oa;b`i`d_&&_-poSwL2Z{F>pWxVxcjLqiM3^wB43FjKxWkddM_$|4(|dOVc`^1#$UUUW=Tuyk${x1LryO!hPIF3StCHd$5XY9ic7CsC z0Fn?@Nrp8K{qf%G?)TNxujvd9reb)`e$H23yB3T6hAKOsI4YO%x}O8E7?Z4+E$Na( zW!aMLxE>efxTV+$Jz=Z5D(Z?gU~9T&C-tPLC#;m6*3))I&)9?dpq(y;@u@DcGxq8J5l$&+e3$)u!ukJJqt3oT@`5ZvyGG4a{ zV8(Qb#dMiTO#URM$61^y_*IgLWP2R+iVFulY$}-+rylxJ_i>=4}SXuI8 z8}eq1R1I*NZ01Jxx(xxxN&1)}uKa;4>Zj$IC$Ka&E*HLdDrF+K_GXU6Qzz4u&` zyZ2Vi2IKeUOn23Dn)hx%>Q<>Bs@eVdY}vfQVMU8C;rVT}o_12ifd@57qt!#cPp)}bEOBpYRi*^xdj zM|=HwHn!c*UV=p&Yn_7h=D7<+T3+60)Xb*gqLvrl=H{zk{EFw9*~nitdA+jY1?gE~ zDNA+FGIi~`X|Q^u_WIU1*t7?ZpMB%(-1#@Qj-Uo7R~m+G25QsbaLz!h!b?l_Hay)k zA9=GBLxPk`o_?k3!P}FDRj)N{!p(-Glv>Lk4HT%04z~?!i>w2~yg7T}{DrwKGW1}u zWLC&oX(}e~P3i2`FnYH#VkZ0;e)w?$FB6y|Z~`Dmi}A0TRfn5_cB4^smN2)iH2N^k zxwp=rIe(#$*pvPhqEF6q?r=dvb_kk!iJrU_D(S@mu-AW%AFNrW6fS! zIDZCVi$dNk)UlPM0phdoy!#w8eZUF@uWJt-&OSht4YDN&$l9jXK)qNP;G~%#ZZO6Z zC~=As0rd7r$e)vt_$Yyc1jyY(aF|LH0MzNO0cdeq(xmVOe5R%T?{)1PExC>TqMA(H zdze(qeA7O)9_YcQ=Y-SG6nWPL%<44Ip#cUSPqPYA?~%=llwG!=G(@>T<)+u-jS3k{_c@Ki zAYKuWK8H#8Nq5XcNZLdoX40nANi*3`{~TfIX}pv1;`fz}gfIJSob|dMfmEvw?}&x-5cA*s6)vw1{5u zQ(maJrwm0Z47V5*yPU@ zcKB&O|05xeH{y-@iFIkce}tnu`qlpan${Wh_W7eMFLd-0!J5pDeJXeM`TI6<-hOtR zz09V(v5f=l#HLhL*emQLS_+UqwY`eA1Up47-q`)|E?!Lr=4))471`;UwhlYN76pC! z*gnFNM>;&SLx>Z|&yo4{S`Z8TLb1?S6kh`{AmLc|2klVKqPMjIm3wgRz}@tx25& zzG;~={X9O80?UEL(=zhtwZp4&;q<|CYi2 z^Ur(Wyz8aB5iz5i?AE5d+b(Ki`-bKZunR)-i#s$w?vD%2-`+#hci4|>$_{;wzfhm* z6Pf>gq{+K|(hhY{g2loj5j&Qi#s$WEJ>Mk^(%k#o7E+IOCVX|{&}M8d_R;=(a!2YM z^bh(IYZ6AfQ;Yc%{=tg^PQ+ko!sedH4`#hde}bsiCD6SGx`UlV(Y!wJ4`TG+!8{I$ zd3^XZA?5)Ki!A66)7ejchR-h0j&!7m?w*fXN1k}Li;J;3At&~eRRweUEW-Y%+d3Y@ zXulh+wMQqg`qm0wWk2ym;SWUb;q_Qg&M8)Yp)}$D7~!Y=(yYap@k zs(>{CSAL;&CRv@`|0L#5&OvIO!`_kgSm%hJh-6B>`p5oZ|9Gfp|7cb07p&1aCdC@^llX%-{bQI%*+0ze-8kqA zb+~s_aCTcE_NFQSa5R^FVlF5A6Bzx^{V7=3aS?M*P)x0f8GyU*Poe+U@T{rGv&8Nf z#ne~)S6=8@P4?iqSj%hhkFMtZS48Ou`2VJ7Gw!y3MAqi9)>0d&-IL(+UwePt`wIONvhye!_pHhv+?evSf;Z)Q{e8eeMoP)hK<8z73W*v^4HL=r{{Uqv1;4|3q8AFu2`69WrdO$jdnag zo!65$jb+oK^B6x5if7sDrnQEoQN@_P>R7Cx>^K|JiBK2E+(jIG+XE%f@LKLnTY2y9 z&6#$l)TCid93i}!b_$)`Ya=@{(;k^(#VNZu^9GbRz!Z7Z8KnB!NJKd5rDOL~Qb4_PWp3`q;x)lX z5b(D^h0ejNx0jY~-+EEMo~HEPuHKs<+@jMhrk4?kzUR968|bdRA)bGETOTAtKv)k9sZd*2NWhPLnEz4@}zgoAD>BtM4 zN8ZH3k+v}t40X@9<+@G&wxG-J7`n0u=WXwQzs^j@eKE|NWv1XJiyx#Qs5z{@y`)+&amhlMlvCRBZ-K?53LDDuJm6_S}R)m#BhI#xl6t6AZ zUR)~O(C2s91gQ-^cAI2O78hE|-LUn}^R2CYXxqaB+9Nj_E~Ekh=O4D9_(onhi*S$p z7r)x!GBf-Pc+~#-uju)Qx7+cH%a_{f#na1|@+aE^f}k+7C=9AaiOPTf_?P*%RxB3t za7>i)4=Lrz&GzW?9@;+I_0_z&9eMJYSm?^d!azGwT=8tHtXcpa(6hMt^rR$G*5gj1HBhU?X9=B~stZC#|? z5?+^5V0C;Eew4t@j&s3Z$PeVwj$|3`Hss=&AYL)Nz6@HBy2x;?P;u<$r7?t7m-vpx z+SA)fEeg_{Qt(kS{G#Pl49mS#+@=hqVi;%dfqJP$nYz%#VKiR8VN_P~?}9_k!FE0K zoVRSzI?&qZErw{-h0`rwxfMAbg;M?wKxF^_^mhBy^?Cy)?auT}*REq!h>zE?SHhF9 zSBcFA{vp4XZ_9;zi+t6s!ynd6Znr!OGo_-7L@j+%nEAUfGLeI8ugLRf>I;~7@jH7iboxj`JG_zaA56nP#V_E&9w0@4xhn+p=wgKQ7~b(9j3FHM-rx zH*A;$7B^*_!h=4%V~-EHWuiQC#GqRX8Dv5RHSV;U5E<^@T+Buh3Fs6;Na)9|KGH3> zk)EctxcVr31cuNk3$c?kxwulGco8P3<3iB5Z$Si1?;sDfsPZ^SZIkMQ-BvNrAQ29d z?X`yU9b|Rr%y=BlAxT)_!cw9bt%?%qh+(uCSI=~5XjOVr^a)|6L{X6RhsvS8NLYq! z-G4>URI*vz1!g4~7Z9?fcK@C!sj>K&l#|BdT0A2ugrS6&48Zz!TpO3hr6c%zS<2&2 z`D#3)O2gnVDvbe#f7(~6bWX~E57oD?H^AZR;Uw|S#7Cvkxbl4}p@38PcSKUZ8P`-$ zjD~dIrUr773N9*Up-Jj@IYo&NODU8`rCj`^G#t-MSy@GmddCwgX1tB94)j(94jB8Z zEX`F0Pbno&V)ZGB_~k&a(BH?ZjMB$BjHt-Nz``%u#szgaCt9iYETyS&;*z6zey_>u zZ-+5fPC9`#XVD(cR!Km9au!@t-;Jx=>!RzJ)Brf|n(fZD4a@zUG-*;E#Y(%f33-e| z3fYq+@h;|3REs_6&miFHAQ}3mXt3^QnD<62YdT1Eb7}-LjESD z%FpBh`DkhAKmVzf1h~VY`NhsAY#4<_Y}s%;7!Zak3COO6prXr zu>pwxpWt-A+CQ?1t={EB#B7)V?H%Ejp^!I2HBd1D+ye{Y^ZkS%WKY7`OZI_wG1TZK ZxOdDY+)OM3=NyKkX5h9NxCx9s{y*9XY#smr literal 0 HcmV?d00001 diff --git a/src/utils/local_server.py b/src/utils/local_server.py new file mode 100644 index 0000000..76f2c63 --- /dev/null +++ b/src/utils/local_server.py @@ -0,0 +1,86 @@ +import http.server +import socketserver +import os +import webbrowser +import threading +from pathlib import Path + +class LocalWebServer: + """Einfacher lokaler Webserver für die Anzeige gesicherter Webseiten""" + + def __init__(self, directory: str, port: int = 8000): + self.directory = Path(directory).resolve() + self.port = port + self.server = None + self.server_thread = None + + def start(self): + """Startet den lokalen Webserver""" + # Wechsle zum Verzeichnis + os.chdir(self.directory) + + # Erstelle Handler + handler = http.server.SimpleHTTPRequestHandler + + # Finde einen freien Port + while True: + try: + self.server = socketserver.TCPServer(("", self.port), handler) + break + except OSError: + self.port += 1 + if self.port > 9000: + raise Exception("Kein freier Port zwischen 8000 und 9000 gefunden") + + # Starte Server in separatem Thread + self.server_thread = threading.Thread(target=self.server.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + return f"http://localhost:{self.port}" + + def stop(self): + """Stoppt den Webserver""" + if self.server: + self.server.shutdown() + self.server_thread.join() + + def open_in_browser(self): + """Öffnet die Webseite im Browser""" + url = f"http://localhost:{self.port}" + webbrowser.open(url) + return url + + +def serve_website(directory: str, auto_open: bool = True): + """ + Startet einen lokalen Webserver für die gesicherte Webseite + + Args: + directory: Pfad zum Verzeichnis mit der gesicherten Webseite + auto_open: Öffnet automatisch den Browser + """ + server = LocalWebServer(directory) + url = server.start() + + print(f"Webserver gestartet auf {url}") + print("Drücken Sie Strg+C zum Beenden") + + if auto_open: + server.open_in_browser() + + try: + # Server läuft bis Ctrl+C + while True: + pass + except KeyboardInterrupt: + print("\nServer wird beendet...") + server.stop() + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + serve_website(sys.argv[1]) + else: + print("Verwendung: python local_server.py ") \ No newline at end of file diff --git a/src/utils/pdf_report.py b/src/utils/pdf_report.py new file mode 100644 index 0000000..850b2e0 --- /dev/null +++ b/src/utils/pdf_report.py @@ -0,0 +1,418 @@ +from reportlab.lib import colors +from reportlab.lib.pagesizes import A4 +from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle +from reportlab.lib.units import inch, cm +from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak +from reportlab.platypus.tableofcontents import TableOfContents +from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_RIGHT, TA_JUSTIFY +from datetime import datetime +import os +from typing import Dict, List, Optional + + +class PDFReport: + def __init__(self): + self.styles = getSampleStyleSheet() + self._create_custom_styles() + + def _create_custom_styles(self): + """Erstellt angepasste Styles für den Bericht""" + # Titel-Style + self.styles.add(ParagraphStyle( + name='CustomTitle', + parent=self.styles['Heading1'], + fontSize=24, + textColor=colors.HexColor('#232D53'), + spaceAfter=30, + alignment=TA_CENTER + )) + + # Untertitel-Style + self.styles.add(ParagraphStyle( + name='CustomSubtitle', + parent=self.styles['Normal'], + fontSize=14, + textColor=colors.HexColor('#6C757D'), + spaceAfter=20, + alignment=TA_CENTER + )) + + # Section Header + self.styles.add(ParagraphStyle( + name='SectionHeader', + parent=self.styles['Heading2'], + fontSize=16, + textColor=colors.HexColor('#232D53'), + spaceAfter=12, + spaceBefore=20 + )) + + # Info Text + self.styles.add(ParagraphStyle( + name='InfoText', + parent=self.styles['Normal'], + fontSize=11, + textColor=colors.HexColor('#495057'), + alignment=TA_JUSTIFY + )) + + # Error Text + self.styles.add(ParagraphStyle( + name='ErrorText', + parent=self.styles['Normal'], + fontSize=10, + textColor=colors.HexColor('#FF4444'), + leftIndent=20 + )) + + # Success Text + self.styles.add(ParagraphStyle( + name='SuccessText', + parent=self.styles['Normal'], + fontSize=10, + textColor=colors.HexColor('#4CAF50'), + leftIndent=20 + )) + + # Table Cell Style für automatischen Textumbruch + self.styles.add(ParagraphStyle( + name='TableCell', + parent=self.styles['Normal'], + fontSize=9, + leading=11, + wordWrap='CJK' # Besserer Umbruch für lange URLs + )) + + # Small Table Cell Style + self.styles.add(ParagraphStyle( + name='SmallTableCell', + parent=self.styles['Normal'], + fontSize=8, + leading=10, + wordWrap='CJK' + )) + + def generate_report(self, + url: str, + save_path: str, + start_time: datetime, + end_time: datetime, + downloaded_resources: Dict[str, str], + skipped_urls: Dict[str, str], + options: Dict, + success: bool, + output_path: Optional[str] = None, + error_message: Optional[str] = None) -> str: + """Generiert einen PDF-Bericht""" + + # Bestimme Ausgabepfad + if not output_path: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"crawler_report_{timestamp}.pdf" + output_path = os.path.join(save_path, filename) + + # Erstelle PDF + doc = SimpleDocTemplate( + output_path, + pagesize=A4, + rightMargin=2*cm, + leftMargin=2*cm, + topMargin=2*cm, + bottomMargin=2*cm + ) + + # Story - Container für alle Elemente + story = [] + + # Titel + story.append(Paragraph("IntelSight Webseiten-Crawler", self.styles['CustomTitle'])) + story.append(Paragraph("Sicherungsbericht", self.styles['CustomSubtitle'])) + story.append(Spacer(1, 0.5*inch)) + + # Zusammenfassung + story.append(Paragraph("Zusammenfassung", self.styles['SectionHeader'])) + + # Erstelle Zusammenfassungstabelle + duration = end_time - start_time + status_text = 'Erfolgreich' if success else 'Fehlgeschlagen' + if error_message: + status_text += f' - {error_message}' + + # Verwende Paragraph-Objekte für automatischen Textumbruch + # Erstelle speziellen Style für die linke Spalte + label_style = ParagraphStyle( + name='LabelCell', + parent=self.styles['TableCell'], + alignment=TA_RIGHT, + fontName='Helvetica-Bold' + ) + + summary_data = [ + [Paragraph('Status:', label_style), Paragraph(status_text, self.styles['TableCell'])], + [Paragraph('URL:', label_style), Paragraph(url, self.styles['TableCell'])], + [Paragraph('Speicherort:', label_style), Paragraph(save_path, self.styles['TableCell'])], + [Paragraph('Startzeit:', label_style), Paragraph(start_time.strftime('%d.%m.%Y %H:%M:%S'), self.styles['TableCell'])], + [Paragraph('Endzeit:', label_style), Paragraph(end_time.strftime('%d.%m.%Y %H:%M:%S'), self.styles['TableCell'])], + [Paragraph('Dauer:', label_style), Paragraph(f"{duration.total_seconds():.1f} Sekunden", self.styles['TableCell'])], + [Paragraph('Gesicherte Dateien:', label_style), Paragraph(str(len(downloaded_resources)), self.styles['TableCell'])], + [Paragraph('Übersprungene URLs:', label_style), Paragraph(str(len(skipped_urls)), self.styles['TableCell'])], + ] + + # Erhöhe die linke Spalte für längere deutsche Texte + summary_table = Table(summary_data, colWidths=[4.5*cm, 11*cm]) + summary_table.setStyle(TableStyle([ + ('BACKGROUND', (0, 0), (0, -1), colors.HexColor('#F8F9FA')), + ('VALIGN', (0, 0), (-1, -1), 'TOP'), + ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#E9ECEF')), + ('ROWBACKGROUNDS', (0, 0), (-1, -1), [colors.white, colors.HexColor('#F8F9FA')]), + ('TOPPADDING', (0, 0), (-1, -1), 8), + ('BOTTOMPADDING', (0, 0), (-1, -1), 8), + ('LEFTPADDING', (0, 0), (0, -1), 5), + ('RIGHTPADDING', (0, 0), (0, -1), 10), + ])) + + story.append(summary_table) + story.append(Spacer(1, 0.3*inch)) + + # Fehlerinformationen wenn vorhanden + if not success and error_message: + story.append(Paragraph("Fehlerdetails", self.styles['SectionHeader'])) + error_style = ParagraphStyle( + name='ErrorText', + parent=self.styles['Normal'], + fontSize=12, + textColor=colors.HexColor('#DC3545'), + leftIndent=20, + spaceBefore=6, + spaceAfter=6 + ) + story.append(Paragraph(error_message, error_style)) + story.append(Spacer(1, 0.3*inch)) + + # Download-Optionen + story.append(Paragraph("Download-Optionen", self.styles['SectionHeader'])) + + # Sicherungsart + backup_type = options.get('backup_type', 'snapshot') + backup_type_text = 'Webseiten-Snapshot' if backup_type == 'snapshot' else 'Gesamte Webseite' + + options_data = [ + ['Sicherungsart:', Paragraph(backup_type_text, self.styles['TableCell'])], + ] + + # Füge weitere Optionen nur bei "Gesamte Webseite" hinzu + if backup_type == 'full': + options_data.extend([ + ['Bilder herunterladen:', Paragraph('Ja' if options.get('download_images', False) else 'Nein', self.styles['TableCell'])], + ['Videos herunterladen:', Paragraph('Ja' if options.get('download_videos', False) else 'Nein', self.styles['TableCell'])], + ['CSS-Dateien herunterladen:', Paragraph('Ja' if options.get('download_css', False) else 'Nein', self.styles['TableCell'])], + ['JavaScript herunterladen:', Paragraph('Ja' if options.get('download_js', False) else 'Nein', self.styles['TableCell'])], + ['Maximale Tiefe:', Paragraph(str(options.get('max_depth', 0)), self.styles['TableCell'])], + ]) + + options_table = Table(options_data, colWidths=[6*cm, 9.5*cm]) + options_table.setStyle(TableStyle([ + ('FONTSIZE', (0, 0), (-1, -1), 10), + ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#E9ECEF')), + ('TOPPADDING', (0, 0), (-1, -1), 6), + ('BOTTOMPADDING', (0, 0), (-1, -1), 6), + ])) + + story.append(options_table) + story.append(Spacer(1, 0.3*inch)) + + # Übersprungene URLs + if skipped_urls: + story.append(Paragraph("Übersprungene URLs", self.styles['SectionHeader'])) + story.append(Paragraph( + f"Insgesamt wurden {len(skipped_urls)} URLs übersprungen:", + self.styles['InfoText'] + )) + story.append(Spacer(1, 0.1*inch)) + + # Gruppiere nach Fehlertyp + error_groups = {} + for url, reason in skipped_urls.items(): + if reason not in error_groups: + error_groups[reason] = [] + error_groups[reason].append(url) + + # Zeige Fehlergruppen + for reason, urls in sorted(error_groups.items()): + story.append(Paragraph(f"{reason} ({len(urls)} URLs):", self.styles['Normal'])) + + # Zeige maximal 10 URLs pro Gruppe + for url in urls[:10]: + story.append(Paragraph(f"• {url}", self.styles['ErrorText'])) + + if len(urls) > 10: + story.append(Paragraph( + f"... und {len(urls) - 10} weitere", + self.styles['ErrorText'] + )) + + story.append(Spacer(1, 0.1*inch)) + + # Erfolgreich gesicherte Dateien + if downloaded_resources: + story.append(PageBreak()) + story.append(Paragraph("Gesicherte Dateien", self.styles['SectionHeader'])) + story.append(Paragraph( + f"Insgesamt wurden {len(downloaded_resources)} Dateien erfolgreich gesichert.", + self.styles['InfoText'] + )) + + # Kategorisiere Dateien nach Typ + categories = { + 'HTML/Webseiten': ['.html', '.htm', '.xhtml', '.php', '.asp', '.aspx', '.jsp'], + 'Bilder': ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico', '.bmp', '.tiff'], + 'CSS/Stylesheets': ['.css', '.scss', '.sass', '.less'], + 'JavaScript': ['.js', '.mjs', '.jsx', '.ts', '.tsx'], + 'Videos': ['.mp4', '.webm', '.ogg', '.avi', '.mov', '.flv', '.wmv', '.m4v'], + 'Schriften': ['.woff', '.woff2', '.ttf', '.otf', '.eot'], + 'Dokumente': ['.pdf', '.doc', '.docx', '.txt', '.rtf'], + 'Sonstige': [] + } + + # Zuordnung der Dateien zu Kategorien + categorized_files = {cat: [] for cat in categories} + unknown_extensions = set() + + for url, local_path in downloaded_resources.items(): + ext = os.path.splitext(url)[1].lower() + if not ext: + # Prüfe ob es eine Zahl als Extension ist (.0, .1, etc) + parts = url.split('.') + if len(parts) > 1 and parts[-1].isdigit(): + ext = '.' + parts[-1] + unknown_extensions.add(ext) + + # Finde passende Kategorie + categorized = False + for category, extensions in categories.items(): + if category != 'Sonstige' and ext in extensions: + categorized_files[category].append((url, local_path)) + categorized = True + break + + if not categorized: + categorized_files['Sonstige'].append((url, local_path)) + + # Zeige Kategorien-Übersicht + story.append(Spacer(1, 0.3*inch)) + story.append(Paragraph("Übersicht nach Kategorie:", self.styles['InfoText'])) + + category_data = [] + for category, files in categorized_files.items(): + if files: # Nur Kategorien mit Dateien anzeigen + category_data.append([category, str(len(files))]) + + if category_data: + category_table = Table(category_data, colWidths=[6*cm, 2*cm]) + category_table.setStyle(TableStyle([ + ('FONTSIZE', (0, 0), (-1, -1), 10), + ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#E9ECEF')), + ('TOPPADDING', (0, 0), (-1, -1), 4), + ('BOTTOMPADDING', (0, 0), (-1, -1), 4), + ('BACKGROUND', (0, 0), (0, -1), colors.HexColor('#F8F9FA')), + ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'), + ])) + story.append(category_table) + + # Hinweis zu unbekannten Extensions + if unknown_extensions: + story.append(Spacer(1, 0.2*inch)) + story.append(Paragraph( + f"Hinweis: Dateien mit numerischen Endungen ({', '.join(sorted(unknown_extensions))}) " + + "sind vermutlich versionierte Ressourcen oder Chunks von größeren Dateien.", + self.styles['InfoText'] + )) + + # Detaillierte Liste pro Kategorie (begrenzt auf erste 20 pro Kategorie) + for category, files in categorized_files.items(): + if files: + story.append(Spacer(1, 0.3*inch)) + story.append(Paragraph(f"{category} ({len(files)} Dateien):", self.styles['InfoText'])) + + # Zeige maximal 20 Einträge pro Kategorie + display_files = files[:20] + if len(files) > 20: + story.append(Paragraph( + f"(Zeige erste 20 von {len(files)} Dateien)", + self.styles['CustomSubtitle'] + )) + + file_data = [] + for url, local_path in display_files: + # Verwende SmallTableCell Style für besseren Textumbruch + file_data.append([ + Paragraph(url, self.styles['SmallTableCell']), + Paragraph(os.path.basename(local_path), self.styles['SmallTableCell']) + ]) + + # Dynamische Spaltenbreiten: mehr Platz für URLs + file_table = Table(file_data, colWidths=[12*cm, 3.5*cm]) + file_table.setStyle(TableStyle([ + ('FONTSIZE', (0, 0), (-1, -1), 8), + ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#E9ECEF')), + ('VALIGN', (0, 0), (-1, -1), 'TOP'), + ('TOPPADDING', (0, 0), (-1, -1), 2), + ('BOTTOMPADDING', (0, 0), (-1, -1), 2), + ])) + story.append(file_table) + + # Übersprungene URLs + if skipped_urls: + story.append(PageBreak()) + story.append(Paragraph("Übersprungene URLs", self.styles['SectionHeader'])) + story.append(Paragraph( + f"Es wurden {len(skipped_urls)} URLs übersprungen.", + self.styles['InfoText'] + )) + story.append(Spacer(1, 0.3*inch)) + + # Gruppiere nach Fehlergrund + skip_reasons = {} + for url, reason in skipped_urls.items(): + if reason not in skip_reasons: + skip_reasons[reason] = [] + skip_reasons[reason].append(url) + + for reason, urls in skip_reasons.items(): + story.append(Paragraph(f"{reason} ({len(urls)} URLs):", self.styles['InfoText'])) + + # Zeige maximal 10 URLs pro Grund + display_urls = urls[:10] + if len(urls) > 10: + story.append(Paragraph( + f"(Zeige erste 10 von {len(urls)} URLs)", + self.styles['CustomSubtitle'] + )) + + skip_data = [] + for url in display_urls: + # Verwende SmallTableCell für automatischen Umbruch + skip_data.append([Paragraph(url, self.styles['SmallTableCell'])]) + + skip_table = Table(skip_data, colWidths=[15.5*cm]) + skip_table.setStyle(TableStyle([ + ('FONTSIZE', (0, 0), (-1, -1), 8), + ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#E9ECEF')), + ('VALIGN', (0, 0), (-1, -1), 'TOP'), + ('TOPPADDING', (0, 0), (-1, -1), 2), + ('BOTTOMPADDING', (0, 0), (-1, -1), 2), + ])) + story.append(skip_table) + story.append(Spacer(1, 0.2*inch)) + + # Footer + story.append(Spacer(1, 0.5*inch)) + story.append(Paragraph( + f"Bericht erstellt am {datetime.now().strftime('%d.%m.%Y um %H:%M:%S')}", + self.styles['CustomSubtitle'] + )) + + # Erstelle PDF + doc.build(story) + + return output_path \ No newline at end of file diff --git a/start.bat b/start.bat new file mode 100644 index 0000000..31accab --- /dev/null +++ b/start.bat @@ -0,0 +1,4 @@ +@echo off +echo IntelSight Webseiten-Crawler wird gestartet... +python main.py +pause \ No newline at end of file