From dc4bad51e9c60bf62ebb90cd01be13c52d097cad Mon Sep 17 00:00:00 2001 From: Yuvi9587 <114073886+Yuvi9587@users.noreply.github.com> Date: Tue, 6 May 2025 22:08:27 +0530 Subject: [PATCH] commit --- .gitattributes | 2 - Kemono.ico | Bin 0 -> 67646 bytes Known.txt | 1 + kemono_downloader_config.txt | 0 main.py | 2787 ++++++++++++++++++++++++++-------- readme.md | 82 +- 6 files changed, 2212 insertions(+), 660 deletions(-) delete mode 100644 .gitattributes create mode 100644 Kemono.ico create mode 100644 Known.txt delete mode 100644 kemono_downloader_config.txt diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index dfe0770..0000000 --- a/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto diff --git a/Kemono.ico b/Kemono.ico new file mode 100644 index 0000000000000000000000000000000000000000..3d977c5d3efe04631511188aff15499d3c07a182 GIT binary patch literal 67646 zcmeHQ2YeM(x}OvkyCOC$lypFvh@uDxNDa-p`V^jhZCms`%dQIRx~tDZx2_6eS-Y#K zh^W|5v4ILICCN>p2m*rCkmS7o|C{sOOorQY69Q)PJDGB4&Ybi8zjnS;#&J^cH)f23 z|1F)|CMnKgj^pGa;Sw*(bsqBQqXQgC?s?;UwV8F>iIu7TJ+pbB0=0lb{*@FX47Vyl2FAPBHe-XSEMVEhPNJA-1*!;FKlYe)!?5iodsV{byAxg<=nK@IA^_ToTeQnq-OMA zn4Xhgl9oNFC?%_Zv6G21We>uBU{Mlk+L(n>AUnU*$pYrt(B-#ZZyF}1* zyvGdKh631=V%n9Iw!;gYT$GhIsO{OSv>J{3I4O`#pA7mvJHJrrnFjlvHtn_b$muJj zkDVsYdwV&}zCY>UZUuks`n!_XNBw2xvU{hlGc!KkWPU8%X3Do$nax|O%yoCKHZ4vs zQp_unZw1o!FsTAOQk5|vU^<>vYF?PR!K_%d-aPQscc%9RZ&V%7@%gg-JHPvS+hL_Q zb{kc7Lgx!A_d4Ms06}cLKox>f{p_3<_;u(Hg2&{AZti5?YNWU;!k@H zJsW^-{|t|R?d<7%x3AN3(V%1c7Ck=v56c68e`t+)^TSQ1c=>iyQ3;J;OjTu-sla~w zqCj~?k5Q!+&AJye?S zZY!5-=AF+rn>NFW4fw>g8Co0_?@+t}9x!=B0tWnJM*U@lS^ujr82?qw{X_B&GX~ip2kkT$ zchQ|fs}5%tr`-9kwa)1K*X)@&FmQI$Qwlx??p2eX{>7|bOVKu!z?zs>Qs$W|kszM@ z%{=|a2Em;=Ja1@pnTNiK$RFgR!+RE*L02v_r7O$j9eBUl4*T-RzrU$Gy4&RKDH-D` zk3RJclQr;1lQU!)Y)HxMtU*hMXAWMnN5-J<;TsmIK7f96^+DF)k{}&^YX>=qy%6^- z#9^|Am-09bIy$xIs8R>MTNC*7R~&g-;SyXocRjYY>X%=k)j@K~xZ*89;2*KDy5Ed_ zVx2hxSn)aJ7ZF#=Hf0>Xn#g%>1mBQ6a8&O?13on07i^PPmzPr&82AU9%q2I@H0hal zSMJ+(()Jd;W>x|FD)0f`U$iiLP~oVYA!U2!z?Vh2gb%Xv3mj)bcbE5Z{kODH{{{U! zOOGRaE_Dt%X>mGyy#EF!ODS)N*;FkpDW||!5d8`q5o_zTS@;r{!Jd zuoE7wXwm!QitPLnv^h#qj>7Me@44-Um!)(VA^w517jVt;!0zuG$@+&lP=)XHU#_?Vjj@JotsV^bf%cxg*MUOI1MsH^}SOzq2~vP+Xrz znjh1r$Q*x8NyVr0x45|DKka&`&5QjX?^+eap16cdiam7;+lkzm4->aFV7Xz}E;mQ? zDiq%_Tt3tn^exanFmHddNuH(eK^~xE!tJU5{9u|L|B%@;`#y76mxs(TJ>Rc{ZwP%r zU$lsAkq5Cq5PQ;vZASV&LL>6Q?zAB`f_8imf5<%J@M8bZMV(g?d+6VaewQt)U^|W* z>8ZHL15g^3Tt5ai=|? zJ`f(@wcTOU`q+JXj*I*LIG>8dwprzoy$UNv{BgNiyWSOc(lOGezTT~qevVo0Df@Vj zK|D~h4rNo6r#5b`H0R&2LVW&Mc!0X#gD+iMpk-Gc*syV%IsZ>@8mHA*)3nnB)2z#5 zruFIHROOX;CPp10mi{uew(xn%=1o2i;;-^c3@I@ z2C(0-(PAG)-zz^TeaB5(XAV0RdK$Wed4)^r2hzWr@}Kq9*lUKqz4E}8tyO6AuQZ4C zh-?d}2WSVl|7zh@H-B3PI<4v(-g4g@lbSK!9Ncjd_}~F^>}j8{orqXu5#x|7|2Igv zNTgA^_{j8=kTM4@*`@e^F;80$?!DNv-(@or+vVGjhQ059#j>E>EA4%d1Wy(F7vKI> zWSKONjn?2mJx{ z4JSP@&+L&s0X#4nX`(s0$GhYKVy`@~9>-@O2@hbu%lQP^r;83#{9(_EVC&`D9jFiM zS^?ZDNv{)rS7J(5xb|LkdI&h=lG5`_$QbP|+w3aaHs2aLfV{zb82@_#{K733?Bz!M zW1EtG26^u4J61*2HPl1o=f0PgnROdn1;FRIald`Ldu{4V3(a0R&B}dTj@05{J)#&O{7X=%aEi zCn5eR9qAt+o-b{1%ACz}hdr?;kFd|@)%P|^j`3ixMNl5#`~UoZtE2OPjT2sS>raw} zmRs{v9+>v>LbE4$fOg;@;ej_T4-^r9)diDtd&37n`xw`%^#Iz94RK?e1?AL~+F^u@ zz1a)bUPU5CKHKYTyG643#2)>_;+N0(sJd;Y7{t3LhKl2j zV_hD|i(m^_Hu?->;h%8x^S1K9Lr;HW(lW=PUGShesQox|)TwV-TOe&i<$xbW}x3oXfLB55xO+U0}Ts*iRaN*Mfq}vm88Jl?}6hPNWprLCe+D{ zoig6WP2;w565^lA-OuYP4Epy)rq!pjc{6`WlzYbINdFIQTP&1rA&y=={jVjzMdJbX zIa9{@PJO?3&+Ekhw2RBkubW)EuXW-3cq{>J9AEUmZy`P~)*RXedZ5EZ)3VR!p?KiA z_Pu`Om`2wYAeNHVK>Cv5-vM`N>ybZGTMrRk^^&Crb-vJu@#C1B2M1WD^Q|z4j*T0^_~hRIcR&|HePL&MaHy`lSBfT-@9{?|O8t z^cjTX0ks4Fd}OUWpmovrXuELZ=5lk=8PAx#auFNod_R1GhftRvphMVaAaQ1;;p6C2 zYt_G7Q|fcT!-;`Z4GZK~44zgKw)EZZ)) zM)LQGKiduo$juWNq712#UzJ_p*f zP`)Vj5bzOOu&5}szEd6W%@3>1A)V<9K7cmC1anx|XMlZ;xQ6-zY(tU8>8*#k_BL}T z^&J8amIHQP+WL%N^ZZ66%Da}=m3`<#^yQkDX1VP!zbq(iy?e!l@^d(2FkyTy@)p}& z81E~O`|;vM#h$TxmXG$E{scFDk3M%XeE0pkE;U21S#D@|aaO;=?}E0f_T(JYFoD%0A<;-7hS4?K))K=3iq^*<)Pys=u$2to^*D z*i+wAM^G-=kN?8V(D;wMd8OEHiA`O;%>1-wM;_4fkw4I1CANn;$X`Cu0f<4+E@{6; zD4$1dLH|)NAwD!7b^v++@qvu~3%quKG-aYa=z&C<3+?fqdPU#9oA0MT!hIIfDkQ}} zNDGyLssE{|ZzJ)6V)!n`a4~il&c^b6^atr{oqk!F(Xn5|oN}$$zw+)zxd(mBf(7}7 zF+^^D`6vF=>GV^%?|n&G&>!GftT z95>m<1tyroPkI4)Gd5IX>?&zngmk3#A?Gl@R!?n0(fic-g90fz7@voJJo4a&w!R!! z8^m7yGRnv*D@Q6j5%IV7^>+B#^clmowe(%&56ZN*xk*lyvwy#dv0$(d^dESi>WgUj z^IqB<=F6B8J7ui0Sx- z?Yb8``>xY^I7);j;!jc%-0 z<=W5l{&V%msK4o_sU8kT+jy@hihsQC@!n5$1e~o zu714z=jyXR^xQ8|``x`Xj`etF;7>o2`2RBoT2xr&@aS{jd2GN0#D(WV2h?Z_klsrB zGT?Kl#Yp;zlG#T@A0yN97v4!en_}s$I;aNr{4E@v{doJ&6@QM;l(9Eq;y(8MJG?jA z^>F1FT(Ep^r>VSbodi*C63ut!y1JW;) zIp9Zw`6QuaPO*5{;>G@AnNLt#9pL3B9G{Hj<+)I05nq{oz3mZ0TWpzcci|_62gxDmUeEf8F(5+kw;HEiijzF&@l#z$DY^^shnN9ee;U z9$P@GJfxt!)&_rfo}z10Y@Sb9ER^3uVj^un;NS7g;$ZwdX8dh@jd2#Xwc}}H+p>i$ zlm50d@E7}!w#;*H)r>hS-u}I|0~Zypb^8T7vTtw##thF1=L4|L>IaO+vG@V9t`&|G z-Ppg3yj1UJ<5wn7bb`Z@jx!*LMc~qzC(+IChnr*3pl< zo~x|ou@Z|Nrwi{eRY7+eaL8%Q$W_eSrGLpYLNF`l4Ixn8(<z&aHEtT(ZH-q+9WtVP!r z@_4qk1C|NT@L8M3GuSilh<=~FN3f#Ivfg7Jh^@`z0Ytair~YTdl`4Rx$-fJ<+FJBa^LGFK$5{Z|=L8^CdRVgu;M#WRP@#`(o} zfUm*%**{s@DsSn#!tUv}v)`TbIuqHn9f-9*B98xx1#|B`^@+du{g8nh$Ho@-@zP20 zzs!R_$An>xA<#Ue|EE}}eO{V9q^ODLajroH9*A|beSIqX=UDp1Ec?7)cpqaf+6^xb z&hd(s_elr6mt%3U7L3Rd^Ymhw$g$%8ylsoo&!oN4v1u3D@jtvTk^DnF^8?nR;})wA z%zJfg=*a6;|4%f>_WCfI{ns+9oc2a}#0Joxi*>VO2^9ZFZJkuF=sTkBwcV?2e+XH=penfX3$0AEh)hqsYg*_*&>HA)aIkZxz7=2&4 z8UIuVY~ETWW3o98r?zrQ9l|ktv`c?N`+{>jIj@ZSKi;`Y)^}oitv0%bwyzc1``zHX za7__zS{FHq<6p1yp2vJqHwK6`XHl*l#sY<+zhd=8Hr6`NKBJ7|^_O?7AQ}rWq^jN~29%F!!PV6K0<_<1s(zaf|^d0G&hj0r}~#STP>l3W+uVm_{ET znCY%Tp)sFJNH3(gY`=X*%B1ap2cC_6-jAP7YB&D%;`*fZ2{AswvQeJ|6V7XkJ=Y82 z8f5IhH+{e@fYkqVcV}f zKsx)!XUAy!cHXE?p-}{W@WYIQ|I_Sbu6N)^p;VZ{|z+tF7&hw_a@H zv44v505~^*8~L*~*z2>~uN zTY9J-3H6ScuTe&iLR*k)YAK!MAc23Rk@|#RVf{|d{o<^QA?5(ZN_W)(JuIeaTz56nhWpg_|Fgwk{fJoK z8TLNnLSHc4x)^?%MWdzq5sVKoMkFzpo`n)C%RTnqvG%=5!(aWt;QB972uA#) z@c`|J+7hlMcFV+{W$q@&ThlI(=ZG8M&11^1tyjc7@8vU;b>&;iJFjs*_hO7s;YNM0 z@2@rZ$UsTb=K{HPb?_S-z9DQw!H{@TnOvYZ27Z``6t)|>kFDSMW0|3Sv3*c|Ohkga0 z;j?^>Him1<2kiQ^#L4^4WMHql|Jxr|W4ssV|2&NKVi$)S^BD?n|24&blHwn!4PfK` z>i@Cb!2DE)`14NaoOCB$IZu*Ni#sS%9NnKC7PCdnMPH>HIuJNs#SLf)pWbAeB z$CwFN7Yb|0AKR1b!bR=xjf8{Z&;ENG*9jj3ZXEZ2S{L{;PwD{5)LgrU zzE=k*{#&4dy>s zE0DfP0oL%OPvPH~r=MT__au6*W!RQU$mABhCnQhOJb}Hwq)@(MddOT{JWA8Qh zJo1H*HQKo8P_!p+@i{M#g}N`)HDayL?#K_ozI^`HF;Z*Z1|k;03G&HVN^bMYWZE@`B<&2gj7@h~q%y zcZ?Oy8CppEzei#oNtb`-OC7-RVgCMcjeXDgav5UZ6VUJV5bz&ojvMm6`AND|!haVa zVdC%P2er<#J}$wW-+N&9InJk6c3<^?;{Pev-A77^^zAX=l9CDAkN$7szmPZ~)d~Le z0d#!;U1Nzdpgzzuul*?NqaK3T_u*Y1hkv#-tk1ZU;j8uQ)#eSl=tc1SBfy^aKBj(N z#W~zw@t?!~{~Y*sk^FyP?c{Y|jPG$t9C&_R;?KOP1L$|t_u}U2W7ioNBXi%zVg3up zJ$+=_4quLVTS3Hilxht`Eixd6+HV_AHY)pXeGq``-;1?AYOMbm4%b+(EB>zi$9FWj zUSqh8Jc9K@ghzl+o%#QkucRIBUAvF+ulE0}t6n#08Ta7(NzY3Am-+&IQsOgwyC}AK zH1Xh2+j|+eBM-0+@ow~edYto=!MA16kC7C;ul%9-&j8J7|0T8)5&!D>Ub8Gtnuq3@ z)IBjJwmMIUwEJZP)>Z8NIOaV~GMNLa*T9puJKE>@S1${$!Il8IU{mQSmjr+uW z44;LYUW@o3epFlc*z@0-w5&;pJ7A5E+O_xm*l7FzS;Zf;4hKK^Bgi8-ey%?7S3i(_ zdAiO1vQnrDqI@JO-5a*n2sLkojsJ;oecJ>(u>>1)e&x z6l3rWz5}(zwCVqhY0;P8Qw|;bBG1ro!1uq!XACkQfOKdy0IK^pjJ65?Fj?!EAvU>oVX0oi?}-KY3>vH9sZl6c-5*S+j- zp*+9$**bIPwezv=1N!c9Eku2S*QhsW1GvUS*~-{#t`~>e9;@z`aT-{wk8w(_*O=0J zoVjJ(0?D3nMgOKe@rN<9%z<5In|32spiL9^*sdTQBG5>2+lG5GkcfY3INPt+agW3Z zu|EW94HDm_dByVH1n(&}=U!hBmoB6ZVEiD``s#lAC+l2w|E*)^1lQ~39Gxa@9yKFx zcIP}QkLWvfY{v_4EHDRlem$y=v$Ri;p24_X0BZm=k#zuot>PSRk9T6-ph6_+1N_pK zLG?(YWr^vz*8L;Pd?U1y+u zx=LaJe1~cP<%PwCt4v1k>CjyzQFUCX`o?ih?e;r(Mr5D+Xq&xPZNL=!zB=^*?K>n6 zpvST9=?ijPA?Ixhy_Mi{RyY4thttlx>od9Q^^*sV>HR&{j9+fH+VKM96~=|QZP~KL z^cgt|;~$n_Oj2myHr9H_aGzp!AMPdo;@3yRU%u&5-3GYazDIc}COM9wY)Q?BV|F;c z{JpvM8@z4^xo*Hmt}4rCT{T_&J$~o=Abh{8`6;hL7nPab7nfGYrvSY>CZ)<;dD|zZ z*(nRr)+>X}PkLL=PgBM0Y>R&y+RoA7cX9{RSnIJct001b;pwIsk15{qCPDbFm;A-#x1PxkkJD-8NVDk39KRV7m;q zsK8u^??Q0X?}Yq&I>wis`fXjd^Zc@}Sj}sR@9}mz-~BFYUjOLwPd)%WnRbc~kSPA* z2VlLxy7U9Ie{V6r(IDUJ!}r=C_f0!Jh`#I}fPX2*G!)1fWaX-vAFVXU_E`vgN^RRF zejhghnj~cVi9NSG^bvUQKi|Gz?TMe)8q7J{0-U?g@tWLpOp#(y+r7&EpKhNWlzo1y z>!?%TmT{VS=(lHkl-~#8Hhb0A(3iij{YI3m<5!JvgF~PIA z-j*8>*0y>q7)QEC$7#rVJQ?Hcw^{hTR;0%qXvOE9;j2<<@EC*27&P~ZP_#gOCqPKIufjt>X)5SznI>sbG$f@gB!yT0G^ew|l9B*(b=ggMZ!Rg0FVm zFXQ3PwXJ{oUuH?4ezVRK3}bxm8azA2wnw6$;!zH4g5QHo#&_WCx-sMdwFA>F4>ag_ zkn%txV+6Nj?m4$fPt3!(E_dziBf7l|>_dKgClW68^_yZb71)Tp!UvK*{HQ`6<%Twr zpXdO^zq`d>d9J?d7}tBW9r(13#l<>4OtF7sb|L!pCrLZ+$dg~T=pk0`L$_}S*^)d8GO&T=Z>#are&zh8Yot`o*B7W~!MpF4kr zX@;>}`?Vcwj_N+WA!AQEs@>Ny-f6k+8m=`ktj!761hwObR0rhXy}w!>Q2wiPOzrz( zdXM^m!S8NF@c|Y4Z+=*9j_&rb*(Yx-<{!-R*!u=2_oRc;eii)l=GI4dr)KKySM59Pm@aES-Q+T6=H5ZC|V8Zgl|)%o)lt~SS<{Gg2IYTftKhU({g z>8!N>BWOka3~Y!|a`e8J30j@GjQKOUYXl4q?2Y4ZkXRm#J9*2tQ;*gEZ5tJ&@~cCw(qXvgx6=_oIwqMX5OhF>lkw$H^GB)C*nGjyZnB zjtkYX0eu=W{_G3xa!v{66YkgsWPhHv@t%5Rf!V(u)*3i*I%4T{HT_QiHWU*ON{s`~;d z^NRa6OgOsX&UZ0)<6r2<`^t{TW?dSjt@qMLX?-*CKuYI#-+5fu8`qDt?@T%2xKHt2 z0j@0@n2W^nsZR2ib*CH){#eVu^V!A0Z!mByp7!H^_u~q4+WD`TeLKF1v19C~T~;4s zGLu0IrSZ(vYd1Ss&m*Y!xqerDY!-j=fwl+OH}DG*>!-RY8FfuK58_Y#f8u%7-(931 zxOvM~bN9nv<6C-fndZH|m-#ntxf{I9lNRb{Eyr;S8=FgIjK4N<{&;zY-x9<7HEDaG zL5&gM{4x3Nz@;Tv->DKhzrwsUv($7y|8p6KOaCr!Xz8vZ^TeKV#Y7&c9@k}Yh!?}! z-p}|F$^rc&-w9A2XfSO;;?H#+hh9@=o}aPC47hwL=8OJ_{`nFar@0H*c)#vbo6mKb zN&9sA<2=s6H-2`;CVHp8;Lmd4`yvt_n1W-LoqZAhI_#8lt~GdUU#>S)@8d9&RbQp$ zIM7tEcVm1UqaXj+o@DZh9Vf^#acwd9BE{4NTyKVTSDsB49TLiiZ92w-Y?*eoy<-3D zF=zgO^_gUCHpF0LJ-1{+nQ&g9tLP!@Q!~zhUC5X3MZa(9-5_{i7sq~j=D?yf@GyP- zpue9mU`c*3v${=4K1iib!u~*{kC7;EjXV&)EmXOmfp*f+JctcJMH_eOfrhK z%dWM%W}uV}+%2UB61(+R_(SJGAQN z>5P20O@Q(XuU_k1#q|W6;QS>d)dLM8K48mJ-`GZbA8Dhya%$Tl#i@wvQQrr#=eUlB zyk!guwg;@fDoSnLuYkw8o*3g-8pnu6kKfYoq>0A%{*IU={V4Xc1nqrR|I&sGeXwkq z!-D=GacB8u?m6~XBPoCD{a=@dDND0@g!g zMBQ;*ibPtl-oIsgK$%zU7vq@gEl_6CF?R#s;6!{4DYuUM^y-Rj+Md0Nb@3+ffQ(OQ zhU@ARc=cX5`Pdz=d+p#f(Bpuh+;bgfo9YUi`plPg$)Y`Df-L|~r)IeQm6zgN`;ENx z*d6iKIw@1xRV!lr!8k7{X)T?JV1Ze>Vm-0IDQ$4^q?)+UHQPT z=bi?}hoQct2W?}})?+=;-3j_&UD!XMeU<2+6u%{#KVvsMFznRBYFi-wAZZ}<;G8As1kN|V0m;h;>MJWf8zLXD zZ1m?@RxM`{j;}?1_wm~KJm^CD_0)&di@((kw6Xjzdu=D;8zkThu3s`Za0wE1rWbF@ zRYPezD&~G2^*PFT0WjY?%bi2Xy3zLy+WAOpG5^;1LGX5EpZp+vKw8Q7=v>`!Dy~0{ zv=)iHpz@`9!K-V%yprr=zE}B=@Aj63{`@0I-LT$QO6EX}#~tL>1$=-y1^wUO8oV*r zSM{MQ|4KXTInK!KUuX*Yo`du}66x*7SLKoVfjFryR(|o{AL^X@#kX;NIpJYkleL$mUH2Q-^D#4ky%BWt+65ldKS*mgU@4A^b$zM>aIFv0{~>*V zv<4{@o_fuTDRK2DKglB z$xHkO;!LCoNLL`8jC4@;0PrT}*JTew*@ns5n&eGs!?!sF{oeeJ2FrzbSL2pE3S?oN z3Hv`YdSiV$(%1D9EbWU@=}$4?8`k(ulYGncVBDLH)D@{O(g-BP-2xZd^gHZ_BT<)} zfRu@ZXZWm}kK}3F==hE8HsZ~9r(o?rr**%@ERU0eb(0#mx~G6U2EEwH9E!Qgq8CK| zsT))li4SmMY>6p$9Sq!|yO$!#bBc8kZ|bDXKCnY>JiG2I+xUidx{TCjcMuL>HbgO;k>Gs_xIG% z{_~14-rrMtMmyi;z1?wU`_8BM&&zAxKgD-GYD3N6hebQD<_3ANL%ioT;BcIrc+Y>X zS>7BEH}aRL=lk8(ac8o&8}_w(-#=PkVe?I;gB z$d6yX{PO3Q&jsH6A-^>M*~=&Y=kBN1ZMJ$Lo9Uw@kc`n#*&%|t!7`rmIK{Pts;eSw=jG_r-@ z;L)reHlKYmuKh}p((`w}eXF{gC8)N4=5KgI`rmk~yKOwwah|o-ORo6rtEuqWUwtt8 G{{I18D&DIA literal 0 HcmV?d00001 diff --git a/Known.txt b/Known.txt new file mode 100644 index 0000000..c35c647 --- /dev/null +++ b/Known.txt @@ -0,0 +1 @@ +Hinata diff --git a/kemono_downloader_config.txt b/kemono_downloader_config.txt deleted file mode 100644 index e69de29..0000000 diff --git a/main.py b/main.py index bb931c7..61db148 100644 --- a/main.py +++ b/main.py @@ -3,702 +3,1351 @@ import os import time import requests import re +import threading +# import uvicorn # Removed uvicorn import +import queue +import hashlib # Import hashlib for hashing +from concurrent.futures import ThreadPoolExecutor, Future, CancelledError + +from PyQt5.QtGui import QIcon from PyQt5.QtWidgets import ( QApplication, QWidget, QLabel, QLineEdit, QTextEdit, QPushButton, QVBoxLayout, QHBoxLayout, QFileDialog, QMessageBox, QListWidget, - QRadioButton, QButtonGroup, QCheckBox + QRadioButton, QButtonGroup, QCheckBox, QMainWindow ) -from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker + +# Import QObject before other Qt classes that inherit from it if needed +from PyQt5.QtCore import Qt, QThread, pyqtSignal, QMutex, QMutexLocker, QObject from urllib.parse import urlparse +# Import Image module correctly +try: + from PIL import Image +except ImportError: + print("ERROR: Pillow library not found. Please install it: pip install Pillow") + Image = None # Set to None to handle gracefully later + +from PyQt5.QtGui import QIcon + +app = QApplication(sys.argv) +app.setWindowIcon(QIcon("Kemono.ico")) # Taskbar + window icon + +class MainWindow(QMainWindow): + def __init__(self): + super().__init__() + self.setWindowTitle("My App") + +from io import BytesIO + + +# Removed try-except block for kemono_api +fastapi_app = None # Set to None directly + KNOWN_NAMES = [] +# --- Helper Functions --- + def clean_folder_name(name): - return "".join(c for c in name if c.isalnum() or c in (' ', '_', '-')).strip().replace(' ', '_') + """Removes invalid characters for folder names and replaces spaces with underscores.""" + if not isinstance(name, str): name = str(name) # Ensure input is string + # Allow spaces and common separators, remove others + cleaned = re.sub(r'[^\w\s\-\_]', '', name) + return cleaned.strip().replace(' ', '_') def clean_filename(name): - return "".join(c for c in name if c.isalnum() or c in (' ', '_', '-', '.')).strip().replace(' ', '_') + """Removes invalid characters for filenames and replaces spaces with underscores.""" + if not isinstance(name, str): name = str(name) # Ensure input is string + # Allow dots for file extensions, and common filename characters + cleaned = re.sub(r'[^\w\s\-\_\.]', '', name) + return cleaned.strip().replace(' ', '_') + def extract_folder_name_from_title(title, unwanted_keywords): + """ + Tries to find a suitable folder name from the title's first valid token. + Falls back to 'Uncategorized' if no suitable name is found. + """ + if not title: return 'Uncategorized' title_lower = title.lower() tokens = title_lower.split() for token in tokens: clean_token = clean_folder_name(token) + # Check if token is not empty and not just unwanted keywords if clean_token and clean_token not in unwanted_keywords: return clean_token - return 'Uncategorized' + return 'Uncategorized' # Fallback if no suitable token found + def match_folders_from_title(title, known_names, unwanted_keywords): - title_lower = title.lower() - folders = [] + """ + Matches known names (phrases/keywords) within the cleaned title. + Returns a list of *cleaned* known names found. + """ + if not title: return [] + cleaned_title = clean_folder_name(title.lower()) + matched_cleaned_names = set() + for name in known_names: - cleaned_name = clean_folder_name(name.lower()) - if not cleaned_name: - continue + cleaned_name_for_match = clean_folder_name(name.lower()) + if not cleaned_name_for_match: continue # Skip empty known names - pattern = re.compile(r'\b' + re.escape(cleaned_name) + r'\b') + # Check if the cleaned known name is a substring of the cleaned title + if cleaned_name_for_match in cleaned_title: + # Ensure the match itself isn't an unwanted keyword + if cleaned_name_for_match not in unwanted_keywords: + matched_cleaned_names.add(cleaned_name_for_match) - if pattern.search(title_lower): - folders.append(cleaned_name) + return list(matched_cleaned_names) - folders = [f for f in folders if f not in unwanted_keywords] - return folders def is_image(filename): - return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')) and not filename.lower().endswith('.gif') + if not filename: return False + return filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp', '.gif')) # Added gif def is_video(filename): - return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.gif')) + if not filename: return False + # Consider common video formats + return filename.lower().endswith(('.mp4', '.mov', '.mkv', '.webm', '.avi', '.wmv')) def is_zip(filename): + if not filename: return False return filename.lower().endswith('.zip') def is_rar(filename): + if not filename: return False return filename.lower().endswith('.rar') def is_post_url(url): - return '/post/' in url and url.startswith("https://kemono.su/api/v1/") + if not isinstance(url, str): return False + # Simple check for '/post/' segment in the path + return '/post/' in urlparse(url).path -def extract_post_info(api_url): - parts = api_url.rstrip('/').split('/') - try: - post_index = parts.index('post') - user_index = parts[post_index::-1].index('user') - user_index_absolute = post_index - user_index - if user_index_absolute > 0: - if post_index - user_index_absolute >= 1: - service = parts[user_index_absolute - 1] - user_id = parts[user_index_absolute + 1] - post_id = parts[post_index + 1] - if service and user_id and post_id: - return service, user_id, post_id - except ValueError: - pass - try: - user_index = parts.index('user') - if user_index > 0 and user_index + 1 < len(parts): - service = parts[user_index - 1] - user_id = parts[user_index + 1] - return service, user_id, None - except ValueError: - pass +def extract_post_info(url_string): + """ + Extracts service, user_id, and post_id from Kemono/Coomer URLs. + Returns (service, user_id, post_id) or (None, None, None). + """ + service, user_id, post_id = None, None, None + if not isinstance(url_string, str) or not url_string.strip(): + return None, None, None + try: + parsed_url = urlparse(url_string.strip()) + domain = parsed_url.netloc.lower() + path_parts = [part for part in parsed_url.path.strip('/').split('/') if part] + + # Check for known domains + is_kemono = 'kemono.su' in domain or 'kemono.party' in domain # Added kemono.party + is_coomer = 'coomer.su' in domain or 'coomer.party' in domain # Added coomer.party + if not (is_kemono or is_coomer): + return None, None, None # Unknown domain + + # Pattern: //user/ + if len(path_parts) >= 3 and path_parts[1].lower() == 'user': + service = path_parts[0] + user_id = path_parts[2] + # Pattern: //user//post/ + if len(path_parts) >= 5 and path_parts[3].lower() == 'post': + post_id = path_parts[4] + return service, user_id, post_id + + # API Pattern: /api/v1//user/ + if len(path_parts) >= 5 and path_parts[0].lower() == 'api' and path_parts[1].lower() == 'v1' and path_parts[3].lower() == 'user': + service = path_parts[2] + user_id = path_parts[4] + # API Pattern: /api/v1//user//post/ + if len(path_parts) >= 7 and path_parts[5].lower() == 'post': + post_id = path_parts[6] + return service, user_id, post_id + + except ValueError: # Handle potential errors during URL parsing + print(f"Debug: ValueError parsing URL '{url_string}'") + return None, None, None + except Exception as e: # Catch other unexpected errors + print(f"Debug: Exception during extract_post_info for URL '{url_string}': {e}") + return None, None, None + + # If no pattern matched return None, None, None -def fetch_single_post(service, user_id, post_id, logger): - api_url = f"https://kemono.su/api/v1/{service}/user/{user_id}/post/{post_id}" - logger(f"šŸ”„ Fetching single post: {post_id}...") - headers = {'User-Agent': 'Mozilla/5.0'} - try: - response = requests.get(api_url, headers=headers, timeout=15) - response.raise_for_status() - return [response.json()] - except requests.exceptions.RequestException as e: - logger(f"āŒ Error fetching specific post {post_id}: {e}") - return [] - except Exception as e: - logger(f"āŒ Unexpected error fetching post {post_id}: {e}") - return [] -def fetch_posts_paginated(api_url, headers, offset, logger): - paginated_url = f'{api_url}?o={offset}' +def fetch_posts_paginated(api_url_base, headers, offset, logger): + """Fetches a single page of posts from the creator API.""" + paginated_url = f'{api_url_base}?o={offset}' + logger(f" Fetching: {paginated_url}") try: - response = requests.get(paginated_url, headers=headers, timeout=15) - response.raise_for_status() + response = requests.get(paginated_url, headers=headers, timeout=45) # Increased timeout + response.raise_for_status() # Check for 4xx/5xx errors + # Check content type before parsing JSON + if 'application/json' not in response.headers.get('Content-Type', ''): + raise RuntimeError(f"Unexpected content type received: {response.headers.get('Content-Type')}. Body: {response.text[:200]}") return response.json() + except requests.exceptions.Timeout: + raise RuntimeError(f"Timeout fetching page offset {offset}") except requests.exceptions.RequestException as e: - raise RuntimeError(f"Error fetching page at offset {offset}: {e}") + # Provide more context on request errors + err_msg = f"Error fetching page offset {offset}: {e}" + if e.response is not None: + err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})" + raise RuntimeError(err_msg) + except ValueError as e: # JSONDecodeError inherits from ValueError + raise RuntimeError(f"Error decoding JSON response for offset {offset}: {e}. Body: {response.text[:200]}") except Exception as e: - raise RuntimeError(f"Unexpected error fetching page at offset {offset}: {e}") + raise RuntimeError(f"Unexpected error processing page offset {offset}: {e}") -def download_from_api(api_url, logger=print): - headers = {'User-Agent': 'Mozilla/5.0'} - service, user_id, post_id = extract_post_info(api_url) - if service and user_id and post_id: - posts = fetch_single_post(service, user_id, post_id, logger) - if posts: - logger(f"šŸ“¦ Found 1 post (specific URL).") - yield posts +def download_from_api(api_url_input, logger=print): + """ + Generator function yielding batches of posts from the API. + Handles pagination and single post fetching. + """ + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'} + service, user_id, target_post_id = extract_post_info(api_url_input) + + if not service or not user_id: + logger(f"āŒ Invalid or unrecognized URL: {api_url_input}. Cannot fetch.") + return # Stop generator + + parsed_input = urlparse(api_url_input) + api_domain = parsed_input.netloc if ('kemono.su' in parsed_input.netloc.lower() or 'coomer.su' in parsed_input.netloc.lower() or 'kemono.party' in parsed_input.netloc.lower() or 'coomer.party' in parsed_input.netloc.lower()) else "kemono.su" # Added .party domains + api_base_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}" + + offset = 0 + page = 1 + processed_target_post = False # Flag to stop if target is found + + while True: + if target_post_id and processed_target_post: + logger(f"āœ… Target post {target_post_id} found and processed. Stopping.") + break + + logger(f"\nšŸ”„ Fetching page {page} (offset {offset}) for user {user_id} on {api_domain}...") + try: + posts_batch = fetch_posts_paginated(api_base_url, headers, offset, logger) + if not isinstance(posts_batch, list): + logger(f"āŒ API Error: Expected a list of posts, got {type(posts_batch)}. Response: {str(posts_batch)[:200]}") + break # Stop if response format is wrong + except RuntimeError as e: + logger(f"āŒ {e}") + logger(" Aborting pagination due to error.") + break + except Exception as e: + logger(f"āŒ Unexpected error during fetch loop: {e}") + break + + if not posts_batch: # Empty list means end of posts + if page == 1 and not target_post_id: + logger("šŸ˜• No posts found for this creator.") + elif not target_post_id: + logger("āœ… Reached end of posts.") + break # Stop pagination + + logger(f"šŸ“¦ Found {len(posts_batch)} posts on page {page}.") + + if target_post_id: + # Search for the target post in this batch + matching_post = next((post for post in posts_batch if str(post.get('id')) == str(target_post_id)), None) + + if matching_post: + logger(f"šŸŽÆ Found target post {target_post_id} on page {page}.") + yield [matching_post] # Yield only the target post + processed_target_post = True # Set flag to stop after this + else: + logger(f" Target post {target_post_id} not found on this page.") + # Assumption: If target_post_id is given, it should be on the first few pages. + # For now, continue pagination until the end if target not found. + pass else: - logger(f"āŒ Could not fetch specific post {post_id}.") - return - elif service and user_id: - api_base_url = f"https://kemono.su/api/v1/{service}/user/{user_id}" - offset = 0 - page = 1 - while True: - logger(f"\nšŸ”„ Fetching page {page} from {api_base_url} (offset={offset})...") - try: - posts_batch = fetch_posts_paginated(api_base_url, headers, offset, logger) - except RuntimeError as e: - logger(f"āŒ {e}") - break - - if not posts_batch: - logger("āœ… No more posts to fetch.") - break - - logger(f"šŸ“¦ Found {len(posts_batch)} posts on this page.") + # If not looking for a target post, yield the whole batch yield posts_batch - offset += 50 + # Increment offset only if we are not processing a target post that we just found + if not (target_post_id and processed_target_post): + # Determine offset increment (Kemono uses 50, but check API docs if possible) + page_size = 50 + offset += page_size page += 1 - else: - logger(f"āŒ Invalid URL format: {api_url}. Please provide a user page or specific post URL.") + time.sleep(0.6) # Slightly increased delay between page fetches -def process_posts(posts, download_root, known_names, filter_character, - unwanted_keywords, logger, filter_mode, skip_zip, skip_rar, use_subfolders, thread): - total_downloaded_batch = 0 - total_skipped_batch = 0 - headers = {'User-Agent': 'Mozilla.5.0'} - url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') - - for post in posts: - if thread.isInterruptionRequested(): - logger("āš ļø Cancellation requested.") - return total_downloaded_batch, total_skipped_batch, True - - title = post.get('title', 'untitled_post') - post_id = post.get('id', 'unknown_id') - post_file = post.get('file') - attachments = post.get('attachments', []) - post_content = post.get('content', '') - - if not isinstance(attachments, list): - logger(f"āš ļø Unexpected attachment format for post {post_id}: {type(attachments)}. Skipping attachments list.") - attachments = [] - - valid_folder_paths = [] - if use_subfolders: - folder_names_for_post = [] - if filter_character: - clean_char = clean_folder_name(filter_character.lower()) - matched_folders = match_folders_from_title(title, known_names, unwanted_keywords) - if clean_char in matched_folders: - folder_names_for_post = [clean_char] - logger(f"✨ Filter match for post '{title}': Using folder '{clean_char}'.") - else: - continue - else: - matched_folders = match_folders_from_title(title, known_names, unwanted_keywords) - if matched_folders: - logger(f"šŸŽ­ Found known character(s) in title '{title}': Using folder(s) {matched_folders}.") - folder_names_for_post = matched_folders - else: - folder_name = extract_folder_name_from_title(title, unwanted_keywords) - logger(f"šŸ“ No known characters found in title '{title}'. Using folder name derived from title: '{folder_name}'.") - folder_names_for_post = [folder_name] - - for folder in folder_names_for_post: - try: - folder_path_full = os.path.join(download_root, folder) - os.makedirs(folder_path_full, exist_ok=True) - valid_folder_paths.append(folder_path_full) - except OSError as e: - logger(f"āŒ Could not create directory {folder_path_full}: {e}") - else: - valid_folder_paths = [download_root] - try: - os.makedirs(download_root, exist_ok=True) - except OSError as e: - logger(f"āŒ Could not access download directory {download_root}: {e}") - continue + # Final check if target post was specified but never found + if target_post_id and not processed_target_post: + logger(f"āŒ Target post ID {target_post_id} was not found for this creator.") - if not valid_folder_paths: - logger(f"āš ļø No valid folders/root directory available for post {post_id}. Skipping file processing and link extraction.") - continue - if post_content: - found_links = url_pattern.findall(post_content) - if found_links: - logger(f"šŸ”— Links found in Post: {title} (ID: {post_id})") - for link in found_links: - logger(f" - {link}") - - all_files_to_process = [] - if post_file and isinstance(post_file, dict) and post_file.get('path') and (post_file.get('name') or os.path.basename(urlparse(post_file.get('path')).path)): - all_files_to_process.append(post_file) - - if attachments: - all_files_to_process.extend(attachments) - - if not all_files_to_process: - continue - - for file_info in all_files_to_process: - if thread.isInterruptionRequested(): - logger("āš ļø Cancellation requested.") - return total_downloaded_batch, total_skipped_batch, True - - if hasattr(thread, 'skip_current_file') and thread.skip_current_file: - logger(f"ā­ļø Skipping file: {file_info.get('name', 'unknown_file')}") - total_skipped_batch += 1 - thread.skip_current_file = False - continue - - if not isinstance(file_info, dict): - logger(f"āš ļø Skipping invalid file entry in post {post_id}: {file_info}") - continue - - file_url_path = file_info.get('path') - filename = file_info.get('name') - - if not filename and file_url_path: - try: - filename = os.path.basename(urlparse(file_url_path).path) - except Exception: - filename = None - - if not file_url_path or not filename: - logger(f"āš ļø Missing path or name for a file in post '{title}'. Skipping.") - continue - - is_img = is_image(filename) - is_vid = is_video(filename) - is_zip_file = is_zip(filename) - is_rar_file = is_rar(filename) - - if filter_mode == 'image' and not is_img: - total_skipped_batch += 1 - continue - elif filter_mode == 'video' and not is_vid: - total_skipped_batch += 1 - continue - elif skip_zip and is_zip_file: - logger(f"ā­ļø Skipping zip file based on user preference: {filename}") - total_skipped_batch += 1 - continue - elif skip_rar and is_rar_file: - logger(f"ā­ļø Skipping rar file based on user preference: {filename}") - total_skipped_batch += 1 - continue - - - full_url = f"https://kemono.su/data/{file_url_path.lstrip('/')}" - - for folder_path in valid_folder_paths: - if thread.isInterruptionRequested(): - logger("āš ļø Cancellation requested.") - return total_downloaded_batch, total_skipped_batch, True - - if hasattr(thread, 'skip_current_file') and thread.skip_current_file: - logger(f"ā­ļø Skipping file download to {os.path.basename(folder_path)}: {filename}") - total_skipped_batch += 1 - thread.skip_current_file = False - break - - save_filename = f"{clean_filename(title)}_{clean_filename(filename)}" - if len(save_filename) > 200: - save_filename = f"{post_id}_{clean_filename(filename)}" - - save_path = os.path.join(folder_path, save_filename) - - if os.path.exists(save_path) and os.path.getsize(save_path) > 0: - total_skipped_batch += 1 - continue - else: - try: - logger(f"ā¬‡ļø Downloading {save_filename} to {os.path.basename(folder_path)}...") - thread.current_download_path = save_path - thread.is_downloading_file = True - - with requests.get(full_url, headers=headers, timeout=60, stream=True) as r: - r.raise_for_status() - - with open(save_path, 'wb') as f: - for chunk in r.iter_content(chunk_size=8192): - if thread.isInterruptionRequested() or (hasattr(thread, 'skip_current_file') and thread.skip_current_file): - logger("āš ļø Download interrupted or skipped.") - if os.path.exists(save_path): - try: os.remove(save_path) - except OSError: pass - thread.current_download_path = None - thread.is_downloading_file = False - thread.skip_current_file = False - if thread.isInterruptionRequested(): - return total_downloaded_batch, total_skipped_batch + 1, True - else: - total_skipped_batch += 1 - break - - if chunk: - f.write(chunk) - - if not (hasattr(thread, 'skip_current_file') and thread.skip_current_file): - total_downloaded_batch += 1 - logger(f"āœ… Saved in {os.path.basename(folder_path)}: {save_filename}") - time.sleep(0.5) - - thread.current_download_path = None - thread.is_downloading_file = False - thread.skip_current_file = False - - if not (hasattr(thread, 'skip_current_file') and thread.skip_current_file): - break - - except requests.exceptions.RequestException as e: - logger(f"āŒ Failed download {save_filename} to {os.path.basename(folder_path)}: {e}") - if os.path.exists(save_path): - try: os.remove(save_path) - except OSError: pass - thread.current_download_path = None - thread.is_downloading_file = False - thread.skip_current_file = False - except IOError as e: - logger(f"āŒ Failed save {save_filename} to {os.path.basename(folder_path)}: {e}") - thread.current_download_path = None - thread.is_downloading_file = False - self.skip_current_file = False - except Exception as e: - logger(f"āŒ Unexpected error for {save_filename} in {os.path.basename(folder_path)}: {e}") - thread.current_download_path = None - thread.is_downloading_file = False - thread.skip_current_file = False - - return total_downloaded_batch, total_skipped_batch, False - -class DownloadThread(QThread): +# --- Worker Object for ThreadPoolExecutor --- +class PostProcessorSignals(QObject): + """Defines signals emitted by worker threads.""" progress_signal = pyqtSignal(str) - add_character_prompt_signal = pyqtSignal(str) - add_character_result_signal = pyqtSignal(bool) - file_download_status_signal = pyqtSignal(bool) + file_download_status_signal = pyqtSignal(bool) # True=start, False=end + # No request_cancel_signal needed as workers check shared event - def __init__(self, api_url, output_dir, known_names_copy, - filter_character=None, filter_mode='all', skip_zip=True, skip_rar=True, use_subfolders=True): - super().__init__() - self.api_url = api_url - self.output_dir = output_dir - self.known_names = list(known_names_copy) +class PostProcessorWorker: + """Processes a single post within a ThreadPoolExecutor.""" + def __init__(self, post_data, download_root, known_names, filter_character, + unwanted_keywords, filter_mode, skip_zip, skip_rar, + use_subfolders, target_post_id_from_initial_url, custom_folder_name, + compress_images, download_thumbnails, service, user_id, + api_url_input, cancellation_event, signals, + downloaded_files, downloaded_file_hashes, downloaded_files_lock, downloaded_file_hashes_lock, + skip_words_list=None): # ADDED skip_words_list + # Store all arguments passed + self.post = post_data + self.download_root = download_root + self.known_names = known_names self.filter_character = filter_character + self.unwanted_keywords = unwanted_keywords self.filter_mode = filter_mode self.skip_zip = skip_zip self.skip_rar = skip_rar self.use_subfolders = use_subfolders - self.mutex = QMutex() - self._add_character_response = None - self.skip_current_file = False - self.current_download_path = None + self.target_post_id_from_initial_url = target_post_id_from_initial_url + self.custom_folder_name = custom_folder_name + self.compress_images = compress_images + self.download_thumbnails = download_thumbnails + self.service = service + self.user_id = user_id + self.api_url_input = api_url_input # Needed for domain/URL construction + self.cancellation_event = cancellation_event # Shared threading.Event + self.signals = signals # Shared PostProcessorSignals instance + self.skip_current_file_flag = threading.Event() # Event for skipping self.is_downloading_file = False + self.current_download_path = None + self.downloaded_files = downloaded_files # Shared set (filenames) + self.downloaded_file_hashes = downloaded_file_hashes # Shared set (hashes) # ADDED + self.downloaded_files_lock = downloaded_files_lock # Use passed lock + self.downloaded_file_hashes_lock = downloaded_file_hashes_lock # Use passed lock # ADDED + self.skip_words_list = skip_words_list if skip_words_list is not None else [] # ADDED - def run(self): - unwanted_keywords = {'spicy', 'hd', 'nsfw'} - grand_total_downloaded = 0 - grand_total_skipped = 0 - cancelled_during_processing = False + # Ensure Pillow is available if compression is enabled + if self.compress_images and Image is None: + self.logger("āš ļø Image compression enabled, but Pillow library is not loaded. Disabling compression.") + self.compress_images = False - if self.filter_character and self.use_subfolders: - clean_char = clean_folder_name(self.filter_character.lower()) - if clean_char not in (n.lower() for n in self.known_names): - with QMutexLocker(self.mutex): - self._add_character_response = None + def logger(self, message): + """Emit progress messages safely via signals.""" + if self.signals and hasattr(self.signals, 'progress_signal'): + self.signals.progress_signal.emit(message) + else: + print(f"(Worker Log): {message}") # Fallback - self.add_character_prompt_signal.emit(clean_char) - - while self._add_character_response is None: - if self.isInterruptionRequested(): - self.progress_signal.emit("āš ļø Download cancelled while waiting for user input.") - return - self.msleep(100) - - if self._add_character_response: - self.known_names.append(clean_char) - else: - self.progress_signal.emit(f"āŒ Character '{clean_char}' not added by user. Aborting task.") - return - elif self.filter_character and not self.use_subfolders: - clean_char = clean_folder_name(self.filter_character.lower()) - if clean_char not in (n.lower() for n in self.known_names): - self.progress_signal.emit(f"ā„¹ļø Character filter '{clean_char}' will be applied, but files will go to the single output folder as 'Download to Separate Folders' is unchecked.") - - - try: - post_generator = download_from_api(self.api_url, logger=self.update_progress) - - for posts_batch in post_generator: - if self.isInterruptionRequested(): - self.progress_signal.emit("āš ļø Download cancelled.") - cancelled_during_processing = True - break - - self.file_download_status_signal.emit(True) - - downloaded, skipped, cancelled_in_batch = process_posts( - posts=posts_batch, - download_root=self.output_dir, - known_names=self.known_names, - filter_character=self.filter_character, - unwanted_keywords=unwanted_keywords, - logger=self.update_progress, - filter_mode=self.filter_mode, - skip_zip=self.skip_zip, - skip_rar=self.skip_rar, - use_subfolders=self.use_subfolders, - thread=self - ) - grand_total_downloaded += downloaded - grand_total_skipped += skipped - - self.file_download_status_signal.emit(False) - - - if cancelled_in_batch: - cancelled_during_processing = True - break - - if not cancelled_during_processing: - self.progress_signal.emit(f"\nšŸŽ‰ Finished! Total downloaded: {grand_total_downloaded}, Skipped: {grand_total_skipped}") - else: - self.progress_signal.emit(f"\nāš ļø Download cancelled. Total downloaded: {grand_total_downloaded}, Skipped: {grand_total_skipped}") - - - except Exception as e: - self.progress_signal.emit(f"\nāŒ An unexpected error occurred in download thread: {e}") - self.file_download_status_signal.emit(False) - - - def update_progress(self, message): - self.progress_signal.emit(message) - - def receive_add_character_result(self, result): - with QMutexLocker(self.mutex): - self._add_character_response = result - - def cancel(self): - self.requestInterruption() + def check_cancel(self): + """Checks the shared cancellation event.""" + is_cancelled = self.cancellation_event.is_set() + # No need to log cancellation detection repeatedly here, parent thread handles final status. + return is_cancelled def skip_file(self): - if self.isRunning() and self.is_downloading_file: - self.skip_current_file = True - self.progress_signal.emit("ā­ļø Skip requested for the current file.") + """Sets the skip flag (not typically called directly on worker).""" + # This method is primarily for the single-threaded QThread context. + # Skip logic for workers relies on checking skip_current_file_flag. + pass + + def process(self): + """Processes the single post assigned to this worker. Returns (downloaded, skipped).""" + if self.check_cancel(): return 0, 0 + + total_downloaded_post = 0 + total_skipped_post = 0 + # Standard headers, consider customizing if needed + headers = {'User-Agent': 'Mozilla/5.0', 'Referer': f'https://{urlparse(self.api_url_input).netloc}/'} + url_pattern = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') + # LOCAL_API_BASE = "http://127.0.0.1:8000" # Removed LOCAL_API_BASE + LARGE_THUMBNAIL_THRESHOLD = 1 * 1024 * 1024 # 1MB + + post = self.post + api_title = post.get('title', '') # Default to empty string + title = api_title if api_title else 'untitled_post' + post_id = post.get('id', 'unknown_id') + post_file_info = post.get('file') + attachments = post.get('attachments', []) + post_content = post.get('content', '') + # is_target_post used for custom folder logic primarily + is_target_post = (self.target_post_id_from_initial_url is not None) and (str(post_id) == str(self.target_post_id_from_initial_url)) + + self.logger(f"\n--- Processing Post {post_id} ('{title[:50]}...') (Thread: {threading.current_thread().name}) ---") + + # --- NEW: Skip Words Check for Post Title --- + if self.skip_words_list: + title_lower = title.lower() + for skip_word in self.skip_words_list: + if skip_word.lower() in title_lower: + self.logger(f" -> Skip Post (Title): Post {post_id} title ('{title[:30]}...') contains skip word '{skip_word}'. Skipping entire post.") + return 0, 1 # 0 downloaded, 1 skipped (the whole post) + # --- END NEW --- + if not isinstance(attachments, list): + self.logger(f"āš ļø Corrupt attachment data for post {post_id}. Skipping attachments.") + attachments = [] + + # --- Determine Download Folder(s) --- + valid_folder_paths = [] + folder_decision_reason = "" + api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" + + # 1. Custom Folder for Single Target Post (Highest Priority if applicable) + if is_target_post and self.custom_folder_name and self.use_subfolders: + # custom_folder_name should already be cleaned by GUI logic + folder_path_full = os.path.join(self.download_root, self.custom_folder_name) + valid_folder_paths = [folder_path_full] + folder_decision_reason = f"Using custom folder for target post: '{self.custom_folder_name}'" + + + # 2. Subfolders Enabled (Character Filter or Automatic) - Only if custom wasn't used + if not valid_folder_paths and self.use_subfolders: + folder_names_for_post = [] # Cleaned folder names derived for this post + + # a) Character Filter Applied + if self.filter_character: + clean_char_filter = clean_folder_name(self.filter_character.lower()) + # Match against known names found *in this post's title* + matched_names_in_title = match_folders_from_title(title, self.known_names, self.unwanted_keywords) + + if clean_char_filter and clean_char_filter in matched_names_in_title: + # Use only the filtered character's folder name + folder_names_for_post = [clean_char_filter] + folder_decision_reason = f"Character filter '{self.filter_character}' matched title. Using folder '{clean_char_filter}'." + else: + # Filter specified but doesn't match this post -> SKIP POST + self.logger(f" -> Filter Skip Post {post_id}: Character filter '{self.filter_character}' not found in title matches ({matched_names_in_title}).") + return 0, 1 # 0 downloaded, 1 skipped (the whole post) + + # b) No Character Filter -> Automatic Naming + else: + matched_folders = match_folders_from_title(title, self.known_names, self.unwanted_keywords) + if matched_folders: + folder_names_for_post = matched_folders # Use all matched known names as folders + folder_decision_reason = f"Found known name(s) in title: {matched_folders}" + else: + # Try extracting a generic name from title + extracted_folder = extract_folder_name_from_title(title, self.unwanted_keywords) + folder_names_for_post = [extracted_folder] + folder_decision_reason = f"No known names in title. Using derived folder: '{extracted_folder}'" + + # Create full paths for the determined folder names + for folder_name in folder_names_for_post: + folder_path_full = os.path.join(self.download_root, folder_name) + valid_folder_paths.append(folder_path_full) + + + # 3. Fallback: Subfolders disabled OR no specific folder determined above + if not valid_folder_paths: + valid_folder_paths = [self.download_root] # Save directly to root + if not folder_decision_reason: # Add reason if not already set + folder_decision_reason = "Subfolders disabled or no specific folder determined. Using root download directory." + + + self.logger(f" Folder Decision: {folder_decision_reason}") + if not valid_folder_paths: + self.logger(f" ERROR: No valid folder paths determined for post {post_id}. Skipping.") + return 0, 1 # Skip post + + + # --- Link Extraction from Content --- + if post_content: + try: + # More robust link finding, avoid javascript: etc. + found_links = re.findall(r'href=["\'](https?://[^"\']+)["\']', post_content) + if found_links: + self.logger(f"šŸ”— Links found in post content:") + unique_links = sorted(list(set(found_links))) # Remove duplicates + for link in unique_links[:10]: # Log max 10 links + # Basic filtering of common unwanted links + if not any(x in link for x in ['.css', '.js', 'javascript:']): + self.logger(f" - {link}") + if len(unique_links) > 10: + self.logger(f" - ... ({len(unique_links) - 10} more links not shown)") + except Exception as e: + self.logger(f"āš ļø Error parsing content for links in post {post_id}: {e}") + + + # --- Identify Files/Attachments/Thumbnails --- + files_to_process_for_download = [] + api_domain = urlparse(self.api_url_input).netloc if ('kemono.su' in urlparse(self.api_url_input).netloc.lower() or 'coomer.su' in urlparse(self.api_url_input).netloc.lower() or 'kemono.party' in urlparse(self.api_url_input).netloc.lower() or 'coomer.party' in urlparse(self.api_url_input).netloc.lower()) else "kemono.su" + + if self.download_thumbnails: + # Thumbnail download attempt (original logic, modified to not use local API) + self.logger(f" Mode: Attempting to download thumbnail...") # Modified log + # The original code relied on a local API for thumbnails. + # Since we removed the API, this section needs to be adapted or removed. + # For now, we'll simulate that thumbnail download is not available without the API. + self.logger(" Thumbnail download via API is disabled as the local API is not used.") + # If download_thumbnails is true, and we can't get it, then we should skip the post + # as per the original logic's intention. + self.logger(f" -> Skipping Post {post_id}: Thumbnail download requested but API is disabled.") + return 0, 1 # 0 downloaded, 1 skipped post + + else: # Normal file download mode + self.logger(f" Mode: Downloading post file/attachments.") + # Process main post file ('file' field) + if post_file_info and isinstance(post_file_info, dict) and post_file_info.get('path'): + main_file_path = post_file_info['path'].lstrip('/') + # Use provided name or derive from path + main_file_name = post_file_info.get('name') or os.path.basename(main_file_path) + if main_file_name: + file_url = f"https://{api_domain}/data/{main_file_path}" + files_to_process_for_download.append({ + 'url': file_url, 'name': main_file_name, + '_is_thumbnail': False, '_source': 'post_file' + }) + else: + self.logger(f" āš ļø Skipping main post file: Missing filename (Path: {main_file_path})") + + # Process attachments + # Use a counter for attachments within the same post for unique naming + attachment_counter = 0 + for idx, attachment in enumerate(attachments): + if isinstance(attachment, dict) and attachment.get('path'): + attach_path = attachment['path'].lstrip('/') + attach_name = attachment.get('name') or os.path.basename(attach_path) + if attach_name: + # Construct a unique name including post ID and attachment index + base, ext = os.path.splitext(clean_filename(attach_name)) + # Ensure index is added consistently for attachments + final_attach_name = f"{post_id}_{attachment_counter}{ext}" + # Add cleaned original name for readability/lookup if needed, but base on post+index for uniqueness + if base and base != f"{post_id}_{attachment_counter}": # Avoid doubling if base is already post_id_index + final_attach_name = f"{post_id}_{attachment_counter}_{base}{ext}" + + + attach_url = f"https://{api_domain}/data/{attach_path}" + files_to_process_for_download.append({ + 'url': attach_url, 'name': final_attach_name, # Use the unique name here + '_is_thumbnail': False, '_source': f'attachment_{idx+1}', + '_original_name_for_log': attach_name # Keep original for logging + }) + attachment_counter += 1 # Increment counter + + else: + self.logger(f" āš ļø Skipping attachment {idx+1}: Missing filename (Path: {attach_path})") + else: + self.logger(f" āš ļø Skipping invalid attachment entry {idx+1}: {str(attachment)[:100]}") + + + if not files_to_process_for_download: + self.logger(f" No files found to download for post {post_id}.") + return 0, 0 # No files, no action needed, not skipped post + + self.logger(f" Files identified for download: {len(files_to_process_for_download)}") + + + # --- File Download Loop --- + post_download_count = 0 + post_skip_count = 0 # Files skipped within this post + + # Use a local set and lock for filenames *within this post's processing* + # This is secondary to the global hash check but helps with filename conflicts within the same post + local_processed_filenames = set() + local_filenames_lock = threading.Lock() + + + for file_info in files_to_process_for_download: + if self.check_cancel(): break # Check cancellation before each file + + # Check skip flag (set by GUI for single-thread mode, usually not used here) + if self.skip_current_file_flag.is_set(): + original_name_for_log = file_info.get('_original_name_for_log', file_info.get('name', 'unknown_file')) + self.logger(f"ā­ļø File skip requested: {original_name_for_log}") + post_skip_count += 1 + self.skip_current_file_flag.clear() # Reset flag + continue + + file_url = file_info.get('url') + original_filename = file_info.get('name') # This is the constructed unique name if applicable + is_thumbnail = file_info.get('_is_thumbnail', False) + original_name_for_log = file_info.get('_original_name_for_log', original_filename) # Use original for log if available + + if not file_url or not original_filename: + self.logger(f"āš ļø Skipping file entry due to missing URL or name: {str(file_info)[:100]}") + post_skip_count += 1 + continue + + cleaned_save_filename = clean_filename(original_filename) # Clean the potentially unique name + + # --- NEW: Skip Words Check for Filename --- + if self.skip_words_list: + filename_lower = cleaned_save_filename.lower() + file_skipped_by_word = False + for skip_word in self.skip_words_list: + if skip_word.lower() in filename_lower: + self.logger(f" -> Skip File (Filename): File '{original_name_for_log}' contains skip word '{skip_word}'.") + post_skip_count += 1 + file_skipped_by_word = True + break + if file_skipped_by_word: + continue # Skip to next file in the post + # --- END NEW --- + + + # --- Apply File Type Filters (if not in thumbnail mode) --- + if not self.download_thumbnails: # This condition will always be true now + file_skipped_by_filter = False + is_img = is_image(cleaned_save_filename) + is_vid = is_video(cleaned_save_filename) # Using updated is_video + is_zip_file = is_zip(cleaned_save_filename) + is_rar_file = is_rar(cleaned_save_filename) + + if self.filter_mode == 'image' and not is_img: + self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not image/gif)") + file_skipped_by_filter = True + elif self.filter_mode == 'video' and not is_vid: + self.logger(f" -> Filter Skip: '{original_name_for_log}' (Not video)") + file_skipped_by_filter = True + elif self.skip_zip and is_zip_file: + self.logger(f" -> Pref Skip: '{original_name_for_log}' (Zip)") + file_skipped_by_filter = True + elif self.skip_rar and is_rar_file: + self.logger(f" -> Pref Skip: '{original_name_for_log}' (RAR)") + file_skipped_by_filter = True + + if file_skipped_by_filter: + post_skip_count += 1 + continue # Skip to next file + + # --- Attempt Download to Each Valid Folder --- + file_downloaded_or_exists = False + for folder_path in valid_folder_paths: + if self.check_cancel(): break # Check cancellation before each folder attempt + + # --- Ensure Directory Exists --- + try: + os.makedirs(folder_path, exist_ok=True) + except OSError as e: + self.logger(f"āŒ Error ensuring directory exists {folder_path}: {e}. Skipping path.") + continue # Try next folder path if available + except Exception as e: + self.logger(f"āŒ Unexpected error creating dir {folder_path}: {e}. Skipping path.") + continue + + save_path = os.path.join(folder_path, cleaned_save_filename) + folder_basename = os.path.basename(folder_path) # For logging + + # --- Check if File Already Exists on Disk OR Downloaded (Filename Check) --- + # Check size > 0 to avoid re-downloading empty files from previous failures + with local_filenames_lock: # Use local lock for filename set within post + if os.path.exists(save_path) and os.path.getsize(save_path) > 0: + self.logger(f" -> Exists Skip: '{original_name_for_log}' in '{folder_basename}'") + post_skip_count += 1 # Count exists as skipped for this post's summary + file_downloaded_or_exists = True + # Add to global filename set just in case + with self.downloaded_files_lock: + self.downloaded_files.add(cleaned_save_filename) + break # Don't try other folders if it exists in one valid location + elif cleaned_save_filename in local_processed_filenames: + self.logger(f" -> Local Skip: '{original_name_for_log}' in '{folder_basename}' (already processed in this post)") + post_skip_count += 1 + file_downloaded_or_exists = True + # Add to global filename set just in case + with self.downloaded_files_lock: + self.downloaded_files.add(cleaned_save_filename) + break # Don't try other folders + # Global filename check (less critical with hash check, but for consistency) + with self.downloaded_files_lock: + if cleaned_save_filename in self.downloaded_files: + self.logger(f" -> Global Filename Skip: '{original_name_for_log}' in '{folder_basename}' (filename already downloaded globally)") + post_skip_count += 1 + file_downloaded_or_exists = True + break # Don't try other folders + + + # --- Actual Download Attempt --- + try: + self.logger(f"ā¬‡ļø Downloading '{original_name_for_log}' to '{folder_basename}'...") + self.current_download_path = save_path # Still set the potential path + self.is_downloading_file = True + self.signals.file_download_status_signal.emit(True) # Signal START + + # Use stream=True for large files, adjust timeout + response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True) # (connect_timeout, read_timeout) + response.raise_for_status() # Check for HTTP errors + + # --- Download Content in Chunks and Calculate Hash --- + file_content_bytes = BytesIO() + downloaded_size = 0 + chunk_count = 0 + md5_hash = hashlib.md5() # Initialize hash object + + for chunk in response.iter_content(chunk_size=32 * 1024): # 32KB chunks + if self.check_cancel(): break # Check cancellation frequently + # Skip flag check (less relevant for worker, but for consistency) + if self.skip_current_file_flag.is_set(): break + + if chunk: # filter out keep-alive new chunks + file_content_bytes.write(chunk) + md5_hash.update(chunk) # Update hash with chunk + downloaded_size += len(chunk) + chunk_count += 1 + # Optional: Add progress reporting per chunk if needed + + # Check again if loop was broken by cancellation/skip + if self.check_cancel() or self.skip_current_file_flag.is_set(): + self.logger(f" āš ļø Download interrupted {'(cancelled)' if self.cancellation_event.is_set() else '(skipped)'} for {original_name_for_log}.") + # Clean up partial file - not needed here as we haven't saved yet + # Ensure this file is marked as skipped if interrupted by skip flag + if self.skip_current_file_flag.is_set(): + post_skip_count += 1 + self.skip_current_file_flag.clear() + # Need to break from the folder loop as well + break # Break from trying other folders + + + # --- Process Downloaded Content (Hash Check, Compression, Save) --- + final_save_path = save_path # May change if compressed + current_filename_for_log = cleaned_save_filename # May change + file_content_bytes.seek(0) # Rewind the BytesIO object to the beginning + + if downloaded_size == 0 and chunk_count > 0: + self.logger(f"āš ļø Warning: Downloaded 0 bytes despite receiving chunks for {original_name_for_log}. Skipping save.") + post_skip_count += 1 + break # Treat as failure for this folder + + if downloaded_size > 0: + calculated_hash = md5_hash.hexdigest() # Get the final hash + + # --- Content Hash Check --- + with self.downloaded_file_hashes_lock: # Use lock for hash set + if calculated_hash in self.downloaded_file_hashes: + self.logger(f" -> Content Skip: '{original_name_for_log}' (Hash: {calculated_hash}) already downloaded.") + post_skip_count += 1 + file_downloaded_or_exists = True # Mark as handled + # Add filename to global set just in case filename checks are used elsewhere + with self.downloaded_files_lock: + self.downloaded_files.add(cleaned_save_filename) + # Add filename to local set as well + with local_filenames_lock: + local_processed_filenames.add(cleaned_save_filename) + # No need to save or compress, break from folder loop + break + else: + # Hash not found, proceed with saving and adding hash later + pass + + + if not file_downloaded_or_exists: # Only proceed if not skipped by hash check + final_bytes_to_save = file_content_bytes + + # --- Image Compression --- + # Re-check if it's an image *after* download, just in case + is_img_for_compress = is_image(cleaned_save_filename) + if is_img_for_compress and not is_thumbnail and self.compress_images and Image and downloaded_size > 1500 * 1024: + self.logger(f" Compressing large image ({downloaded_size / 1024:.2f} KB)...") + try: + # Open image from bytes + with Image.open(file_content_bytes) as img: + original_format = img.format + # Handle palette/mode issues for saving to WebP + if img.mode == 'P': img = img.convert('RGBA') + elif img.mode not in ['RGB', 'RGBA', 'L']: img = img.convert('RGB') + + compressed_bytes = BytesIO() + img.save(compressed_bytes, format='WebP', quality=75, method=4) # Adjust quality/method + compressed_size = compressed_bytes.getbuffer().nbytes + + # Only save if significantly smaller (e.g., > 10% reduction) + if compressed_size < downloaded_size * 0.90: + self.logger(f" Compression success: {compressed_size / 1024:.2f} KB (WebP Q75)") + compressed_bytes.seek(0) + final_bytes_to_save = compressed_bytes + # Update filename and save path + base, _ = os.path.splitext(cleaned_save_filename) + current_filename_for_log = base + '.webp' + final_save_path = os.path.join(folder_path, current_filename_for_log) + self.logger(f" Updated filename: {current_filename_for_log}") + else: + self.logger(f" Compression skipped: WebP not significantly smaller ({compressed_size / 1024:.2f} KB).") + file_content_bytes.seek(0) # Rewind original bytes + final_bytes_to_save = file_content_bytes + + except Exception as comp_e: + self.logger(f"āŒ Image compression failed for {original_name_for_log}: {comp_e}. Saving original.") + file_content_bytes.seek(0) # Rewind original + final_bytes_to_save = file_content_bytes + final_save_path = save_path # Ensure original path + + elif is_img_for_compress and not is_thumbnail and self.compress_images: + # Log reason if compression enabled but size too small + self.logger(f" Skipping compression: Image size ({downloaded_size / 1024:.2f} KB) below threshold.") + file_content_bytes.seek(0) + final_bytes_to_save = file_content_bytes + + elif is_thumbnail and downloaded_size > LARGE_THUMBNAIL_THRESHOLD: # This is_thumbnail check is less relevant now + self.logger(f"āš ļø Downloaded thumbnail '{current_filename_for_log}' ({downloaded_size / 1024:.2f} KB) is large.") + file_content_bytes.seek(0) + final_bytes_to_save = file_content_bytes + else: # Ensure stream is rewound if no compression happened + file_content_bytes.seek(0) + final_bytes_to_save = file_content_bytes + + + # --- Save to Disk --- + # Check existence again before writing (reduce race slightly), though hash check is primary now. + # Also check filename sets again. + save_file = False + with self.downloaded_files_lock: # Lock for global filename set + with local_filenames_lock: # Lock for local filename set + if os.path.exists(final_save_path) and os.path.getsize(final_save_path) > 0: + self.logger(f" -> Exists Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}'") + post_skip_count += 1 + file_downloaded_or_exists = True + elif current_filename_for_log in self.downloaded_files: + self.logger(f" -> Global Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already downloaded globally)") + post_skip_count += 1 + file_downloaded_or_exists = True + elif current_filename_for_log in local_processed_filenames: + self.logger(f" -> Local Skip (pre-write): '{current_filename_for_log}' in '{folder_basename}' (already processed in this post)") + post_skip_count += 1 + file_downloaded_or_exists = True + else: + save_file = True # OK to save + + + if save_file: + try: + with open(final_save_path, 'wb') as f: + # Write in chunks to handle potentially large compressed streams + while True: + chunk = final_bytes_to_save.read(64 * 1024) # 64KB write chunks + if not chunk: break + f.write(chunk) + + # File saved successfully, now add hash and filename to sets + with self.downloaded_file_hashes_lock: + self.downloaded_file_hashes.add(calculated_hash) # ADD HASH + with self.downloaded_files_lock: + self.downloaded_files.add(current_filename_for_log) # Add filename + with local_filenames_lock: + local_processed_filenames.add(current_filename_for_log) # Add filename locally + + post_download_count += 1 + file_downloaded_or_exists = True + self.logger(f"āœ… Saved: '{current_filename_for_log}' ({downloaded_size / 1024:.1f} KB, Hash: {calculated_hash[:8]}...) in '{folder_basename}'") + time.sleep(0.05) # Tiny delay after successful save + + except IOError as io_err: + self.logger(f"āŒ Save Fail: '{current_filename_for_log}' to '{folder_basename}'. Error: {io_err}") + post_skip_count += 1 # Count save failure as skip + # Attempt cleanup of potentially partial file + if os.path.exists(final_save_path): + try: os.remove(final_save_path) + except OSError: pass + # Continue to next folder? Probably not useful if save failed. Break folder loop. + break + except Exception as save_err: + self.logger(f"āŒ Unexpected Save Error: '{current_filename_for_log}' in '{folder_basename}'. Error: {save_err}") + post_skip_count += 1 + if os.path.exists(final_save_path): + try: os.remove(final_save_path) + except OSError: pass + break # Break folder loop on unexpected error + + # Clean up BytesIO streams + final_bytes_to_save.close() + # Only close original if it's different from the one saved + if file_content_bytes is not final_bytes_to_save: + file_content_bytes.close() + + # If downloaded/exists/saved successfully, break from folder loop + if file_downloaded_or_exists: + break + + # --- Error Handling for Download Attempt --- + except requests.exceptions.RequestException as e: + self.logger(f"āŒ Download Fail: {original_name_for_log}. Error: {e}") + # Clean up file if it was created partially/empty - not needed with BytesIO first + post_skip_count += 1 + # Break folder loop: If download failed once, it will likely fail again. + break + except IOError as e: + # This might happen if folder becomes inaccessible between check and write + self.logger(f"āŒ File I/O Error: {original_name_for_log} in '{folder_basename}'. Error: {e}") + post_skip_count += 1 + break # Break folder loop + except Exception as e: + self.logger(f"āŒ Unexpected Error during download/save for {original_name_for_log}: {e}") + import traceback + self.logger(f" Traceback: {traceback.format_exc(limit=2)}") + post_skip_count += 1 + break # Break folder loop on unexpected error + + finally: + # --- Cleanup after each folder attempt --- + self.is_downloading_file = False + self.current_download_path = None + self.signals.file_download_status_signal.emit(False) # Signal END + + # --- End of Folder Loop --- + if self.check_cancel(): break # Check cancellation after trying all folders + + # Reset skip flag if it was processed + if self.skip_current_file_flag.is_set(): + self.skip_current_file_flag.clear() + + # If the file wasn't handled (downloaded/exists/skipped) in any folder, log it. + # The skip count should already reflect failures or explicit skips. + if not file_downloaded_or_exists: + # Check if it wasn't skipped for other reasons already counted + # This log might be redundant if errors were already logged. + # self.logger(f" -> File '{original_name_for_log}' not downloaded/found in any target folder.") + pass + + + # --- End of File Loop for Post --- + if self.check_cancel(): + self.logger(f" Post {post_id} processing cancelled.") + # Return counts accumulated *before* cancellation + return post_download_count, post_skip_count + + + self.logger(f" Post {post_id} Summary: Downloaded={post_download_count}, Skipped={post_skip_count}") + return post_download_count, post_skip_count + + + +# --- Main Application Class --- class DownloaderApp(QWidget): + # Signals for cross-thread communication + character_prompt_response_signal = pyqtSignal(bool) + log_signal = pyqtSignal(str) + add_character_prompt_signal = pyqtSignal(str) + file_download_status_signal = pyqtSignal(bool) # Combined start/end + overall_progress_signal = pyqtSignal(int, int) # total, processed + finished_signal = pyqtSignal(int, int, bool) # downloaded, skipped, cancelled + + def __init__(self): super().__init__() - self.config_file = "kemono_downloader_config.txt" - self.load_known_names() - self.setWindowTitle("Kemono Downloader") - self.setGeometry(200, 200, 900, 580) - self.setStyleSheet(self.get_dark_theme()) - self.init_ui() + # Initialize core attributes first + self.config_file = "Known.txt" self.download_thread = None + self.thread_pool = None + self.cancellation_event = threading.Event() + self.active_futures = [] + self.total_posts_to_process = 0 + self.processed_posts_count = 0 + self.download_counter = 0 + self.skip_counter = 0 + self.worker_signals = PostProcessorSignals() # Single instance for workers + self.prompt_mutex = QMutex() + self._add_character_response = None + # self.api_server = None # Removed api_server + # self.api_thread = None # Removed api_thread + self.downloaded_files = set() # Shared set for tracking downloaded filenames (secondary check) + self.downloaded_files_lock = threading.Lock() # Lock for filenames set + self.downloaded_file_hashes = set() # Shared set for tracking downloaded file hashes (primary check) # ADDED + self.downloaded_file_hashes_lock = threading.Lock() # Lock for hashes set # ADDED + + # Load configuration *after* initializing essential attributes + self.load_known_names() # Load KNOWN_NAMES global + + # Setup UI last + self.setWindowTitle("Kemono Downloader v2.3 (Content Dedupe & Skip)") # Updated Title + self.setGeometry(150, 150, 1050, 820) # Adjusted size for new field + self.setStyleSheet(self.get_dark_theme()) + self.init_ui() # Initialize UI elements + + # Connect signals + self._connect_signals() + + # Start API server if configured - Removed call to start_api_server + # self.start_api_server() + self.log_signal.emit("ā„¹ļø Local API server functionality has been removed.") + + + def _connect_signals(self): + """Connect all signals for clarity.""" + # Signals from worker helper (for multi-threading) + self.worker_signals.progress_signal.connect(self.log) + self.worker_signals.file_download_status_signal.connect(self.update_skip_button_state) + + # Internal signals for GUI updates and thread communication + self.log_signal.connect(self.log) + self.add_character_prompt_signal.connect(self.prompt_add_character) + self.character_prompt_response_signal.connect(self.receive_add_character_result) + self.overall_progress_signal.connect(self.update_progress_display) + self.finished_signal.connect(self.download_finished) + + # Connect search bar signal + self.character_search_input.textChanged.connect(self.filter_character_list) # CONNECTED + + + # --- Config Loading/Saving --- def load_known_names(self): + """Loads known names from the config file into the global KNOWN_NAMES list.""" global KNOWN_NAMES + loaded_names = [] if os.path.exists(self.config_file): try: with open(self.config_file, 'r', encoding='utf-8') as f: - KNOWN_NAMES = [line.strip() for line in f if line.strip()] + # Read lines, strip whitespace, filter empty lines + raw_names = [line.strip() for line in f] + loaded_names = sorted(list(set(filter(None, raw_names)))) + log_msg = f"ā„¹ļø Loaded {len(loaded_names)} known names from {self.config_file}" except Exception as e: - print(f"Error loading config '{self.config_file}': {e}") - QMessageBox.warning(self, "Config Load Error", f"Could not load character list from {self.config_file}:\n{e}") - KNOWN_NAMES = [] + log_msg = f"āŒ Error loading config '{self.config_file}': {e}" + QMessageBox.warning(self, "Config Load Error", f"Could not load list from {self.config_file}:\n{e}") + loaded_names = [] # Start empty on error else: - print(f"Config file '{self.config_file}' not found. Starting with empty character list.") - KNOWN_NAMES = [] + log_msg = f"ā„¹ļø Config file '{self.config_file}' not found. Starting empty." + loaded_names = [] + + KNOWN_NAMES = loaded_names # Update global list + + # Log the message (use signal if UI ready, otherwise print) + # Check if log_output exists before emitting signal during init + if hasattr(self, 'log_output'): + self.log_signal.emit(log_msg) + else: + print(log_msg) + def save_known_names(self): + """Saves the current global KNOWN_NAMES list to the config file.""" + global KNOWN_NAMES try: + # Ensure uniqueness and sort before saving + unique_sorted_names = sorted(list(set(filter(None, KNOWN_NAMES)))) with open(self.config_file, 'w', encoding='utf-8') as f: - for name in sorted(KNOWN_NAMES): + for name in unique_sorted_names: f.write(name + '\n') - except Exception as e: - QMessageBox.warning(self, "Config Save Error", f"Could not save character list to {self.config_file}:\n{e}") + # Update global list to cleaned version (consistency) + KNOWN_NAMES = unique_sorted_names + + # Use log_signal safely + if hasattr(self, 'log_signal'): + self.log_signal.emit(f"šŸ’¾ Saved {len(unique_sorted_names)} known names to {self.config_file}") + else: + print(f"Saved {len(unique_sorted_names)} names to {self.config_file}") + + except Exception as e: + log_msg = f"āŒ Error saving config '{self.config_file}': {e}" + if hasattr(self, 'log_signal'): + self.log_signal.emit(log_msg) + else: + print(log_msg) + QMessageBox.warning(self, "Config Save Error", f"Could not save list to {self.config_file}:\n{e}") + + # --- Event Handling --- def closeEvent(self, event): - self.save_known_names() - if self.download_thread and self.download_thread.isRunning(): + """Handles application closing: saves config, checks for running downloads.""" + self.save_known_names() # Save names first + should_exit = True + + # Check if download is active (either mode) + is_downloading = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) + + if is_downloading: reply = QMessageBox.question(self, "Confirm Exit", - "A download is in progress. Are you sure you want to exit? This will cancel the download.", + "Download in progress. Are you sure you want to exit and cancel?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if reply == QMessageBox.Yes: - self.download_thread.cancel() - self.download_thread.wait(2000) - event.accept() + self.log_signal.emit("āš ļø Cancelling active download due to application exit...") + self.cancel_download() # Request cancellation + # Allow some time for cancellation signal to propagate? Maybe not needed. else: - event.ignore() - else: - event.accept() + should_exit = False + self.log_signal.emit("ā„¹ļø Application exit cancelled.") + event.ignore() # Prevent closing + return + if should_exit: + self.log_signal.emit("ā„¹ļø Application closing.") # Removed "Stopping API server..." + # self._shutdown_api_server() # Removed call to shutdown API server + self.log_signal.emit("šŸ‘‹ Exiting application.") + event.accept() # Allow closing + + # Removed _shutdown_api_server method + # def _shutdown_api_server(self): ... + + + # --- UI Initialization --- def init_ui(self): + """Sets up all the UI widgets and layouts.""" main_layout = QHBoxLayout() left_layout = QVBoxLayout() + right_layout = QVBoxLayout() - self.link_label = QLabel("šŸ”— Kemono Creator Page or Post URL:") + # --- Left Side Controls --- + left_layout.addWidget(QLabel("šŸ”— Kemono Creator/Post URL:")) self.link_input = QLineEdit() - self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/67890") - left_layout.addWidget(self.link_label) + self.link_input.setPlaceholderText("e.g., https://kemono.su/patreon/user/12345 or .../post/98765") + self.link_input.textChanged.connect(self.update_custom_folder_visibility) left_layout.addWidget(self.link_input) - self.dir_label = QLabel("šŸ“ Download Location:") + left_layout.addWidget(QLabel("šŸ“ Download Location:")) self.dir_input = QLineEdit() - self.dir_button = QPushButton("Browse") + self.dir_input.setPlaceholderText("Select folder where downloads will be saved") + self.dir_button = QPushButton("Browse...") self.dir_button.clicked.connect(self.browse_directory) dir_layout = QHBoxLayout() - dir_layout.addWidget(self.dir_input) + dir_layout.addWidget(self.dir_input, 1) # Input takes more space dir_layout.addWidget(self.dir_button) - left_layout.addWidget(self.dir_label) left_layout.addLayout(dir_layout) - self.character_label = QLabel("šŸŽÆ Filter by Character (optional):") - self.character_input = QLineEdit() - self.character_input.setPlaceholderText("Enter character name exactly as in list (case insensitive match)") - left_layout.addWidget(self.character_label) - left_layout.addWidget(self.character_input) + # Custom Folder Input (Visible only for single posts + subfolders) + self.custom_folder_widget = QWidget() + custom_folder_layout = QVBoxLayout(self.custom_folder_widget) + custom_folder_layout.setContentsMargins(0, 5, 0, 0) # Add top margin + self.custom_folder_label = QLabel("šŸ—„ļø Custom Folder Name (Single Post Only):") + self.custom_folder_input = QLineEdit() + self.custom_folder_input.setPlaceholderText("Optional: Save this post to specific folder") + custom_folder_layout.addWidget(self.custom_folder_label) + custom_folder_layout.addWidget(self.custom_folder_input) + self.custom_folder_widget.setVisible(False) # Initially hidden + left_layout.addWidget(self.custom_folder_widget) + # Character Filter Input (Visible only with subfolders) + self.character_filter_widget = QWidget() + character_filter_layout = QVBoxLayout(self.character_filter_widget) + character_filter_layout.setContentsMargins(0, 5, 0, 0) # Add top margin + self.character_label = QLabel("šŸŽÆ Filter by Show/Character Name:") + self.character_input = QLineEdit() + self.character_input.setPlaceholderText("Only download posts matching this known name in title") + character_filter_layout.addWidget(self.character_label) + character_filter_layout.addWidget(self.character_input) + self.character_filter_widget.setVisible(True) # Initially visible, controlled by subfolder checkbox + left_layout.addWidget(self.character_filter_widget) + + # --- NEW: Skip Words Input Field --- + left_layout.addWidget(QLabel("🚫 Skip Posts/Files with Words (comma-separated):")) + self.skip_words_input = QLineEdit() + self.skip_words_input.setPlaceholderText("e.g., WM, WIP, sketch, preview") + left_layout.addWidget(self.skip_words_input) + # --- END NEW --- + + + # --- Options Row 1 --- + options_layout_1 = QHBoxLayout() + options_layout_1.addWidget(QLabel("Filter Files:")) self.radio_group = QButtonGroup(self) - self.radio_all = QRadioButton("All Files") - self.radio_images = QRadioButton("Images Only (no GIFs)") - self.radio_videos = QRadioButton("Videos Only (includes GIFs)") + self.radio_all = QRadioButton("All") + self.radio_images = QRadioButton("Images/GIFs") + self.radio_videos = QRadioButton("Videos") self.radio_all.setChecked(True) + # Add radios to group for exclusivity self.radio_group.addButton(self.radio_all) self.radio_group.addButton(self.radio_images) self.radio_group.addButton(self.radio_videos) - radio_layout = QHBoxLayout() - radio_layout.addWidget(self.radio_all) - radio_layout.addWidget(self.radio_images) - radio_layout.addWidget(self.radio_videos) - left_layout.addLayout(radio_layout) + options_layout_1.addWidget(self.radio_all) + options_layout_1.addWidget(self.radio_images) + options_layout_1.addWidget(self.radio_videos) + options_layout_1.addStretch(1) + left_layout.addLayout(options_layout_1) - # Create a new horizontal layout for the checkboxes - checkbox_layout = QHBoxLayout() - self.skip_zip_checkbox = QCheckBox("Skip Zip Files") - self.skip_zip_checkbox.setChecked(True) - checkbox_layout.addWidget(self.skip_zip_checkbox) - - self.skip_rar_checkbox = QCheckBox("Skip RAR Files") - self.skip_rar_checkbox.setChecked(True) - checkbox_layout.addWidget(self.skip_rar_checkbox) - - self.use_subfolders_checkbox = QCheckBox("Download to Separate Folders") + # --- Options Row 2 (Checkboxes) --- + options_layout_2 = QHBoxLayout() + self.use_subfolders_checkbox = QCheckBox("Separate Folders by Name/Title") self.use_subfolders_checkbox.setChecked(True) - checkbox_layout.addWidget(self.use_subfolders_checkbox) + self.use_subfolders_checkbox.toggled.connect(self.update_ui_for_subfolders) + options_layout_2.addWidget(self.use_subfolders_checkbox) - # Add the horizontal checkbox layout to the main left layout - left_layout.addLayout(checkbox_layout) + self.download_thumbnails_checkbox = QCheckBox("Download Thumbnails Only") # Removed (via API) + self.download_thumbnails_checkbox.setChecked(False) + self.download_thumbnails_checkbox.setToolTip("Thumbnail download functionality is currently limited without the API.") # Updated tooltip + options_layout_2.addWidget(self.download_thumbnails_checkbox) + options_layout_2.addStretch(1) + left_layout.addLayout(options_layout_2) + + # --- Options Row 3 (Checkboxes) --- + options_layout_3 = QHBoxLayout() + self.skip_zip_checkbox = QCheckBox("Skip .zip") + self.skip_zip_checkbox.setChecked(True) + options_layout_3.addWidget(self.skip_zip_checkbox) + self.skip_rar_checkbox = QCheckBox("Skip .rar") + self.skip_rar_checkbox.setChecked(True) + options_layout_3.addWidget(self.skip_rar_checkbox) + + self.compress_images_checkbox = QCheckBox("Compress Large Images (to WebP)") + self.compress_images_checkbox.setChecked(False) + self.compress_images_checkbox.setToolTip("Compress images > 1.5MB to WebP format (requires Pillow).") + options_layout_3.addWidget(self.compress_images_checkbox) + options_layout_3.addStretch(1) + left_layout.addLayout(options_layout_3) + + # --- Options Row 4 (Threading) --- + options_layout_4 = QHBoxLayout() + self.use_multithreading_checkbox = QCheckBox(f"Use Multithreading ({4} Threads)") # Use constant + self.use_multithreading_checkbox.setChecked(True) # Default to on + self.use_multithreading_checkbox.setToolTip("Speeds up downloads for full creator pages.\nSingle post URLs always use one thread.") + options_layout_4.addWidget(self.use_multithreading_checkbox) + options_layout_4.addStretch(1) + left_layout.addLayout(options_layout_4) + # --- Action Buttons --- btn_layout = QHBoxLayout() self.download_btn = QPushButton("ā¬‡ļø Start Download") + self.download_btn.setStyleSheet("padding: 8px 15px; font-weight: bold;") # Make prominent self.download_btn.clicked.connect(self.start_download) - self.cancel_btn = QPushButton("āŒ Cancel Download") - self.cancel_btn.clicked.connect(self.cancel_download) + self.cancel_btn = QPushButton("āŒ Cancel") self.cancel_btn.setEnabled(False) - + self.cancel_btn.clicked.connect(self.cancel_download) self.skip_file_btn = QPushButton("ā­ļø Skip Current File") - self.skip_file_btn.clicked.connect(self.skip_current_file) self.skip_file_btn.setEnabled(False) - + self.skip_file_btn.setToolTip("Only available in single-thread mode during file download.") + self.skip_file_btn.clicked.connect(self.skip_current_file) btn_layout.addWidget(self.download_btn) btn_layout.addWidget(self.cancel_btn) btn_layout.addWidget(self.skip_file_btn) - left_layout.addLayout(btn_layout) + left_layout.addSpacing(10) # Add space before list - self.log_output = QTextEdit() - self.log_output.setReadOnly(True) - left_layout.addWidget(QLabel("šŸ“œ Progress Log:")) - left_layout.addWidget(self.log_output) - right_layout = QVBoxLayout() - right_layout.addWidget(QLabel("šŸŽ­ Known Characters:")) + # --- Known Names List with Search --- + known_chars_label_layout = QHBoxLayout() + self.known_chars_label = QLabel("šŸŽ­ Known Shows/Characters (for Folder Names):") + self.character_search_input = QLineEdit() # ADDED search bar + self.character_search_input.setPlaceholderText("Search characters...") # ADDED placeholder + known_chars_label_layout.addWidget(self.known_chars_label, 1) # Label takes more space + known_chars_label_layout.addWidget(self.character_search_input) # ADDED search bar + + left_layout.addLayout(known_chars_label_layout) # Use the new layout + self.character_list = QListWidget() - self.character_list.addItems(sorted(KNOWN_NAMES)) - right_layout.addWidget(self.character_list) + # Load names *after* list widget is created + self.character_list.addItems(KNOWN_NAMES) + self.character_list.setSelectionMode(QListWidget.ExtendedSelection) + left_layout.addWidget(self.character_list, 1) # Allow list to stretch vertically + # Add/Delete Known Names Controls + char_manage_layout = QHBoxLayout() self.new_char_input = QLineEdit() - self.new_char_input.setPlaceholderText("Add new character name") + self.new_char_input.setPlaceholderText("Add new show/character name") self.add_char_button = QPushButton("āž• Add") self.delete_char_button = QPushButton("šŸ—‘ļø Delete Selected") self.add_char_button.clicked.connect(self.add_new_character) + # Allow adding via Enter key self.new_char_input.returnPressed.connect(self.add_char_button.click) self.delete_char_button.clicked.connect(self.delete_selected_character) - char_button_layout = QHBoxLayout() - char_button_layout.addWidget(self.new_char_input, 2) - char_button_layout.addWidget(self.add_char_button, 1) - char_button_layout.addWidget(self.delete_char_button, 1) - right_layout.addLayout(char_button_layout) + char_manage_layout.addWidget(self.new_char_input, 2) # Input wider + char_manage_layout.addWidget(self.add_char_button, 1) + char_manage_layout.addWidget(self.delete_char_button, 1) + left_layout.addLayout(char_manage_layout) - main_layout.addLayout(left_layout, 3) - main_layout.addLayout(right_layout, 2) + + # --- Right Side Log & Progress --- + right_layout.addWidget(QLabel("šŸ“œ Progress Log:")) + self.log_output = QTextEdit() + self.log_output.setReadOnly(True) + self.log_output.setMinimumWidth(450) # Ensure decent width + self.log_output.setLineWrapMode(QTextEdit.WidgetWidth) # Wrap lines + right_layout.addWidget(self.log_output, 1) # Log area stretches + + # Progress Label + self.progress_label = QLabel("Progress: Idle") + self.progress_label.setStyleSheet("padding-top: 5px; font-style: italic;") + right_layout.addWidget(self.progress_label) + # Consider adding QProgressBar if desired + + + # --- Assemble Main Layout --- + main_layout.addLayout(left_layout, 5) # Left side takes 5 parts width + main_layout.addLayout(right_layout, 4) # Right side takes 4 parts width self.setLayout(main_layout) + # Initial UI state updates based on defaults + self.update_ui_for_subfolders(self.use_subfolders_checkbox.isChecked()) + self.update_custom_folder_visibility() + + def get_dark_theme(self): + # Dark theme CSS (improved readability slightly) return """ QWidget { - background-color: #2b2b2b; - color: #f0f0f0; + background-color: #2E2E2E; /* Slightly lighter dark */ + color: #E0E0E0; /* Lighter text */ font-family: Segoe UI, Arial, sans-serif; font-size: 10pt; } QLineEdit, QTextEdit, QListWidget { - background-color: #3c3f41; - border: 1px solid #555; + background-color: #3C3F41; + border: 1px solid #5A5A5A; /* Slightly lighter border */ padding: 5px; - color: #f0f0f0; - border-radius: 3px; + color: #F0F0F0; /* Bright text in inputs */ + border-radius: 4px; /* Slightly rounder corners */ + } + QTextEdit { + font-family: Consolas, Courier New, monospace; /* Monospace for log */ + font-size: 9.5pt; } QPushButton { background-color: #555; - color: #f0f0f0; - border: 1px solid #666; + color: #F0F0F0; + border: 1px solid #6A6A6A; padding: 6px 12px; - border-radius: 3px; - min-height: 20px; + border-radius: 4px; + min-height: 22px; /* Ensure clickable height */ } QPushButton:hover { - background-color: #666; - border: 1px solid #777; + background-color: #656565; /* Lighter hover */ + border: 1px solid #7A7A7A; } QPushButton:pressed { - background-color: #444; + background-color: #4A4A4A; /* Darker pressed */ } QPushButton:disabled { - background-color: #444; + background-color: #404040; /* More distinct disabled */ color: #888; border-color: #555; } QLabel { font-weight: bold; padding-top: 4px; + padding-bottom: 2px; + color: #C0C0C0; /* Slightly muted labels */ } - QRadioButton { + QRadioButton, QCheckBox { spacing: 5px; - color: #f0f0f0; + color: #E0E0E0; + padding-top: 4px; + padding-bottom: 4px; } - QRadioButton::indicator { - width: 13px; - height: 13px; + QRadioButton::indicator, QCheckBox::indicator { + width: 14px; /* Slightly larger indicators */ + height: 14px; } QListWidget { - alternate-background-color: #333; - border: 1px solid #555; + alternate-background-color: #353535; /* Subtle alternating color */ + border: 1px solid #5A5A5A; } QListWidget::item:selected { - background-color: #0078d7; - color: #ffffff; + background-color: #007ACC; /* Standard blue selection */ + color: #FFFFFF; } - QCheckBox { - color: #f0f0f0; - spacing: 5px; + QToolTip { + background-color: #4A4A4A; + color: #F0F0F0; + border: 1px solid #6A6A6A; + padding: 4px; + border-radius: 3px; } - QCheckBox::indicator { - width: 13px; - height: 13px; - } """ + # --- UI Interaction Methods --- def browse_directory(self): - folder = QFileDialog.getExistingDirectory(self, "Select Download Folder") + # Suggest last used directory? QSettings could store this. + current_dir = self.dir_input.text() if os.path.isdir(self.dir_input.text()) else "" + folder = QFileDialog.getExistingDirectory(self, "Select Download Folder", current_dir) if folder: self.dir_input.setText(folder) def log(self, message): - self.log_output.append(message) - self.log_output.verticalScrollBar().setValue(self.log_output.verticalScrollBar().maximum()) + """Safely appends messages to the log output widget (called via log_signal).""" + try: + safe_message = str(message).replace('\x00', '[NULL]') # Ensure string, sanitize nulls + self.log_output.append(safe_message) + # Auto-scroll only if near the bottom + scrollbar = self.log_output.verticalScrollBar() + if scrollbar.value() >= scrollbar.maximum() - 30: # Threshold + scrollbar.setValue(scrollbar.maximum()) + except Exception as e: + # Fallback if GUI logging fails + print(f"GUI Log Error: {e}") + print(f"Original Message: {message}") + def get_filter_mode(self): if self.radio_images.isChecked(): @@ -708,181 +1357,1057 @@ class DownloaderApp(QWidget): return 'all' def add_new_character(self): + """Adds anew name to the known names list and updates UI.""" global KNOWN_NAMES - name = self.new_char_input.text().strip() - if name: - if name.lower() not in (n.lower() for n in KNOWN_NAMES): - KNOWN_NAMES.append(name) - self.character_list.clear() - self.character_list.addItems(sorted(KNOWN_NAMES)) - self.log(f"āœ… Added '{name}' to known characters.") - self.new_char_input.clear() - self.save_known_names() - else: - QMessageBox.warning(self, "Duplicate", f"'{name}' is already in the list.") + name_to_add = self.new_char_input.text().strip() + if not name_to_add: + QMessageBox.warning(self, "Input Error", "Name cannot be empty.") + return + + # Check for duplicates using case-insensitive comparison + # Store original case, but compare lower-case + name_lower = name_to_add.lower() + is_duplicate = any(existing.lower() == name_lower for existing in KNOWN_NAMES) + + if not is_duplicate: + # Add the name with its original casing + KNOWN_NAMES.append(name_to_add) + # Sort case-insensitively for display consistency + KNOWN_NAMES.sort(key=str.lower) + # Update the list widget and apply current filter + self.character_list.clear() + self.character_list.addItems(KNOWN_NAMES) + self.filter_character_list(self.character_search_input.text()) # Apply current filter # MODIFIED + self.log_signal.emit(f"āœ… Added '{name_to_add}' to known names list.") + self.new_char_input.clear() + self.save_known_names() # Save changes immediately else: - QMessageBox.warning(self, "Input Error", "Character name cannot be empty.") + QMessageBox.warning(self, "Duplicate Name", f"The name '{name_to_add}' (or similar) already exists in the list.") + def delete_selected_character(self): + """Removes selected names from the known names list.""" global KNOWN_NAMES selected_items = self.character_list.selectedItems() if not selected_items: - QMessageBox.warning(self, "Selection Error", "Please select a character to delete.") + QMessageBox.warning(self, "Selection Error", "Please select one or more names to delete.") return + names_to_remove = {item.text() for item in selected_items} confirm = QMessageBox.question(self, "Confirm Deletion", - f"Are you sure you want to delete {len(selected_items)} selected character(s)?", + f"Are you sure you want to delete {len(names_to_remove)} selected name(s)?", QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if confirm == QMessageBox.Yes: - names_to_remove = [item.text() for item in selected_items] original_count = len(KNOWN_NAMES) - KNOWN_NAMES = [n for n in KNOWN_NAMES if n.lower() not in (rem.lower() for rem in names_to_remove)] + # Filter list, keeping names NOT in the removal set + KNOWN_NAMES = [n for n in KNOWN_NAMES if n not in names_to_remove] removed_count = original_count - len(KNOWN_NAMES) + if removed_count > 0: - self.log(f"šŸ—‘ļø Removed {removed_count} character(s).") + self.log_signal.emit(f"šŸ—‘ļø Removed {removed_count} name(s) from the list.") + # Update UI list (ensure sorted after removal) self.character_list.clear() - self.character_list.addItems(sorted(KNOWN_NAMES)) - self.save_known_names() + KNOWN_NAMES.sort(key=str.lower) # Re-sort remaining names + self.character_list.addItems(KNOWN_NAMES) + self.filter_character_list(self.character_search_input.text()) # Apply current filter # MODIFIED + self.save_known_names() # Save changes else: - self.log("🤷 No matching characters found to remove.") + # This shouldn't happen if items were selected, but handle just in case + self.log_signal.emit("ā„¹ļø No names were removed (selection might have changed?).") + + + def update_custom_folder_visibility(self, url_text=None): + """Shows/hides the custom folder input based on URL and subfolder setting.""" + # If called by signal without text, get it from input widget + if url_text is None: + url_text = self.link_input.text() + + _, _, post_id = extract_post_info(url_text.strip()) + # Show only if it's a single post URL AND subfolders are enabled + should_show = bool(post_id) and self.use_subfolders_checkbox.isChecked() + + self.custom_folder_widget.setVisible(should_show) + if not should_show: + self.custom_folder_input.clear() # Clear input if hiding + + + def update_ui_for_subfolders(self, checked): + """Updates related UI elements when 'Separate Folders' checkbox changes.""" + # Show/hide the character filter input + self.character_filter_widget.setVisible(checked) + # Re-evaluate custom folder visibility (depends on both subfolders and URL type) + self.update_custom_folder_visibility() + # Clear character filter input if subfolders are disabled + if not checked: + self.character_input.clear() + + # ADDED method to filter the character list + def filter_character_list(self, search_text): + """Filters the character list based on the search text.""" + search_text = search_text.lower() + for i in range(self.character_list.count()): + item = self.character_list.item(i) + # Check if the item's text contains the search text (case-insensitive) + if search_text in item.text().lower(): + item.setHidden(False) + else: + item.setHidden(True) + + + def update_progress_display(self, total_posts, processed_posts): + """Updates the progress label based on processed posts.""" + if total_posts > 0: + try: + percent = (processed_posts / total_posts) * 100 + self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts ({percent:.1f}%)") + except ZeroDivisionError: + self.progress_label.setText(f"Progress: {processed_posts} / {total_posts} posts") # Handle rare case + elif processed_posts > 0: # E.g., single post mode might not set total + self.progress_label.setText(f"Progress: Processing post {processed_posts}...") + else: + self.progress_label.setText("Progress: Starting...") + + + + # --- Download Logic Orchestration --- def start_download(self): - if self.download_thread and self.download_thread.isRunning(): - self.log("āš ļø Download already in progress.") + """Validates inputs and starts the download in single or multi-threaded mode.""" + # Check if already running + is_running = (self.download_thread and self.download_thread.isRunning()) or (self.thread_pool is not None) + if is_running: + self.log_signal.emit("āš ļø Download already in progress.") + QMessageBox.warning(self, "Busy", "A download is already running.") return + # --- Gather Inputs --- api_url = self.link_input.text().strip() output_dir = self.dir_input.text().strip() - filter_character = self.character_input.text().strip() filter_mode = self.get_filter_mode() skip_zip = self.skip_zip_checkbox.isChecked() skip_rar = self.skip_rar_checkbox.isChecked() use_subfolders = self.use_subfolders_checkbox.isChecked() + compress_images = self.compress_images_checkbox.isChecked() + download_thumbnails = self.download_thumbnails_checkbox.isChecked() + use_multithreading = self.use_multithreading_checkbox.isChecked() + num_threads = 4 # Define number of threads + # --- NEW: Get Skip Words --- + raw_skip_words = self.skip_words_input.text().strip() + skip_words_list = [] + if raw_skip_words: + # Split by comma, strip whitespace from each word, and filter out empty strings + skip_words_list = [word.strip() for word in raw_skip_words.split(',') if word.strip()] + # --- END NEW --- + + + # --- Input Validation --- + service, user_id, post_id_from_url = extract_post_info(api_url) if not api_url: - QMessageBox.warning(self, "Input Error", "Please enter a Kemono creator page or post URL.") + QMessageBox.critical(self, "Input Error", "Please enter a Kemono/Coomer URL.") return - + if not service or not user_id: + QMessageBox.critical(self, "Input Error", "Invalid or unsupported URL format.\nPlease provide a valid creator page or post URL.") + self.log_signal.emit(f"āŒ Invalid URL detected: {api_url}") + return if not output_dir: - QMessageBox.warning(self, "Input Error", "Please select a download location.") - return - - if filter_character and use_subfolders and clean_folder_name(filter_character.lower()) not in (n.lower() for n in KNOWN_NAMES): - self.log(f"ā„¹ļø Character '{filter_character}' not found in known list. Will prompt to add (only if using separate folders).") - elif filter_character and not use_subfolders: - self.log(f"ā„¹ļø Character filter '{filter_character}' will be applied, but files will go to the single output folder as 'Download to Separate Folders' is unchecked.") + QMessageBox.critical(self, "Input Error", "Please select a download directory.") + return + if not os.path.isdir(output_dir): + reply = QMessageBox.question(self, "Directory Not Found", + f"The directory '{output_dir}' does not exist.\n\nCreate it?", + QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) + if reply == QMessageBox.Yes: + try: + os.makedirs(output_dir) + self.log_signal.emit(f"ā„¹ļø Created download directory: {output_dir}") + except Exception as e: + QMessageBox.critical(self, "Directory Error", f"Could not create directory:\n{e}") + self.log_signal.emit(f"āŒ Failed to create directory: {output_dir} - {e}") + return + else: # User chose not to create + return + + + # Pillow Check for Compression + if compress_images and Image is None: + QMessageBox.warning(self, "Dependency Missing", "Image compression requires the Pillow library, but it's not installed.\nPlease run: pip install Pillow\n\nCompression will be disabled for this session.") + self.log_signal.emit("āŒ Cannot compress images: Pillow library not found.") + compress_images = False # Disable for this run + + + # --- Gather Filters --- + filter_character = None + if use_subfolders and self.character_filter_widget.isVisible(): + filter_character = self.character_input.text().strip() or None + + custom_folder_name = None + if use_subfolders and post_id_from_url and self.custom_folder_widget.isVisible(): + raw_custom_name = self.custom_folder_input.text().strip() + if raw_custom_name: + cleaned_custom = clean_folder_name(raw_custom_name) + if cleaned_custom: + custom_folder_name = cleaned_custom + else: + QMessageBox.warning(self, "Input Warning", f"Custom folder name '{raw_custom_name}' is invalid and will be ignored.") + self.log_signal.emit(f"āš ļø Invalid custom folder name ignored: {raw_custom_name}") + + + # --- Character Filter Pre-Validation --- + if use_subfolders and filter_character and not post_id_from_url: # Only validate filter if for whole creator + clean_char_filter = clean_folder_name(filter_character.lower()) + known_names_lower = {name.lower() for name in KNOWN_NAMES} + + if not clean_char_filter: + self.log_signal.emit(f"āŒ Filter name '{filter_character}' is invalid. Aborting.") + QMessageBox.critical(self, "Filter Error", "The provided filter name is invalid (contains only spaces or special characters).") + return + elif filter_character.lower() not in known_names_lower: + # Prompt user to add the name before starting threads + reply = QMessageBox.question(self, "Add Filter Name?", + f"The filter name '{filter_character}' is not in your known names list.\n\nAdd it now and continue?", + QMessageBox.Yes | QMessageBox.No | QMessageBox.Cancel, QMessageBox.Yes) + + if reply == QMessageBox.Yes: + # Add the name and save (use existing method) + self.new_char_input.setText(filter_character) # Pre-fill input for user convenience? No, just add it. + self.add_new_character() # This adds, sorts, saves, updates UI + # Check if adding failed (e.g., duplicate detected by add_new_character) + if filter_character.lower() not in {name.lower() for name in KNOWN_NAMES}: + self.log_signal.emit(f"āš ļø Failed to add '{filter_character}' automatically. Please add manually if needed.") + # Don't abort here, let download proceed without guaranteed filter match yet + else: + self.log_signal.emit(f"āœ… Added filter '{filter_character}' to list.") + elif reply == QMessageBox.No: + self.log_signal.emit(f"ā„¹ļø Proceeding without adding '{filter_character}'. Posts matching it might not be saved to a specific folder unless name is derived.") + # Allow proceeding, but filter might not work as expected if name isn't known + # Filter logic inside worker will handle skipping if name doesn't match title + else: # Cancel + self.log_signal.emit("āŒ Download cancelled by user during filter check.") + return # Abort download + + + # --- Reset State & Log Start --- self.log_output.clear() - self.log(f"šŸš€ Starting download from {api_url}...") - self.log(f"šŸ“ Saving to: {output_dir}") - if filter_character: - self.log(f"šŸŽÆ Filtering by Character: {filter_character}") - self.log(f"šŸ“„ File Type Filter: {filter_mode}") - self.log(f"🤐 Skip Zip Files: {'Yes' if skip_zip else 'No'}") - self.log(f"šŸ“¦ Skip RAR Files: {'Yes' if skip_rar else 'No'}") - self.log(f"šŸ“‚ Download Location Mode: {'Separate Folders' if use_subfolders else 'Single Folder'}") + self.cancellation_event.clear() # Reset cancellation flag + self.active_futures = [] + self.total_posts_to_process = 0 + self.processed_posts_count = 0 + self.download_counter = 0 + self.skip_counter = 0 + # Clear both downloaded sets + with self.downloaded_files_lock: + self.downloaded_files.clear() # Clear downloaded files set (filenames) + with self.downloaded_file_hashes_lock: + self.downloaded_file_hashes.clear() # Clear downloaded file hashes set # ADDED + + self.progress_label.setText("Progress: Initializing...") + + # Log settings clearly before starting + self.log_signal.emit("="*40) + self.log_signal.emit(f"šŸš€ Starting Download Task @ {time.strftime('%Y-%m-%d %H:%M:%S')}") + self.log_signal.emit(f" URL: {api_url}") + self.log_signal.emit(f" Save Location: {output_dir}") + mode = "Single Post" if post_id_from_url else "Creator Feed" + self.log_signal.emit(f" Mode: {mode}") + self.log_signal.emit(f" Subfolders: {'Enabled' if use_subfolders else 'Disabled'}") + if use_subfolders: + if custom_folder_name: + self.log_signal.emit(f" Custom Folder (Post): '{custom_folder_name}'") + elif filter_character: + self.log_signal.emit(f" Character Filter: '{filter_character}'") + else: + self.log_signal.emit(f" Folder Naming: Automatic (Known Names > Title Extraction)") + self.log_signal.emit(f" File Type Filter: {filter_mode}") + self.log_signal.emit(f" Skip: {'.zip' if skip_zip else ''}{', ' if skip_zip and skip_rar else ''}{'.rar' if skip_rar else ''}{'None' if not (skip_zip or skip_rar) else ''}") + # --- NEW: Log Skip Words --- + if skip_words_list: + self.log_signal.emit(f" Skip Words (Title/Filename): {', '.join(skip_words_list)}") + else: + self.log_signal.emit(f" Skip Words (Title/Filename): None") + # --- END NEW --- + self.log_signal.emit(f" Compress Images: {'Enabled' if compress_images else 'Disabled'}") + self.log_signal.emit(f" Thumbnails Only: {'Enabled' if download_thumbnails else 'Disabled'}") - self.download_thread = DownloadThread( - api_url=api_url, - output_dir=output_dir, - known_names_copy=list(KNOWN_NAMES), - filter_character=filter_character if filter_character else None, - filter_mode=filter_mode, - skip_zip=skip_zip, - skip_rar=skip_rar, - use_subfolders=use_subfolders - ) - - self.download_thread.progress_signal.connect(self.log) - self.download_thread.add_character_prompt_signal.connect(self.prompt_add_character) - self.download_thread.add_character_result_signal.connect(self.download_thread.receive_add_character_result) - self.download_thread.finished.connect(self.download_finished) - self.download_thread.file_download_status_signal.connect(self.update_skip_button_state) + # --- Determine Execution Mode --- + # Always single-thread for single post URLs + should_use_multithreading = use_multithreading and not post_id_from_url + self.log_signal.emit(f" Threading: {'Multi-threaded' if should_use_multithreading else 'Single-threaded'}") + self.log_signal.emit("="*40) - self.download_btn.setEnabled(False) + # --- Disable UI & Enable Cancel --- + self.set_ui_enabled(False) self.cancel_btn.setEnabled(True) - self.link_input.setEnabled(False) - self.dir_input.setEnabled(False) - self.dir_button.setEnabled(False) - self.character_input.setEnabled(False) - self.radio_all.setEnabled(False) - self.radio_images.setEnabled(False) - self.radio_videos.setEnabled(False) - self.skip_zip_checkbox.setEnabled(False) - self.skip_rar_checkbox.setEnabled(False) - self.use_subfolders_checkbox.setEnabled(False) - self.character_list.setEnabled(False) - self.new_char_input.setEnabled(False) - self.add_char_button.setEnabled(False) - self.delete_char_button.setEnabled(False) + # --- Start Execution --- + try: + # Collect arguments common to both modes + common_args = { + 'api_url': api_url, + 'output_dir': output_dir, + 'known_names_copy': list(KNOWN_NAMES), # Pass a copy + 'filter_character': filter_character, + 'filter_mode': filter_mode, + 'skip_zip': skip_zip, + 'skip_rar': skip_rar, + 'use_subfolders': use_subfolders, + 'compress_images': compress_images, + 'download_thumbnails': download_thumbnails, + 'service': service, + 'user_id': user_id, + # Pass shared state and locks + 'downloaded_files': self.downloaded_files, + 'downloaded_files_lock': self.downloaded_files_lock, + 'downloaded_file_hashes': self.downloaded_file_hashes, # ADDED + 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, # ADDED + 'skip_words_list': skip_words_list, # --- NEW: Pass skip words --- + } - self.download_thread.start() + if should_use_multithreading: + self.log_signal.emit(" Initializing multi-threaded download...") + # Add multi-threading specific args + multi_args = common_args.copy() + multi_args['num_threads'] = num_threads + self.start_multi_threaded_download(**multi_args) + else: + # Single post or multi-threading disabled + self.log_signal.emit(" Initializing single-threaded download...") + # Add single-thread specific args + single_args = common_args.copy() + single_args['custom_folder_name'] = custom_folder_name + single_args['single_post_id'] = post_id_from_url + self.start_single_threaded_download(**single_args) + except Exception as e: + self.log_signal.emit(f"āŒ CRITICAL ERROR preparing download task: {e}") + import traceback + self.log_signal.emit(traceback.format_exc()) + QMessageBox.critical(self, "Start Error", f"Failed to start download task:\n{e}") + # Ensure UI is reset if start fails critically + self.download_finished(0, 0, False) # Reset UI state + + + def start_single_threaded_download(self, **kwargs): + """Starts the download using the dedicated QThread.""" + try: + self.download_thread = DownloadThread( + cancellation_event = self.cancellation_event, # Pass the shared event + # Pass all other relevant kwargs collected in start_download + **kwargs + ) + + if self.download_thread._init_failed: + # Error already logged by thread's init + QMessageBox.critical(self, "Thread Error", "Failed to initialize the download thread.\nCheck the log for details.") + self.download_finished(0, 0, False) # Reset UI + return + + # Connect signals from this specific thread instance + # These replace the direct connections used in previous versions + self.download_thread.progress_signal.connect(self.log_signal) # Use log_signal slot + self.download_thread.add_character_prompt_signal.connect(self.add_character_prompt_signal) # Forward signal + self.download_thread.file_download_status_signal.connect(self.file_download_status_signal) # Forward signal + self.download_thread.finished_signal.connect(self.finished_signal) # Forward signal + + # Connect response signal *to* the thread instance + self.character_prompt_response_signal.connect(self.download_thread.receive_add_character_result) + + self.download_thread.start() + self.log_signal.emit("āœ… Single download thread started.") + + except Exception as e: + self.log_signal.emit(f"āŒ CRITICAL ERROR starting single-thread task: {e}") + import traceback + self.log_signal.emit(traceback.format_exc()) + QMessageBox.critical(self, "Thread Start Error", f"Failed to start download thread:\n{e}") + self.download_finished(0, 0, False) # Reset UI state + + + + def start_multi_threaded_download(self, **kwargs): + """Starts download using ThreadPoolExecutor and a fetcher thread.""" + num_threads = kwargs['num_threads'] + self.thread_pool = ThreadPoolExecutor(max_workers=num_threads, thread_name_prefix='Downloader_') + self.active_futures = [] + self.processed_posts_count = 0 + self.total_posts_to_process = 0 # Updated by fetcher + self.download_counter = 0 + self.skip_counter = 0 + + # Prepare arguments for worker threads (PostProcessorWorker) + # Remove args not needed by worker's init or handled differently + worker_args_template = kwargs.copy() + del worker_args_template['num_threads'] + # api_url is used by fetcher, but also needed by worker for domain/URL construction + # service/user_id are passed explicitly + # output_dir needs to be mapped to download_root + # Pass shared state explicitly + + # Start the fetcher thread + fetcher_thread = threading.Thread( + target=self._fetch_and_queue_posts, + args=(kwargs['api_url'], worker_args_template), + daemon=True, + name="PostFetcher" + ) + fetcher_thread.start() + self.log_signal.emit(f"āœ… Post fetcher thread started. {num_threads} worker threads initializing...") + + + def _fetch_and_queue_posts(self, api_url_input, worker_args_template): + """(Runs in fetcher thread) Fetches posts and submits tasks to the pool.""" + all_posts = [] + fetch_error = False + try: + self.log_signal.emit(" Starting post fetch...") + # Define logger for the generator + def fetcher_logger(msg): + # Add prefix to distinguish fetcher logs if needed + self.log_signal.emit(f"[Fetcher] {msg}") + + post_generator = download_from_api(api_url_input, logger=fetcher_logger) + + for posts_batch in post_generator: + if self.cancellation_event.is_set(): + self.log_signal.emit("āš ļø Post fetching cancelled by user.") + fetch_error = True # Treat cancellation during fetch as an error state for cleanup + break + # Basic validation of batch + if isinstance(posts_batch, list): + all_posts.extend(posts_batch) + self.total_posts_to_process = len(all_posts) + # Emit progress less frequently to avoid flooding log + if self.total_posts_to_process % 250 == 0: # Log every 250 posts + self.log_signal.emit(f" Fetched {self.total_posts_to_process} posts...") + else: + self.log_signal.emit(f"āŒ API returned non-list batch: {type(posts_batch)}. Stopping fetch.") + fetch_error = True + break + + if not fetch_error: + self.log_signal.emit(f"āœ… Finished fetching. Total posts found: {self.total_posts_to_process}") + + + except Exception as e: + self.log_signal.emit(f"āŒ Unexpected Error during post fetching: {e}") + import traceback + self.log_signal.emit(traceback.format_exc(limit=3)) + fetch_error = True + + + # --- Handle Fetch Completion / Errors --- + if self.cancellation_event.is_set() or fetch_error: + # If cancelled or error during fetch, signal completion immediately + self.finished_signal.emit(self.download_counter, self.skip_counter, self.cancellation_event.is_set()) + # Ensure pool is cleaned up if fetch fails before submitting + if self.thread_pool: + self.thread_pool.shutdown(wait=False, cancel_futures=True) + self.thread_pool = None + return # Stop fetcher thread + + + if self.total_posts_to_process == 0: + self.log_signal.emit("šŸ˜• No posts found or fetched successfully.") + self.finished_signal.emit(0, 0, False) # Signal completion with zero counts + return + + + # --- Submit Tasks to Worker Pool --- + self.log_signal.emit(f" Submitting {self.total_posts_to_process} post tasks to worker pool...") + self.processed_posts_count = 0 # Reset counter before submitting + self.overall_progress_signal.emit(self.total_posts_to_process, 0) # Update progress display + + + # Extract arguments needed explicitly for the worker + common_worker_args = { + 'download_root': worker_args_template['output_dir'], # **FIXED HERE** + 'known_names': worker_args_template['known_names_copy'], + 'filter_character': worker_args_template['filter_character'], + 'unwanted_keywords': {'spicy', 'hd', 'nsfw', '4k', 'preview'}, # Define unwanted keywords here + 'filter_mode': worker_args_template['filter_mode'], + 'skip_zip': worker_args_template['skip_zip'], + 'skip_rar': worker_args_template['skip_rar'], + 'use_subfolders': worker_args_template['use_subfolders'], + # target_post_id is likely None here, but pass it for consistency + 'target_post_id_from_initial_url': worker_args_template.get('single_post_id'), + # custom_folder_name is likely None here + 'custom_folder_name': worker_args_template.get('custom_folder_name'), + 'compress_images': worker_args_template['compress_images'], + 'download_thumbnails': worker_args_template['download_thumbnails'], + 'service': worker_args_template['service'], + 'user_id': worker_args_template['user_id'], + 'api_url_input': worker_args_template['api_url'], # Pass original URL + 'cancellation_event': self.cancellation_event, + 'signals': self.worker_signals, # Pass the shared signals object + # Pass shared state and locks + 'downloaded_files': self.downloaded_files, + 'downloaded_files_lock': self.downloaded_files_lock, + 'downloaded_file_hashes': self.downloaded_file_hashes, # ADDED + 'downloaded_file_hashes_lock': self.downloaded_file_hashes_lock, # ADDED + 'skip_words_list': worker_args_template['skip_words_list'], # --- NEW: Pass skip words --- + } + + for post_data in all_posts: + if self.cancellation_event.is_set(): + self.log_signal.emit("āš ļø Cancellation detected during task submission.") + break # Stop submitting new tasks + + if not isinstance(post_data, dict): + self.log_signal.emit(f"āš ļø Skipping invalid post data item (type: {type(post_data)}).") + self.processed_posts_count += 1 # Count as processed (skipped) + self.total_posts_to_process -=1 # Adjust total if skipping invalid data + continue + + # Create worker with specific post and common args + worker = PostProcessorWorker(post_data=post_data, **common_worker_args) + + # Submit the worker's process method to the pool + try: + if self.thread_pool: # Check if pool still exists + future = self.thread_pool.submit(worker.process) + future.add_done_callback(self._handle_future_result) + self.active_futures.append(future) + else: # Pool was shut down prematurely + self.log_signal.emit("āš ļø Thread pool shutdown before submitting all tasks.") + break + except RuntimeError as e: # Handle pool shutdown error + self.log_signal.emit(f"āš ļø Error submitting task (pool might be shutting down): {e}") + break + except Exception as e: + self.log_signal.emit(f"āŒ Unexpected error submitting task: {e}") + # Decide whether to continue or break on submission error + break + + + # Log after submission loop completes or breaks + submitted_count = len(self.active_futures) + self.log_signal.emit(f" {submitted_count} / {self.total_posts_to_process} tasks submitted.") + # If cancelled during submission, remaining tasks won't run. + # Fetcher thread's job is done. Callbacks handle the rest. + # If submission loop finished and submitted_count == total_posts_to_process, check if pool needs shutdown signal? + # No, shutdown happens in finished_signal handler or closeEvent. + + + + def _handle_future_result(self, future: Future): + """(Callback) Handles results from worker threads.""" + # This runs in one of the ThreadPoolExecutor threads, use signals for UI updates + self.processed_posts_count += 1 + downloaded_res, skipped_res = 0, 0 # Default results + + try: + if future.cancelled(): + # Task was cancelled before/during execution + # Count cancelled task as skipped only if it wasn't already counted within the worker + # The worker's process method should return counts before checking cancel state at the end. + # Let's assume worker reports counts correctly up to cancellation point. + # We don't increment skipped_res here, as the worker should have handled it. + pass + elif future.exception(): + exc = future.exception() + self.log_signal.emit(f"āŒ Error in worker thread: {exc}") + # Log traceback snippet if helpful + # import traceback + # self.log_signal.emit(traceback.format_exc(limit=2)) + # Count errored task as skipped only if it wasn't already handled in worker. + # Assume worker reports counts correctly up to error point. + # We don't increment skipped_res here. + pass + else: + # Task completed, get results + downloaded, skipped = future.result() # Result from worker.process() + downloaded_res = downloaded + skipped_res = skipped + # Log task completion? Might be too verbose. + + # --- Safely update shared counters (using main thread or locks if needed) --- + # These counters are primarily for the final summary. + # Direct update is acceptable for progress display with potential minor races. + # A more robust way uses thread-safe counters (e.g., threading.Lock with counters) + # Let's use locks for absolute accuracy in the final count. + + with threading.Lock(): # Use a temporary lock for updating these counters + self.download_counter += downloaded_res + self.skip_counter += skipped_res + + + # --- Update Progress --- + self.overall_progress_signal.emit(self.total_posts_to_process, self.processed_posts_count) + + except Exception as e: + # Catch errors within the callback itself + self.log_signal.emit(f"āŒ Error in result callback handling: {e}") + + # --- Check for Overall Completion --- + # This check is tricky due to callbacks running concurrently. + # Rely on the processed_posts_count reaching total_posts_to_process + # Ensure this check is race-safe if possible. Using a simple counter is okay for a check-and-signal pattern. + if self.processed_posts_count >= self.total_posts_to_process and self.total_posts_to_process > 0: + # Add a small delay to ensure all pending callbacks (if any) have a chance to run + # This is heuristic and not guaranteed thread-safe completion detection. + # A more robust solution involves tracking active futures explicitly. + # Given the current structure, this simple counter check is the most practical. + # time.sleep(0.1) # Short delay before final check + + # Re-check count just in case + if self.processed_posts_count >= self.total_posts_to_process: + self.log_signal.emit("šŸ All submitted tasks have completed or failed.") + cancelled = self.cancellation_event.is_set() + # Use the final accumulated counters (updated under lock) + self.finished_signal.emit(self.download_counter, self.skip_counter, cancelled) + + + # --- UI State Management --- + def set_ui_enabled(self, enabled): + """Enable/disable UI controls based on download state.""" + # Controls to disable during download + self.download_btn.setEnabled(enabled) + self.link_input.setEnabled(enabled) + self.dir_input.setEnabled(enabled) + self.dir_button.setEnabled(enabled) + self.radio_all.setEnabled(enabled) + self.radio_images.setEnabled(enabled) + self.radio_videos.setEnabled(enabled) + self.skip_zip_checkbox.setEnabled(enabled) + self.skip_rar_checkbox.setEnabled(enabled) + self.use_subfolders_checkbox.setEnabled(enabled) + self.compress_images_checkbox.setEnabled(enabled) + self.download_thumbnails_checkbox.setEnabled(enabled) + self.use_multithreading_checkbox.setEnabled(enabled) + self.skip_words_input.setEnabled(enabled) # --- NEW: Enable/disable skip words input --- + # self.character_list.setEnabled(enabled) # Keep enabled for Browse + self.character_search_input.setEnabled(enabled) # ADDED + self.new_char_input.setEnabled(enabled) + self.add_char_button.setEnabled(enabled) + self.delete_char_button.setEnabled(enabled) + + + # Enable/disable conditional controls + subfolders_on = self.use_subfolders_checkbox.isChecked() + self.custom_folder_widget.setEnabled(enabled and subfolders_on) + self.character_filter_widget.setEnabled(enabled and subfolders_on) + + + # Update visibility if enabling UI + if enabled: + self.update_ui_for_subfolders(subfolders_on) + self.update_custom_folder_visibility() # Update based on current URL + + # Cancel button is enabled only when download is running (UI disabled) + self.cancel_btn.setEnabled(not enabled) + + # Skip button state is handled separately, reset when UI enabled + if enabled: + self.skip_file_btn.setEnabled(False) + + + # --- Actions --- def cancel_download(self): - if self.download_thread and self.download_thread.isRunning(): - self.log("āš ļø Requesting cancellation...") - self.download_thread.cancel() + """Requests cancellation of the ongoing download (single or multi-thread).""" + if not self.cancel_btn.isEnabled(): return # Prevent double clicks + + self.log_signal.emit("āš ļø Requesting cancellation...") + self.cancellation_event.set() # Set the shared event + + # Disable cancel button immediately to prevent multiple signals + self.cancel_btn.setEnabled(False) + self.progress_label.setText("Progress: Cancelling...") + + # Optional: Try to actively cancel futures if using thread pool + # This might help stop tasks quicker if they check the future status, + # but reliance on the cancellation_event is the primary mechanism. + if self.thread_pool and self.active_futures: + cancelled_count = 0 + for future in self.active_futures: + if future.cancel(): # Attempts to cancel + cancelled_count += 1 + if cancelled_count > 0: + self.log_signal.emit(f" Attempted to cancel {cancelled_count} pending/running tasks.") + def skip_current_file(self): + """Signals the active download thread (if single-threaded) to skip the current file.""" + # Check if single-threaded mode is active if self.download_thread and self.download_thread.isRunning(): - self.download_thread.skip_file() - - def update_skip_button_state(self, is_downloading): - self.skip_file_btn.setEnabled(is_downloading) + self.download_thread.skip_file() # Call method on the QThread instance + elif self.thread_pool: + self.log_signal.emit("ā„¹ļø Skipping individual files is not supported in multi-threaded mode.") + QMessageBox.information(self, "Action Not Supported", "Skipping individual files is only available in single-threaded mode.") + else: + self.log_signal.emit("ā„¹ļø Skip requested, but no download is active.") - def download_finished(self): - self.log("Download thread finished.") - self.download_btn.setEnabled(True) + def update_skip_button_state(self, is_downloading_active): + """Enables/disables the skip button based on download state.""" + # Enable only if: download running (UI disabled), AND single-thread mode, AND file download active + can_skip = (not self.download_btn.isEnabled()) and \ + (self.download_thread and self.download_thread.isRunning()) and \ + is_downloading_active + + # Explicitly disable if multi-threading was used for this run + # Check if thread_pool was initialized and is not None + if self.thread_pool is not None: + can_skip = False + + self.skip_file_btn.setEnabled(can_skip) + + + def download_finished(self, total_downloaded, total_skipped, cancelled): + """Cleans up resources and resets UI after download completion/cancellation.""" + # Log final status + self.log_signal.emit("="*40) + status = "Cancelled" if cancelled else "Finished" + self.log_signal.emit(f"šŸ Download {status}!") + self.log_signal.emit(f" Summary: Downloaded={total_downloaded}, Skipped={total_skipped}") + self.progress_label.setText(f"{status}: {total_downloaded} downloaded, {total_skipped} skipped.") + self.log_signal.emit("="*40) + + + # --- Cleanup Resources --- + # QThread cleanup + if self.download_thread: + # Disconnect response signal *from* the thread instance + try: + self.character_prompt_response_signal.disconnect(self.download_thread.receive_add_character_result) + except TypeError: pass # Ignore if not connected + # QThread should exit naturally after finished signal + self.download_thread = None + + + # ThreadPoolExecutor cleanup + if self.thread_pool: + self.log_signal.emit(" Shutting down worker thread pool...") + # Shutdown non-blockingly, attempt to cancel any remaining futures + self.thread_pool.shutdown(wait=False, cancel_futures=True) + self.thread_pool = None + self.active_futures = [] # Clear future list + + + # Reset cancellation event for the next run + self.cancellation_event.clear() + + # --- Reset UI --- + self.set_ui_enabled(True) + # Ensure cancel/skip buttons are disabled self.cancel_btn.setEnabled(False) self.skip_file_btn.setEnabled(False) - self.link_input.setEnabled(True) - self.dir_input.setEnabled(True) - self.dir_button.setEnabled(True) - self.character_input.setEnabled(True) - self.radio_all.setEnabled(True) - self.radio_images.setEnabled(True) - self.radio_videos.setEnabled(True) - self.skip_zip_checkbox.setEnabled(True) - self.skip_rar_checkbox.setEnabled(True) - self.use_subfolders_checkbox.setEnabled(True) - self.character_list.setEnabled(True) - self.new_char_input.setEnabled(True) - self.add_char_button.setEnabled(True) - self.delete_char_button.setEnabled(True) - - self.download_thread = None + # --- Character Prompt Handling --- + # Runs in GUI thread, triggered by signal from DownloadThread def prompt_add_character(self, character_name): - if self.download_thread and self.download_thread.use_subfolders: - reply = QMessageBox.question(self, "Add Character?", - f"Character '{character_name}' was found in a post title but is not in your known list.\n\nAdd '{character_name}' to your known characters list and download to its folder?", - QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) + # Ensure prompt mutex is available if needed, though GUI thread access is usually safe + # with QMutexLocker(self.prompt_mutex): # Might not be strictly necessary here - result = (reply == QMessageBox.Yes) - self.download_thread.add_character_result_signal.emit(result) + reply = QMessageBox.question(self, "Add Filter Name?", + f"The filter name '{character_name}' is not in your known list.\n\nAdd it now and continue download?", + QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) + result = (reply == QMessageBox.Yes) - if result: - global KNOWN_NAMES - if character_name.lower() not in (n.lower() for n in KNOWN_NAMES): - KNOWN_NAMES.append(character_name) - self.character_list.clear() - self.character_list.addItems(sorted(KNOWN_NAMES)) - self.log(f"āœ… Added '{character_name}' to known characters (via prompt).") - self.save_known_names() - else: - self.download_thread.add_character_result_signal.emit(False) + if result: + # Use the existing add method to handle adding, UI update, save + self.new_char_input.setText(character_name) # Pre-fill for clarity if needed? No, just add. + # Find the item in the list to simulate adding it if Add button was used + # Check if already added by another mechanism + if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}: + self.add_new_character() # Add the name + # Verify it was added successfully + if character_name.lower() not in {n.lower() for n in KNOWN_NAMES}: + self.log_signal.emit(f"āš ļø Failed to add '{character_name}' via prompt. Check for errors.") + result = False # Treat as failure if not added + else: + self.log_signal.emit(f"ā„¹ļø Filter name '{character_name}' was already present or added.") + + # Signal the result back to the waiting DownloadThread + self.character_prompt_response_signal.emit(result) + # Slot to receive the result for the waiting DownloadThread + def receive_add_character_result(self, result): + """Slot to receive the boolean result from the GUI prompt.""" + # This runs in the thread's event loop, triggered by the signal connection + with QMutexLocker(self.prompt_mutex): + self._add_character_response = result + self.log_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") + + + # Removed start_api_server method + # def start_api_server(self): ... + + +# --- Modified DownloadThread for Single-Threaded Mode --- +# This class handles the entire download process when multi-threading is off or for single posts. +class DownloadThread(QThread): + # Signals emitted by this thread back to the GUI + progress_signal = pyqtSignal(str) + add_character_prompt_signal = pyqtSignal(str) # Ask GUI to prompt user + file_download_status_signal = pyqtSignal(bool) # File download start/end + finished_signal = pyqtSignal(int, int, bool) # download_count, skip_count, cancelled_flag + + + def __init__(self, api_url, output_dir, known_names_copy, + cancellation_event, single_post_id=None, # Use shared cancellation event + filter_character=None, filter_mode='all', skip_zip=True, skip_rar=True, + use_subfolders=True, custom_folder_name=None, compress_images=False, + download_thumbnails=False, service=None, user_id=None, + downloaded_files=None, downloaded_files_lock=None, + downloaded_file_hashes=None, downloaded_file_hashes_lock=None, + skip_words_list=None): # --- NEW: Accept skip_words_list --- + super().__init__() + self._init_failed = False + self.api_url_input = api_url + self.output_dir = output_dir + self.known_names = list(known_names_copy) + self.cancellation_event = cancellation_event + self.initial_target_post_id = single_post_id + self.filter_character = filter_character + self.filter_mode = filter_mode + self.skip_zip = skip_zip + self.skip_rar = skip_rar + self.use_subfolders = use_subfolders + self.custom_folder_name = custom_folder_name + self.compress_images = compress_images + self.download_thumbnails = download_thumbnails + self.service = service # Use passed value + self.user_id = user_id # Use passed value + self.skip_words_list = skip_words_list if skip_words_list is not None else [] # --- NEW: Store skip_words_list --- + + + # Shared state and locks (passed from DownloaderApp) + self.downloaded_files = downloaded_files if downloaded_files is not None else set() + self.downloaded_files_lock = downloaded_files_lock if downloaded_files_lock is not None else threading.Lock() + self.downloaded_file_hashes = downloaded_file_hashes if downloaded_file_hashes is not None else set() # ADDED + self.downloaded_file_hashes_lock = downloaded_file_hashes_lock if downloaded_file_hashes_lock is not None else threading.Lock() # ADDED + + + # Internal state + self.skip_current_file_flag = threading.Event() + self.is_downloading_file = False + self.current_download_path = None + self._add_character_response = None # Stores response from GUI prompt + self.prompt_mutex = QMutex() # Protects access to _add_character_response + + + # Basic validation in init + if not self.service or not self.user_id: + log_msg = f"āŒ Thread Init Error: Missing service ('{self.service}') or user ID ('{self.user_id}') for URL '{api_url}'" + print(log_msg) # Print error as signals might not be connected yet + # Try emitting signal as well, might work if called after main init + try: self.progress_signal.emit(log_msg) + except RuntimeError: pass # Ignore if signal connection fails during init + self._init_failed = True + + + def run(self): + """Main execution logic for the single-threaded download.""" + if self._init_failed: + # Error already logged in __init__ + self.finished_signal.emit(0, 0, False) # Signal completion with zero counts + return + + unwanted_keywords = {'spicy', 'hd', 'nsfw', '4k', 'preview'} # Example unwanted keywords + grand_total_downloaded = 0 + grand_total_skipped = 0 + cancelled_by_user = False + + try: + # --- Character Filter Pre-Check --- + # This check is necessary here because the thread runs independently + if self.use_subfolders and self.filter_character and not self.custom_folder_name: + if not self._check_and_prompt_filter_character(): + # If check fails (invalid, or user declines adding), abort. + # Error/reason logged inside the check method. + self.finished_signal.emit(0, 0, False) # Not cancelled, aborted by validation/user + return + + + # --- Setup Worker Instance --- + # Use the PostProcessorWorker logic for processing each post + # Create adapter signals object that routes back to this QThread's signals + worker_signals_adapter = PostProcessorSignals() + worker_signals_adapter.progress_signal.connect(self.progress_signal) # Route log messages + worker_signals_adapter.file_download_status_signal.connect(self.file_download_status_signal) # Route file status + + post_worker = PostProcessorWorker( + post_data=None, # Will be set per post below + download_root=self.output_dir, + known_names=self.known_names, # Use thread's (potentially updated) list + filter_character=self.filter_character, + unwanted_keywords=unwanted_keywords, + filter_mode=self.filter_mode, + skip_zip=self.skip_zip, + skip_rar=self.skip_rar, + use_subfolders=self.use_subfolders, + target_post_id_from_initial_url=self.initial_target_post_id, + custom_folder_name=self.custom_folder_name, + compress_images=self.compress_images, + download_thumbnails=self.download_thumbnails, + service=self.service, + user_id=self.user_id, + api_url_input=self.api_url_input, + cancellation_event=self.cancellation_event, # Pass the shared event + signals=worker_signals_adapter, # Use the adapter signals + # Pass shared state and locks + downloaded_files=self.downloaded_files, + downloaded_files_lock=self.downloaded_files_lock, + downloaded_file_hashes=self.downloaded_file_hashes, # ADDED + downloaded_file_hashes_lock=self.downloaded_file_hashes_lock, # ADDED + skip_words_list=self.skip_words_list, # --- NEW: Pass skip words to worker --- + ) + # Allow worker to use this thread's skip flag directly + post_worker.skip_current_file_flag = self.skip_current_file_flag + + + # --- Fetch and Process Posts --- + self.progress_signal.emit(" Starting post fetch...") + # Use a local logger function that emits the progress signal + def thread_logger(msg): + self.progress_signal.emit(msg) + + post_generator = download_from_api(self.api_url_input, logger=thread_logger) + + for posts_batch in post_generator: + if self.isInterruptionRequested(): # Checks QThread interrupt AND cancellation_event + self.progress_signal.emit("āš ļø Download cancelled before processing batch.") + cancelled_by_user = True + break # Exit fetch loop + + for post in posts_batch: + if self.isInterruptionRequested(): + self.progress_signal.emit("āš ļø Download cancelled during post processing.") + cancelled_by_user = True + break # Exit inner post loop + + # Assign post data to the worker instance + post_worker.post = post + try: + # Process the post using the worker's logic + downloaded, skipped = post_worker.process() + grand_total_downloaded += downloaded + grand_total_skipped += skipped + except Exception as proc_e: + post_id_err = post.get('id', 'N/A') if isinstance(post, dict) else 'N/A' + self.progress_signal.emit(f"āŒ Error processing post {post_id_err}: {proc_e}") + import traceback + self.progress_signal.emit(traceback.format_exc(limit=2)) + grand_total_skipped += 1 # Count post as skipped on error + + # Brief pause between posts to yield control, keep UI responsive + self.msleep(20) # 20 milliseconds + + if cancelled_by_user: + break # Exit outer batch loop as well + + # --- Finished Processing --- + if not cancelled_by_user: + self.progress_signal.emit("āœ… Post fetching and processing complete.") + + + except Exception as e: + # Catch unexpected errors during the main run loop + log_msg = f"\nāŒ An critical error occurred in download thread: {e}" + self.progress_signal.emit(log_msg) + import traceback + tb_str = traceback.format_exc() + self.progress_signal.emit("--- Traceback ---") + for line in tb_str.splitlines(): + self.progress_signal.emit(" " + line) + self.progress_signal.emit("--- End Traceback ---") + cancelled_by_user = False # Not cancelled by user, but by error + + finally: + # Ensure finished signal is always emitted + self.finished_signal.emit(grand_total_downloaded, grand_total_skipped, cancelled_by_user) + + + def _check_and_prompt_filter_character(self): + """Validates filter character and prompts user if it's not known. Returns True if OK to proceed.""" + clean_char_filter = clean_folder_name(self.filter_character.lower()) + # Use current known_names list (potentially updated if added via prompt earlier) + known_names_lower = {name.lower() for name in self.known_names} + + if not clean_char_filter: + self.progress_signal.emit(f"āŒ Filter name '{self.filter_character}' is invalid. Aborting.") + return False # Invalid filter + + if self.filter_character.lower() not in known_names_lower: + self.progress_signal.emit(f"ā“ Filter '{self.filter_character}' not found in known list.") + + # Reset response flag and signal GUI to ask + with QMutexLocker(self.prompt_mutex): + self._add_character_response = None + self.add_character_prompt_signal.emit(self.filter_character) + + # Wait loop for response from GUI (via receive_add_character_result) + self.progress_signal.emit(" Waiting for user confirmation to add filter name...") + while self._add_character_response is None: + if self.isInterruptionRequested(): # Check cancellation + self.progress_signal.emit("āš ļø Cancelled while waiting for user input on filter name.") + return False # Abort if cancelled + self.msleep(200) # Check every 200ms + + # Process the response stored in self._add_character_response + if self._add_character_response: + self.progress_signal.emit(f"āœ… User confirmed adding '{self.filter_character}'. Continuing.") + # Update thread's local list if GUI added it + if self.filter_character not in self.known_names: + self.known_names.append(self.filter_character) + return True # OK to proceed + else: + self.progress_signal.emit(f"āŒ User declined to add filter '{self.filter_character}'. Aborting download.") + return False # User declined, abort + + # Filter character is valid and already known + return True + + + def skip_file(self): + """Sets the skip flag for the currently downloading file.""" + # Check if a download is actually active (using worker's state) + # Accessing worker state directly isn't ideal. Use internal flag. + if self.isRunning() and self.is_downloading_file: + self.progress_signal.emit("ā­ļø Skip requested for current file.") + self.skip_current_file_flag.set() # Signal the worker's process loop + elif self.isRunning(): + self.progress_signal.emit("ā„¹ļø Skip requested, but no file download active.") + + + def receive_add_character_result(self, result): + """Slot to receive the boolean result from the GUI prompt.""" + # This runs in the thread's event loop, triggered by the signal connection + with QMutexLocker(self.prompt_mutex): + self._add_character_response = result + self.progress_signal.emit(f" Received prompt response: {'Yes' if result else 'No'}") + + + def isInterruptionRequested(self): + """Overrides QThread method to check both interruption flag and shared event.""" + return super().isInterruptionRequested() or self.cancellation_event.is_set() + + + +# --- Main Execution Block --- if __name__ == '__main__': - app = QApplication(sys.argv) - downloader = DownloaderApp() - downloader.show() - sys.exit(app.exec_()) \ No newline at end of file + # Set high DPI scaling attribute if needed (Qt 5.6+) + # QApplication.setAttribute(Qt.AA_EnableHighDpiScaling, True) + # QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps, True) + + qt_app = QApplication(sys.argv) + # Optional: Apply a style like Fusion for consistency + # qt_app.setStyle('Fusion') + + downloader = DownloaderApp() # Create the main application window + downloader.show() # Show the window + + # Start the Qt event loop + exit_code = qt_app.exec_() + + # Code here runs after the application window is closed + print(f"Application finished with exit code: {exit_code}") + sys.exit(exit_code) # Exit the script with the application's exit code \ No newline at end of file diff --git a/readme.md b/readme.md index 004a207..576f86f 100644 --- a/readme.md +++ b/readme.md @@ -1,21 +1,34 @@ # Kemono Downloader -A simple, multi-platform GUI application built with PyQt5 to download content from Kemono.su creator pages or specific posts, with options for filtering and organizing downloads. +A simple, multi-platform GUI application built with PyQt5 to download content from Kemono.su or Coomer.party creator pages or specific posts, with options for filtering and organizing downloads. ## Features * **GUI Interface:** Easy-to-use graphical interface. -* **URL Support:** Download from a creator's main page (paginated) or a specific post URL. +* **URL Support:** Download from a creator's main page (paginated) or a specific post URL from Kemono or Coomer sites. * **Download Location:** Select your desired output directory. -* **Character Filtering:** Optionally filter posts and organize downloads into folders based on character names detected in post titles. -* **Known Characters List:** Manage a persistent list of known characters for better folder organization. -* **File Type Filtering:** Download All Files, Images Only (PNG, JPG, JPEG, WEBP, excluding GIFs), or Videos Only (MP4, MOV, MKV, WEBM, including GIFs). -* **Archive Skipping:** Options to skip `.zip` and `.rar` files. -* **Folder Organization:** Choose to download files into separate folders (based on character/title) or all into a single selected folder. -* **Progress Log:** View download progress and status messages. +* **Subfolder Organization:** + * Organize downloads into folders based on character/artist names found in post titles (using your "Known Names" list). + * Option to create a custom folder for single post downloads. + * Automatic folder naming based on post title if no known names are matched. +* **Known Names List:** Manage a persistent list of known names (artists, characters, series) for improved folder organization and filtering. +* **Content Filtering:** + * **Character/Name Filter:** Only download posts where the specified known name is found in the title. + * **File Type Filter:** Download All Files, Images/GIFs Only, or Videos Only. + * **Skip Words Filter:** Specify a list of comma-separated words to skip posts or files if these words appear in their titles or filenames. +* **Archive Skipping:** Options to skip `.zip` and `.rar` files (enabled by default). +* **Image Compression:** Optionally compress large images (larger than 1.5MB) to WebP format to save space (requires Pillow library). +* **Thumbnail Downloading:** Option to download thumbnails. (Note: The previous local API method for enhanced thumbnail fetching has been removed. Thumbnail availability might depend on the source.) +* **Duplicate Prevention:** + * Avoids re-downloading files with the same content hash. + * Checks for existing filenames in the target directory. +* **Multithreading:** Utilizes multithreading for faster downloads from full creator pages (single posts are processed in a single thread). +* **Progress Log:** View detailed download progress, status messages, and errors. * **Dark Theme:** Built-in dark theme for comfortable use. -* **Download Cancellation:** Ability to cancel an ongoing download. -* **Skip Current File:** Option to skip the specific file currently being downloaded within a larger batch. +* **Download Management:** + * Ability to cancel an ongoing download process. + * Option to skip the specific file currently being downloaded (in single-thread mode). +* **Persistent Configuration:** Saves the "Known Names" list to a local file. ## Prerequisites @@ -24,12 +37,13 @@ A simple, multi-platform GUI application built with PyQt5 to download content fr ## Installation -1. Clone or download this repository to your local machine. +1. Clone or download this repository/script to your local machine. 2. Navigate to the script's directory in your terminal or command prompt. 3. Install the required Python libraries: ```bash - pip install PyQt5 requests + pip install PyQt5 requests Pillow ``` + *(Pillow is required for image compression and potentially for basic image handling.)* ## How to Run @@ -42,17 +56,31 @@ A simple, multi-platform GUI application built with PyQt5 to download content fr ## How to Use -1. **URL Input:** Enter the URL of the Kemono creator page (e.g., `https://kemono.su/patreon/user/12345`) or a specific post (e.g., `https://kemono.su/api/v1/patreon/user/12345/post/67890`) into the "Kemono Creator Page or Post URL" field. Note that while the API URL format is shown, the GUI can usually handle the standard web page URL format as well (`https://kemono.su/patreon/user/12345` or `https://kemono.su/patreon/user/12345/post/67890`). +1. **URL Input:** Enter the URL of the Kemono/Coomer creator page (e.g., `https://kemono.su/patreon/user/12345`) or a specific post (e.g., `https://kemono.su/patreon/user/12345/post/67890`) into the "Kemono Creator/Post URL" field. 2. **Download Location:** Use the "Browse" button to select the root directory where you want to save the downloaded content. -3. **Filter by Character (optional):** Enter a character name from your "Known Characters" list to only download posts tagged with that character. If left empty, it will try to find any known character in the title or default to a name derived from the title. -4. **File Type Filter:** Select the type of files you want to download (All Files, Images Only, or Videos Only). -5. **Skip Archives:** Check "Skip Zip Files" and/or "Skip RAR Files" to prevent downloading these archive formats. These are checked by default. -6. **Folder Organization:** Check "Download to Separate Folders" to create subfolders within your download location based on character names or derived titles. Uncheck it to download all files directly into the selected Download Location folder. This is checked by default. -7. **Known Characters:** The list on the right shows characters the application knows about (saved in `kemono_downloader_config.txt`). You can manually add or delete characters here. If a new character is detected in a post title while "Download to Separate Folders" is enabled and is not in your list, the application may prompt you to add it. -8. **Start Download:** Click the "Start Download" button to begin fetching and processing content. -9. **Cancel Download:** Click "Cancel Download" to stop the process. Note that the current file download might finish before the cancellation takes effect. -10. **Skip Current File:** Click "Skip Current File" to immediately stop downloading the file currently in progress and move to the next one. This button is only enabled when a file is actively being downloaded. -11. **Progress Log:** Monitor the download status, file saves, and any errors in the "Progress Log" area. +3. **Custom Folder Name (Single Post Only):** If downloading a single post and "Separate Folders" is enabled, you can specify a custom folder name for that post's content. +4. **Filter by Show/Character Name (Optional):** If "Separate Folders" is enabled, enter a name from your "Known Names" list. Only posts with titles matching this name will be downloaded into a folder named accordingly. If empty, the script will try to match any known name or derive a folder name from the post title. +5. **Skip Posts/Files with Words:** Enter comma-separated words (e.g., `WIP, sketch, preview`). Posts or files containing these words in their title/filename will be skipped. +6. **File Type Filter:** + * **All:** Downloads all files. + * **Images/GIFs:** Downloads common image formats and GIFs. + * **Videos:** Downloads common video formats. +7. **Options (Checkboxes):** + * **Separate Folders by Name/Title:** Enables creation of subfolders based on known names or post titles. Controls visibility of "Filter by Show/Character Name" and "Custom Folder Name". (Default: On) + * **Download Thumbnails Only:** Attempts to download only thumbnails for posts. (Default: Off) + * **Skip .zip / Skip .rar:** Prevents downloading of these archive types. (Default: On) + * **Compress Large Images (to WebP):** Compresses images larger than 1.5MB. (Default: Off) + * **Use Multithreading:** Enables faster downloads for full creator pages. (Default: On) +8. **Known Names List:** + * The list on the left ("Known Shows/Characters") displays names used for folder organization and filtering. This list is saved in `Known.txt`. + * Use the input field below the list and the "āž• Add" button to add new names. + * Select names and click "šŸ—‘ļø Delete Selected" to remove them. + * A search bar above the list allows you to filter the displayed names. +9. **Start Download:** Click "ā¬‡ļø Start Download" to begin. +10. **Cancel / Skip:** + * **āŒ Cancel:** Stops the entire download process. + * **ā­ļø Skip Current File:** (Only in single-thread mode during file download) Skips the currently downloading file and moves to the next. +11. **Progress Log:** The area on the right shows detailed logs of the download process, including fetched posts, saved files, skips, and errors. ## Building an Executable (Optional) @@ -62,19 +90,19 @@ You can create a standalone `.exe` file for Windows using `PyInstaller`. 2. Obtain an icon file (`.ico`). Place it in the same directory as `main.py`. 3. Open your terminal in the script's directory and run: ```bash - pyinstaller --onefile --windowed --icon="your_icon_name.ico" --name="Kemono Downloader" main.py + pyinstaller --name "YourAppName" --onefile --windowed --icon="your_icon.ico" main.py ``` - Replace `"your_icon_name.ico"` with the actual name of your icon file. + Replace `"YourAppName"` with your desired application name and `"your_icon.ico"` with the actual name of your icon file. 4. The executable will be found in the `./dist` folder. ## Configuration -The application saves your list of known characters to a file named `kemono_downloader_config.txt` in the same directory as the script (`main.py`). Each character name is stored on a new line. You can manually edit this file if needed, but be mindful of the format (one name per line). +The application saves your list of known names (characters, artists, series, etc.) to a file named `Known.txt` in the same directory as the script (`main.py`). Each name is stored on a new line. You can manually edit this file if needed. ## Dark Theme -The application comes with a simple dark theme applied via stylesheets. +The application uses a built-in dark theme for the user interface. ## Contributing -Contributions are welcome! If you find a bug or have a feature request, please open an issue on the GitHub repository. If you want to contribute code, please fork the repository and create a pull request. \ No newline at end of file +Contributions are welcome! If you find a bug or have a feature request, please open an issue on the GitHub repository (if applicable). If you want to contribute code, please fork the repository and create a pull request. \ No newline at end of file