Skip to content

Commit 26faa34

Browse files
committed
0.02.2
verify tag
1 parent 6afc591 commit 26faa34

17 files changed

+368
-290
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.02.1
1+
0.02.2

changelog

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
0.02.2 (2018-10-11)
2+
fb2/cleanFB2.pm -> fb2/Сlean.pm
3+
- verify list fb2 tag
4+
cleanFB2.pl -> fb2clean.pl
5+
fb2images.pl -> fb2/Images.pm
6+
fb2normalize.pl -> fb2/Normalize.pm
7+
18
0.02.1 (2018-09-13)
29
fb2/cleanFB2.pm
310
- removing harmful spaces

scripts/cleanFB2.pl

Lines changed: 0 additions & 6 deletions
This file was deleted.
Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2424
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2525

26-
package fb2::cleanFB2;
26+
package fb2::Clean;
2727
#Подчистка Fb2
2828

2929
use XML::Parser;
@@ -32,34 +32,35 @@ use Encode;
3232
use utf8;
3333
#
3434
use Math::Random;
35+
our $VERSION=0.02.2;
3536

3637
%encodings=(
3738
'ru'=>'utf-8',
3839
'rus'=>'windows-1251',
39-
'en'=>'windows-1252',
40-
'eng'=>'windows-1251',
41-
'urk'=>'windows-1251',
42-
'bel'=>'windows-1251',
40+
'en'=>'utf-8',
41+
'eng'=>'utf-8',
42+
'urk'=>'utf-8',
43+
'bel'=>'utf-8',
44+
'fr'=>'iso-8859-1',
4345
'fra'=>'iso-8859-1',
4446
'fre'=>'iso-8859-1',
45-
'fr'=>'iso-8859-1',
4647
'de'=>'iso-8859-1',
4748
'deu'=>'iso-8859-1',
4849
'ger'=>'iso-8859-1',
4950
'ara'=>'utf-8',
50-
'cze'=>'iso-8859-2',
5151
'cs'=>'iso-8859-2',
52+
'cze'=>'iso-8859-2',
5253
'ces'=>'iso-8859-2',
53-
'est'=>'iso-8859-2',
5454
'et'=>'iso-8859-2',
55-
'fin'=>'iso-8859-1',
55+
'est'=>'iso-8859-2',
5656
'fi'=>'iso-8859-1',
57+
'fin'=>'iso-8859-1',
5758
'ita'=>'iso-8859-1',
59+
'sl'=>'iso-8859-2',
5860
'slk'=>'iso-8859-2',
5961
'slo'=>'iso-8859-2',
60-
'sl'=>'iso-8859-2',
61-
'swe'=>'iso-8859-1',
62-
'sv'=>'iso-8859-1'
62+
'sv'=>'iso-8859-1',
63+
'swe'=>'iso-8859-1'
6364
);
6465
my %oldjenres=(
6566
Action=>'literature_adv',
@@ -151,9 +152,11 @@ sub GenerateDocInfo{
151152
<version>1.0</version>
152153
</document-info>};
153154
}
155+
154156
sub RandChar{
155157
return chr(Math::Random::random_uniform(1,97,122));
156158
}
159+
157160
sub CleanupFB2{
158161
my $FileToParce=shift;
159162
my $BookLang;
@@ -164,7 +167,7 @@ sub CleanupFB2{
164167
my %NotesLinks;
165168
my %RealImages;
166169
my %RealNotes;
167-
170+
168171
print "Cleaning the file up...\n$FileToParce\n";
169172
my $CleanupParser=new XML::Parser(Handlers => {
170173
Start => sub {
@@ -239,7 +242,7 @@ sub CleanupFB2{
239242
$InLang=1 if $elem eq 'lang';
240243
$InSRCLang=1 if $elem eq 'src-lang';
241244
$InJenre=1 if $elem eq 'genre';
242-
245+
243246
# Remember, where we are
244247
unshift(@Elems,$elem);
245248
},
@@ -259,7 +262,7 @@ sub CleanupFB2{
259262
if ($LangFixes{$XText}){$XText=$LangFixes{$XText};print "Lang changed to: $XText\n"}
260263
}
261264
$BookLang.=$XText if $InLang;
262-
$XMLBody.=xmlescapeLite($XText) if ($InHead or $Elems[0]=~/\A(v|p|subtitle|td|text-author|cite|a|style|strong|emphasis|binary)\Z/);
265+
$XMLBody.=xmlescapeLite($XText) if ($InHead or $Elems[0]=~/\A(a|p|v|subtitle|style|th|td|text-author|cite|strong|emphasis|strikethrough|sub|sup|binary)\Z/);
263266
},
264267
End => sub {
265268
# if ($_[1] eq 'description'){
@@ -299,7 +302,7 @@ sub CleanupFB2{
299302
print "Performing final text cleanup...\n" unless $Mute;
300303

301304
# finall cleanup
302-
305+
303306
$XMLBody=~s/\A\s+//;
304307
print "step 1\n";
305308
$XMLBody=~s/\s+\Z//;
@@ -364,21 +367,22 @@ sub CleanupFB2{
364367
push(@ImgWrong,$_) if !$RealImages{$_};
365368
}
366369
die "Image links points to inexistent ID!\n".join("\n",@ImgWrong) if @ImgWrong;
367-
368-
# $!=16;
369-
# my @NotesWrong;
370-
# for (keys(%NotesLinks)){
371-
# s/\A#//;
372-
# push(@NotesWrong,$_) if !$RealNotes{$_};
373-
# }
374-
# die "Notes point to inexistent section ID!\n".join("\n",@NotesWrong) if @NotesWrong;
370+
371+
$!=16;
372+
my @NotesWrong;
373+
for (keys(%NotesLinks)){
374+
s/\A#//;
375+
push(@NotesWrong,$_) if !$RealNotes{$_};
376+
}
377+
die "Notes point to inexistent section ID!\n".join("\n",@NotesWrong) if @NotesWrong;
375378

376379
$!=17;
377380
my @ImgWrong;
378381
for (keys(%RealImages)){
379382
push(@ImgWrong,$_) if !$ImgLinks{'#'.$_};
380383
}
381384
die "Unused images 'detected!\n".join("\n",@ImgWrong) if @ImgWrong;
385+
382386
$!=0;
383387

384388
print "Lang: $BookLang\n";
@@ -395,14 +399,14 @@ sub CleanupFB2{
395399
print FILETOUPDATE $doc->toString(0) or die "error writing XML to file!\$!";
396400
close FILETOUPDATE;
397401
}
402+
398403
sub xmlescapeLite {
399404
$b=shift;
400405
$_=$b;
401406
s/([&<>])/$escapesLite{$1}/gs;
402407
$_;
403408
}
404409

405-
406410
sub xmlescape {
407411
$b=shift;
408412
$_=$b;

scripts/fb2/Images.pm

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# Copyright 2006-2011 by Swami Dhyan Nataraj (Nikolay Shaplov)
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions# are met:
5+
# 1. Redistributions of source code must retain the above copyright
6+
# notice, this list of conditions and the following disclaimer.
7+
# 2. Redistributions in binary form must reproduce the above copyright
8+
# notice, this list of conditions and the following disclaimer in the
9+
# documentation and/or other materials provided with the distribution.
10+
# 3. The name of the author may not be used to endorse or promote products
11+
# derived from this software without specific prior written permission.
12+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
13+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
14+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
15+
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
16+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
17+
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
18+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
19+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21+
22+
package fb2::Images;
23+
#manipulate embedded images in the FictionBook file
24+
25+
use XML::LibXML;
26+
use File::MMagic;
27+
use MIME::Base64 qw(encode_base64);
28+
use Encode;
29+
our $VERSION=0.02.2;
30+
31+
sub printImageList
32+
{
33+
my $List=shift;
34+
foreach (@{$List})
35+
{
36+
print "$_\n";
37+
}
38+
}
39+
40+
sub getImageList
41+
{
42+
my $doc=shift;
43+
my @list=();
44+
45+
foreach ($doc->getDocumentElement()->getElementsByTagName('binary' ,0) )
46+
{
47+
my $id=$_->getAttribute('id');
48+
push @list,$id if ($id)
49+
}
50+
return \@list
51+
}
52+
53+
sub getUsedIdList
54+
{
55+
my $doc=shift;
56+
my @list=();
57+
58+
foreach ($doc->getDocumentElement()->getElementsByTagName('*' ,1) )
59+
{
60+
my $id=$_->getAttribute('id');
61+
push @list,$id if $id
62+
}
63+
return \@list
64+
}
65+
66+
sub AddImages
67+
{
68+
my $doc = shift;
69+
my $opts = shift;
70+
my $flags = shift;
71+
72+
foreach (@{$opts->{'add'}})
73+
{
74+
my $flag=1;
75+
my $ImageName=$_;
76+
print "Adding image $_ ...\n";
77+
foreach (@{getImageList($doc)})
78+
{
79+
if ($_ eq $ImageName)
80+
{
81+
print STDERR "Image $ImageName already exist\n";
82+
$flag=0;
83+
last;
84+
}
85+
}
86+
if ($flag)
87+
{
88+
foreach (@{getUsedIdList($doc)})
89+
{
90+
if ($_ eq $ImageName)
91+
{
92+
print STDERR "Object $ImageName already exist\n";
93+
$flag=0;
94+
last;
95+
}
96+
}
97+
}
98+
if ($flag)
99+
{
100+
my $mm= new File::MMagic;
101+
my $MimeType= $mm->checktype_filename($ImageName);
102+
open(FILE, $ImageName) or die "$!";
103+
local($/) = undef;
104+
my $Encoded= encode_base64(<FILE>);
105+
close (FILE);
106+
my $NewNode = $doc->createElement('binary');
107+
$NewNode->setAttribute ('id', $ImageName);
108+
$NewNode->setAttribute ('content-type',$MimeType);
109+
$NewNode->appendChild($doc->createTextNode("\n".$Encoded));
110+
$doc->getDocumentElement()->appendChild($NewNode);
111+
$doc->getDocumentElement()->appendChild($doc->createTextNode("\n"));
112+
$flags->{'changed'}=1;
113+
}
114+
}
115+
}
116+
117+
sub RemoveImages
118+
{
119+
my $doc = shift;
120+
my $opts = shift;
121+
my $flags = shift;
122+
123+
my $root=$doc->getDocumentElement();
124+
foreach (@{$opts->{'remove'}})
125+
{
126+
my $ImageName=$_;
127+
print "Removing image '$_'... ";
128+
my $flag=1;
129+
foreach my $binary ($root->getElementsByTagName('binary' ,0))
130+
{
131+
if ($binary->getAttribute('id') eq $ImageName)
132+
{
133+
while (1)
134+
{
135+
my $prev = $binary->getPreviousSibling();
136+
last until defined($prev);
137+
last until ($prev->nodeType() == XML_TEXT_NODE && $prev->getData() =~ /^\s*$/ );
138+
$root->removeChild($prev);
139+
}
140+
$root->removeChild($binary);
141+
print "Done\n";
142+
$flag=0;
143+
$flags->{'changed'}=1;
144+
}
145+
}
146+
print "Not Found!\n" if $flag;
147+
}
148+
}
149+
150+
sub getText
151+
{
152+
my ($elem) = @_;
153+
my $text = '';
154+
for my $node ($elem->getChildNodes())
155+
{
156+
if ($node->nodeType() == XML_ELEMENT_NODE)
157+
{
158+
$text .= getText($node);
159+
}
160+
elsif ($node->nodeType() == XML_TEXT_NODE)
161+
{
162+
$text .= $node->getData();
163+
}
164+
}
165+
return $text;
166+
}
167+
168+
sub ExtractImages
169+
{
170+
my ($doc,$opts,$flags) = @_;
171+
172+
my $root=$doc->getDocumentElement();
173+
foreach my $ImageName (@{$opts->{'extract'}})
174+
{
175+
print "Extracting image '$ImageName'... ";
176+
my $flag=1;
177+
foreach my $binary ($root->getElementsByTagName('binary' ,0))
178+
{
179+
if ($binary->getAttribute('id') eq $ImageName)
180+
{
181+
my $data = MIME::Base64::decode_base64( getText( $binary ) );
182+
open(FILE, '>', $ImageName) or die "$!";
183+
print FILE $data;
184+
close (FILE);
185+
print "Done\n";
186+
$flag=0;
187+
last;
188+
}
189+
}
190+
print "Not Found!\n" if $flag;
191+
}
192+
}

0 commit comments

Comments
 (0)