DATA=a.html b.html c.html d.html e.html
T1_RES=a.t1.txt b.t1.txt c.t1.txt d.t1.txt e.t1.txt
T2_RES=a.t2.txt b.t2.txt c.t2.txt d.t2.txt e.t2.txt
T3_RES=a.t3.txt b.t3.txt c.t3.txt d.t3.txt e.t3.txt
all: ${T1_RES} ${T3_RES} ${T2_RES}
$(T1_RES): $(DATA)
@for i in $^; do \
n=`echo $$i | sed 's/.html//'`; \
m=`echo $@ | grep $$n | wc -l`; \
if [ $$m = 1 ]; then \
cat $$n.html | ./strip-html-tags.php > $@; \
fi; \
done;
$(T2_RES): $(DATA)
@for i in $^; do \
n=`echo $$i | sed 's/.html//'`; \
m=`echo $@ | grep $$n | wc -l`; \
if [ $$m = 1 ]; then \
cat $$n.html | ./strip-html-tags.pl > $@; \
fi; \
done;
$(T3_RES): $(DATA)
@for i in $^; do \
n=`echo $$i | sed 's/.html//'`; \
m=`echo $@ | grep $$n | wc -l`; \
if [ $$m = 1 ]; then \
cat $$n.html | ../../trunk/tools/boilerpipe/boilerPlateRemover.sh > $@; \
fi; \
done;
a.html:
wget 'http://cs.wikipedia.org/wiki/%C5%A0e%C5%99%C3%ADk_obecn%C3%BD' -O a.html;
b.html:
wget 'http://cs.wikipedia.org/wiki/Turecko' -O b.html;
c.html:
wget 'http://cs.wikipedia.org/wiki/St%C5%99edozemn%C3%AD_mo%C5%99e' -O c.html;
d.html:
wget 'http://cs.wikipedia.org/wiki/Slovn%C3%AD_z%C3%A1soba' -O d.html;
e.html:
wget 'http://cs.wikipedia.org/wiki/Neologismus' -O e.html;
clean:
rm -rf *.txt *.html;
clean-res: clean-t1 clean-t2 clean-t3
clean-t1:
rm -rf *.t1.txt;
clean-t2:
rm -rf *.t2.txt;
clean-t3:
rm -rf *.t3.txt;