improve report-status.sh
Bug #667013 reported by
siznax
This bug affects 1 person
Affects | Status | Importance | Assigned to | Milestone | |
---|---|---|---|---|---|
Archive Widecrawl |
Fix Committed
|
Low
|
Kenji Nagahashi |
Bug Description
had intended the bash to be a working model, to be re-written in python.
either 1) apply Kenji's bash suggestions, or 2) re-write in python.
Changed in archivewidecrawl: | |
assignee: | nobody → Kenji Nagahashi (knagahashi) |
importance: | Undecided → Low |
status: | New → Fix Committed |
To post a comment you must log in.
On 10/25/10 2:04 PM, Kenji Nagahashi wrote:
> Hi Steve,
>
> Reading report-status*.sh, I came up with some ideas to simplify the code.
> As it is easier to present actual code, I attached a patch for those files.
>
> Changes also include adding "--max-time 30" option to curl invocation,
> as it seems reading job status page sooo long when it is actively crawling.
>
> Hope you find this useful.
> --Kenji
diff --git a/report- status- cluster. sh b/report- status- cluster. sh status- cluster. sh status- cluster. sh
index 7b7e339..d7a8e01 100755
--- a/report-
+++ b/report-
@@ -2,18 +2,14 @@
# siznax 2010
cluster= /home/steve/ crawling/ live/cluster. txt /home/steve/ crawling/ live/report- status. sh status. sh
-status=
-nodes=`cut -d ' ' -f 1 $cluster`
+status=`dirname $0`/report-
-self=`echo $0 | tr '/' ' ' | awk '{print $NF}'`
+self=`basename $0`
echo $self `date`
-for node in $nodes /home/steve/ crawling/ live/hs_ errs.txt
-do
- host=`grep $node $cluster | awk '{print $1}'`
- port=`grep $node $cluster | awk '{print $2}'`
- job=` grep $node $cluster | awk '{print $3}'`
- auth=`grep $node $cluster | awk '{print $4}'`
- $status $host $port $job $auth 0
-done
+exec 3<$cluster
+while read -u 3 host port job auth; do
+ $status $host $port $job $auth 0
+done
+exec 3<&-
diff --git a/report-status.sh b/report-status.sh
index ec978e8..0470905 100755
--- a/report-status.sh
+++ b/report-status.sh
@@ -17,6 +17,8 @@ version="r0"
LC_ALL=en_US.UTF-8 # for thousands separator
hs_errs=
+verbose=false
+
if [ $# -lt 5 ]
then
echo "Usage: $script host port job auth verbose"
@@ -26,19 +28,19 @@ else
port=$2
job=$3
auth=$4
- verbose=$5
+ [ $5 == 0 ] || verbose=true
fi
-tags=( '<statusDescrip tion>'\ Count>' \ rSecond> '\ rSecond> '\ Sec>'\ Sec>'\ 'statusDescript ion'\ ount'\ Second' \ Second' \ ------- ------- ------- ------- ------- ------- ------- ------- --
- '<totalUriCount>'\
- '<downloadedUri
- '<novel>'\
- '<currentDocsPe
- '<averageDocsPe
- '<currentKiBPer
- '<averageKiBPer
- '<elapsedPretty>'\
- '<launchCount>')
+tags=(
+ 'totalUriCount'\
+ 'downloadedUriC
+ 'novel'\
+ 'currentDocsPer
+ 'averageDocsPer
+ 'currentKiBPerSec'\
+ 'averageKiBPerSec'\
+ 'elapsedPretty'\
+ 'launchCount')
keys=(status\
total\
downloaded\
@@ -53,18 +55,20 @@ keys=(status\
#-----
function get_tag_value { tag>\(. *\)</$tag> .*%{s// \1/p;q} " $tmpfile` ription> .*: \([^<]* \).*/{s/ /\1/p;q} ' $tmpfile`
- cmd="grep '$tag' $tmpfile\
- | tr '<' ' '\
- | tr '>' ' '\
- | tr -s ' '\
- | cut -d ' ' -f 3"
- value=`eval $cmd`
+ value=`sed -ne "\%.*<$
+# cmd="grep '$tag' $tmpfile | tr -s '<>' ' '
+# | tr '<' ' '\
+# | tr '>' ' '\
+# | tr -s ' '\
+# | cut -d ' ' -f 3"
+# value=`eval $cmd`
}
function get_status {
- value=`grep statusDescription $tmpfile\
- | grep -o '>[^\<]*'\
- | awk '{print $NF}'\
- | head -1`
+ value=`sed -ne '/.*<statusDesc
+# value=`grep statusDescription $tmpfile\
+# | grep -o '>[^\<]*'\
+# | awk '{print $NF}'\
+# ...