dump.sh (1034B)
1 #!/bin/bash 2 3 # dump URL $1/$i to $2/$i starting at $i=$3 until $4 consecutive without $5 4 # no trailing slashes, uses JSON 5 6 URL="$1" 7 DEST="$2" 8 NUM=$(($3 - 1)) 9 LIMIT="$4" # max consecutive failures 10 EXPR="$5" 11 MAX_RETRIES="5" 12 13 FAIL="0" 14 15 mkdir -p "$DEST" 16 17 while true 18 do 19 NUM=$(($NUM + 1)) 20 RETRIES="0" 21 while true 22 do 23 curl -s -H "Accept: application/json" "$URL/$NUM" > "$DEST/$NUM" 24 sleep 1 25 if [ -s "$DEST/$NUM" ] 26 then 27 break 28 fi 29 if [ $RETRIES -gt $MAX_RETRIES ] 30 then 31 # too many attempts, give up 32 rm -f "$DEST/$NUM" 33 break 34 fi 35 # transient fetch problems happen 36 RETRIES=$(($RETRIES + 1)) 37 echo "error fetching $DEST/$NUM, retrying (${RETRIES}/${MAX_RETRIES})" 38 done 39 if [ ! -f "$DEST/$NUM" ] 40 then 41 echo "could not fetch $DEST/$NUM, removed it" 42 else 43 echo "retrieved $DEST/$NUM" 44 if grep "$EXPR" "$DEST/$NUM" > /dev/null 45 then 46 FAIL="0" 47 else 48 FAIL=$(($FAIL + 1)) 49 fi 50 if [ "$FAIL" -gt "$LIMIT" ] 51 then 52 break 53 fi 54 fi 55 done 56 57