republique

helper scripts for www.republique-numerique.fr
git clone https://a3nm.net/git/republique/
Log | Files | Refs | README

dump.sh (1034B)


      1 #!/bin/bash
      2 
      3 # dump URL $1/$i to $2/$i starting at $i=$3 until $4 consecutive without $5
      4 # no trailing slashes, uses JSON
      5 
      6 URL="$1"
      7 DEST="$2"
      8 NUM=$(($3 - 1))
      9 LIMIT="$4" # max consecutive failures
     10 EXPR="$5"
     11 MAX_RETRIES="5"
     12 
     13 FAIL="0"
     14 
     15 mkdir -p "$DEST"
     16 
     17 while true
     18 do
     19   NUM=$(($NUM + 1))
     20   RETRIES="0"
     21   while true
     22   do
     23     curl -s -H "Accept: application/json" "$URL/$NUM" > "$DEST/$NUM"
     24     sleep 1
     25     if [ -s "$DEST/$NUM" ]
     26     then
     27       break
     28     fi
     29     if [ $RETRIES -gt $MAX_RETRIES ]
     30     then
     31       # too many attempts, give up
     32       rm -f "$DEST/$NUM"
     33       break
     34     fi
     35     # transient fetch problems happen
     36     RETRIES=$(($RETRIES + 1))
     37     echo "error fetching $DEST/$NUM, retrying (${RETRIES}/${MAX_RETRIES})"
     38   done
     39   if [ ! -f "$DEST/$NUM" ]
     40   then
     41     echo "could not fetch $DEST/$NUM, removed it"
     42   else
     43     echo "retrieved $DEST/$NUM"
     44     if grep "$EXPR" "$DEST/$NUM" > /dev/null
     45     then
     46       FAIL="0"
     47     else
     48       FAIL=$(($FAIL + 1))
     49     fi
     50     if [ "$FAIL" -gt "$LIMIT" ]
     51     then
     52       break
     53     fi
     54   fi
     55 done
     56 
     57