farside/.github/workflows/update-instances.yml
Ben Busby 73a53f8dbe
Add cf filter to nightly build, update services
The cloudflare filter has been added back into the nightly build. Now
that the filtering method uses direct querying of the instance IP(s), it
should be more reliable than the namespace lookup (and more accurate).

services.json has been updated with the latest filtered results from
services-full.json as well.
2023-01-04 10:03:03 -07:00

223 lines
8.8 KiB
YAML

on:
workflow_dispatch:
schedule:
- cron: '0 0 * * *'
jobs:
update-instances:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install dependencies
run: sudo apt-get install -y jq dnsutils
- uses: webfactory/ssh-agent@v0.5.3
with:
ssh-private-key: |
${{ secrets.PRIMARY_REPO_KEY }}
- name: Fetch instances
run: |
function apply_update() {
mv services-tmp.json services-full.json
rm -f *-tmp.json
# Ensure no trailing slashes for any instance
sed -i 's/\/"/"/g' services-full.json
}
# ==============================================================
# Git config
# ==============================================================
git config --global user.name github-actions
git config --global user.email 41898282+github-actions[bot]@users.noreply.github.com
git remote set-url origin git@github.com:benbusby/farside.git
git checkout main
# ==============================================================
# Bibliogram update
# NOTE: Bibliogram has been discontinued and the official instance
# at bibliogram.art is no longer around. Unless a new "official"
# instance pops up, Bibliogram will likely be removed from farside
# ==============================================================
#curl -s https://bibliogram.art/api/instances | \
#jq '[
#.data |
#.[] |
#select(.onion_site | not) |
#.address
#] | sort' > bibliogram-tmp.json
#jq --slurpfile bibliogram bibliogram-tmp.json \
#'( .[] | select(.type == "bibliogram") )
#.instances |= $bibliogram[0]' services-full.json > services-tmp.json
#apply_update
# ==============================================================
# searx update
# ==============================================================
curl -s https://searx.space/data/instances.json | \
jq '[
.instances |
to_entries[] |
select(.value.network_type == "normal") |
select(.value.version | . != null) |
select(.value.network.asn_privacy == 0) |
select(.value.http.error == null) |
select(.value.tls.grade == "A+" or .value.tls.grade == "A") |
select(.value.http.grade == "A+" or .value.http.grade == "A") |
select(.value.html.grade == "V" or .value.html.grade == "F") |
select(.key | contains(".i2p") | not) |
.key
] | sort' > searx-tmp.json
cat searx-tmp.json | jq .
jq --slurpfile searx searx-tmp.json \
'( .[] | select(.type == "searx") )
.instances |= $searx[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# SearXNG update
# ==============================================================
curl -s https://searx.space/data/instances.json | \
jq '[
.instances |
to_entries[] |
select(.value.network_type == "normal") |
select(.value.generator | . != null) |
select(.value.generator | contains("searxng")) |
select(.value.version | . != null) |
select(.value.network.asn_privacy == 0) |
select(.value.http.error == null) |
select(.value.tls.grade == "A+" or .value.tls.grade == "A") |
select(.value.http.grade == "A+" or .value.http.grade == "A") |
select(.value.html.grade == "V" or .value.html.grade == "F") |
select(.key | contains(".i2p") | not) |
.key
] | sort' > searxng-tmp.json
cat searxng-tmp.json | jq .
jq --slurpfile searxng searxng-tmp.json \
'( .[] | select(.type == "searxng") )
.instances |= $searxng[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# SimplyTranslate update
# ==============================================================
curl -s https://simple-web.org/instances/simplytranslate > st-out
sed -i -e 's/^/https:\/\//' st-out
jq -ncR '[inputs]' <<< "$(cat st-out | sort)" > st-tmp.json
jq --slurpfile simplytranslate st-tmp.json \
'( .[] | select(.type == "simplytranslate") )
.instances |= $simplytranslate[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# Whoogle update
# ==============================================================
curl -s https://raw.githubusercontent.com/benbusby/whoogle-search/main/misc/instances.txt > whoogle-out
jq -ncR '[inputs]' <<< "$(cat whoogle-out | sort)" > whoogle-tmp.json
jq --slurpfile whoogle whoogle-tmp.json \
'( .[] | select(.type == "whoogle") )
.instances |= $whoogle[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# Invidious update
# ==============================================================
curl -s https://api.invidious.io/instances.json | \
jq '[
.[] |
select(.[1].type | contains("https")) ] |
[.[][1].uri] |
sort' > invidious-tmp.json
jq --slurpfile invidious invidious-tmp.json \
'( .[] | select(.type == "invidious") )
.instances |= $invidious[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# Scribe update
# ==============================================================
curl -s https://git.sr.ht/~edwardloveall/scribe/blob/main/docs/instances.json > scribe-tmp.json
jq --slurpfile scribe scribe-tmp.json \
'( .[] | select(.type == "scribe") )
.instances |= $scribe[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# teddit update
# ==============================================================
curl -s https://codeberg.org/teddit/teddit/raw/branch/main/instances.json | \
jq '[
.[] |
select(.url | contains("https")) |
.url ] |
sort' > teddit-tmp.json
jq --slurpfile teddit teddit-tmp.json \
'( .[] | select(.type == "teddit") )
.instances |= $teddit[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# Wikiless update
# NOTE: Codeberg has removed the Wikiless repo until the maintainer
# makes changes. See https://orenom.fi for updates.
# ==============================================================
#curl -s https://codeberg.org/orenom/Wikiless/raw/branch/main/instances.json | \
#jq '[
#.[] |
#select((.url | contains(".onion") | not) and (.url | contains("https"))).url ] |
#sort' > wikiless-tmp.json
#jq --slurpfile wikiless wikiless-tmp.json \
#'( .[] | select(.type == "wikiless") )
#.instances |= $wikiless[0]' services-full.json > services-tmp.json
#apply_update
# ==============================================================
# libreddit update
# ==============================================================
curl -s https://raw.githubusercontent.com/libreddit/libreddit-instances/master/instances.json | \
jq '[
.instances[] |
select(.url) |
.url ] |
sort' > libreddit-tmp.json
jq --slurpfile libreddit libreddit-tmp.json \
'( .[] | select(.type == "libreddit") )
.instances |= $libreddit[0]' services-full.json > services-tmp.json
apply_update
# ==============================================================
# TODO: Update instances for other services
# ==============================================================
# ==============================================================
# Filter out Cloudflared instances from services-full.json into
# services.json
# ==============================================================
./tools/un-cloudflare.sh
# ==============================================================
# Push changes
# ==============================================================
git add services.json services-full.json
git commit -m '[CI] Auto update instances' || exit 0
git push