Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 70 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@ jobs:
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
# fetch-depth: 0 is required so Docusaurus can read git log per file to
# emit accurate <lastmod> dates in sitemap.xml. Without it every file gets
# the same date (the latest commit) and the Google Indexing diff submits
# every URL on every deploy, burning the full daily quota each time.
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set Node.js 22.x
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: 22.x

Expand Down Expand Up @@ -153,3 +159,66 @@ jobs:
echo " curl -sS -X POST \"https://api.indexnow.org/indexnow\" -H 'Content-Type: application/json' \\"
echo " -d '{\"host\":\"${INDEXNOW_HOST}\",\"key\":\"${INDEXNOW_KEY}\",\"keyLocation\":\"${INDEXNOW_KEY_LOC}\",\"urlList\":[\"https://${INDEXNOW_HOST}/docs/\"]}'"
exit 1

# Restore the sitemap from the previous successful deploy so the Google
# Indexing step can diff and only submit URLs that are new since last run.
# key includes github.sha so a fresh entry is saved after every deploy;
# restore-keys provides a fallback to the most-recent previous run.
- name: Restore previous sitemap cache
uses: actions/cache@v4
with:
path: .sitemap-prev.xml
key: sitemap-prev-${{ github.sha }}
restore-keys: |
sitemap-prev-

# Submit new/changed URLs to Google Indexing API and ping the GSC sitemap
# endpoint. Uses a cached copy of the previous sitemap (restored above) to
# diff so we only burn quota on pages that actually changed.
#
# Required secret: GOOGLE_SERVICE_ACCOUNT_JSON
# — paste the full JSON key of a Google Cloud service account that has
# been granted "Owner" access in GSC (Search Console → Settings →
# Users and permissions → Add user).
# — The service account also needs the "Indexing API" enabled on its
# Cloud project (APIs & Services → Enable APIs → Web Search Indexing API).
#
# Optional secret: GSC_SITE_URL (default: https://keploy.io/)
# — must match the property URL exactly as it appears in GSC.
- name: Submit changed URLs to Google Indexing API
env:
GOOGLE_SERVICE_ACCOUNT_JSON: ${{ secrets.GOOGLE_SERVICE_ACCOUNT_JSON }}
GSC_SITE_URL: ${{ secrets.GSC_SITE_URL || 'https://keploy.io/' }}
continue-on-error: true
run: |
set -euo pipefail

SITEMAP="build/docs/sitemap.xml"
if [ ! -f "$SITEMAP" ]; then
echo "::notice::Sitemap not found at $SITEMAP, skipping Google Indexing submission"
exit 0
fi

if [ -z "${GOOGLE_SERVICE_ACCOUNT_JSON:-}" ]; then
echo "::notice::GOOGLE_SERVICE_ACCOUNT_JSON secret not set, skipping Google Indexing submission"
exit 0
fi

# Install only the auth library — pinned to major version so breaking
# changes in a future release don't silently break this step.
npm install --no-save google-auth-library@10

# Use || so set -e doesn't abort on a non-zero exit before cp runs.
# SCRIPT_EXIT defaults to 0 (success); || only fires on failure.
SCRIPT_EXIT=0
node scripts/google-index.js \
--sitemap "$SITEMAP" \
--prev-sitemap .sitemap-prev.xml \
--sitemap-url "https://keploy.io/docs/sitemap.xml" \
--site-url "${GSC_SITE_URL}" || SCRIPT_EXIT=$?

# Always update the cache baseline, even if some submissions failed,
# so the next deploy diffs against today's sitemap, not a stale one.
Comment thread
dhananjay6561 marked this conversation as resolved.
Outdated
cp "$SITEMAP" .sitemap-prev.xml

exit $SCRIPT_EXIT
4 changes: 4 additions & 0 deletions docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,10 @@ module.exports = {
changefreq: "weekly",
priority: 0.5,
filename: "sitemap.xml",
// Emit <lastmod> using the git commit date of each file so the
// Google Indexing API step can diff by date and only resubmit
// pages whose content actually changed since the last deploy.
lastmod: "date",
// Differentiate docs sitemap priorities by content type so
// search engines spend crawl budget proportional to how
// canonical each page is. Priority buckets:
Expand Down
303 changes: 303 additions & 0 deletions scripts/google-index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
#!/usr/bin/env node
// Submits new/changed docs URLs to Google Indexing API and pings the GSC sitemap endpoint.
// Reads GOOGLE_SERVICE_ACCOUNT_JSON from env; uses a previous-sitemap file for smart diffing
// so only URLs that are new or have a changed <lastmod> date consume quota.
//
// Usage:
// node scripts/google-index.js \
// --sitemap build/docs/sitemap.xml \
// --prev-sitemap .sitemap-prev.xml \
// --sitemap-url https://keploy.io/docs/sitemap.xml
// Add --all to force-submit every URL (ignores prev-sitemap).

'use strict';

const { GoogleAuth } = require('google-auth-library');
const fs = require('fs');

const INDEXING_ENDPOINT =
'https://indexing.googleapis.com/v3/urlNotifications:publish';
const GSC_SITEMAPS_API =
'https://www.googleapis.com/webmasters/v3/sites';

// Google's published quota: 200 URL_UPDATED notifications per day (default).
// Burst: up to 10 per second before per-second quota kicks in.
const DAILY_QUOTA = 200;
const BURST_SIZE = 10;
const MAX_RETRIES = 3;

// ── helpers ───────────────────────────────────────────────────────────────────

function parseArgs() {
const argv = process.argv.slice(2);
const get = (flag) => {
const i = argv.indexOf(flag);
return i !== -1 ? argv[i + 1] : null;
};
return {
sitemap: get('--sitemap') || 'build/docs/sitemap.xml',
prevSitemap: get('--prev-sitemap') || '.sitemap-prev.xml',
sitemapUrl: get('--sitemap-url') || 'https://keploy.io/docs/sitemap.xml',
siteUrl: get('--site-url') || 'https://keploy.io/',
all: argv.includes('--all'),
};
}

// Returns Map<url, lastmod|null> — lastmod is the raw string from <lastmod> or
// null when the tag is absent. Used for both presence and date diffing.
function parseSitemap(filepath) {
if (!fs.existsSync(filepath)) return new Map();
const content = fs.readFileSync(filepath, 'utf8');
const result = new Map();
// Match each <url>…</url> block so loc and lastmod stay paired.
const urlBlocks = content.match(/<url>[\s\S]*?<\/url>/g) || [];
for (const block of urlBlocks) {
const locMatch = block.match(/<loc>([^<]+)<\/loc>/);
const lastmodMatch = block.match(/<lastmod>([^<]+)<\/lastmod>/);
if (locMatch) {
result.set(locMatch[1].trim(), lastmodMatch ? lastmodMatch[1].trim() : null);
}
}
return result;
}

// Mirror the same filters used by the IndexNow step so both pipelines
// submit identical URL sets: no /tags/ pages, no versioned /docs/N.M.P/ paths.
function filterUrl(url) {
return !url.includes('/tags/') && !/\/docs\/\d+\.\d+\.\d+\//.test(url);
}

function sleep(ms) {
return new Promise((r) => setTimeout(r, ms));
}

// ── Google Indexing API ───────────────────────────────────────────────────────

// Retries on 429, 5xx, and network errors with exponential backoff.
// Hard 4xx (e.g. 404, 403) are permanent failures — no retry.
async function submitOne(token, url, type) {
let delay = 1000;
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
try {
const res = await fetch(INDEXING_ENDPOINT, {
method: 'POST',
headers: {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({ url, type }),
});

if (res.ok) return { ok: true };

const body = await res.text().catch(() => '');
const retryable = res.status === 429 || res.status >= 500;

if (!retryable || attempt === MAX_RETRIES) {
return { ok: false, status: res.status, body: body.slice(0, 200) };
}

console.log(` retry ${attempt}/${MAX_RETRIES} after ${delay}ms (HTTP ${res.status})`);
} catch (err) {
// Network-level error (DNS, connection reset, timeout).
if (attempt === MAX_RETRIES) {
return { ok: false, status: 0, body: err.message };
}
console.log(` retry ${attempt}/${MAX_RETRIES} after ${delay}ms (${err.message})`);
}

await sleep(delay);
delay *= 2;
}
// Guard: unreachable with MAX_RETRIES > 0, but prevents implicit undefined return.
return { ok: false, status: 0, body: 'max retries exceeded' };
}

async function submitBatch(token, urls, type) {
let ok = 0;
let fail = 0;

for (let i = 0; i < urls.length; i++) {
const url = urls[i];
const result = await submitOne(token, url, type);
if (result.ok) {
ok++;
if (i < 5 || i % 10 === 0 || i === urls.length - 1) {
console.log(` [${i + 1}/${urls.length}] ✓ ${url}`);
}
} else {
console.log(` [${i + 1}/${urls.length}] ✗ HTTP ${result.status} — ${url}`);
console.log(` Response: ${result.body}`);
fail++;
}

// Stay under burst limit: 10 req/s.
if ((i + 1) % BURST_SIZE === 0 && i + 1 < urls.length) {
await sleep(1100);
}
}

return { ok, fail };
}

async function submitUrls(token, updatedUrls, deletedUrls) {
let totalFail = 0;

// ── URL_UPDATED ─────────────────────────────────────────────────────────────
const toUpdate = updatedUrls.slice(0, DAILY_QUOTA);
if (updatedUrls.length > DAILY_QUOTA) {
console.log(
`::warning::${updatedUrls.length} URLs to update but daily quota is ${DAILY_QUOTA}; ` +
`submitting first ${DAILY_QUOTA}. Request a quota increase at console.cloud.google.com.`
);
Comment thread
dhananjay6561 marked this conversation as resolved.
Outdated
Comment on lines +155 to +159
}

if (toUpdate.length > 0) {
console.log(`\nSubmitting ${toUpdate.length} URL_UPDATED notification(s)…`);
const { ok, fail } = await submitBatch(token, toUpdate, 'URL_UPDATED');
console.log(`URL_UPDATED: ${ok} accepted, ${fail} failed.`);
totalFail += fail;
} else {
console.log('No new/changed URLs to submit (URL_UPDATED).');
}

// ── URL_DELETED ─────────────────────────────────────────────────────────────
// Quota for deletions shares the same 200/day pool — only send if there's
// remaining budget after updates.
const deletionBudget = Math.max(0, DAILY_QUOTA - toUpdate.length);
const toDelete = deletedUrls.slice(0, deletionBudget);

if (toDelete.length > 0) {
console.log(`\nSubmitting ${toDelete.length} URL_DELETED notification(s)…`);
const { ok, fail } = await submitBatch(token, toDelete, 'URL_DELETED');
console.log(`URL_DELETED: ${ok} accepted, ${fail} failed.`);
totalFail += fail;
}

if (deletedUrls.length > toDelete.length) {
console.log(
`::warning::${deletedUrls.length - toDelete.length} deleted URL(s) skipped — ` +
Comment thread
dhananjay6561 marked this conversation as resolved.
Outdated
`quota exhausted. They will be signalled on the next deploy.`
Comment thread
dhananjay6561 marked this conversation as resolved.
Outdated
);
Comment on lines +189 to +192
}

return totalFail;
}

// ── GSC Sitemap ping ──────────────────────────────────────────────────────────

async function pingSitemap(token, siteUrl, sitemapUrl) {
try {
const endpoint =
`${GSC_SITEMAPS_API}/${encodeURIComponent(siteUrl)}` +
`/sitemaps/${encodeURIComponent(sitemapUrl)}`;

const res = await fetch(endpoint, {
method: 'PUT',
headers: { Authorization: `Bearer ${token}` },
});

if (res.ok || res.status === 204) {
console.log(`\nGSC sitemap ping: OK (HTTP ${res.status}) — ${sitemapUrl}`);
} else {
const body = await res.text().catch(() => '');
console.log(
`\n::warning::GSC sitemap ping returned HTTP ${res.status}. Body: ${body.slice(0, 300)}`
);
}
} catch (err) {
console.log(`\n::warning::GSC sitemap ping failed: ${err.message}`);
Comment thread
dhananjay6561 marked this conversation as resolved.
Outdated
}
}

// ── main ─────────────────────────────────────────────────────────────────────

async function main() {
const args = parseArgs();

const saJson = process.env.GOOGLE_SERVICE_ACCOUNT_JSON;
if (!saJson) {
console.log(
'::error::GOOGLE_SERVICE_ACCOUNT_JSON is not set. ' +
'Add the service account key JSON as a GitHub secret.'
);
process.exit(1);
}

let credentials;
try {
credentials = JSON.parse(saJson);
} catch {
console.log('::error::GOOGLE_SERVICE_ACCOUNT_JSON is not valid JSON.');
Comment thread
dhananjay6561 marked this conversation as resolved.
Outdated
process.exit(1);
}

if (!fs.existsSync(args.sitemap)) {
console.log(`::notice::Sitemap not found at ${args.sitemap} — skipping.`);
process.exit(0);
}

// Fetch the token once — valid for 1 hour, well beyond the ~22s runtime
// for 200 URLs. No need to call getAccessToken() per request.
const auth = new GoogleAuth({
credentials,
scopes: [
'https://www.googleapis.com/auth/indexing',
'https://www.googleapis.com/auth/webmasters',
],
});
const client = await auth.getClient();
const { token } = await client.getAccessToken();
if (!token) {
console.log('::error::Failed to obtain an access token. Check that the service account key is valid and the Indexing API is enabled on its Cloud project.');
process.exit(1);
}

const newMap = parseSitemap(args.sitemap);
const prevMap = args.all ? new Map() : parseSitemap(args.prevSitemap);

const hasPrev = !args.all && fs.existsSync(args.prevSitemap);
if (!hasPrev) {
console.log(
args.all
? '--all flag set: submitting every URL in the sitemap.'
: 'No previous sitemap cached — submitting all current URLs (first run).'
);
} else {
console.log(`Prev sitemap: ${prevMap.size} URLs | New sitemap: ${newMap.size} URLs`);
}

// URLs to update: new URL OR same URL with a different/newer lastmod date.
const updatedUrls = [];
for (const [url, lastmod] of newMap) {
if (!filterUrl(url)) continue;
if (!prevMap.has(url)) {
updatedUrls.push(url); // new page
} else if (lastmod && prevMap.get(url) !== lastmod) {
updatedUrls.push(url); // existing page with updated content
}
}

// URLs to delete: present in previous sitemap but gone from the new one.
const deletedUrls = [];
for (const [url] of prevMap) {
if (!filterUrl(url)) continue;
if (!newMap.has(url)) {
deletedUrls.push(url);
}
}

console.log(
`Changed/new: ${updatedUrls.length} | Deleted: ${deletedUrls.length}`
);

const failures = await submitUrls(token, updatedUrls, deletedUrls);
await pingSitemap(token, args.siteUrl, args.sitemapUrl);

if (failures > 0) process.exit(1);
}

main().catch((err) => {
console.log(`::error::Unhandled error: ${err.message}`);
process.exit(1);
});
Loading