Creating a sitemap generator for Next.js

From sitemap creation to search engine index requests


Next.js

In this story I want to share the SEO work I did while migrating the Volla bridge page from Vue.js to Next.js-and how I built a dynamic sitemap generator for Next.js.

1. Create a sitemap generator script that follows the Next.js folder structure

Next.js automatically creates URLs based on the folder and file names inside the pages directory. So I'll write the XML generator to walk those names while excluding Next.js's special files (_document.js, _app.js, etc.) using globby.

Create a scripts folder in the root directory and add sitemap-common.js:

const fs = require("fs");
const globby = require("globby");
const prettier = require("prettier");

const getDate = new Date().toISOString();

const YOUR_AWESOME_DOMAIN = "https://website.com";

const formatted = sitemap => prettier.format(sitemap, { parser: "html" });

(async () => {
  const pages = await globby([
    // include
    "../pages/**/*.tsx",
    "../pages/*.tsx",
    // exclude
    "!../pages/_*.tsx"
  ]);

  const pagesSitemap = `
    ${pages
      .map(page => {
        const path = page
          .replace("../pages/", "")
          .replace(".tsx", "")
          .replace(/\/index/g, "");
        const routePath = path === "index" ? "" : path;
        return `
          <url>
            <loc>${YOUR_AWESOME_DOMAIN}/${routePath}</loc>
            <lastmod>${getDate}</lastmod>
          </url>
        `;
      })
      .join("")}
  `;

  const generatedSitemap = `
    <?xml version="1.0" encoding="UTF-8"?>
    <urlset
      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
    >
      ${pagesSitemap}
    </urlset>
  `;

  const formattedSitemap = [formatted(generatedSitemap)];

  fs.writeFileSync("../public/sitemap-common.xml", formattedSitemap, "utf8");
})();

Running this script produces an XML file like this (example):

<?xml version="1.0" encoding="UTF-8"?>
<urlset
  xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
>
  <url>
    <loc>https://website.com/</loc>
    <lastmod>2020-04-03T08:19:25.691Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/about</loc>
    <lastmod>2020-04-03T08:19:25.691Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/blog</loc>
    <lastmod>2020-04-03T08:19:25.691Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/profile</loc>
    <lastmod>2020-04-03T08:19:25.691Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/teams</loc>
    <lastmod>2020-04-03T08:19:25.691Z</lastmod>
  </url>
</urlset>

2. A sitemap generator script for an external API

It's relatively easy to generate sitemaps for static pages, as in the example above. For dynamic pages (e.g., pages that use a userId), we need a slightly different script. In the example I used JSONPlaceholder's API.

I wrote sitemap-posts.js:

const fs = require("fs");
const fetch = require("node-fetch");
const prettier = require("prettier");

const getDate = new Date().toISOString();

const fetchUrl = "https://jsonplaceholder.typicode.com/posts";
const YOUR_AWESOME_DOMAIN = "https://website.com";

const formatted = sitemap => prettier.format(sitemap, { parser: "html" });

(async () => {
  const fetchPosts = await fetch(fetchUrl)
    .then(res => res.json())
    .catch(err => console.log(err));

  const postList = [];
  fetchPosts.forEach(post => postList.push(post.id));

  const postListSitemap = `
    ${postList
      .map(id => {
        return `
          <url>
            <loc>${`${YOUR_AWESOME_DOMAIN}/post/${id}`}</loc>
            <lastmod>${getDate}</lastmod>
          </url>`;
      })
      .join("")}
  `;

  const generatedSitemap = `
    <?xml version="1.0" encoding="UTF-8"?>
    <urlset
      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
    >
      ${postListSitemap}
    </urlset>
  `;

  const formattedSitemap = [formatted(generatedSitemap)];

  fs.writeFileSync("../public/sitemap-posts.xml", formattedSitemap, "utf8");
})();

Running this script produces an XML file like this (example):

<?xml version="1.0" encoding="UTF-8"?>
<urlset
  xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
>
  <url>
    <loc>https://website.com/post/1</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/2</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/3</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/4</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/5</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/6</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/7</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/8</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/9</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
  <url>
    <loc>https://website.com/post/10</loc>
    <lastmod>2020-04-03T16:03:19.388Z</lastmod>
  </url>
</urlset>

3. A script that compresses all sitemap files to gzip

Sitemaps compressed to gzip (.gz) work the same way as XML sitemaps but are much smaller. Using zlib, I'll compress the XML files into gzip:

const fs = require("fs");
const zlib = require("zlib");

var dirs = ["../public/sitemap"];

dirs.forEach((dir) => {
  fs.readdirSync(dir).forEach((file) => {
    if (file.endsWith(".xml")) {
      // gzip
      const fileContents = fs.createReadStream(dir + "/" + file);
      const writeStream = fs.createWriteStream(dir + "/" + file + ".gz");
      const zip = zlib.createGzip();

      fileContents
        .pipe(zip)
        .on("error", (err) => console.error(err))
        .pipe(writeStream)
        .on("error", (err) => console.error(err));
    }
  });
});

This script compresses every XML file generated in steps 1 and 2 into .gz format.

4. A sitemap-index generator script for the files created above

Submitting multiple sitemaps to search engines (Google Search Console, Naver Search Advisor, etc.) requires a sitemap index file.

I created separate sitemaps for various dynamic URL patterns such as /seller/[_id], /product/[_id], and /video/[_id]. To submit them to Google Search Console, a single sitemap must be submitted, so I wrote this script to build a sitemap index:

const fs = require("fs");
const globby = require("globby");
const prettier = require("prettier");

const getDate = new Date().toISOString();

const webrootDomain = "https://website.com";

const formatted = sitemap => prettier.format(sitemap, { parser: "html" });

(async () => {
  const pages = await globby(["../public/sitemap/*.gz"]);

  const sitemapIndex = `
    ${pages
      .map(page => {
        const path = page.replace("../public/", "");

        return `
          <sitemap>
            <loc>${`${webrootDomain}/${path}`}</loc>
            <lastmod>${getDate}</lastmod>
          </sitemap>`;
      })
      .join("")}
  `;

  const sitemap = `
    <?xml version="1.0" encoding="UTF-8"?>
    <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
      ${sitemapIndex}
    </sitemapindex>
  `;

  const formattedSitemap = [formatted(sitemap)];

  fs.writeFileSync("../public/sitemap.xml", formattedSitemap, "utf8");
})();

Running it produces an XML file like this (example):

<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <sitemap>
    <loc>https://website.com/sitemap/sitemap-common.xml.gz</loc>
    <lastmod>2020-04-03T08:19:46.858Z</lastmod>
  </sitemap>
  <sitemap>
    <loc>https://website.com/sitemap/sitemap-posts.xml.gz</loc>
    <lastmod>2020-04-03T08:19:46.858Z</lastmod>
  </sitemap>
</sitemapindex>

5. A bash script that builds new sitemaps on every master deployment, plus a script that pings Google Search Console from GitHub Actions

# yarn sitemap
$ cd public
$ rm -rf sitemap
$ mkdir sitemap
$ cd ..
$ cd scripts
$ node ./sitemap-common.js
$ node ./sitemap-posts.js
$ node ./sitemap.js

If you want Google Search Console to reindex your pages, just add this line at the end of the script:

$ curl http://google.com/ping?sitemap=http://website.com/sitemap.xml

I wrote a bash script to generate the XML sitemaps, compress them to gzip, and remove unused XML files automatically.

Bash Script

I then updated the workflow file to run these scripts via GitHub Actions on each master deployment.

# Create a sitemap and submit it to Google.
- run: yarn sitemap
  name: ping sitemap

Takeaway

I hope you enjoyed this short tutorial on generating sitemaps for Next.js. Feel free to reach out via LinkedIn or by email at ghsspower@gmail.com. Thanks!

Title : Creating a sitemap generator for Next.js
Date : August 30, 2020
Writer : Hyouk Seo (Spemer)