XML Sitemap documentation for the dotCMS Content Management System

You can create an XML Sitemap for your site using a Scripted API endpoint. By using a scripted endpoint, you can exercise dynamic and fine-grained control over what content on your site you wish to be included and excluded. An example scripted endpoint to supply an XML Sitemap is included in the dotCMS starter site, and implemented on the dotCMS Demo Site.

If you have created your site from the dotCMS starter site in a recent dotCMS version, the appropriate files to implement XML Sitemaps will already exist in your site. If the /application/apivtl/sitemap/get.vtl file already exists on your site, you may skip directly to step 3 in the steps below.

Implementing the XML Sitemap Scripted API

If you do not already have the get.vtl file on your site, perform the following steps to implement an XML Sitemap:

  1. Create a new folder on your site named /application/apivtl/sitemap.
  2. Create a new file named get.vtl in the new folder, and place the code below in the file.
  3. Access the /api/vtl/sitemap URL from the front-end of your site to see the XML Sitemap results.
  4. Modify your robots.txt file (/robots.txt) to point to the API path https://{yoursite}/api/vtl/sitemap
    User-agent: *
    Disallow: /
    Sitemap: /api/vtl/sitemap
    

The get.vtl File

To create the get.vtl file for your site, you may create a new file by either copying the following code into a new file, or downloading the get.vtl file from the dotCMS Demo Site.

$response.setContentType("application/xml")
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
    <url>
        <loc>https://${host}</loc>
        <changefreq>always</changefreq>
        <priority>1</priority>
    </url>
##Folders with pages:
#set($esQuery = '
{
    "query": {
        "query_string": {
            "query": "+baseType:5"
        }
    },
    "aggs": {
        "folders": {
            "terms": {
                "field": "parentpath_dotraw",
                "size" : 2000
            }
        }
    },
    "size": 0
}
')
#set($rawResults = $estool.search($render.eval($esQuery)))
#set($results = $json.generate($rawResults.response))
#foreach($folder in $results.aggregations.get("asMap").folders.buckets)
    <url>
        <loc>https://${host}${folder.key}</loc>
        <changefreq>always</changefreq>
        <priority>0.9</priority>
    </url>
#end


##Pages:
#set($esQuery = '
{
    "query": {
        "query_string": {
            "query": "+baseType:5"
        }
    },
    "aggs": {
        "folders": {
            "terms": {
                "field": "path_dotraw",
                "size" : 2000
            }
        }
    },
    "size": 0
}
')
#set($rawResults = $estool.search($render.eval($esQuery)))
#set($results = $json.generate($rawResults.response))
#foreach($folder in $results.aggregations.get("asMap").folders.buckets)
    <url>
        <loc>https://${host}${folder.key}</loc>
        <changefreq>always</changefreq>
        <priority>0.9</priority>
    </url>
#end



##Folders with pdfs:
#set($esQuery = '
{
    "query": {

        "regexp": {
            "path": "(.*?).pdf"
        }
    },
    "aggs": {
        "folders": {
            "terms": {
                "field": "parentpath_dotraw",
                "size" : 2000
            }
        }
    },
    "size": 0
}
')
#set($rawResults = $estool.search($render.eval($esQuery)))
#set($results = $json.generate($rawResults.response))
#foreach($folder in $results.aggregations.get("asMap").folders.buckets)
    <url>
        <loc>https://${host}${folder.key}</loc>
        <changefreq>always</changefreq>
        <priority>0.9</priority>
    </url>
#end


##PDFs:
#set($esQuery = '
{
    "query": {

        "regexp": {
            "path": "(.*?).pdf"
        }
    },
    "aggs": {
        "folders": {
            "terms": {
                "field": "path_dotraw",
                "size" : 2000
            }
        }
    },
    "size": 0
}
')
#set($rawResults = $estool.search($render.eval($esQuery)))
#set($results = $json.generate($rawResults.response))
#foreach($folder in $results.aggregations.get("asMap").folders.buckets)
    <url>
        <loc>https://${host}${folder.key}</loc>
        <changefreq>always</changefreq>
        <priority>0.9</priority>
    </url>
#end


##URLMaps:
#set($esQuery = '
{
    "query": {
        "query_string": {
            "query": "+baseType:1"
        }
    },
    "aggs": {
        "folders": {
            "terms": {
                "field": "urlmap_dotraw",
                "size" : 2000
            }
        }
    },
    "size": 0
}
')
#set($rawResults = $estool.search($render.eval($esQuery)))
#set($results = $json.generate($rawResults.response))
#foreach($folder in $results.aggregations.get("asMap").folders.buckets)
    <url>
        <loc>https://${host}${folder.key}</loc>
        <changefreq>always</changefreq>
        <priority>0.9</priority>
    </url>
#end
</urlset>

Changing What's Included

The get.vtl file has several Elasticsearch queries which determine which content is included in the generated sitemap. To change which content is included, you can modify any of these queries.

For example, if you wish to exclude Pages within a particular folder named /privateFolder from the sitemap, you can modify the query in the ##Pages portion of the get.vtl file to exclude that path, as follows:

{
    "query": {
        "query_string": {
            "query": "+baseType:5 -path:/privateFolder/*"
        }
    },
    "aggs": {
        "folders": {
            "terms": {
                "field": "path_dotraw",
                "size" : 2000
            }
        }
    },
    "size": 0
}

References

For more information about sitemap protocol, please see the Sitemaps.org protocol document.