001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.http.api; 019 020import static org.apache.commons.lang3.StringUtils.isEmpty; 021import static org.slf4j.LoggerFactory.getLogger; 022 023import java.io.File; 024import java.io.IOException; 025import java.net.URI; 026import java.nio.file.Files; 027import java.nio.file.Paths; 028import java.util.Arrays; 029import java.util.HashSet; 030import java.util.List; 031import java.util.Set; 032import java.util.regex.Matcher; 033import java.util.regex.Pattern; 034import java.util.stream.Collectors; 035import java.util.stream.Stream; 036 037import org.fcrepo.kernel.api.exception.ExternalMessageBodyException; 038import org.fcrepo.kernel.api.utils.AutoReloadingConfiguration; 039import org.slf4j.Logger; 040 041/** 042 * Validates external content paths to ensure that they are within a configured allowed list of paths. 043 * 044 * @author bbpennel 045 */ 046public class ExternalContentPathValidator extends AutoReloadingConfiguration { 047 048 private static final Logger LOGGER = getLogger(ExternalContentPathValidator.class); 049 050 private static final Set<String> ALLOWED_SCHEMES = new HashSet<>(Arrays.asList("file", "http", "https")); 051 052 private static final Pattern SCHEME_PATTERN = Pattern.compile("^(http|https|file):/.*"); 053 054 // Pattern to check that an http uri contains a / after the domain if a domain is present 055 private static final Pattern HTTP_DOMAIN_PATTERN = Pattern.compile("^(http|https)://([^/]+/.*|$)"); 056 057 private static final Pattern RELATIVE_MOD_PATTERN = Pattern.compile(".*(^|/)\\.\\.($|/).*"); 058 059 private static final Pattern NORMALIZE_FILE_URI = Pattern.compile("^file:/{2,3}"); 060 061 private List<String> allowedList; 062 063 /** 064 * Validates that an external path is valid. The path must be an HTTP or file URI within the allow list of paths, 065 * be absolute, and contain no relative modifier. 066 * 067 * @param extPath external binary path to validate 068 * @throws ExternalMessageBodyException thrown if the path is invalid. 069 */ 070 public void validate(final String extPath) throws ExternalMessageBodyException { 071 if (allowedList == null || allowedList.size() == 0) { 072 throw new ExternalMessageBodyException("External content is disallowed by the server"); 073 } 074 075 if (isEmpty(extPath)) { 076 throw new ExternalMessageBodyException("External content path was empty"); 077 } 078 079 final String path = normalizePath(extPath.toLowerCase()); 080 081 final URI uri; 082 try { 083 // Ensure that the path is a valid URL 084 uri = new URI(path); 085 uri.toURL(); 086 } catch (final Exception e) { 087 throw new ExternalMessageBodyException("Path was not a valid URI: " + extPath); 088 } 089 090 // Decode the uri and ensure that it does not contain modifiers 091 final String decodedPath = uri.getPath(); 092 if (RELATIVE_MOD_PATTERN.matcher(decodedPath).matches()) { 093 throw new ExternalMessageBodyException("Path was not absolute: " + extPath); 094 } 095 096 // Require that the path is absolute 097 if (!uri.isAbsolute()) { 098 throw new ExternalMessageBodyException("Path was not absolute: " + extPath); 099 } 100 101 // Ensure that an accept scheme was provided 102 final String scheme = uri.getScheme(); 103 if (!ALLOWED_SCHEMES.contains(scheme)) { 104 throw new ExternalMessageBodyException("Path did not provide an allowed scheme: " + extPath); 105 } 106 107 // If a file, verify that it exists 108 if (scheme.equals("file") && !Paths.get(uri).toFile().exists()) { 109 throw new ExternalMessageBodyException("Path did not match any allowed external content paths: " + 110 extPath); 111 } 112 113 // Check that the uri is within an allowed path 114 if (allowedList.stream().anyMatch(allowed -> path.startsWith(allowed))) { 115 return; 116 } 117 throw new ExternalMessageBodyException("Path did not match any allowed external content paths: " + extPath); 118 } 119 120 private String normalizePath(final String path) { 121 // file uris can have between 1 and 3 slashes depending on if the authority is present 122 if (path.startsWith("file://")) { 123 return NORMALIZE_FILE_URI.matcher(path).replaceFirst("file:/"); 124 } 125 return path; 126 } 127 128 /** 129 * Loads the allowed list. 130 * 131 * @throws IOException thrown if the allowed list configuration file cannot be read. 132 */ 133 @Override 134 protected synchronized void loadConfiguration() throws IOException { 135 LOGGER.info("Loading list of allowed external content locations from {}", configPath); 136 try (final Stream<String> stream = Files.lines(Paths.get(configPath))) { 137 allowedList = stream.map(line -> normalizePath(line.trim().toLowerCase())) 138 .filter(line -> isAllowanceValid(line)) 139 .collect(Collectors.toList()); 140 } 141 } 142 143 private boolean isAllowanceValid(final String allowance) { 144 final Matcher schemeMatcher = SCHEME_PATTERN.matcher(allowance); 145 final boolean schemeMatches = schemeMatcher.matches(); 146 if (!schemeMatches || RELATIVE_MOD_PATTERN.matcher(allowance).matches()) { 147 LOGGER.error("Invalid path {} specified in external path configuration {}", 148 allowance, configPath); 149 return false; 150 } 151 152 final String protocol = schemeMatcher.group(1); 153 if ("file".equals(protocol)) { 154 // If a file uri ends with / it must be a directory, otherwise it must be a file. 155 final File allowing = new File(URI.create(allowance).getPath()); 156 if ((allowance.endsWith("/") && !allowing.isDirectory()) || (!allowance.endsWith("/") && !allowing 157 .isFile())) { 158 LOGGER.error("Invalid path {} in configuration {}, directories must end with a '/'", 159 allowance, configPath); 160 return false; 161 } 162 } else if ("http".equals(protocol) || "https".equals(protocol)) { 163 if (!HTTP_DOMAIN_PATTERN.matcher(allowance).matches()) { 164 LOGGER.error("Invalid path {} in configuration {}, domain must end with a '/'", 165 allowance, configPath); 166 return false; 167 } 168 } 169 return true; 170 } 171}