001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.http.api; 019 020import static javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE; 021import static org.fcrepo.kernel.api.FedoraExternalContent.COPY; 022import static org.fcrepo.kernel.api.FedoraExternalContent.REDIRECT; 023import static org.fcrepo.kernel.api.FedoraExternalContent.PROXY; 024import static org.apache.http.HttpHeaders.CONTENT_TYPE; 025import static org.apache.http.HttpStatus.SC_OK; 026import static org.slf4j.LoggerFactory.getLogger; 027 028import org.apache.http.Header; 029import org.apache.http.client.methods.HttpHead; 030import org.apache.http.client.methods.CloseableHttpResponse; 031import org.apache.http.impl.client.CloseableHttpClient; 032import org.apache.http.impl.client.HttpClients; 033import org.fcrepo.kernel.api.exception.ExternalContentAccessException; 034import org.fcrepo.kernel.api.exception.ExternalMessageBodyException; 035import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 036import javax.ws.rs.core.Link; 037import javax.ws.rs.core.MediaType; 038import java.io.FileInputStream; 039import java.io.IOException; 040import java.io.InputStream; 041import java.net.URI; 042import java.util.Map; 043import org.slf4j.Logger; 044 045/** 046 * This class is a helper for dealing with the External Content Link header and External Content itself, in the case 047 * of handling="copy". This class will verify that an External Content Link header is formatted correctly and 048 * help parse it, delivering parts of it when asked. 049 * 050 * @author bseeger 051 * @since 5/7/2018 052 */ 053public class ExternalContentHandler { 054 055 private static final Logger LOGGER = getLogger(FedoraLdp.class); 056 057 private final static String HANDLING = "handling"; 058 private final static String EXT_CONTENT_TYPE = "type"; 059 060 private final Link link; 061 private final String handling; 062 private final String type; 063 private final MediaType contentType; 064 065 /* link header for external content should look like this: 066 Link: <http://example.org/some/content>; 067 rel="http://fedora.info/definitions/fcrepo#ExternalContent"; 068 handling="proxy"; 069 type="image/tiff" 070 */ 071 072 /** 073 * Construct an ExternalContentHandler (helper) 074 * 075 * @param linkHeader actual link header from request 076 */ 077 protected ExternalContentHandler(final String linkHeader) { 078 // if it parses, then we're mostly good to go. 079 link = parseLinkHeader(linkHeader); 080 081 final Map<String, String> map = link.getParams(); 082 // handling will be in the map, where as content type may not be 083 handling = map.get(HANDLING).toLowerCase(); 084 type = map.get(EXT_CONTENT_TYPE) != null ? map.get(EXT_CONTENT_TYPE).toLowerCase() : null; 085 contentType = type != null ? MediaType.valueOf(type) : findContentType(getURL()); 086 } 087 088 /** 089 * Returns the content type located in the link header. 090 * @return content type if in Link header, else null 091 */ 092 public MediaType getContentType() { 093 return contentType; 094 } 095 096 /** 097 * Retrieve handling information 098 * @return a String containing the type of handling requested ["proxy", "copy" or "redirect"] 099 */ 100 public String getHandling() { 101 return handling; 102 } 103 104 /** 105 * Retrieve url in link header 106 * @return a String of the URL that was in the Link header 107 */ 108 public String getURL() { 109 return link != null ? link.getUri().toString() : null; 110 } 111 112 /** 113 * Returns whether or not the handling parameter is "copy" 114 * @return boolean value representing whether or not the content handling is "copy" 115 */ 116 public boolean isCopy() { 117 return handling != null && handling.equals(COPY); 118 } 119 120 /** 121 * Returns whether or not the handling parameter is "redirect" 122 * @return boolean value representing whether or not the content handling is "redirect" 123 */ 124 public boolean isRedirect() { 125 return handling != null && handling.equals(REDIRECT); 126 } 127 128 /** 129 * Returns whether or not the handling parameter is "proxy" 130 * @return boolean value representing whether or not the content handling is "proxy" 131 */ 132 public boolean isProxy() { 133 return handling != null && handling.equals(PROXY); 134 } 135 136 /** 137 * Fetch the external content 138 * @return InputStream containing the external content 139 */ 140 public InputStream fetchExternalContent() { 141 142 final URI uri = link.getUri(); 143 final String scheme = uri.getScheme(); 144 LOGGER.debug("scheme is {}", scheme); 145 if (scheme != null) { 146 try { 147 if (scheme.equals("file")) { 148 return new FileInputStream(uri.getPath()); 149 } else if (scheme.equals("http") || scheme.equals("https")) { 150 return uri.toURL().openStream(); 151 } 152 } catch (final IOException e) { 153 throw new ExternalContentAccessException("Failed to read external content from " + uri, e); 154 } 155 } 156 return null; 157 } 158 159 /** 160 * Validate that an external content link header is appropriately formatted 161 * @param link to be validated 162 * @return Link object if the header is formatted correctly, else null 163 * @throws ExternalMessageBodyException on error 164 */ 165 private Link parseLinkHeader(final String link) throws ExternalMessageBodyException { 166 final Link realLink = Link.valueOf(link); 167 168 try { 169 final String handling = realLink.getParams().get(HANDLING); 170 if (handling == null || !handling.matches("(?i)" + PROXY + "|" + COPY + "|" + REDIRECT)) { 171 // error 172 throw new ExternalMessageBodyException( 173 "Link header formatted incorrectly: 'handling' parameter incorrect or missing"); 174 } 175 } catch (final Exception e) { 176 throw new ExternalMessageBodyException("External content link header url is malformed"); 177 } 178 return realLink; 179 } 180 181 /** 182 * Find the content type for a remote resource 183 * @param url of remote resource 184 * @return the content type reported by remote system or "application/octet-stream" if not supplied 185 */ 186 private MediaType findContentType(final String url) { 187 if (url == null) { 188 return null; 189 } 190 191 if (url.startsWith("file")) { 192 return APPLICATION_OCTET_STREAM_TYPE; 193 } else if (url.startsWith("http")) { 194 try (CloseableHttpClient httpClient = HttpClients.createDefault()) { 195 final HttpHead httpHead = new HttpHead(url); 196 try (CloseableHttpResponse response = httpClient.execute(httpHead)) { 197 if (response.getStatusLine().getStatusCode() == SC_OK) { 198 final Header contentType = response.getFirstHeader(CONTENT_TYPE); 199 if (contentType != null) { 200 return MediaType.valueOf(contentType.getValue()); 201 } 202 } 203 } 204 } catch (final IOException e) { 205 LOGGER.warn("Unable to retrieve external content from {} due to {}", url, e.getMessage()); 206 } catch (final Exception e) { 207 throw new RepositoryRuntimeException(e); 208 } 209 } 210 LOGGER.debug("Defaulting to octet stream for media type"); 211 return APPLICATION_OCTET_STREAM_TYPE; 212 } 213}