001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.http.api; 019 020import static javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE; 021import static org.apache.http.HttpHeaders.CONTENT_LENGTH; 022import static org.apache.http.HttpHeaders.CONTENT_TYPE; 023import static org.apache.http.HttpStatus.SC_OK; 024import static org.slf4j.LoggerFactory.getLogger; 025 026import org.apache.http.Header; 027import org.apache.http.client.methods.HttpHead; 028import org.apache.http.client.methods.CloseableHttpResponse; 029import org.apache.http.impl.client.CloseableHttpClient; 030import org.apache.http.impl.client.HttpClients; 031import org.fcrepo.kernel.api.exception.ExternalContentAccessException; 032import org.fcrepo.kernel.api.exception.ExternalMessageBodyException; 033import javax.ws.rs.core.Link; 034 035import java.io.FileInputStream; 036import java.io.IOException; 037import java.io.InputStream; 038import java.net.URI; 039import java.nio.file.Files; 040import java.nio.file.Path; 041import java.nio.file.Paths; 042import java.util.Map; 043 044import org.fcrepo.kernel.api.models.ExternalContent; 045import org.slf4j.Logger; 046 047/** 048 * This class is a helper for dealing with the External Content Link header and External Content itself, in the case 049 * of handling="copy". This class will verify that an External Content Link header is formatted correctly and 050 * help parse it, delivering parts of it when asked. 051 * 052 * @author bseeger 053 * @since 5/7/2018 054 */ 055public class ExternalContentHandler implements ExternalContent { 056 057 private static final Logger LOGGER = getLogger(FedoraLdp.class); 058 059 private final static String HANDLING = "handling"; 060 private final static String EXT_CONTENT_TYPE = "type"; 061 062 private final Link link; 063 private final String handling; 064 private String contentType; 065 private Long contentSize; 066 067 /* link header for external content should look like this: 068 Link: <http://example.org/some/content>; 069 rel="http://fedora.info/definitions/fcrepo#ExternalContent"; 070 handling="proxy"; 071 type="image/tiff" 072 */ 073 074 /** 075 * Construct an ExternalContentHandler (helper) 076 * 077 * @param linkHeader actual link header from request 078 */ 079 protected ExternalContentHandler(final String linkHeader) { 080 // if it parses, then we're mostly good to go. 081 link = parseLinkHeader(linkHeader); 082 083 final Map<String, String> map = link.getParams(); 084 // handling will be in the map, where as content type may not be 085 handling = map.get(HANDLING).toLowerCase(); 086 // Retrieve details directly from the content 087 retrieveContentDetails(); 088 final var type = map.get(EXT_CONTENT_TYPE) != null ? map.get(EXT_CONTENT_TYPE).toLowerCase() : null; 089 if (type != null) { 090 contentType = type; 091 } else if (contentType == null) { 092 LOGGER.debug("Defaulting to octet stream for media type"); 093 contentType = APPLICATION_OCTET_STREAM_TYPE.toString(); 094 } 095 096 if (contentSize == null) { 097 contentSize = -1L; 098 } 099 } 100 101 @Override 102 public String getContentType() { 103 return contentType; 104 } 105 106 @Override 107 public long getContentSize() { 108 return contentSize; 109 } 110 111 @Override 112 public String getHandling() { 113 return handling; 114 } 115 116 @Override 117 public String getURL() { 118 return link.getUri().toString(); 119 } 120 121 @Override 122 public URI getURI() { 123 return link.getUri(); 124 } 125 126 @Override 127 public boolean isCopy() { 128 return COPY.equals(handling); 129 } 130 131 @Override 132 public boolean isRedirect() { 133 return REDIRECT.equals(handling); 134 } 135 136 @Override 137 public boolean isProxy() { 138 return PROXY.equals(handling); 139 } 140 141 @Override 142 public InputStream fetchExternalContent() { 143 144 final URI uri = link.getUri(); 145 final String scheme = uri.getScheme(); 146 LOGGER.debug("scheme is {}", scheme); 147 if (scheme != null) { 148 try { 149 if (scheme.equals("file")) { 150 return new FileInputStream(uri.getPath()); 151 } else if (scheme.equals("http") || scheme.equals("https")) { 152 return uri.toURL().openStream(); 153 } 154 } catch (final IOException e) { 155 throw new ExternalContentAccessException("Failed to read external content from " + uri, e); 156 } 157 } 158 return null; 159 } 160 161 /** 162 * Validate that an external content link header is appropriately formatted 163 * @param link to be validated 164 * @return Link object if the header is formatted correctly, else null 165 * @throws ExternalMessageBodyException on error 166 */ 167 private Link parseLinkHeader(final String link) throws ExternalMessageBodyException { 168 final Link realLink; 169 170 try { 171 realLink = Link.valueOf(link); 172 final String handling = realLink.getParams().get(HANDLING); 173 if (handling == null || !handling.matches("(?i)" + PROXY + "|" + COPY + "|" + REDIRECT)) { 174 // error 175 throw new ExternalMessageBodyException( 176 "Link header formatted incorrectly: 'handling' parameter incorrect or missing"); 177 } 178 } catch (final Exception e) { 179 throw new ExternalMessageBodyException("External content link header url is malformed"); 180 } 181 return realLink; 182 } 183 184 private void retrieveContentDetails() { 185 final URI uri = getURI(); 186 final String scheme = uri.getScheme().toLowerCase(); 187 188 if ("file".equals(scheme)) { 189 final Path path = Paths.get(uri); 190 try { 191 contentSize = Files.size(path); 192 } catch (final IOException e) { 193 throw new ExternalMessageBodyException("Unable to access external binary at URI " + uri, e); 194 } 195 } else if ("http".equals(scheme) || "https".equals(scheme)) { 196 try (final CloseableHttpClient httpClient = HttpClients.createDefault()) { 197 final HttpHead httpHead = new HttpHead(uri); 198 httpHead.setHeader("Accept-Encoding", "identity"); 199 try (final CloseableHttpResponse response = httpClient.execute(httpHead)) { 200 if (response.getStatusLine().getStatusCode() != SC_OK) { 201 throw new ExternalMessageBodyException("Unable to access external binary at URI " + uri 202 + " received response " + response.getStatusLine().getStatusCode()); 203 } 204 205 final Header typeHeader = response.getFirstHeader(CONTENT_TYPE); 206 if (typeHeader != null) { 207 contentType = typeHeader.getValue(); 208 } 209 final Header sizeHeader = response.getFirstHeader(CONTENT_LENGTH); 210 if (sizeHeader != null) { 211 contentSize = Long.parseLong(sizeHeader.getValue()); 212 } 213 } 214 } catch (final IOException e) { 215 throw new ExternalMessageBodyException("Unable to access external binary at URI " + uri, e); 216 } 217 } 218 } 219}