001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.http.api;
019
020import static javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE;
021import static org.fcrepo.kernel.api.FedoraExternalContent.COPY;
022import static org.fcrepo.kernel.api.FedoraExternalContent.REDIRECT;
023import static org.fcrepo.kernel.api.FedoraExternalContent.PROXY;
024import static org.apache.http.HttpHeaders.CONTENT_TYPE;
025import static org.apache.http.HttpStatus.SC_OK;
026import static org.slf4j.LoggerFactory.getLogger;
027
028import org.apache.http.Header;
029import org.apache.http.client.methods.HttpHead;
030import org.apache.http.client.methods.CloseableHttpResponse;
031import org.apache.http.impl.client.CloseableHttpClient;
032import org.apache.http.impl.client.HttpClients;
033import org.fcrepo.kernel.api.exception.ExternalContentAccessException;
034import org.fcrepo.kernel.api.exception.ExternalMessageBodyException;
035import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
036import javax.ws.rs.core.Link;
037import javax.ws.rs.core.MediaType;
038import java.io.FileInputStream;
039import java.io.IOException;
040import java.io.InputStream;
041import java.net.URI;
042import java.util.Map;
043import org.slf4j.Logger;
044
045/**
046 * This class is a helper for dealing with the External Content Link header and External Content itself, in the case
047 * of handling="copy". This class will verify that an External Content Link header is formatted correctly and
048 * help parse it, delivering parts of it when asked.
049 *
050 * @author bseeger
051 * @since 5/7/2018
052 */
053public class ExternalContentHandler {
054
055    private static final Logger LOGGER = getLogger(FedoraLdp.class);
056
057    private final static String HANDLING = "handling";
058    private final static String EXT_CONTENT_TYPE = "type";
059
060    private final Link link;
061    private final String handling;
062    private final String type;
063    private final MediaType contentType;
064
065    /* link header for external content should look like this:
066          Link: <http://example.org/some/content>;
067          rel="http://fedora.info/definitions/fcrepo#ExternalContent";
068          handling="proxy";
069          type="image/tiff"
070    */
071
072    /**
073     *  Construct an ExternalContentHandler (helper)
074     *
075     *  @param linkHeader actual link header from request
076     */
077    protected ExternalContentHandler(final String linkHeader) {
078        // if it parses, then we're mostly good to go.
079        link = parseLinkHeader(linkHeader);
080
081        final Map<String, String> map = link.getParams();
082        // handling will be in the map, where as content type may not be
083        handling = map.get(HANDLING).toLowerCase();
084        type = map.get(EXT_CONTENT_TYPE) != null ? map.get(EXT_CONTENT_TYPE).toLowerCase() : null;
085        contentType = type != null ? MediaType.valueOf(type) : findContentType(getURL());
086    }
087
088    /**
089     * Returns the content type located in the link header.
090     * @return content type if in Link header, else null
091     */
092    public MediaType getContentType() {
093        return contentType;
094    }
095
096    /**
097     * Retrieve handling information
098     * @return a String containing the type of handling requested ["proxy", "copy" or "redirect"]
099     */
100    public String getHandling() {
101        return handling;
102    }
103
104    /**
105     * Retrieve url in link header
106     * @return a String of the URL that was in the Link header
107     */
108    public String getURL() {
109        return link != null ? link.getUri().toString() : null;
110    }
111
112    /**
113     * Returns whether or not the handling parameter is "copy"
114     * @return boolean value representing whether or not the content handling is "copy"
115     */
116    public boolean isCopy() {
117        return handling != null && handling.equals(COPY);
118    }
119
120    /**
121     * Returns whether or not the handling parameter is "redirect"
122     * @return boolean value representing whether or not the content handling is "redirect"
123     */
124    public boolean isRedirect() {
125        return handling != null && handling.equals(REDIRECT);
126    }
127
128    /**
129     * Returns whether or not the handling parameter is "proxy"
130     * @return boolean value representing whether or not the content handling is "proxy"
131     */
132    public boolean isProxy() {
133        return handling != null && handling.equals(PROXY);
134    }
135
136    /**
137     * Fetch the external content
138     * @return InputStream containing the external content
139     */
140    public InputStream fetchExternalContent() {
141
142        final URI uri = link.getUri();
143        final String scheme = uri.getScheme();
144        LOGGER.debug("scheme is {}", scheme);
145        if (scheme != null) {
146            try {
147                if (scheme.equals("file")) {
148                    return new FileInputStream(uri.getPath());
149                } else if (scheme.equals("http") || scheme.equals("https")) {
150                    return uri.toURL().openStream();
151                }
152            } catch (final IOException e) {
153                throw new ExternalContentAccessException("Failed to read external content from " + uri, e);
154            }
155        }
156        return null;
157    }
158
159    /**
160     * Validate that an external content link header is appropriately formatted
161     * @param link to be validated
162     * @return Link object if the header is formatted correctly, else null
163     * @throws ExternalMessageBodyException on error
164     */
165    private Link parseLinkHeader(final String link) throws ExternalMessageBodyException {
166        final Link realLink = Link.valueOf(link);
167
168        try {
169            final String handling = realLink.getParams().get(HANDLING);
170            if (handling == null || !handling.matches("(?i)" + PROXY + "|" + COPY + "|" + REDIRECT)) {
171                // error
172                throw new ExternalMessageBodyException(
173                        "Link header formatted incorrectly: 'handling' parameter incorrect or missing");
174            }
175        } catch (final Exception e) {
176            throw new ExternalMessageBodyException("External content link header url is malformed");
177        }
178        return realLink;
179    }
180
181    /**
182     * Find the content type for a remote resource
183     * @param url of remote resource
184     * @return the content type reported by remote system or "application/octet-stream" if not supplied
185     */
186    private MediaType findContentType(final String url) {
187        if (url == null) {
188            return null;
189        }
190
191        if (url.startsWith("file")) {
192            return APPLICATION_OCTET_STREAM_TYPE;
193        } else if (url.startsWith("http")) {
194            try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
195                final HttpHead httpHead = new HttpHead(url);
196                try (CloseableHttpResponse response = httpClient.execute(httpHead)) {
197                    if (response.getStatusLine().getStatusCode() == SC_OK) {
198                        final Header contentType = response.getFirstHeader(CONTENT_TYPE);
199                        if (contentType != null) {
200                            return MediaType.valueOf(contentType.getValue());
201                        }
202                    }
203                }
204            } catch (final IOException e) {
205                LOGGER.warn("Unable to retrieve external content from {} due to {}", url, e.getMessage());
206            } catch (final Exception e) {
207                throw new RepositoryRuntimeException(e);
208            }
209        }
210        LOGGER.debug("Defaulting to octet stream for media type");
211        return APPLICATION_OCTET_STREAM_TYPE;
212    }
213}